This commit is contained in:
Michael Zhang 2020-09-24 03:54:08 -05:00
commit afcf9577d2
Signed by: michael
GPG Key ID: BDA47A31A3C8EE6B
12 changed files with 662 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

126
Cargo.lock generated Normal file
View File

@ -0,0 +1,126 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "aho-corasick"
version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b602bfe940d21c130f3895acd65221e8a61270debe89d628b9cb4e3ccb8569b"
[[package]]
name = "capgen"
version = "0.1.0"
dependencies = [
"anyhow",
"maplit",
"regex",
"thiserror",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "memchr"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
[[package]]
name = "proc-macro2"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36e28516df94f3dd551a587da5357459d9b36d945a7c37c3557928c1c2ff2a2c"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"thread_local",
]
[[package]]
name = "regex-syntax"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
[[package]]
name = "syn"
version = "1.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6690e3e9f692504b941dc6c3b188fd28df054f7fb8469ab40680df52fdcc842b"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "thiserror"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dfdd070ccd8ccb78f4ad66bf1982dc37f620ef696c6b5028fe2ed83dd3d0d08"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "thread_local"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
dependencies = [
"lazy_static",
]
[[package]]
name = "unicode-xid"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"

13
Cargo.toml Normal file
View File

@ -0,0 +1,13 @@
[package]
name = "capgen"
version = "0.1.0"
authors = ["Michael Zhang <iptq@protonmail.com>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0.32"
regex = "1.3.9"
thiserror = "1.0.20"
maplit = "1.0.2"

2
rustfmt.toml Normal file
View File

@ -0,0 +1,2 @@
max_width = 75
wrap_comments = true

View File

@ -0,0 +1,28 @@
use std::collections::HashSet;
use crate::grammar::Grammar;
use crate::lr0item::Lr0Item;
pub struct CanonicalCollection<'g> {
grammar: &'g Grammar,
sets: HashSet<HashSet<Lr0Item>>,
}
impl<'g> CanonicalCollection<'g> {
pub fn new(grammar: &'g Grammar) -> Self {
CanonicalCollection {
grammar,
sets: HashSet::default(),
}
}
pub fn clear(&mut self) {
self.sets.clear();
}
pub fn build(&mut self) {
self.clear();
// add the start items
}
}

181
src/grammar/context_sets.rs Normal file
View File

@ -0,0 +1,181 @@
use std::collections::{HashMap, HashSet};
use crate::grammar::Grammar;
use crate::utils::MapOfSet;
#[derive(Debug, Default)]
pub struct ContextSets {}
#[derive(Debug, Error)]
pub enum Error {}
impl Grammar {
pub fn build_context_sets(&mut self) {
// first set construction: dragon book page 189
let mut first_sets = MapOfSet::<String, String>::default();
let grammar_symbols = self.symbol_names();
loop {
let mut to_add = HashSet::<(String, String)>::new();
for name in grammar_symbols.iter() {
let name = *name;
if self.is_terminal(name) {
if !first_sets.key_contains(name, name) {
to_add.insert((name.to_owned(), name.to_owned()));
}
} else if self.is_nonterminal(name) {
let x = name;
let productions =
self.productions.get(name).unwrap();
for production in productions.iter() {
// has ε shown up in every FIRST set so far?
let mut is_epsilon_in_all_sets = false;
let mut first = true;
// if X -> ε is a production
if production.len() == 1 && production[0] == "ε" {
if !first_sets.key_contains(x, "ε") {
to_add.insert((
x.to_owned(),
"ε".to_owned(),
));
}
}
if first && !is_epsilon_in_all_sets {
first = false;
is_epsilon_in_all_sets = true;
}
// if X is a terminal and X -> Y1 Y2 Y3 .. Yk
for (_i, y_i) in production.iter().enumerate() {
if let Some(first_y) = first_sets.get(y_i) {
for a in first_y {
if is_epsilon_in_all_sets
&& !first_sets.key_contains(x, a)
{
to_add.insert((
x.to_owned(),
a.to_owned(),
));
}
}
}
if let Some(first_y) = first_sets.get(y_i) {
if !first_y.contains("ε") {
is_epsilon_in_all_sets = false;
}
} else {
is_epsilon_in_all_sets = false;
}
}
if is_epsilon_in_all_sets {
// ε is in all first sets for X's production
if !first_sets.key_contains(x, "ε") {
to_add.insert((
x.to_owned(),
"ε".to_owned(),
));
}
}
}
}
}
if to_add.is_empty() {
break;
} else {
for (symbol_name, first_item) in to_add.drain() {
first_sets.insert_item(symbol_name, first_item);
}
}
}
self.first_sets = first_sets;
}
}
#[cfg(test)]
mod tests {
use anyhow::Result;
use super::ContextSets;
macro_rules! assert_first_sets {
(
$set:expr,
$(
$lhs:ident
->
$($arg:ident)*
),* $(,)?
) => {
$(
let first_set = $set.get(stringify!($lhs)).expect("should exist");
$(
assert!(first_set.contains(stringify!($arg)));
)*
)*
}
}
#[test]
fn example1() -> Result<()> {
let mut grammar = make_grammar! {
E -> T E_,
E_ -> P T E_ | ε,
T -> F T_,
T_ -> A F T_ | ε,
F -> LP E RP | num
%
P -> r"\+",
A -> r"\*",
LP -> r"\(",
RP -> r"\)",
num -> r"[0-9]+",
};
grammar.build_context_sets();
assert_first_sets! {
grammar.first_sets,
E -> LP num,
T -> LP num,
F -> LP num,
E_ -> P ε,
T_ -> A ε,
};
Ok(())
}
#[test]
fn example2() -> Result<()> {
let mut grammar = make_grammar! {
S -> A C B | C b b | B a,
A -> d a | B C,
B -> g | ε,
C -> h | ε,
%
a -> "a",
b -> "b",
d -> "d",
g -> "g",
h -> "h",
};
grammar.build_context_sets();
assert_first_sets! {
grammar.first_sets,
S -> a b d g h ε,
A -> d g h ε,
B -> g ε,
C -> h ε,
};
Ok(())
}
}

138
src/grammar/mod.rs Normal file
View File

@ -0,0 +1,138 @@
mod context_sets;
use std::collections::{HashMap, HashSet};
use regex::Regex;
use crate::lr0item::Lr0Item;
use crate::utils::MapOfSet;
#[derive(Debug)]
pub struct Grammar {
/// The start symbols
pub start: HashSet<String>,
/// Map of terminals from their names to the regex used to identify
/// them
pub terminals: HashMap<String, Regex>,
pub productions: HashMap<String, Vec<Vec<String>>>,
pub first_sets: MapOfSet<String, String>,
}
impl Grammar {
pub fn symbol_names(&self) -> Vec<&str> {
self.terminals
.keys()
.map(|s| s.as_str())
.chain(self.productions.keys().map(|s| s.as_str()))
.collect()
}
pub fn is_terminal(&self, name: impl AsRef<str>) -> bool {
self.terminals.contains_key(name.as_ref())
}
pub fn is_nonterminal(&self, name: impl AsRef<str>) -> bool {
self.productions.contains_key(name.as_ref())
}
pub fn build(&mut self) {
self.build_context_sets();
}
pub fn closure(&self, I: HashSet<Lr0Item>) -> HashSet<Lr0Item> {
// closure described in dragon book page 223
let mut J = I.clone();
let mut to_add = HashSet::new();
loop {
for item in J.iter() {
let A = &item.lhs;
let B = item.next_symbol();
let productions = match self.productions.get(B) {
Some(v) => v,
None => continue,
};
for production in productions {
let item = Lr0Item {
lhs: B.to_owned(),
rhs: production.clone(),
dot: 0,
};
if !J.contains(&item) {
to_add.insert(item);
}
}
}
if to_add.is_empty() {
break;
} else {
J.extend(to_add.drain());
}
}
J
}
pub fn goto(&self, I: HashSet<Lr0Item>, X: &str) -> HashSet<Lr0Item> {
let mut result = HashSet::new();
let mut to_add = HashSet::new();
loop {
for item in I.iter() {
let A = &item.lhs;
let B = item.next_symbol();
let productions = match self.productions.get(B) {
Some(v) => v,
None => continue,
};
for production in productions {}
}
if to_add.is_empty() {
break;
} else {
result.extend(to_add.drain());
}
}
return self.closure(result);
}
}
#[cfg(test)]
mod tests {
use crate::lr0item::Lr0Item;
#[test]
fn closure1() {
let grammar = make_grammar! {
E_ -> E,
E -> E p T | T,
T -> T a F | F,
F -> lp E rp | num,
%
p -> r"\+",
a -> r"\*",
lp -> r"\(",
rp -> r"\)",
num -> r"[0-9]+",
};
let item = Lr0Item {
lhs: string!(E_),
rhs: vec![string!(E)],
dot: 0,
};
let j = grammar.closure(hashset! { item });
// TODO: verify this later
// E_ -> . E
// E -> . E p T
// E -> . T
// T -> . T a F
// T -> . F
// F -> . lp E rp
// F -> . num
}
}

13
src/lr.rs Normal file
View File

@ -0,0 +1,13 @@
use crate::grammar::Grammar;
pub struct LR {
pub grammar: Grammar,
}
pub enum Error {}
impl LR {
fn build(grammar: Grammar) -> Result<Self, Error> {
todo!()
}
}

29
src/lr0item.rs Normal file
View File

@ -0,0 +1,29 @@
use std::fmt::{self, Display};
// lr0 items described in dragon book page 221
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct Lr0Item {
pub lhs: String,
pub rhs: Vec<String>,
pub dot: usize,
}
impl Lr0Item {
pub fn next_symbol(&self) -> &str {
self.rhs[self.dot].as_ref()
}
}
impl Display for Lr0Item {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} ->", self.lhs)?;
for (i, sym) in self.rhs.iter().enumerate() {
if i == self.dot {
write!(f, " .")?;
}
write!(f, " {}", sym)?;
}
Ok(())
}
}

45
src/macros.rs Normal file
View File

@ -0,0 +1,45 @@
macro_rules! string {
($x:ident) => {
stringify!($x).to_owned()
};
}
macro_rules! make_grammar {
(
// comma-separated grammar rules
$(
$lhs:ident
->
$(
$($arg:ident)*
)|* $(|)?
),* $(,)?
// backslash for terminals
$(
%
$(
$lhs_:ident
->
$re:expr
),* $(,)?
)?
) => {
crate::grammar::Grammar {
start: hashset!{ $(string!($lhs),)* },
productions: hashmap!{
$(string!($lhs) => vec![
$(vec![
$(string!($arg),)*
],)*
],)*
},
terminals: hashmap!{
$(
$(string!($lhs_) => regex::Regex::new($re).unwrap(),)*
)?
},
first_sets: crate::utils::MapOfSet::default(),
}
};
}

27
src/main.rs Normal file
View File

@ -0,0 +1,27 @@
#[macro_use]
extern crate maplit;
#[macro_use]
extern crate thiserror;
#[macro_use]
mod macros;
mod canonical_collection;
mod grammar;
mod lr;
mod lr0item;
mod utils;
use std::fs::File;
use std::io::Read;
use anyhow::Result;
fn main() -> Result<()> {
let mut file = File::open("test.bnf")?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
println!("Hello, world!");
Ok(())
}

59
src/utils.rs Normal file
View File

@ -0,0 +1,59 @@
use std::borrow::Borrow;
use std::collections::{HashMap, HashSet};
use std::fmt::{self, Debug};
use std::hash::Hash;
pub struct MapOfSet<K, V>(HashMap<K, HashSet<V>>);
impl<K: Debug, V: Debug> Debug for MapOfSet<K, V> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.0.fmt(f)
}
}
impl<K, V> Default for MapOfSet<K, V> {
fn default() -> Self {
Self(HashMap::new())
}
}
impl<K: Hash + Eq, V: Hash + Eq> MapOfSet<K, V> {
pub fn insert_item(&mut self, key: K, value: V) {
if let Some(container) = self.0.get_mut(&key) {
container.insert(value);
} else {
let mut container = HashSet::new();
container.insert(value);
self.0.insert(key, container);
}
}
pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<&HashSet<V>>
where
K: Borrow<Q>,
Q: Hash + Eq,
{
self.0.get(key)
}
pub fn key_contains<QK: ?Sized, QV: ?Sized>(
&self,
key: &QK,
value: &QV,
) -> bool
where
K: Borrow<QK>,
QK: Hash + Eq,
V: Borrow<QV>,
QV: Hash + Eq,
{
match self.0.get(key) {
Some(container) => container.contains(value),
None => false,
}
}
pub fn iter(&self) -> impl Iterator<Item = (&K, &HashSet<V>)> {
self.0.iter()
}
}