From fd032e092d875ac7696034f0e5d185c919c3ac7a Mon Sep 17 00:00:00 2001 From: Michael Zhang Date: Sun, 24 Jan 2021 07:37:04 -0600 Subject: [PATCH] switch to lalrpop --- Cargo.lock | 323 ++++++++++++++++++++++++++++++++++++++++++--- Cargo.toml | 6 +- build.rs | 3 + src/ast.rs | 3 +- src/main.rs | 2 +- src/parser.lalrpop | 47 +++++++ src/parser.rs | 100 +++++++------- 7 files changed, 421 insertions(+), 63 deletions(-) create mode 100644 build.rs create mode 100644 src/parser.lalrpop diff --git a/Cargo.lock b/Cargo.lock index 697958d..6a77eab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,14 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + [[package]] name = "ansi_term" version = "0.11.0" @@ -15,6 +24,27 @@ version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1" +[[package]] +name = "arrayref" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + +[[package]] +name = "ascii-canvas" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff8eb72df928aafb99fe5d37b383f2fe25bd2a765e3e5f7c365916b6f2463a29" +dependencies = [ + "term", +] + [[package]] name = "atty" version = "0.2.14" @@ -32,12 +62,44 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "bit-set" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "blake2b_simd" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587" +dependencies = [ + "arrayref", + "arrayvec", + "constant_time_eq", +] + [[package]] name = "byteorder" version = "1.4.2" @@ -71,6 +133,12 @@ dependencies = [ "vec_map", ] +[[package]] +name = "constant_time_eq" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" + [[package]] name = "cranelift" version = "0.69.0" @@ -178,6 +246,72 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "crossbeam-utils" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d" +dependencies = [ + "autocfg", + "cfg-if 1.0.0", + "lazy_static", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "diff" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" + +[[package]] +name = "dirs" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "ena" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7402b94a93c24e742487327a7cd839dc9d36fec9de9fb25b09f2dae459f36c3" +dependencies = [ + "log", +] + +[[package]] +name = "fixedbitset" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi", +] + [[package]] name = "gust" version = "0.1.0" @@ -186,9 +320,11 @@ dependencies = [ "cranelift", "cranelift-module", "cranelift-object", + "lalrpop", + "lalrpop-util", "lazy_static", "parking_lot", - "peg", + "regex", "structopt", "target-lexicon", ] @@ -236,6 +372,47 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "itertools" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +dependencies = [ + "either", +] + +[[package]] +name = "lalrpop" +version = "0.19.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a71d75b267b3299da9ccff4dd80d73325b5d8adcd76fe97cf92725eb7c6f122" +dependencies = [ + "ascii-canvas", + "atty", + "bit-set", + "diff", + "ena", + "itertools", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax", + "string_cache", + "term", + "tiny-keccak", + "unicode-xid", +] + +[[package]] +name = "lalrpop-util" +version = "0.19.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ebbd90154472db6267a7d28ca08fea7788e5619fef10f2398155cb74c08f77a" +dependencies = [ + "regex", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -266,6 +443,18 @@ dependencies = [ "cfg-if 0.1.10", ] +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + [[package]] name = "object" version = "0.22.0" @@ -276,6 +465,12 @@ dependencies = [ "indexmap", ] +[[package]] +name = "once_cell" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" + [[package]] name = "parking_lot" version = "0.11.1" @@ -302,31 +497,35 @@ dependencies = [ ] [[package]] -name = "peg" -version = "0.6.3" +name = "petgraph" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f76678828272f177ac33b7e2ac2e3e73cc6c1cd1e3e387928aa69562fa51367" +checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" dependencies = [ - "peg-macros", - "peg-runtime", + "fixedbitset", + "indexmap", ] [[package]] -name = "peg-macros" -version = "0.6.3" +name = "phf_shared" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "636d60acf97633e48d266d7415a9355d4389cea327a193f87df395d88cd2b14d" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" dependencies = [ - "peg-runtime", - "proc-macro2", - "quote", + "siphasher", ] [[package]] -name = "peg-runtime" -version = "0.6.3" +name = "pico-args" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555b1514d2d99d78150d3c799d4c357a3e2c2a8062cd108e93a06d9057629c5" +checksum = "28b9b4df73455c861d7cbf8be42f01d3b373ed7f02e378d55fa84eafc6f638b1" + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" [[package]] name = "proc-macro-error" @@ -376,6 +575,17 @@ version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" +[[package]] +name = "redox_users" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d" +dependencies = [ + "getrandom", + "redox_syscall", + "rust-argon2", +] + [[package]] name = "regalloc" version = "0.0.31" @@ -387,6 +597,36 @@ dependencies = [ "smallvec", ] +[[package]] +name = "regex" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" + +[[package]] +name = "rust-argon2" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb" +dependencies = [ + "base64", + "blake2b_simd", + "constant_time_eq", + "crossbeam-utils", +] + [[package]] name = "rustc-hash" version = "1.1.0" @@ -399,12 +639,30 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "siphasher" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7" + [[package]] name = "smallvec" version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" +[[package]] +name = "string_cache" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ddb1139b5353f96e429e1a5e19fbaf663bddedaa06d1dbd49f82e352601209a" +dependencies = [ + "lazy_static", + "new_debug_unreachable", + "phf_shared", + "precomputed-hash", +] + [[package]] name = "strsim" version = "0.8.0" @@ -452,6 +710,17 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ee5a98e506fb7231a304c3a1bd7c132a55016cf65001e0282480665870dfcb9" +[[package]] +name = "term" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd106a334b7657c10b7c540a0106114feadeb4dc314513e97df481d5d966f42" +dependencies = [ + "byteorder", + "dirs", + "winapi", +] + [[package]] name = "textwrap" version = "0.11.0" @@ -481,6 +750,24 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301bdd13d23c49672926be451130892d274d3ba0b410c18e00daa7990ff38d99" +dependencies = [ + "once_cell", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "unicode-segmentation" version = "1.7.1" @@ -511,6 +798,12 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index ea92902..f3d0786 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,8 +9,12 @@ anyhow = "1.0.38" cranelift = "0.69.0" cranelift-module = "0.69.0" cranelift-object = "0.69.0" +lalrpop-util = "0.19.4" lazy_static = "1.4.0" parking_lot = "0.11.1" -peg = "0.6.3" +regex = "1.4.3" structopt = "0.3.21" target-lexicon = "0.11.1" + +[build-dependencies] +lalrpop = "0.19.4" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..ca5c283 --- /dev/null +++ b/build.rs @@ -0,0 +1,3 @@ +fn main() { + lalrpop::process_root().unwrap(); +} diff --git a/src/ast.rs b/src/ast.rs index 61c2ade..9520e70 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -11,7 +11,8 @@ pub enum Decl { #[derive(Debug)] pub struct Func { pub name: Ident, - pub body: Expr, + pub stmts: Vec, + pub ret: Expr, } #[derive(Debug)] diff --git a/src/main.rs b/src/main.rs index fda5a09..f054e8e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -30,7 +30,7 @@ fn main() -> Result<()> { contents }; - let parsed = parser::program(&contents)?; + let parsed = parser::parse(&contents)?; println!("parsed: {:?}", parsed); let namespaces = Namespaces::create(parsed); diff --git a/src/parser.lalrpop b/src/parser.lalrpop new file mode 100644 index 0000000..c7bbd37 --- /dev/null +++ b/src/parser.lalrpop @@ -0,0 +1,47 @@ +use crate::ast::*; + +grammar; + +pub Program: Program = { + => { Program { decls } } +}; + +Decls: Vec = Decl*; + +Decl: Decl = { + "fn" "{" "}" => { Decl::Func(Func{ name, stmts, ret }) }, +}; + +Stmt: Stmt = { + ";" => Stmt::Expr(expr), + "return" ";" => Stmt::Return(expr), +}; + +BlockExpr: Expr = "{" "}" => Expr::Seq(stmts, Box::new(expr)); + +Expr: Expr = { + "(" ")" => Expr::FuncCall(name), + Lit => Expr::Lit(<>), +}; + +Lit: Lit = { + Int => Lit::Int(<>), +}; + +// + +Ident: Ident = r"[A-Za-z][A-Za-z0-9_]*|_[A-Za-z0-9_]+" => Ident(<>.to_owned()); +Int: i64 = r"[0-9]+" => <>.parse::().unwrap(); + +// + +Sep: Vec = { + )*> => match e { + None => v, + Some(e) => { + let mut v = v; + v.push(e); + v + } + } +}; diff --git a/src/parser.rs b/src/parser.rs index cfad11f..2bb11ce 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,50 +1,60 @@ -use crate::ast::*; +use anyhow::Result; +use lalrpop_util::lalrpop_mod; -peg::parser! { - grammar parser() for str { - pub rule program() -> Program - = decls:decls() _? ![_] { Program { decls } } +use crate::ast::Program; - rule decls() -> Vec - = hd:decl() _? tl:decls() { let mut tl = tl; tl.insert(0, hd); tl } - / { vec![] } +lalrpop_mod!(parser); - rule decl() -> Decl - = d:func() { Decl::Func(d) } +// peg::parser! { +// grammar parser() for str { +// pub rule program() -> Program +// = decls:decls() _? ![_] { Program { decls } } +// +// rule decls() -> Vec +// = hd:decl() _? tl:decls() { let mut tl = tl; tl.insert(0, hd); tl } +// / { vec![] } +// +// rule decl() -> Decl +// = d:func() { Decl::Func(d) } +// +// rule func() -> Func +// = "fn" _ name:ident() _? "{" _? body:expr() _? "}" { Func { name, body } } +// / "fn" _ name:ident() _? body:blockexpr() { Func { name, body } } +// +// rule stmts() -> Vec +// = hd:stmt() _? tl:stmts() { let mut tl = tl; tl.insert(0, hd); tl } +// / { vec![] } +// +// rule stmt() -> Stmt +// = "return" _ v:expr() _? ";" { Stmt::Return(v) } +// / v:expr() _? ";" { Stmt::Expr(v) } +// +// rule blockexpr() -> Expr +// = "{" _? s:stmts() _? e:expr() "}" { Expr::Seq(s, Box::new(e)) } +// +// rule expr() -> Expr +// = blockexpr() +// / func:ident() _? "(" _? ")" { Expr::FuncCall(func) } +// / v:lit() { Expr::Lit(v) } +// +// rule lit() -> Lit +// = v:int() { Lit::Int(v) } +// +// rule int() -> i64 +// = s:$(['0'..='9']+) { s.parse::().unwrap() } +// +// rule ident() -> Ident +// = s:$([ 'a'..='z' | 'A'..='Z'] ['a'..='z' | 'A'..='Z' | '0'..='9' ]*) { Ident(s.to_string()) } +// / s:$("_" ['a'..='z' | 'A'..='Z' | '0'..='9' ]+) { Ident(s.to_string()) } +// / expected!("identifier") +// +// rule _() = [' ' | '\n' | '\t']+ +// } +// } - rule func() -> Func - = "fn" _ name:ident() _? "{" _? body:expr() _? "}" { Func { name, body } } - / "fn" _ name:ident() _? body:blockexpr() { Func { name, body } } - - rule stmts() -> Vec - = hd:stmt() _? tl:stmts() { let mut tl = tl; tl.insert(0, hd); tl } - / { vec![] } - - rule stmt() -> Stmt - = "return" _ v:expr() _? ";" { Stmt::Return(v) } - / v:expr() _? ";" { Stmt::Expr(v) } - - rule blockexpr() -> Expr - = "{" _? s:stmts() _? e:expr() "}" { Expr::Seq(s, Box::new(e)) } - - rule expr() -> Expr - = blockexpr() - / func:ident() _? "(" _? ")" { Expr::FuncCall(func) } - / v:lit() { Expr::Lit(v) } - - rule lit() -> Lit - = v:int() { Lit::Int(v) } - - rule int() -> i64 - = s:$(['0'..='9']+) { s.parse::().unwrap() } - - rule ident() -> Ident - = s:$([ 'a'..='z' | 'A'..='Z'] ['a'..='z' | 'A'..='Z' | '0'..='9' ]*) { Ident(s.to_string()) } - / s:$("_" ['a'..='z' | 'A'..='Z' | '0'..='9' ]+) { Ident(s.to_string()) } - / expected!("identifier") - - rule _() = [' ' | '\n' | '\t']+ - } +pub fn parse(s: impl AsRef) -> Result { + let s = s.as_ref().to_owned(); + let parser = parser::ProgramParser::new(); + let res = parser.parse(&s).unwrap(); + Ok(res) } - -pub use self::parser::*;