From 215683173e66de0b185558d825a268f32551d438 Mon Sep 17 00:00:00 2001 From: Michael Zhang Date: Tue, 19 Jul 2022 01:30:25 -0500 Subject: [PATCH] Lots of work on revamping the type system for structs, also stubs for future pieces --- .build.yml | 17 + Cargo.lock | 92 +++++ Cargo.toml | 5 + README.md | 14 + bin/e0c.rs | 47 ++- docs/.gitignore | 1 + docs/book.toml | 6 + docs/src/SUMMARY.md | 3 + docs/src/chapter_1.md | 1 + examples/struct.e0 | 7 +- flake.nix | 10 +- rt/Cargo.toml | 9 + rt/src/aarch64/mod.rs | 1 + rt/src/aarch64/syscall.rs | 0 rt/src/gc.rs | 8 + rt/src/lib.rs | 23 ++ rt/src/start.rs | 4 + rt/src/x86_64/linux.rs | 12 + rt/src/x86_64/mod.rs | 4 + rt/src/x86_64/syscall.rs | 45 +++ src/ast/mod.rs | 42 ++- src/ast/typeck.rs | 600 +++++++++++++++++++++++++++++++++ src/ast/typeck_bidi.rs | 149 ++++++++ src/ast/typed.rs | 532 ----------------------------- src/ast/types.rs | 57 ++++ src/codegen/llvm_ir/expr.rs | 18 +- src/codegen/llvm_ir/if_else.rs | 2 +- src/codegen/llvm_ir/mod.rs | 61 ++-- src/codegen/llvm_ir/stmts.rs | 2 +- src/codegen/mod.rs | 7 +- src/lib.rs | 8 +- src/parser.lalrpop | 90 +++-- src/utils.rs | 62 ++-- std/prelude.e0 | 0 34 files changed, 1292 insertions(+), 647 deletions(-) create mode 100644 .build.yml create mode 100644 README.md create mode 100644 docs/.gitignore create mode 100644 docs/book.toml create mode 100644 docs/src/SUMMARY.md create mode 100644 docs/src/chapter_1.md create mode 100644 rt/Cargo.toml create mode 100644 rt/src/aarch64/mod.rs create mode 100644 rt/src/aarch64/syscall.rs create mode 100644 rt/src/gc.rs create mode 100644 rt/src/lib.rs create mode 100644 rt/src/start.rs create mode 100644 rt/src/x86_64/linux.rs create mode 100644 rt/src/x86_64/mod.rs create mode 100644 rt/src/x86_64/syscall.rs create mode 100644 src/ast/typeck.rs create mode 100644 src/ast/typeck_bidi.rs delete mode 100644 src/ast/typed.rs create mode 100644 src/ast/types.rs create mode 100644 std/prelude.e0 diff --git a/.build.yml b/.build.yml new file mode 100644 index 0000000..8bb9b29 --- /dev/null +++ b/.build.yml @@ -0,0 +1,17 @@ +image: archlinux +packages: + - rust + - rsync +sources: + - https://git.sr.ht/~mzhang/e0 +secrets: + - 0b26b413-7901-41c3-a4e2-3c752228ffcb +tasks: + - test: | + cd e0 + cargo test --all + - doc-upload: | + cd e0 + cargo doc --workspace --no-deps + echo "mzhang.io ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBzBZ+QmM4EO3Fwc1ZcvWV2IY9VF04T0H9brorGj9Udp" >> ~/.ssh/known_hosts + rsync -azvrP target/doc/ sourcehutBuilds@mzhang.io:/mnt/storage/svcdata/blog-public/e0 diff --git a/Cargo.lock b/Cargo.lock index fc2b31a..14dfacb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,6 +109,16 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + [[package]] name = "crunchy" version = "0.2.2" @@ -148,11 +158,17 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", + "codespan-reporting", + "frunk", "lalrpop", "lalrpop-util", "tempfile", ] +[[package]] +name = "e0-rt" +version = "0.1.0" + [[package]] name = "either" version = "1.7.0" @@ -183,6 +199,70 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "frunk" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cd67cf7d54b7e72d0ea76f3985c3747d74aee43e0218ad993b7903ba7a5395e" +dependencies = [ + "frunk_core", + "frunk_derives", + "frunk_proc_macros", +] + +[[package]] +name = "frunk_core" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1246cf43ec80bf8b2505b5c360b8fb999c97dabd17dbb604d85558d5cbc25482" + +[[package]] +name = "frunk_derives" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dbc4f084ec5a3f031d24ccedeb87ab2c3189a2f33b8d070889073837d5ea09e" +dependencies = [ + "frunk_proc_macro_helpers", + "quote", + "syn", +] + +[[package]] +name = "frunk_proc_macro_helpers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99f11257f106c6753f5ffcb8e601fb39c390a088017aaa55b70c526bff15f63e" +dependencies = [ + "frunk_core", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "frunk_proc_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a078bd8459eccbb85e0b007b8f756585762a72a9efc53f359b371c3b6351dbcc" +dependencies = [ + "frunk_core", + "frunk_proc_macros_impl", + "proc-macro-hack", +] + +[[package]] +name = "frunk_proc_macros_impl" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ffba99f0fa4f57e42f57388fbb9a0ca863bc2b4261f3c5570fed579d5df6c32" +dependencies = [ + "frunk_core", + "frunk_proc_macro_helpers", + "proc-macro-hack", + "quote", + "syn", +] + [[package]] name = "getrandom" version = "0.2.7" @@ -402,6 +482,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + [[package]] name = "proc-macro2" version = "1.0.40" @@ -595,6 +681,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7" +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + [[package]] name = "unicode-xid" version = "0.2.3" diff --git a/Cargo.toml b/Cargo.toml index 26ce390..8c4a47b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,9 @@ name = "e0" version = "0.1.0" edition = "2021" +[workspace] +members = ["rt"] + [[bin]] name = "e0c" path = "./bin/e0c.rs" @@ -14,6 +17,8 @@ path = "./bin/e0pkg.rs" [dependencies] anyhow = "1.0.56" clap = { version = "3.1.8", features = ["derive"] } +codespan-reporting = "0.11.1" +frunk = "0.4.0" lalrpop-util = { version = "0.19.7", features = ["lexer"] } tempfile = "3.3.0" diff --git a/README.md b/README.md new file mode 100644 index 0000000..af57ef8 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# e0: Experimental Language #0 + +`e0` is an experimental language created for the purpose of practicing making a +simple language ecosystem. The language is strongly typed but with only a +limited set of primitives, and compiles down to LLVM IR. + +Compilation process +------------------- + +``` +e0 source code => llvm bitcode text => clang => link against e0-rt => binary +``` + +Currently clang is required as there are no plans for custom codegen yet. diff --git a/bin/e0c.rs b/bin/e0c.rs index 8eeca3c..7406f43 100644 --- a/bin/e0c.rs +++ b/bin/e0c.rs @@ -1,12 +1,19 @@ use std::fs::{self, File}; use std::io::Write; use std::path::PathBuf; +use std::process::ExitCode; use anyhow::Result; use clap::Parser; -use e0::codegen::CodegenBackend; +use codespan_reporting::diagnostic::{Diagnostic, Label}; +use codespan_reporting::files::SimpleFiles; +use codespan_reporting::term::termcolor::{ColorChoice, StandardStream}; +use codespan_reporting::term::{self, Config as CodespanConfig}; +use e0::ast::typeck_bidi::TypeChecker; use e0::codegen::llvm_ir::LlvmIrCodegen; +use e0::codegen::CodegenBackend; use e0::parser::ProgramParser; +use lalrpop_util::ParseError; #[derive(Debug, Parser)] struct Opt { @@ -21,15 +28,45 @@ struct Opt { emit_ast: Option, } -fn main() -> Result<()> { +fn main() -> Result { let opts = Opt::parse(); + // Set up reporting + let mut files = SimpleFiles::new(); + let writer = StandardStream::stderr(ColorChoice::Always); + let config = CodespanConfig::default(); + let contents = fs::read_to_string(&opts.path)?; + let file_id = files.add(opts.path.display().to_string(), &contents); let parser = ProgramParser::new(); - let ast = parser.parse(&contents).unwrap(); + let ast = match parser.parse(&contents) { + Ok(v) => v, + Err(err) => { + let loc = match err { + ParseError::InvalidToken { location } + | ParseError::UnrecognizedEOF { location, .. } => (location, location), + ParseError::UnrecognizedToken { ref token, .. } + | ParseError::ExtraToken { ref token } => (token.0, token.2), + _ => todo!(), + }; + let diagnostic = Diagnostic::error() + .with_labels(vec![Label::primary(file_id, loc.0..loc.1)]) + .with_message(err.to_string()); + term::emit(&mut writer.lock(), &config, &files, &diagnostic)?; + return Ok(ExitCode::FAILURE); + } + }; - let typed_ast = e0::ast::typed::convert(ast)?; + let type_checker = TypeChecker::default(); + let typed_ast = match type_checker.convert(ast) { + Ok(v) => v, + Err(err) => { + let diagnostic = Diagnostic::error().with_message(err.to_string()); + term::emit(&mut writer.lock(), &config, &files, &diagnostic)?; + return Ok(ExitCode::FAILURE); + } + }; if let Some(path) = opts.emit_ast { let mut file = File::create(&path)?; @@ -53,5 +90,5 @@ fn main() -> Result<()> { // println!("Emitted."); // } - Ok(()) + Ok(ExitCode::SUCCESS) } diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..7585238 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +book diff --git a/docs/book.toml b/docs/book.toml new file mode 100644 index 0000000..2260cdd --- /dev/null +++ b/docs/book.toml @@ -0,0 +1,6 @@ +[book] +authors = ["Michael Zhang"] +language = "en" +multilingual = false +src = "src" +title = "e0 Reference" diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md new file mode 100644 index 0000000..7390c82 --- /dev/null +++ b/docs/src/SUMMARY.md @@ -0,0 +1,3 @@ +# Summary + +- [Chapter 1](./chapter_1.md) diff --git a/docs/src/chapter_1.md b/docs/src/chapter_1.md new file mode 100644 index 0000000..b743fda --- /dev/null +++ b/docs/src/chapter_1.md @@ -0,0 +1 @@ +# Chapter 1 diff --git a/examples/struct.e0 b/examples/struct.e0 index a3b3f0c..67936c8 100644 --- a/examples/struct.e0 +++ b/examples/struct.e0 @@ -3,11 +3,14 @@ type IntPair = struct { snd: int, } -fn main() -> int { - let x = IntPair { +fn getX() -> IntPair { + return new IntPair { fst: 4, snd: 6, }; +} +fn main() -> int { + let x = getX(); return x.fst + x.snd; } diff --git a/flake.nix b/flake.nix index 9f0e298..1c127bf 100644 --- a/flake.nix +++ b/flake.nix @@ -22,9 +22,17 @@ cargo-edit cargo-watch clangUseLLVM - (toolchain.withComponents [ "clippy" "rustc" "rust-src" ]) + mdbook + (toolchain.withComponents [ + "cargo" + "clippy" + "rustc" + "rust-src" + "rustfmt" + ]) ]; inputsFrom = with myPkgs; [ e0 ]; + CARGO_UNSTABLE_SPARSE_REGISTRY = "true"; }; diff --git a/rt/Cargo.toml b/rt/Cargo.toml new file mode 100644 index 0000000..3aa2bcb --- /dev/null +++ b/rt/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "e0-rt" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["cdylib"] + +[dependencies] diff --git a/rt/src/aarch64/mod.rs b/rt/src/aarch64/mod.rs new file mode 100644 index 0000000..6285171 --- /dev/null +++ b/rt/src/aarch64/mod.rs @@ -0,0 +1 @@ +pub mod syscall; diff --git a/rt/src/aarch64/syscall.rs b/rt/src/aarch64/syscall.rs new file mode 100644 index 0000000..e69de29 diff --git a/rt/src/gc.rs b/rt/src/gc.rs new file mode 100644 index 0000000..c1b8ed0 --- /dev/null +++ b/rt/src/gc.rs @@ -0,0 +1,8 @@ +//! Simple concurrent mark-and-sweep garbage collector. + +struct Allocator { +} + +#[no_mangle] +pub extern "C" fn __e0_gc_init() { +} diff --git a/rt/src/lib.rs b/rt/src/lib.rs new file mode 100644 index 0000000..18e7b99 --- /dev/null +++ b/rt/src/lib.rs @@ -0,0 +1,23 @@ +#![feature(lang_items)] +#![cfg_attr(not(test), no_std)] + +#[cfg(target_arch = "aarch64")] +mod aarch64; + +#[cfg(target_arch = "x86_64")] +mod x86_64; + +pub mod gc; +pub mod start; + +use core::panic::PanicInfo; + +#[cfg(not(test))] +#[panic_handler] +fn panic(_panic: &PanicInfo<'_>) -> ! { + loop {} +} + +#[cfg(not(test))] +#[lang = "eh_personality"] +extern "C" fn eh_personality() {} diff --git a/rt/src/start.rs b/rt/src/start.rs new file mode 100644 index 0000000..581727c --- /dev/null +++ b/rt/src/start.rs @@ -0,0 +1,4 @@ +#[no_mangle] +pub extern "C" fn __e0_rt_init() { + // Set up thread-local storage +} diff --git a/rt/src/x86_64/linux.rs b/rt/src/x86_64/linux.rs new file mode 100644 index 0000000..e1582b6 --- /dev/null +++ b/rt/src/x86_64/linux.rs @@ -0,0 +1,12 @@ +use core::mem::size_of; + +#[repr(C)] +pub struct CloneArgs { + pub flags: u64, +} + +#[no_mangle] +#[inline(always)] +pub extern "C" fn clone3(flags: CloneArgs) { + let size = size_of::(); +} diff --git a/rt/src/x86_64/mod.rs b/rt/src/x86_64/mod.rs new file mode 100644 index 0000000..bfeb661 --- /dev/null +++ b/rt/src/x86_64/mod.rs @@ -0,0 +1,4 @@ +pub mod syscall; + +#[cfg(target_os = "linux")] +pub mod linux; diff --git a/rt/src/x86_64/syscall.rs b/rt/src/x86_64/syscall.rs new file mode 100644 index 0000000..0811ab2 --- /dev/null +++ b/rt/src/x86_64/syscall.rs @@ -0,0 +1,45 @@ +use core::arch::asm; + +#[no_mangle] +#[inline(always)] +pub extern "C" fn __syscall0(n: u64) -> u64 { + let mut res: u64; + unsafe { + asm!( + "syscall", + in("rax") n, + lateout("rax") res, + out("rcx") _, + out("r11") _, + ); + } + res +} + +#[no_mangle] +#[inline(always)] +pub extern "C" fn __syscall2(n: u64, a1: u64, a2: u64) -> u64 { + let mut res: u64; + unsafe { + asm!( + "syscall", + in("rax") n, + in("rdi") a1, + in("rsi") a2, + lateout("rax") res, + out("rcx") _, + out("r11") _, + ); + } + res +} + +#[cfg(test)] +mod tests { + use super::__syscall0; + + #[test] + fn run() { + println!("{}", __syscall0(102)); + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 3eb75f9..ae0a81e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1,28 +1,44 @@ -pub mod typed; +// pub mod typeck; +pub mod typeck_bidi; +pub mod types; + +use self::types::Type; #[derive(Debug)] pub enum Decl { Func(Func), + TypeDef(TypeDef), } #[derive(Debug)] pub struct Func { pub name: String, - pub args: Vec, - pub return_ty: Type, + pub args: Vec>, + pub return_ty: T, pub stmts: Vec>, +} + +#[derive(Debug)] +pub struct TypeDef { + pub name: String, + pub def: T, +} + +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +pub struct Arg { + pub name: String, pub ty: T, } -#[derive(Debug, Clone)] -pub struct Arg { +#[derive(Debug)] +pub struct NameValue { pub name: String, - pub ty: Type, + pub expr: Expr, } #[derive(Debug)] pub enum Stmt { - Let(String, Option, Expr), + Let(String, T, Expr), Return(Option>), IfElse(IfElse), } @@ -52,6 +68,9 @@ pub enum ExprKind { Var(String), BinOp(Box>, Op, Box>), Call(String, Vec>), + + StructNew(String, Vec>), + FieldAccess(Box>, String), } #[derive(Copy, Clone, Debug)] @@ -73,12 +92,3 @@ impl Op { } } } - -#[derive(Clone, Debug, Hash, Eq, PartialEq)] -pub enum Type { - Int, - Bool, - - StructInst(Vec<(String, Type)>), - Func(Vec, Box), -} diff --git a/src/ast/typeck.rs b/src/ast/typeck.rs new file mode 100644 index 0000000..a805177 --- /dev/null +++ b/src/ast/typeck.rs @@ -0,0 +1,600 @@ +//! Hindley-Milner-shaped type-checker. + +use std::collections::{HashMap, HashSet}; + +use anyhow::Result; + +use crate::{ast::Expr, utils::LayeredEnv}; + +use super::{ + types::{TypeS, TypeTC}, + Arg, NameValue, +}; +use super::{Decl, ElseClause, ExprKind, Func, IfElse, Op, Stmt, Type}; + +type Assignments = HashMap; + +#[derive(Default)] +pub struct TypeChecker { + assignments: Assignments, + ctr: usize, + struct_env: LayeredEnv>>, + type_env: LayeredEnv, + constraints: HashSet, +} + +impl TypeChecker { + /// Convert an AST with source types to an AST with final types. + pub fn convert(mut self, ast: Vec>) -> Result>> { + // First pass, gather all of the type signatures in the top level + self.type_env.push(); + for decl in ast.iter() { + match decl { + Decl::Func(func) => { + let name = func.name.clone(); + let args_ty = func.args.iter().map(|arg| arg.ty.clone()).collect(); + let ty = TypeS::Func(args_ty, Box::new(func.return_ty.clone())); + let ty = self.convert_TypeS_to_TypeTC(ty); + self.type_env.insert(name, ty); + } + + Decl::TypeDef(typedef) => match &typedef.def { + TypeS::Struct(strct) => { + let strct = strct + .iter() + .map(|arg| Arg { + name: arg.name.clone(), + ty: self.convert_TypeS_to_TypeTC(arg.ty.clone()), + }) + .collect(); + self.struct_env.insert(typedef.name.clone(), strct); + } + _ => {} + }, + } + } + + // Now, type-check each function separately + let mut new_decl = Vec::new(); + for decl in ast.iter() { + match decl { + Decl::Func(func) => { + self.type_env.push(); + + for arg in func.args.iter() { + let arg_ty = self.convert_TypeS_to_TypeTC(arg.ty.clone()); + self.type_env.insert(arg.name.clone(), arg_ty); + } + + let constraints = HashSet::new(); + let new_stmts = self.annotate_stmts(&func.stmts)?; + + let mut args_ty = Vec::new(); + let mut args_func = Vec::new(); + for arg in func.args.iter() { + let ty = self.convert_TypeS_to_TypeTC(arg.ty.clone()); + args_ty.push(ty.clone()); + args_func.push(Arg { + name: arg.name.clone(), + ty, + }); + } + + let return_ty = self.convert_TypeS_to_TypeTC(func.return_ty.clone()); + let decorated_func: Func = Func { + name: func.name.clone(), + args: args_func, + return_ty, + stmts: new_stmts, + }; + + self.type_env.pop(); + + self.unify_constraints(&constraints)?; + println!("Func: {:?}", decorated_func); + println!("Constraints:"); + for Constraint(left, right) in constraints { + println!("- {:?} == {:?}", left, right); + } + + println!("Assignments:"); + for (num, ty) in self.assignments.iter() { + println!("- {} : {:?}", num, ty); + } + + let typed_func = self.substitute_in_func(decorated_func)?; + new_decl.push(Decl::Func(typed_func)); + } + + _ => {} + } + } + + self.type_env.pop(); + Ok(new_decl) + } + + /// Convert a type from the source code into a type used for type-checking. + #[allow(non_snake_case)] + fn convert_TypeS_to_TypeTC(&mut self, ty: TypeS) -> TypeTC { + match ty { + TypeS::Infer => self.type_var(), + TypeS::Named(name) => match self.struct_env.lookup(&name) { + Some(ty) => TypeTC::Struct(ty.clone()), + None => self.type_var(), + }, + + TypeS::Unit => TypeTC::Unit, + TypeS::Int => TypeTC::Int, + TypeS::Bool => TypeTC::Bool, + + TypeS::Func(args, ret) => { + let args = args + .into_iter() + .map(|arg| self.convert_TypeS_to_TypeTC(arg)) + .collect(); + let ret = self.convert_TypeS_to_TypeTC(*ret); + TypeTC::Func(args, Box::new(ret)) + } + + TypeS::Struct(fields) => TypeTC::Struct( + fields + .into_iter() + .map(|arg| Arg { + name: arg.name, + ty: self.convert_TypeS_to_TypeTC(arg.ty), + }) + .collect(), + ), + } + } + + #[allow(non_snake_case)] + fn convert_TypeTC_to_Type(&self, ty: &TypeTC) -> Result { + Ok(match ty { + TypeTC::Var(n) => match self.assignments.get(&n) { + Some(v) => self.convert_TypeTC_to_Type(&v)?, + None => bail!("Unsolved constraint variable {n}"), + }, + + TypeTC::Unit => Type::Unit, + TypeTC::Int => Type::Int, + TypeTC::Bool => Type::Bool, + + TypeTC::Func(args, ret) => { + let args = args + .into_iter() + .map(|arg| self.convert_TypeTC_to_Type(&arg)) + .collect::>()?; + let ret = self.convert_TypeTC_to_Type(&ret)?; + Type::Func(args, Box::new(ret)) + } + + TypeTC::Struct(fields) => Type::Struct( + fields + .into_iter() + .map(|arg| { + Ok(Arg { + name: arg.name.clone(), + ty: self.convert_TypeTC_to_Type(&arg.ty)?, + }) + }) + .collect::>()?, + ), + }) + } + + fn annotate_stmts( + &mut self, + stmts: impl AsRef<[Stmt]>, + ) -> Result>> { + let stmts = stmts.as_ref(); + let mut new_stmts = Vec::new(); + self.type_env.push(); + + for stmt in stmts.iter() { + match stmt { + Stmt::Return(ret_val) => { + let new_stmt = match ret_val { + Some(v) => Some(self.annotate_expr(v)?), + None => None, + }; + new_stmts.push(Stmt::Return(new_stmt)); + } + + Stmt::Let(name, ty, body) => { + let ty = self.convert_TypeS_to_TypeTC(ty.clone()); + let new_stmt = + Stmt::Let(name.clone(), ty.clone(), self.annotate_expr(body)?); + + self.type_env.insert(name.to_owned(), ty); + new_stmts.push(new_stmt); + } + + Stmt::IfElse(if_else) => { + let new_stmt = Stmt::IfElse(self.annotate_if_else(&if_else)?); + new_stmts.push(new_stmt); + } + } + } + + self.type_env.pop(); + Ok(new_stmts) + } + + fn annotate_expr(&mut self, expr: &Expr) -> Result> { + Ok(match &expr.kind { + ExprKind::Int(n) => Expr { + kind: ExprKind::Int(*n), + ty: TypeTC::Int, + }, + + ExprKind::Var(name) => { + let ty = match self.type_env.lookup(name) { + Some(v) => v.clone(), + None => self.type_var(), + }; + Expr { + kind: ExprKind::Var(name.clone()), + ty, + } + } + + ExprKind::BinOp(left, op, right) => { + let left = self.annotate_expr(left)?; + let right = self.annotate_expr(right)?; + let output = self.type_var(); + + op.constraints(self, &left.ty, &right.ty, &output); + + Expr { + kind: ExprKind::BinOp(Box::new(left), *op, Box::new(right)), + ty: output, + } + } + + ExprKind::Call(func_name, args) => { + let mut args_annot = Vec::new(); + let ret_ty = self.type_var(); + + let (func_args_ty, func_ret_ty) = match self.type_env.lookup(func_name) + { + Some(TypeTC::Func(args, ret)) => (args.clone(), *ret.clone()), + Some(_) => bail!("Not a function"), + None => bail!("Name not found"), + }; + + self.constrain(ret_ty.clone(), func_ret_ty); + + for (arg, expected_ty) in args.iter().zip(func_args_ty.iter()) { + let arg_annot = self.annotate_expr(arg)?; + self.constrain(arg_annot.ty.clone(), expected_ty.clone()); + args_annot.push(arg_annot); + } + + Expr { + kind: ExprKind::Call(func_name.to_string(), args_annot), + ty: ret_ty, + } + } + + ExprKind::StructNew(struct_ref, values) => { + let struct_def = match self.struct_env.lookup(struct_ref) { + Some(v) => v.clone(), + None => bail!("No struct named {struct_ref:?}"), + }; + + let mut values2 = Vec::new(); + for (arg, value) in struct_def.iter().zip(values.iter()) { + let ty = self.type_var(); + let expr = self.annotate_expr(&value.expr)?; + self.constrain(arg.ty.clone(), ty.clone()); + values2.push(NameValue { + name: value.name.clone(), + expr, + }); + } + + Expr { + kind: ExprKind::StructNew(struct_ref.to_owned(), values2), + ty: TypeTC::Struct(struct_def.to_vec()), + } + } + ExprKind::FieldAccess(expr, field_name) => { + let struct_ty = self.type_var(); + let field_ty = self.type_var(); + + let expr = self.annotate_expr(&expr)?; + self.require_field(struct_ty, field_name, field_ty.clone()); + + Expr { + kind: ExprKind::FieldAccess(Box::new(expr), field_name.to_owned()), + ty: field_ty, + } + } + }) + } + + fn annotate_if_else( + &mut self, + if_else: &IfElse, + ) -> Result> { + let converted_cond = self.annotate_expr(&if_else.cond)?; + let converted_body = self.annotate_stmts(&if_else.body)?; + + let else_clause = match &if_else.else_clause { + Some(ElseClause::If(if_else2)) => { + Some(ElseClause::If(Box::new(self.annotate_if_else(&if_else2)?))) + } + Some(ElseClause::Body(stmts)) => { + Some(ElseClause::Body(self.annotate_stmts(&stmts)?)) + } + None => None, + }; + + Ok(IfElse { + cond: converted_cond, + body: converted_body, + else_clause, + }) + } + + fn substitute_in_expr_kind( + &self, + expr_kind: ExprKind, + ) -> Result> { + Ok(match expr_kind { + ExprKind::Int(n) => ExprKind::Int(n), + ExprKind::Var(name) => ExprKind::Var(name), + ExprKind::BinOp(left, op, right) => { + let left = self.substitute_in_expr(*left)?; + let right = self.substitute_in_expr(*right)?; + ExprKind::BinOp(Box::new(left), op, Box::new(right)) + } + ExprKind::Call(func, args) => { + let args = args + .into_iter() + .map(|arg| self.substitute_in_expr(arg)) + .collect::>()?; + ExprKind::Call(func, args) + } + ExprKind::StructNew(struct_ref, values) => { + let values = values + .into_iter() + .map(|nv| { + Ok(NameValue { + name: nv.name, + expr: self.substitute_in_expr(nv.expr)?, + }) + }) + .collect::>()?; + ExprKind::StructNew(struct_ref, values) + } + ExprKind::FieldAccess(expr, field_name) => { + let expr = self.substitute_in_expr(*expr)?; + ExprKind::FieldAccess(Box::new(expr), field_name) + } + }) + } + + fn substitute_in_expr(&self, expr: Expr) -> Result> { + Ok(Expr { + kind: self.substitute_in_expr_kind(expr.kind)?, + ty: self.convert_TypeTC_to_Type(&expr.ty)?, + }) + } + + fn substitute_in_if_else( + &self, + if_else: IfElse, + ) -> Result> { + let cond = self.substitute_in_expr(if_else.cond)?; + let body = self.substitute_in_stmts(if_else.body)?; + + let else_clause = match if_else.else_clause { + Some(ElseClause::If(if_else2)) => Some(ElseClause::If(Box::new( + self.substitute_in_if_else(*if_else2)?, + ))), + Some(ElseClause::Body(body)) => { + Some(ElseClause::Body(self.substitute_in_stmts(body)?)) + } + None => None, + }; + + Ok(IfElse { + cond, + body, + else_clause, + }) + } + + fn substitute_in_stmts( + &self, + stmts: Vec>, + ) -> Result>> { + stmts + .into_iter() + .map(|stmt| { + Ok(match stmt { + Stmt::Let(name, ty, body) => { + let ty = self.convert_TypeTC_to_Type(&ty)?; + Stmt::Let(name, ty, self.substitute_in_expr(body)?) + } + + Stmt::Return(ret_val) => Stmt::Return(match ret_val { + Some(v) => Some(self.substitute_in_expr(v)?), + None => None, + }), + + Stmt::IfElse(if_else) => { + Stmt::IfElse(self.substitute_in_if_else(if_else)?) + } + }) + }) + .collect() + } + + fn substitute_in_func(&self, func: Func) -> Result> { + let args = func + .args + .into_iter() + .map(|arg| { + Ok(Arg { + name: arg.name, + ty: self.convert_TypeTC_to_Type(&arg.ty)?, + }) + }) + .collect::>()?; + let return_ty = self.convert_TypeTC_to_Type(&func.return_ty)?; + Ok(Func { + name: func.name, + args, + return_ty, + stmts: self.substitute_in_stmts(func.stmts)?, + }) + } + + fn substitute_types(&mut self, ty: &TypeTC) -> TypeTC { + match ty { + TypeTC::Var(n) => match self.assignments.get(&n) { + Some(ty2) => ty2.clone(), + None => ty.clone(), + }, + TypeTC::Func(args, ret) => { + let args = args + .into_iter() + .map(|arg| self.substitute_types(arg)) + .collect(); + let ret = self.substitute_types(&*ret); + TypeTC::Func(args, Box::new(ret)) + } + TypeTC::Struct(fields) => TypeTC::Struct(fields.to_vec()), + TypeTC::Unit | TypeTC::Int | TypeTC::Bool => ty.clone(), + } + } + + fn unify_constraints( + &mut self, + constraints: &HashSet, + ) -> Result<()> { + for constraint in constraints.iter() { + match constraint { + Constraint2::Eq(left, right) => { + let left = self.substitute_types(left); + let right = self.substitute_types(right); + self.unify_single(left, right)?; + } + Constraint2::HasField(struct_ty, field_name, field_ty) => { + let struct_ty = self.substitute_types(struct_ty); + let field_ty = self.substitute_types(field_ty); + + match struct_ty { + TypeTC::Struct(fields) => {} + TypeTC::Var(n) => {} + _ => bail!( + "Expected struct with field {field_name}, got {struct_ty:?}" + ), + } + } + } + } + + Ok(()) + } + + fn unify_single(&mut self, left: TypeTC, right: TypeTC) -> Result<()> { + match (left, right) { + // Unify basic types + (TypeTC::Unit, TypeTC::Unit) + | (TypeTC::Int, TypeTC::Int) + | (TypeTC::Bool, TypeTC::Bool) => {} + + // Unify variables + (TypeTC::Var(n), o) | (o, TypeTC::Var(n)) => { + self.assignments.insert(n, o); + } + + // Unify functions + ( + TypeTC::Func(left_args, left_ret), + TypeTC::Func(right_args, right_ret), + ) => { + let mut new_constraints = HashSet::new(); + for (left_arg, right_arg) in + left_args.into_iter().zip(right_args.into_iter()) + { + new_constraints.insert(Constraint2::Eq(left_arg, right_arg)); + } + new_constraints.insert(Constraint2::Eq(*left_ret, *right_ret)); + self.unify_constraints(&new_constraints)?; + } + + (left, right) => bail!("Mismatching types {left:?} vs. {right:?}"), + }; + + Ok(()) + } + + fn type_var(&mut self) -> TypeTC { + TypeTC::Var(self.gen_int()) + } + + fn gen_int(&mut self) -> usize { + let id = self.ctr; + self.ctr += 1; + id + } + + fn constrain(&mut self, left: TypeTC, right: TypeTC) { + if left == right { + // No op, return now + return; + } + self.constraints.insert(Constraint2::Eq(left, right)); + } + + fn require_field( + &mut self, + left: TypeTC, + right_name: &str, + right_ty: TypeTC, + ) { + self.constraints.insert(Constraint2::HasField( + left, + right_name.to_owned(), + right_ty, + )); + } +} + +impl Op { + fn constraints( + &self, + tc: &mut TypeChecker, + left: &TypeTC, + right: &TypeTC, + output: &TypeTC, + ) { + match self { + Op::Plus => { + tc.constrain(left.clone(), TypeTC::Int); + tc.constrain(right.clone(), TypeTC::Int); + tc.constrain(output.clone(), TypeTC::Int); + } + Op::LessThan | Op::GreaterThan => { + tc.constrain(left.clone(), TypeTC::Int); + tc.constrain(right.clone(), TypeTC::Int); + tc.constrain(output.clone(), TypeTC::Bool); + } + } + } +} + +#[derive(Debug, Hash, Eq, PartialEq)] +struct Constraint(TypeTC, TypeTC); + +#[derive(Debug, Hash, Eq, PartialEq)] +enum Constraint2 { + Eq(TypeTC, TypeTC), + HasField(TypeTC, String, TypeTC), +} diff --git a/src/ast/typeck_bidi.rs b/src/ast/typeck_bidi.rs new file mode 100644 index 0000000..3ceca2d --- /dev/null +++ b/src/ast/typeck_bidi.rs @@ -0,0 +1,149 @@ +use std::collections::{HashSet, VecDeque}; + +use anyhow::Result; + +use crate::utils::LayeredEnv; + +use super::types::{Type, TypeS, TypeTC}; +use super::{Arg, Decl, Expr, ExprKind, Stmt}; + +#[derive(Default)] +pub struct TypeChecker { + ctr: usize, + type_env: LayeredEnv, + struct_env: LayeredEnv>>, +} + +impl TypeChecker { + pub fn convert(mut self, ast: Vec>) -> Result>> { + // First pass, gather all of the type signatures in the top level + self.type_env.push(); + for decl in ast.iter() { + match decl { + Decl::Func(func) => { + let name = func.name.clone(); + let args_ty = func.args.iter().map(|arg| arg.ty.clone()).collect(); + let ty = TypeS::Func(args_ty, Box::new(func.return_ty.clone())); + let ty = self.convert_TypeS_to_TypeTC(ty); + self.type_env.insert(name, ty); + } + + Decl::TypeDef(typedef) => match &typedef.def { + TypeS::Struct(strct) => { + let strct = strct + .iter() + .map(|arg| Arg { + name: arg.name.clone(), + ty: self.convert_TypeS_to_TypeTC(arg.ty.clone()), + }) + .collect(); + self.struct_env.insert(typedef.name.clone(), strct); + } + _ => {} + }, + } + } + + // Now, type-check each function separately + // let mut new_decl = Vec::new(); + for decl in ast.iter() { + if let Decl::Func(func) = decl { + self.type_env.push(); + + // Stmt queue. If there's not enough information to figure out the type, + // punt it to the back of the queue. Eventually we stitch it back + // together in order. + let mut stmts: VecDeque<_> = func.stmts.iter().enumerate().collect(); + + while !stmts.is_empty() { + let next_stmt = stmts.pop_front(); + + // self.infer_stmt(next_stmt); + } + + self.type_env.pop(); + } + } + + self.type_env.pop(); + todo!() + } + + fn infer_stmt(&mut self, stmt: Stmt) -> Result> { + todo!() + } + + fn infer_expr(&mut self, expr: Expr) -> Result> { + Ok(match expr.kind { + ExprKind::Int(n) => Expr { + kind: ExprKind::Int(n), + ty: TypeTC::Int, + }, + ExprKind::Var(_) => todo!(), + ExprKind::BinOp(_, _, _) => todo!(), + ExprKind::Call(_, _) => todo!(), + ExprKind::StructNew(_, _) => todo!(), + ExprKind::FieldAccess(_, _) => todo!(), + }) + } + + fn check_expr( + &mut self, + expr: Expr, + ty: TypeTC, + ) -> Result> { + Ok(match expr.kind { + ExprKind::Int(_) => self.infer_expr(expr)?, + ExprKind::Var(_) => todo!(), + ExprKind::BinOp(_, _, _) => todo!(), + ExprKind::Call(_, _) => todo!(), + ExprKind::StructNew(_, _) => todo!(), + ExprKind::FieldAccess(_, _) => todo!(), + }) + } + + /// Convert a type from the source code into a type used for type-checking. + #[allow(non_snake_case)] + fn convert_TypeS_to_TypeTC(&mut self, ty: TypeS) -> TypeTC { + match ty { + TypeS::Infer => self.type_var(), + TypeS::Named(name) => match self.struct_env.lookup(&name) { + Some(ty) => TypeTC::Struct(ty.clone()), + None => self.type_var(), + }, + + TypeS::Unit => TypeTC::Unit, + TypeS::Int => TypeTC::Int, + TypeS::Bool => TypeTC::Bool, + + TypeS::Func(args, ret) => { + let args = args + .into_iter() + .map(|arg| self.convert_TypeS_to_TypeTC(arg)) + .collect(); + let ret = self.convert_TypeS_to_TypeTC(*ret); + TypeTC::Func(args, Box::new(ret)) + } + + TypeS::Struct(fields) => TypeTC::Struct( + fields + .into_iter() + .map(|arg| Arg { + name: arg.name, + ty: self.convert_TypeS_to_TypeTC(arg.ty), + }) + .collect(), + ), + } + } + + fn type_var(&mut self) -> TypeTC { + TypeTC::Var(self.gen_int()) + } + + fn gen_int(&mut self) -> usize { + let id = self.ctr; + self.ctr += 1; + id + } +} diff --git a/src/ast/typed.rs b/src/ast/typed.rs deleted file mode 100644 index e8e0551..0000000 --- a/src/ast/typed.rs +++ /dev/null @@ -1,532 +0,0 @@ -use std::{ - collections::{HashMap, HashSet}, - mem, -}; - -use anyhow::Result; - -use crate::ast::Expr; - -use super::{Decl, ElseClause, ExprKind, Func, IfElse, Op, Stmt, Type}; - -#[derive(Clone, Debug, Hash, Eq, PartialEq)] -pub enum Type_ { - Var(usize), - - Int, - Bool, - - StructInst(Vec<(String, Type_)>), - Func(Vec, Box), -} - -impl Type_ { - fn from_type(ty: Type) -> Self { - match ty { - Type::Int => Type_::Int, - Type::Bool => Type_::Bool, - - Type::Func(args, ret) => { - let args = args.into_iter().map(|arg| Type_::from_type(arg)).collect(); - let ret = Type_::from_type(*ret); - Type_::Func(args, Box::new(ret)) - } - Type::StructInst(fields) => Type_::StructInst( - fields - .into_iter() - .map(|(name, ty)| (name, Type_::from_type(ty))) - .collect(), - ), - } - } - - fn convert(&self, assignments: &Assignments) -> Result { - Ok(match self { - Type_::Var(n) => match assignments.get(&n) { - Some(v) => v.convert(assignments)?, - None => bail!("Unsolved constraint variable {n}"), - }, - - Type_::Int => Type::Int, - Type_::Bool => Type::Bool, - - Type_::Func(args, ret) => { - let args = args - .into_iter() - .map(|arg| arg.convert(assignments)) - .collect::>()?; - let ret = ret.convert(assignments)?; - Type::Func(args, Box::new(ret)) - } - Type_::StructInst(fields) => Type::StructInst( - fields - .into_iter() - .map(|(name, ty)| Ok((name.clone(), ty.convert(assignments)?))) - .collect::>()?, - ), - }) - } -} - -impl Op { - fn constraints( - &self, - ctx: &mut AnnotationContext, - left: &Type_, - right: &Type_, - output: &Type_, - ) { - match self { - Op::Plus => { - ctx.constrain(left.clone(), Type_::Int); - ctx.constrain(right.clone(), Type_::Int); - ctx.constrain(output.clone(), Type_::Int); - } - Op::LessThan | Op::GreaterThan => { - ctx.constrain(left.clone(), Type_::Int); - ctx.constrain(right.clone(), Type_::Int); - ctx.constrain(output.clone(), Type_::Bool); - } - } - } -} - -#[derive(Debug, Hash, Eq, PartialEq)] -struct Constraint(Type_, Type_); - -#[derive(Debug, Default)] -struct Env { - parent: Option>, - local_type_map: HashMap, -} - -impl Env { - pub fn lookup(&self, name: impl AsRef) -> Option<&Type_> { - match self.local_type_map.get(name.as_ref()) { - Some(v) => Some(v), - None => match &self.parent { - Some(p) => p.lookup(name), - None => None, - }, - } - } -} - -struct AnnotationContext<'a> { - counter: usize, - constraints: &'a mut HashSet, - current_env: Option, -} - -impl<'a> AnnotationContext<'a> { - pub fn type_var(&mut self) -> Type_ { - Type_::Var(self.gen_int()) - } - - pub fn gen_int(&mut self) -> usize { - let id = self.counter; - self.counter += 1; - id - } - - pub fn constrain(&mut self, left: Type_, right: Type_) { - if left == right { - // No op, return now - return; - } - self.constraints.insert(Constraint(left, right)); - } - - pub fn lookup(&self, name: impl AsRef) -> Option<&Type_> { - self.current_env.as_ref().unwrap().lookup(name) - } - - pub fn define_var(&mut self, name: impl AsRef, ty: Type_) { - self - .current_env - .as_mut() - .unwrap() - .local_type_map - .insert(name.as_ref().to_string(), ty); - } - - pub fn push_scope(&mut self) { - self.current_env = Some(Env { - parent: Some(Box::new(self.current_env.take().unwrap())), - local_type_map: Default::default(), - }); - } - - pub fn pop_scope(&mut self) { - self.current_env = - Some(*self.current_env.take().unwrap().parent.take().unwrap()); - } -} - -fn annotate_stmts( - ctx: &mut AnnotationContext, - stmts: impl AsRef<[Stmt<()>]>, -) -> Result>> { - let stmts = stmts.as_ref(); - let mut new_stmts = Vec::new(); - ctx.push_scope(); - - for stmt in stmts.iter() { - match stmt { - Stmt::Return(ret_val) => { - let new_stmt = match ret_val { - Some(v) => Some(annotate_expr(ctx, v)?), - None => None, - }; - new_stmts.push(Stmt::Return(new_stmt)); - } - - Stmt::Let(name, ty, body) => { - let new_stmt = - Stmt::Let(name.clone(), ty.clone(), annotate_expr(ctx, body)?); - let ty = match ty { - Some(v) => Type_::from_type(v.clone()), - None => ctx.type_var(), - }; - ctx.define_var(name, ty); - new_stmts.push(new_stmt); - } - - Stmt::IfElse(if_else) => { - let new_stmt = Stmt::IfElse(annotate_if_else(ctx, &if_else)?); - new_stmts.push(new_stmt); - } - } - } - - ctx.pop_scope(); - Ok(new_stmts) -} - -fn annotate_expr( - ctx: &mut AnnotationContext, - expr: &Expr<()>, -) -> Result> { - Ok(match &expr.kind { - ExprKind::Int(n) => Expr { - kind: ExprKind::Int(*n), - ty: Type_::Int, - }, - - ExprKind::Var(name) => { - let ty = match ctx.lookup(name) { - Some(v) => v.clone(), - None => ctx.type_var(), - }; - Expr { - kind: ExprKind::Var(name.clone()), - ty, - } - } - - ExprKind::BinOp(left, op, right) => { - let left = annotate_expr(ctx, left)?; - let right = annotate_expr(ctx, right)?; - let output = ctx.type_var(); - - op.constraints(ctx, &left.ty, &right.ty, &output); - - Expr { - kind: ExprKind::BinOp(Box::new(left), *op, Box::new(right)), - ty: output, - } - } - - ExprKind::Call(func_name, args) => { - let mut args_annot = Vec::new(); - let ret_ty = ctx.type_var(); - - let (func_args_ty, func_ret_ty) = - match ctx.current_env.as_ref().unwrap().lookup(func_name) { - Some(Type_::Func(args, ret)) => (args.clone(), *ret.clone()), - Some(_) => bail!("Not a function"), - None => bail!("Name not found"), - }; - - ctx.constrain(ret_ty.clone(), func_ret_ty); - - for (arg, expected_ty) in args.iter().zip(func_args_ty.iter()) { - let arg_annot = annotate_expr(ctx, arg)?; - ctx.constrain(arg_annot.ty.clone(), expected_ty.clone()); - args_annot.push(arg_annot); - } - - Expr { - kind: ExprKind::Call(func_name.to_string(), args_annot), - ty: ret_ty, - } - } - }) -} - -fn annotate_if_else( - ctx: &mut AnnotationContext, - if_else: &IfElse<()>, -) -> Result> { - let converted_cond = annotate_expr(ctx, &if_else.cond)?; - let converted_body = annotate_stmts(ctx, &if_else.body)?; - - let else_clause = match &if_else.else_clause { - Some(ElseClause::If(if_else2)) => { - Some(ElseClause::If(Box::new(annotate_if_else(ctx, &if_else2)?))) - } - Some(ElseClause::Body(stmts)) => { - Some(ElseClause::Body(annotate_stmts(ctx, &stmts)?)) - } - None => None, - }; - - Ok(IfElse { - cond: converted_cond, - body: converted_body, - else_clause, - }) -} - -fn collect_info( - env: Env, - func: &Func<()>, -) -> Result<(Func, HashSet, Env)> { - let mut constraints = HashSet::new(); - let mut ctx = AnnotationContext { - counter: 0, - constraints: &mut constraints, - current_env: Some(env), - }; - let new_stmts = annotate_stmts(&mut ctx, &func.stmts)?; - - let args_ = func - .args - .iter() - .cloned() - .map(|arg| Type_::from_type(arg.ty)) - .collect(); - let total_ty = - Type_::Func(args_, Box::new(Type_::from_type(func.return_ty.clone()))); - - let func = Func { - name: func.name.clone(), - args: func.args.clone(), - return_ty: func.return_ty.clone(), - stmts: new_stmts, - ty: total_ty, - }; - - let env = ctx.current_env.unwrap(); - mem::drop(ctx.constraints); - Ok((func, constraints, env)) -} - -type Assignments = HashMap; - -fn substitute_types(assignments: &Assignments, ty: &Type_) -> Type_ { - match ty { - Type_::Var(n) => match assignments.get(&n) { - Some(ty2) => ty2.clone(), - None => ty.clone(), - }, - Type_::Func(args, ret) => { - let args = args - .into_iter() - .map(|arg| substitute_types(assignments, arg)) - .collect(); - let ret = substitute_types(assignments, &*ret); - Type_::Func(args, Box::new(ret)) - } - Type_::StructInst(fields) => Type_::StructInst( - fields - .into_iter() - .map(|(name, ty)| (name.clone(), substitute_types(assignments, ty))) - .collect(), - ), - Type_::Int | Type_::Bool => ty.clone(), - } -} - -fn unify_constraints(constraints: &HashSet) -> Result { - let mut assignments = HashMap::new(); - - for Constraint(left, right) in constraints { - let left = substitute_types(&assignments, left); - let right = substitute_types(&assignments, right); - unify_single(&mut assignments, left, right)?; - } - - Ok(assignments) -} - -fn unify_single( - assignments: &mut Assignments, - left: Type_, - right: Type_, -) -> Result<()> { - match (left, right) { - (Type_::Int, Type_::Int) | (Type_::Bool, Type_::Bool) => {} - - (Type_::Var(n), o) | (o, Type_::Var(n)) => { - assignments.insert(n, o); - } - - (Type_::Func(left_args, left_ret), Type_::Func(right_args, right_ret)) => { - let mut new_constraints = HashSet::new(); - for (left_arg, right_arg) in - left_args.into_iter().zip(right_args.into_iter()) - { - new_constraints.insert(Constraint(left_arg, right_arg)); - } - new_constraints.insert(Constraint(*left_ret, *right_ret)); - assignments.extend(unify_constraints(&new_constraints)?); - } - - (left, right) => bail!("Mismatching types {left:?} vs. {right:?}"), - }; - - Ok(()) -} - -fn substitute_in_expr_kind( - assignments: &Assignments, - expr_kind: ExprKind, -) -> Result> { - Ok(match expr_kind { - ExprKind::Int(n) => ExprKind::Int(n), - ExprKind::Var(name) => ExprKind::Var(name), - ExprKind::BinOp(left, op, right) => { - let left = substitute_in_expr(assignments, *left)?; - let right = substitute_in_expr(assignments, *right)?; - ExprKind::BinOp(Box::new(left), op, Box::new(right)) - } - ExprKind::Call(func, args) => { - let args = args - .into_iter() - .map(|arg| substitute_in_expr(assignments, arg)) - .collect::>()?; - ExprKind::Call(func, args) - } - }) -} - -fn substitute_in_expr( - assignments: &Assignments, - expr: Expr, -) -> Result> { - Ok(Expr { - kind: substitute_in_expr_kind(assignments, expr.kind)?, - ty: expr.ty.convert(assignments)?, - }) -} - -fn substitute_in_if_else( - assignments: &Assignments, - if_else: IfElse, -) -> Result> { - let cond = substitute_in_expr(assignments, if_else.cond)?; - let body = substitute_in_stmts(assignments, if_else.body)?; - - let else_clause = match if_else.else_clause { - Some(ElseClause::If(if_else2)) => Some(ElseClause::If(Box::new( - substitute_in_if_else(assignments, *if_else2)?, - ))), - Some(ElseClause::Body(body)) => { - Some(ElseClause::Body(substitute_in_stmts(assignments, body)?)) - } - None => None, - }; - - Ok(IfElse { - cond, - body, - else_clause, - }) -} - -fn substitute_in_stmts( - assignments: &Assignments, - stmts: Vec>, -) -> Result>> { - stmts - .into_iter() - .map(|stmt| { - Ok(match stmt { - Stmt::Let(name, ty, body) => { - Stmt::Let(name, ty, substitute_in_expr(assignments, body)?) - } - - Stmt::Return(ret_val) => Stmt::Return(match ret_val { - Some(v) => Some(substitute_in_expr(assignments, v)?), - None => None, - }), - - Stmt::IfElse(if_else) => { - Stmt::IfElse(substitute_in_if_else(assignments, if_else)?) - } - }) - }) - .collect() -} - -fn substitute_in_func( - assignments: &Assignments, - func: Func, -) -> Result> { - Ok(Func { - name: func.name, - args: func.args, - return_ty: func.return_ty, - stmts: substitute_in_stmts(assignments, func.stmts)?, - ty: func.ty.convert(assignments)?, - }) -} - -pub fn convert(ast: Vec>) -> Result>> { - // First pass, gather all of the type signatures in the top level - let mut top_level_env = Env::default(); - for decl in ast.iter() { - match decl { - super::Decl::Func(func) => { - let name = func.name.clone(); - let args_ty = func.args.iter().map(|arg| arg.ty.clone()).collect(); - let ty = Type::Func(args_ty, Box::new(func.return_ty.clone())); - top_level_env - .local_type_map - .insert(name, Type_::from_type(ty)); - } - } - } - - // Now, type-check each function separately - let mut new_decl = Vec::new(); - let mut env = top_level_env; - for decl in ast.iter() { - match decl { - Decl::Func(func) => { - let mut scoped_env = Env { - parent: Some(Box::new(env)), - local_type_map: HashMap::new(), - }; - - for arg in func.args.iter() { - scoped_env - .local_type_map - .insert(arg.name.clone(), Type_::from_type(arg.ty.clone())); - } - - let (decorated_func, constraints, env2) = - collect_info(scoped_env, func)?; - env = *env2.parent.unwrap(); - - let assignments = unify_constraints(&constraints)?; - - let typed_func = substitute_in_func(&assignments, decorated_func)?; - new_decl.push(Decl::Func(typed_func)); - } - } - } - - Ok(new_decl) -} diff --git a/src/ast/types.rs b/src/ast/types.rs new file mode 100644 index 0000000..35c65f8 --- /dev/null +++ b/src/ast/types.rs @@ -0,0 +1,57 @@ +use super::Arg; + +pub trait IType { + fn func_ty(args: impl AsRef<[Self]>, ret: Self) -> Self + where + Self: Sized; +} + +/// The type, as written in the source code. Notably, this includes type +/// variables that cannot appear in the final type. +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub enum TypeS { + /// Type is not given in the source, will infer it later. + Infer, + + Unit, + Bool, + Int, + + Struct(Vec>), + Func(Vec, Box), + Named(String), +} + +impl IType for TypeS { + fn func_ty(args: impl AsRef<[Self]>, ret: Self) -> Self + where + Self: Sized, + { + TypeS::Func(args.as_ref().to_vec(), Box::new(ret)) + } +} + +/// The "real" type, that is modeled. +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub enum Type { + Unit, + Bool, + Int, + + Struct(Vec>), + Func(Vec, Box), +} + +/// Types used during type-checking. Notably, this includes the constraint +/// variable Var +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub enum TypeTC { + Var(usize), + + Unit, + Bool, + Int, + + Struct(Vec>), + Func(Vec, Box), +} diff --git a/src/codegen/llvm_ir/expr.rs b/src/codegen/llvm_ir/expr.rs index b3514a8..65cdf22 100644 --- a/src/codegen/llvm_ir/expr.rs +++ b/src/codegen/llvm_ir/expr.rs @@ -2,7 +2,7 @@ use std::io::Write; use anyhow::Result; -use crate::ast::{Expr, ExprKind, Op, Type}; +use crate::ast::{types::Type, Expr, ExprKind, Op}; use super::{type_to_llvm, LlvmIrCodegen}; @@ -25,19 +25,15 @@ impl LlvmIrCodegen { match &expr.kind { ExprKind::Int(n) => { expr_ref = format!("{}", n); - // writeln!(self.writer, "%{} = alloca {}", expr_id, ty_str)?; - // writeln!( - // self.writer, - // "store {} {}, {}* %{}", - // ty_str, n, ty_str, expr_id - // )?; } + ExprKind::Var(name) => match self.var_env.lookup(name) { Some((_, name)) => { expr_ref = name.clone(); } None => bail!("Unbound name {name:?}"), }, + ExprKind::BinOp(left, op, right) => { let result_ty = match op.check_types(&left.ty, &right.ty) { Some(v) => type_to_llvm(&v), @@ -75,6 +71,7 @@ impl LlvmIrCodegen { )?, } } + ExprKind::Call(func, args) => match self.var_env.lookup(func) { Some((Type::Func(func_args_ty, func_ret_ty), func_name)) => { // Clone these so we aren't depending on a reference @@ -106,6 +103,13 @@ impl LlvmIrCodegen { } _ => bail!("No function with name {func:?}"), }, + + ExprKind::StructNew(struct_ref, values) => { + todo!() + } + ExprKind::FieldAccess(expr, field_name) => { + todo!() + } } Ok(ExprLlvm(expr_ref, expr)) diff --git a/src/codegen/llvm_ir/if_else.rs b/src/codegen/llvm_ir/if_else.rs index 1cf8338..3dd09b6 100644 --- a/src/codegen/llvm_ir/if_else.rs +++ b/src/codegen/llvm_ir/if_else.rs @@ -2,7 +2,7 @@ use std::io::Write; use anyhow::Result; -use crate::ast::{ElseClause, IfElse, Type}; +use crate::ast::{ElseClause, IfElse, types::Type}; use super::LlvmIrCodegen; diff --git a/src/codegen/llvm_ir/mod.rs b/src/codegen/llvm_ir/mod.rs index aeaaded..313dd09 100644 --- a/src/codegen/llvm_ir/mod.rs +++ b/src/codegen/llvm_ir/mod.rs @@ -6,7 +6,8 @@ use std::io::Write; use anyhow::Result; -use crate::ast::{Decl, Type}; +use crate::ast::Arg; +use crate::ast::{types::Type, Decl}; use crate::utils::LayeredEnv; use super::CodegenBackend; @@ -14,15 +15,17 @@ use super::CodegenBackend; pub struct LlvmIrCodegen { ctr: usize, var_env: LayeredEnv, + /// Mapping from source code name to type def and LLVM struct name + struct_env: LayeredEnv>, String)>, writer: W, } impl LlvmIrCodegen { pub fn new(writer: W) -> Self { - let env = LayeredEnv::new(); LlvmIrCodegen { ctr: 0, - var_env: env, + var_env: LayeredEnv::default(), + struct_env: LayeredEnv::default(), writer, } } @@ -45,30 +48,45 @@ impl LlvmIrCodegen { fn type_to_llvm(ty: &Type) -> String { match ty { + Type::Unit => String::from("void"), Type::Int => String::from("i32"), Type::Bool => String::from("i1"), - Type::StructInst(_) => todo!(), + Type::Struct(_) => todo!(), Type::Func(_, _) => todo!(), } } impl CodegenBackend for LlvmIrCodegen { fn convert(&mut self, program: Vec>) -> Result<()> { - // First, create a global environment and add all functions, so they can be - // called within other functions + // First, create a global environment and add all functions and types, so + // they can be referenced within other functions // - // This is in a separate loop so there's no dependency on function order - self.var_env = LayeredEnv::new(); - for func in program.iter().filter_map(|decl| match decl { - Decl::Func(v) => Some(v), - _ => None, - }) { - let func_name = match func.name.as_str() { - "main" => String::from("@main"), - _ => self.gensym(Some("@"), format!("func.{}", func.name)), - }; - let value = (func.ty.clone(), func_name); - self.var_env.insert(func.name.clone(), value); + // This is in a separate loop so there's no dependency on order + for decl in program.iter() { + match decl { + Decl::Func(func) => { + let func_name = match func.name.as_str() { + "main" => String::from("@main"), + _ => self.gensym(Some("@"), format!("func.{}", func.name)), + }; + let func_ty = Type::Func( + func.args.iter().map(|arg| arg.ty.clone()).collect(), + Box::new(func.return_ty.clone()), + ); + let value = (func_ty, func_name); + self.var_env.insert(func.name.clone(), value); + } + + Decl::TypeDef(typedef) => match &typedef.def { + Type::Struct(strct) => { + let name = + self.gensym(Some("%"), format!("struct.{}", typedef.name)); + let value = (strct.clone(), name); + self.struct_env.insert(typedef.name.clone(), value); + } + _ => {} + }, + } } // Convert all functions @@ -76,10 +94,7 @@ impl CodegenBackend for LlvmIrCodegen { Decl::Func(v) => Some(v), _ => None, }) { - let (_, ret_ty) = match &func.ty { - Type::Func(args, ret) => (args, ret), - _ => unreachable!(""), - }; + let ret_ty = &func.return_ty; let (_, func_name) = self.var_env.lookup(&func.name).expect("Just inserted."); let func_name = func_name.to_owned(); @@ -100,7 +115,7 @@ impl CodegenBackend for LlvmIrCodegen { writeln!( self.writer, "define {} {} ({}) {{", - type_to_llvm(ret_ty), + type_to_llvm(&ret_ty), func_name, args_str.join(", "), )?; diff --git a/src/codegen/llvm_ir/stmts.rs b/src/codegen/llvm_ir/stmts.rs index c1d968f..9a5a96d 100644 --- a/src/codegen/llvm_ir/stmts.rs +++ b/src/codegen/llvm_ir/stmts.rs @@ -2,7 +2,7 @@ use std::io::Write; use anyhow::Result; -use crate::ast::{Stmt, Type}; +use crate::ast::{Stmt, types::Type}; use super::{type_to_llvm, LlvmIrCodegen}; diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 09ac3bf..d533c9f 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -1,8 +1,13 @@ +//! This module contains everything related to codegen. +//! +//! Although written in a style that could possibly support multiple backends, +//! the only planned backend is the [LLVM IR][llvm_ir] backend. + pub mod llvm_ir; use anyhow::Result; -use crate::ast::{Decl, Type}; +use crate::ast::{Decl, types::Type}; pub trait CodegenBackend { fn convert(&mut self, program: Vec>) -> Result<()>; diff --git a/src/lib.rs b/src/lib.rs index bbc6c3f..9a47dcf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,15 @@ +#![doc = include_str!("../README.md")] + #[macro_use] extern crate lalrpop_util; #[macro_use] extern crate anyhow; -lalrpop_mod!(pub parser); +/// This module contains an auto-generated parser created by LALRPOP. +pub mod parser { + lalrpop_mod!(parser); + pub use self::parser::*; +} pub mod ast; pub mod codegen; diff --git a/src/parser.lalrpop b/src/parser.lalrpop index 5273b37..ec203d9 100644 --- a/src/parser.lalrpop +++ b/src/parser.lalrpop @@ -1,67 +1,93 @@ -use crate::ast::*; +use crate::ast::{*, types::*}; grammar; -pub Program: Vec> = Decl* => <>; +pub Program: Vec> = Decl* => <>; -Decl: Decl<()> = { +Decl: Decl = { Func => Decl::Func(<>), + TypeDef => Decl::TypeDef(<>), }; -Func: Func<()> = { +Func: Func = { "fn" "(" ")" "->" "{" "}" => - Func { name, args, return_ty, stmts, ty: (), }, + Func { name, args, return_ty, stmts }, + "fn" "(" ")" "{" "}" => + Func { name, args, return_ty: TypeS::Unit, stmts }, }; -Args: Vec = Punct<",", Arg>? => <>.unwrap_or_else(|| Vec::new()); +TypeDef: TypeDef = { + "type" "=" => TypeDef { name, def }, +}; -Arg: Arg = ":" => Arg { name, ty }; +Args: Vec> = Punct<",", Arg>? => <>.unwrap_or_else(|| Vec::new()); -Stmt: Stmt<()> = { +Arg: Arg = ":" => Arg { name, ty }; + +NameValue: NameValue = ":" => NameValue { name, expr }; + +Stmt: Stmt = { "let" "=" ";" => - Stmt::Let(name, ty, expr), + Stmt::Let(name, ty.unwrap_or(TypeS::Infer), expr), "return" ";" => Stmt::Return(expr), IfElse => Stmt::IfElse(<>), }; -ColonType: Type = ":" => ty; +ColonType: TypeS = ":" => ty; -IfElse: IfElse<()> = +IfElse: IfElse = "if" "{" "}" => IfElse { cond, body, else_clause }; -Else: ElseClause<()> = "else" => else_clause; -Else_: ElseClause<()> = { +Else: ElseClause = "else" => else_clause; +Else_: ElseClause = { IfElse => ElseClause::If(Box::new(<>)), "{" "}" => ElseClause::Body(body), }; -Expr: Expr<()> = { +Expr: Expr = { #[precedence(level = "0")] "(" ")" => expr, #[precedence(level = "0")] - r"-?[0-9]+" => Expr { kind: ExprKind::Int(<>.parse::().unwrap()), ty: () }, + r"-?[0-9]+" => Expr { + kind: ExprKind::Int(<>.parse::().unwrap()), + ty: TypeS::Infer, + }, #[precedence(level = "0")] - Ident => Expr { kind: ExprKind::Var(<>), ty: () }, + "new" "{" > "}" => Expr { + kind: ExprKind::StructNew(name, values), + ty: TypeS::Infer, + }, + + #[precedence(level = "1")] + Ident => Expr { kind: ExprKind::Var(<>), ty: TypeS::Infer }, #[precedence(level = "2")] + #[assoc(side = "left")] + "." => + Expr { kind: ExprKind::FieldAccess(Box::new(expr), field), ty: TypeS::Infer }, + + #[precedence(level = "3")] "(" ?> ")" => - Expr { kind: ExprKind::Call(func, args.unwrap_or_else(|| vec![])), ty: () }, + Expr { + kind: ExprKind::Call(func, args.unwrap_or_else(|| vec![])), + ty: TypeS::Infer, + }, #[precedence(level = "8")] #[assoc(side = "left")] => Expr { kind: ExprKind::BinOp(Box::new(left), op, Box::new(right)), - ty: (), + ty: TypeS::Infer, }, #[precedence(level = "13")] #[assoc(side = "none")] => Expr { kind: ExprKind::BinOp(Box::new(left), op, Box::new(right)), - ty: (), + ty: TypeS::Infer, }, }; @@ -74,22 +100,24 @@ CompareOp: Op = { ">" => Op::GreaterThan, }; -Type: Type = { - "int" => Type::Int, +Type: TypeS = { + "int" => TypeS::Int, + "struct" "{" > "}" => TypeS::Struct(fields), + => TypeS::Named(name), }; Ident: String = r"([A-Za-z][A-Za-z0-9_]*)|(_[A-Za-z0-9_]+)" => <>.to_string(); Punct: Vec = { P => vec![], - )?> => { - match rest { - Some(rest) => { - let (_, mut vec) = rest; - vec.insert(0, first); - vec - } - None => vec![first], - } - }, + + T => vec![<>], + + P => vec![first], + + P > => { + let mut vec = rest; + vec.insert(0, first); + vec + } }; diff --git a/src/utils.rs b/src/utils.rs index 9be5dfb..9d6874a 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,11 +1,47 @@ use std::collections::HashMap; use std::hash::Hash; +/// Layered environment that lets us push and pop frames and lookup across +/// layers. #[derive(Debug)] pub struct LayeredEnv { inner: Option>, } +impl Default for LayeredEnv { + fn default() -> Self { + LayeredEnv { + inner: Some(InnerEnv::new()), + } + } +} + +impl LayeredEnv { + pub fn insert(&mut self, key: K, value: V) { + self.inner.as_mut().unwrap().map.insert(key, value); + } + + pub fn push(&mut self) { + let inner = self.inner.take().unwrap(); + self.inner = Some(InnerEnv::with_parent(inner)); + } + + pub fn pop(&mut self) { + let inner = self.inner.take().unwrap(); + self.inner = Some(*inner.parent.unwrap()); + } + + /// Look up the key within the environment. + /// + /// If a key exists in multiple layers at once, the top-most one (of the + /// stack) will shadow all the others. + /// + /// TODO: Make a "lookup" function that traverses all environments. + pub fn lookup(&self, key: &K) -> Option<&V> { + self.inner.as_ref().unwrap().lookup(key) + } +} + #[derive(Debug)] struct InnerEnv { parent: Option>>, @@ -37,29 +73,3 @@ impl InnerEnv { } } } - -impl LayeredEnv { - pub fn new() -> Self { - LayeredEnv { - inner: Some(InnerEnv::new()), - } - } - - pub fn insert(&mut self, key: K, value: V) { - self.inner.as_mut().unwrap().map.insert(key, value); - } - - pub fn push(&mut self) { - let inner = self.inner.take().unwrap(); - self.inner = Some(InnerEnv::with_parent(inner)); - } - - pub fn pop(&mut self) { - let inner = self.inner.take().unwrap(); - self.inner = Some(*inner.parent.unwrap()); - } - - pub fn lookup(&self, key: &K) -> Option<&V> { - self.inner.as_ref().unwrap().lookup(key) - } -} diff --git a/std/prelude.e0 b/std/prelude.e0 new file mode 100644 index 0000000..e69de29