Lots of work on revamping the type system for structs, also stubs for future pieces

This commit is contained in:
Michael Zhang 2022-07-19 01:30:25 -05:00
parent f7e4bfa8cf
commit 215683173e
Signed by: michael
GPG key ID: BDA47A31A3C8EE6B
34 changed files with 1292 additions and 647 deletions

17
.build.yml Normal file
View file

@ -0,0 +1,17 @@
image: archlinux
packages:
- rust
- rsync
sources:
- https://git.sr.ht/~mzhang/e0
secrets:
- 0b26b413-7901-41c3-a4e2-3c752228ffcb
tasks:
- test: |
cd e0
cargo test --all
- doc-upload: |
cd e0
cargo doc --workspace --no-deps
echo "mzhang.io ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBzBZ+QmM4EO3Fwc1ZcvWV2IY9VF04T0H9brorGj9Udp" >> ~/.ssh/known_hosts
rsync -azvrP target/doc/ sourcehutBuilds@mzhang.io:/mnt/storage/svcdata/blog-public/e0

92
Cargo.lock generated
View file

@ -109,6 +109,16 @@ dependencies = [
"os_str_bytes", "os_str_bytes",
] ]
[[package]]
name = "codespan-reporting"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
dependencies = [
"termcolor",
"unicode-width",
]
[[package]] [[package]]
name = "crunchy" name = "crunchy"
version = "0.2.2" version = "0.2.2"
@ -148,11 +158,17 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
"codespan-reporting",
"frunk",
"lalrpop", "lalrpop",
"lalrpop-util", "lalrpop-util",
"tempfile", "tempfile",
] ]
[[package]]
name = "e0-rt"
version = "0.1.0"
[[package]] [[package]]
name = "either" name = "either"
version = "1.7.0" version = "1.7.0"
@ -183,6 +199,70 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]]
name = "frunk"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cd67cf7d54b7e72d0ea76f3985c3747d74aee43e0218ad993b7903ba7a5395e"
dependencies = [
"frunk_core",
"frunk_derives",
"frunk_proc_macros",
]
[[package]]
name = "frunk_core"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1246cf43ec80bf8b2505b5c360b8fb999c97dabd17dbb604d85558d5cbc25482"
[[package]]
name = "frunk_derives"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dbc4f084ec5a3f031d24ccedeb87ab2c3189a2f33b8d070889073837d5ea09e"
dependencies = [
"frunk_proc_macro_helpers",
"quote",
"syn",
]
[[package]]
name = "frunk_proc_macro_helpers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99f11257f106c6753f5ffcb8e601fb39c390a088017aaa55b70c526bff15f63e"
dependencies = [
"frunk_core",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "frunk_proc_macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a078bd8459eccbb85e0b007b8f756585762a72a9efc53f359b371c3b6351dbcc"
dependencies = [
"frunk_core",
"frunk_proc_macros_impl",
"proc-macro-hack",
]
[[package]]
name = "frunk_proc_macros_impl"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ffba99f0fa4f57e42f57388fbb9a0ca863bc2b4261f3c5570fed579d5df6c32"
dependencies = [
"frunk_core",
"frunk_proc_macro_helpers",
"proc-macro-hack",
"quote",
"syn",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.2.7" version = "0.2.7"
@ -402,6 +482,12 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.40" version = "1.0.40"
@ -595,6 +681,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7" checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
[[package]]
name = "unicode-width"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
[[package]] [[package]]
name = "unicode-xid" name = "unicode-xid"
version = "0.2.3" version = "0.2.3"

View file

@ -3,6 +3,9 @@ name = "e0"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
[workspace]
members = ["rt"]
[[bin]] [[bin]]
name = "e0c" name = "e0c"
path = "./bin/e0c.rs" path = "./bin/e0c.rs"
@ -14,6 +17,8 @@ path = "./bin/e0pkg.rs"
[dependencies] [dependencies]
anyhow = "1.0.56" anyhow = "1.0.56"
clap = { version = "3.1.8", features = ["derive"] } clap = { version = "3.1.8", features = ["derive"] }
codespan-reporting = "0.11.1"
frunk = "0.4.0"
lalrpop-util = { version = "0.19.7", features = ["lexer"] } lalrpop-util = { version = "0.19.7", features = ["lexer"] }
tempfile = "3.3.0" tempfile = "3.3.0"

14
README.md Normal file
View file

@ -0,0 +1,14 @@
# e0: Experimental Language #0
`e0` is an experimental language created for the purpose of practicing making a
simple language ecosystem. The language is strongly typed but with only a
limited set of primitives, and compiles down to LLVM IR.
Compilation process
-------------------
```
e0 source code => llvm bitcode text => clang => link against e0-rt => binary
```
Currently clang is required as there are no plans for custom codegen yet.

View file

@ -1,12 +1,19 @@
use std::fs::{self, File}; use std::fs::{self, File};
use std::io::Write; use std::io::Write;
use std::path::PathBuf; use std::path::PathBuf;
use std::process::ExitCode;
use anyhow::Result; use anyhow::Result;
use clap::Parser; use clap::Parser;
use e0::codegen::CodegenBackend; use codespan_reporting::diagnostic::{Diagnostic, Label};
use codespan_reporting::files::SimpleFiles;
use codespan_reporting::term::termcolor::{ColorChoice, StandardStream};
use codespan_reporting::term::{self, Config as CodespanConfig};
use e0::ast::typeck_bidi::TypeChecker;
use e0::codegen::llvm_ir::LlvmIrCodegen; use e0::codegen::llvm_ir::LlvmIrCodegen;
use e0::codegen::CodegenBackend;
use e0::parser::ProgramParser; use e0::parser::ProgramParser;
use lalrpop_util::ParseError;
#[derive(Debug, Parser)] #[derive(Debug, Parser)]
struct Opt { struct Opt {
@ -21,15 +28,45 @@ struct Opt {
emit_ast: Option<PathBuf>, emit_ast: Option<PathBuf>,
} }
fn main() -> Result<()> { fn main() -> Result<ExitCode> {
let opts = Opt::parse(); let opts = Opt::parse();
// Set up reporting
let mut files = SimpleFiles::new();
let writer = StandardStream::stderr(ColorChoice::Always);
let config = CodespanConfig::default();
let contents = fs::read_to_string(&opts.path)?; let contents = fs::read_to_string(&opts.path)?;
let file_id = files.add(opts.path.display().to_string(), &contents);
let parser = ProgramParser::new(); let parser = ProgramParser::new();
let ast = parser.parse(&contents).unwrap(); let ast = match parser.parse(&contents) {
Ok(v) => v,
Err(err) => {
let loc = match err {
ParseError::InvalidToken { location }
| ParseError::UnrecognizedEOF { location, .. } => (location, location),
ParseError::UnrecognizedToken { ref token, .. }
| ParseError::ExtraToken { ref token } => (token.0, token.2),
_ => todo!(),
};
let diagnostic = Diagnostic::error()
.with_labels(vec![Label::primary(file_id, loc.0..loc.1)])
.with_message(err.to_string());
term::emit(&mut writer.lock(), &config, &files, &diagnostic)?;
return Ok(ExitCode::FAILURE);
}
};
let typed_ast = e0::ast::typed::convert(ast)?; let type_checker = TypeChecker::default();
let typed_ast = match type_checker.convert(ast) {
Ok(v) => v,
Err(err) => {
let diagnostic = Diagnostic::error().with_message(err.to_string());
term::emit(&mut writer.lock(), &config, &files, &diagnostic)?;
return Ok(ExitCode::FAILURE);
}
};
if let Some(path) = opts.emit_ast { if let Some(path) = opts.emit_ast {
let mut file = File::create(&path)?; let mut file = File::create(&path)?;
@ -53,5 +90,5 @@ fn main() -> Result<()> {
// println!("Emitted."); // println!("Emitted.");
// } // }
Ok(()) Ok(ExitCode::SUCCESS)
} }

1
docs/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
book

6
docs/book.toml Normal file
View file

@ -0,0 +1,6 @@
[book]
authors = ["Michael Zhang"]
language = "en"
multilingual = false
src = "src"
title = "e0 Reference"

3
docs/src/SUMMARY.md Normal file
View file

@ -0,0 +1,3 @@
# Summary
- [Chapter 1](./chapter_1.md)

1
docs/src/chapter_1.md Normal file
View file

@ -0,0 +1 @@
# Chapter 1

View file

@ -3,11 +3,14 @@ type IntPair = struct {
snd: int, snd: int,
} }
fn main() -> int { fn getX() -> IntPair {
let x = IntPair { return new IntPair {
fst: 4, fst: 4,
snd: 6, snd: 6,
}; };
}
fn main() -> int {
let x = getX();
return x.fst + x.snd; return x.fst + x.snd;
} }

View file

@ -22,9 +22,17 @@
cargo-edit cargo-edit
cargo-watch cargo-watch
clangUseLLVM clangUseLLVM
(toolchain.withComponents [ "clippy" "rustc" "rust-src" ]) mdbook
(toolchain.withComponents [
"cargo"
"clippy"
"rustc"
"rust-src"
"rustfmt"
])
]; ];
inputsFrom = with myPkgs; [ e0 ]; inputsFrom = with myPkgs; [ e0 ];
CARGO_UNSTABLE_SPARSE_REGISTRY = "true"; CARGO_UNSTABLE_SPARSE_REGISTRY = "true";
}; };

9
rt/Cargo.toml Normal file
View file

@ -0,0 +1,9 @@
[package]
name = "e0-rt"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
[dependencies]

1
rt/src/aarch64/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod syscall;

View file

8
rt/src/gc.rs Normal file
View file

@ -0,0 +1,8 @@
//! Simple concurrent mark-and-sweep garbage collector.
struct Allocator {
}
#[no_mangle]
pub extern "C" fn __e0_gc_init() {
}

23
rt/src/lib.rs Normal file
View file

@ -0,0 +1,23 @@
#![feature(lang_items)]
#![cfg_attr(not(test), no_std)]
#[cfg(target_arch = "aarch64")]
mod aarch64;
#[cfg(target_arch = "x86_64")]
mod x86_64;
pub mod gc;
pub mod start;
use core::panic::PanicInfo;
#[cfg(not(test))]
#[panic_handler]
fn panic(_panic: &PanicInfo<'_>) -> ! {
loop {}
}
#[cfg(not(test))]
#[lang = "eh_personality"]
extern "C" fn eh_personality() {}

4
rt/src/start.rs Normal file
View file

@ -0,0 +1,4 @@
#[no_mangle]
pub extern "C" fn __e0_rt_init() {
// Set up thread-local storage
}

12
rt/src/x86_64/linux.rs Normal file
View file

@ -0,0 +1,12 @@
use core::mem::size_of;
#[repr(C)]
pub struct CloneArgs {
pub flags: u64,
}
#[no_mangle]
#[inline(always)]
pub extern "C" fn clone3(flags: CloneArgs) {
let size = size_of::<CloneArgs>();
}

4
rt/src/x86_64/mod.rs Normal file
View file

@ -0,0 +1,4 @@
pub mod syscall;
#[cfg(target_os = "linux")]
pub mod linux;

45
rt/src/x86_64/syscall.rs Normal file
View file

@ -0,0 +1,45 @@
use core::arch::asm;
#[no_mangle]
#[inline(always)]
pub extern "C" fn __syscall0(n: u64) -> u64 {
let mut res: u64;
unsafe {
asm!(
"syscall",
in("rax") n,
lateout("rax") res,
out("rcx") _,
out("r11") _,
);
}
res
}
#[no_mangle]
#[inline(always)]
pub extern "C" fn __syscall2(n: u64, a1: u64, a2: u64) -> u64 {
let mut res: u64;
unsafe {
asm!(
"syscall",
in("rax") n,
in("rdi") a1,
in("rsi") a2,
lateout("rax") res,
out("rcx") _,
out("r11") _,
);
}
res
}
#[cfg(test)]
mod tests {
use super::__syscall0;
#[test]
fn run() {
println!("{}", __syscall0(102));
}
}

View file

@ -1,28 +1,44 @@
pub mod typed; // pub mod typeck;
pub mod typeck_bidi;
pub mod types;
use self::types::Type;
#[derive(Debug)] #[derive(Debug)]
pub enum Decl<T> { pub enum Decl<T> {
Func(Func<T>), Func(Func<T>),
TypeDef(TypeDef<T>),
} }
#[derive(Debug)] #[derive(Debug)]
pub struct Func<T> { pub struct Func<T> {
pub name: String, pub name: String,
pub args: Vec<Arg>, pub args: Vec<Arg<T>>,
pub return_ty: Type, pub return_ty: T,
pub stmts: Vec<Stmt<T>>, pub stmts: Vec<Stmt<T>>,
}
#[derive(Debug)]
pub struct TypeDef<T> {
pub name: String,
pub def: T,
}
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct Arg<T> {
pub name: String,
pub ty: T, pub ty: T,
} }
#[derive(Debug, Clone)] #[derive(Debug)]
pub struct Arg { pub struct NameValue<T> {
pub name: String, pub name: String,
pub ty: Type, pub expr: Expr<T>,
} }
#[derive(Debug)] #[derive(Debug)]
pub enum Stmt<T> { pub enum Stmt<T> {
Let(String, Option<Type>, Expr<T>), Let(String, T, Expr<T>),
Return(Option<Expr<T>>), Return(Option<Expr<T>>),
IfElse(IfElse<T>), IfElse(IfElse<T>),
} }
@ -52,6 +68,9 @@ pub enum ExprKind<T> {
Var(String), Var(String),
BinOp(Box<Expr<T>>, Op, Box<Expr<T>>), BinOp(Box<Expr<T>>, Op, Box<Expr<T>>),
Call(String, Vec<Expr<T>>), Call(String, Vec<Expr<T>>),
StructNew(String, Vec<NameValue<T>>),
FieldAccess(Box<Expr<T>>, String),
} }
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
@ -73,12 +92,3 @@ impl Op {
} }
} }
} }
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub enum Type {
Int,
Bool,
StructInst(Vec<(String, Type)>),
Func(Vec<Type>, Box<Type>),
}

600
src/ast/typeck.rs Normal file
View file

@ -0,0 +1,600 @@
//! Hindley-Milner-shaped type-checker.
use std::collections::{HashMap, HashSet};
use anyhow::Result;
use crate::{ast::Expr, utils::LayeredEnv};
use super::{
types::{TypeS, TypeTC},
Arg, NameValue,
};
use super::{Decl, ElseClause, ExprKind, Func, IfElse, Op, Stmt, Type};
type Assignments = HashMap<usize, TypeTC>;
#[derive(Default)]
pub struct TypeChecker {
assignments: Assignments,
ctr: usize,
struct_env: LayeredEnv<String, Vec<Arg<TypeTC>>>,
type_env: LayeredEnv<String, TypeTC>,
constraints: HashSet<Constraint2>,
}
impl TypeChecker {
/// Convert an AST with source types to an AST with final types.
pub fn convert(mut self, ast: Vec<Decl<TypeS>>) -> Result<Vec<Decl<Type>>> {
// First pass, gather all of the type signatures in the top level
self.type_env.push();
for decl in ast.iter() {
match decl {
Decl::Func(func) => {
let name = func.name.clone();
let args_ty = func.args.iter().map(|arg| arg.ty.clone()).collect();
let ty = TypeS::Func(args_ty, Box::new(func.return_ty.clone()));
let ty = self.convert_TypeS_to_TypeTC(ty);
self.type_env.insert(name, ty);
}
Decl::TypeDef(typedef) => match &typedef.def {
TypeS::Struct(strct) => {
let strct = strct
.iter()
.map(|arg| Arg {
name: arg.name.clone(),
ty: self.convert_TypeS_to_TypeTC(arg.ty.clone()),
})
.collect();
self.struct_env.insert(typedef.name.clone(), strct);
}
_ => {}
},
}
}
// Now, type-check each function separately
let mut new_decl = Vec::new();
for decl in ast.iter() {
match decl {
Decl::Func(func) => {
self.type_env.push();
for arg in func.args.iter() {
let arg_ty = self.convert_TypeS_to_TypeTC(arg.ty.clone());
self.type_env.insert(arg.name.clone(), arg_ty);
}
let constraints = HashSet::new();
let new_stmts = self.annotate_stmts(&func.stmts)?;
let mut args_ty = Vec::new();
let mut args_func = Vec::new();
for arg in func.args.iter() {
let ty = self.convert_TypeS_to_TypeTC(arg.ty.clone());
args_ty.push(ty.clone());
args_func.push(Arg {
name: arg.name.clone(),
ty,
});
}
let return_ty = self.convert_TypeS_to_TypeTC(func.return_ty.clone());
let decorated_func: Func<TypeTC> = Func {
name: func.name.clone(),
args: args_func,
return_ty,
stmts: new_stmts,
};
self.type_env.pop();
self.unify_constraints(&constraints)?;
println!("Func: {:?}", decorated_func);
println!("Constraints:");
for Constraint(left, right) in constraints {
println!("- {:?} == {:?}", left, right);
}
println!("Assignments:");
for (num, ty) in self.assignments.iter() {
println!("- {} : {:?}", num, ty);
}
let typed_func = self.substitute_in_func(decorated_func)?;
new_decl.push(Decl::Func(typed_func));
}
_ => {}
}
}
self.type_env.pop();
Ok(new_decl)
}
/// Convert a type from the source code into a type used for type-checking.
#[allow(non_snake_case)]
fn convert_TypeS_to_TypeTC(&mut self, ty: TypeS) -> TypeTC {
match ty {
TypeS::Infer => self.type_var(),
TypeS::Named(name) => match self.struct_env.lookup(&name) {
Some(ty) => TypeTC::Struct(ty.clone()),
None => self.type_var(),
},
TypeS::Unit => TypeTC::Unit,
TypeS::Int => TypeTC::Int,
TypeS::Bool => TypeTC::Bool,
TypeS::Func(args, ret) => {
let args = args
.into_iter()
.map(|arg| self.convert_TypeS_to_TypeTC(arg))
.collect();
let ret = self.convert_TypeS_to_TypeTC(*ret);
TypeTC::Func(args, Box::new(ret))
}
TypeS::Struct(fields) => TypeTC::Struct(
fields
.into_iter()
.map(|arg| Arg {
name: arg.name,
ty: self.convert_TypeS_to_TypeTC(arg.ty),
})
.collect(),
),
}
}
#[allow(non_snake_case)]
fn convert_TypeTC_to_Type(&self, ty: &TypeTC) -> Result<Type> {
Ok(match ty {
TypeTC::Var(n) => match self.assignments.get(&n) {
Some(v) => self.convert_TypeTC_to_Type(&v)?,
None => bail!("Unsolved constraint variable {n}"),
},
TypeTC::Unit => Type::Unit,
TypeTC::Int => Type::Int,
TypeTC::Bool => Type::Bool,
TypeTC::Func(args, ret) => {
let args = args
.into_iter()
.map(|arg| self.convert_TypeTC_to_Type(&arg))
.collect::<Result<_>>()?;
let ret = self.convert_TypeTC_to_Type(&ret)?;
Type::Func(args, Box::new(ret))
}
TypeTC::Struct(fields) => Type::Struct(
fields
.into_iter()
.map(|arg| {
Ok(Arg {
name: arg.name.clone(),
ty: self.convert_TypeTC_to_Type(&arg.ty)?,
})
})
.collect::<Result<_>>()?,
),
})
}
fn annotate_stmts(
&mut self,
stmts: impl AsRef<[Stmt<TypeS>]>,
) -> Result<Vec<Stmt<TypeTC>>> {
let stmts = stmts.as_ref();
let mut new_stmts = Vec::new();
self.type_env.push();
for stmt in stmts.iter() {
match stmt {
Stmt::Return(ret_val) => {
let new_stmt = match ret_val {
Some(v) => Some(self.annotate_expr(v)?),
None => None,
};
new_stmts.push(Stmt::Return(new_stmt));
}
Stmt::Let(name, ty, body) => {
let ty = self.convert_TypeS_to_TypeTC(ty.clone());
let new_stmt =
Stmt::Let(name.clone(), ty.clone(), self.annotate_expr(body)?);
self.type_env.insert(name.to_owned(), ty);
new_stmts.push(new_stmt);
}
Stmt::IfElse(if_else) => {
let new_stmt = Stmt::IfElse(self.annotate_if_else(&if_else)?);
new_stmts.push(new_stmt);
}
}
}
self.type_env.pop();
Ok(new_stmts)
}
fn annotate_expr(&mut self, expr: &Expr<TypeS>) -> Result<Expr<TypeTC>> {
Ok(match &expr.kind {
ExprKind::Int(n) => Expr {
kind: ExprKind::Int(*n),
ty: TypeTC::Int,
},
ExprKind::Var(name) => {
let ty = match self.type_env.lookup(name) {
Some(v) => v.clone(),
None => self.type_var(),
};
Expr {
kind: ExprKind::Var(name.clone()),
ty,
}
}
ExprKind::BinOp(left, op, right) => {
let left = self.annotate_expr(left)?;
let right = self.annotate_expr(right)?;
let output = self.type_var();
op.constraints(self, &left.ty, &right.ty, &output);
Expr {
kind: ExprKind::BinOp(Box::new(left), *op, Box::new(right)),
ty: output,
}
}
ExprKind::Call(func_name, args) => {
let mut args_annot = Vec::new();
let ret_ty = self.type_var();
let (func_args_ty, func_ret_ty) = match self.type_env.lookup(func_name)
{
Some(TypeTC::Func(args, ret)) => (args.clone(), *ret.clone()),
Some(_) => bail!("Not a function"),
None => bail!("Name not found"),
};
self.constrain(ret_ty.clone(), func_ret_ty);
for (arg, expected_ty) in args.iter().zip(func_args_ty.iter()) {
let arg_annot = self.annotate_expr(arg)?;
self.constrain(arg_annot.ty.clone(), expected_ty.clone());
args_annot.push(arg_annot);
}
Expr {
kind: ExprKind::Call(func_name.to_string(), args_annot),
ty: ret_ty,
}
}
ExprKind::StructNew(struct_ref, values) => {
let struct_def = match self.struct_env.lookup(struct_ref) {
Some(v) => v.clone(),
None => bail!("No struct named {struct_ref:?}"),
};
let mut values2 = Vec::new();
for (arg, value) in struct_def.iter().zip(values.iter()) {
let ty = self.type_var();
let expr = self.annotate_expr(&value.expr)?;
self.constrain(arg.ty.clone(), ty.clone());
values2.push(NameValue {
name: value.name.clone(),
expr,
});
}
Expr {
kind: ExprKind::StructNew(struct_ref.to_owned(), values2),
ty: TypeTC::Struct(struct_def.to_vec()),
}
}
ExprKind::FieldAccess(expr, field_name) => {
let struct_ty = self.type_var();
let field_ty = self.type_var();
let expr = self.annotate_expr(&expr)?;
self.require_field(struct_ty, field_name, field_ty.clone());
Expr {
kind: ExprKind::FieldAccess(Box::new(expr), field_name.to_owned()),
ty: field_ty,
}
}
})
}
fn annotate_if_else(
&mut self,
if_else: &IfElse<TypeS>,
) -> Result<IfElse<TypeTC>> {
let converted_cond = self.annotate_expr(&if_else.cond)?;
let converted_body = self.annotate_stmts(&if_else.body)?;
let else_clause = match &if_else.else_clause {
Some(ElseClause::If(if_else2)) => {
Some(ElseClause::If(Box::new(self.annotate_if_else(&if_else2)?)))
}
Some(ElseClause::Body(stmts)) => {
Some(ElseClause::Body(self.annotate_stmts(&stmts)?))
}
None => None,
};
Ok(IfElse {
cond: converted_cond,
body: converted_body,
else_clause,
})
}
fn substitute_in_expr_kind(
&self,
expr_kind: ExprKind<TypeTC>,
) -> Result<ExprKind<Type>> {
Ok(match expr_kind {
ExprKind::Int(n) => ExprKind::Int(n),
ExprKind::Var(name) => ExprKind::Var(name),
ExprKind::BinOp(left, op, right) => {
let left = self.substitute_in_expr(*left)?;
let right = self.substitute_in_expr(*right)?;
ExprKind::BinOp(Box::new(left), op, Box::new(right))
}
ExprKind::Call(func, args) => {
let args = args
.into_iter()
.map(|arg| self.substitute_in_expr(arg))
.collect::<Result<_>>()?;
ExprKind::Call(func, args)
}
ExprKind::StructNew(struct_ref, values) => {
let values = values
.into_iter()
.map(|nv| {
Ok(NameValue {
name: nv.name,
expr: self.substitute_in_expr(nv.expr)?,
})
})
.collect::<Result<_>>()?;
ExprKind::StructNew(struct_ref, values)
}
ExprKind::FieldAccess(expr, field_name) => {
let expr = self.substitute_in_expr(*expr)?;
ExprKind::FieldAccess(Box::new(expr), field_name)
}
})
}
fn substitute_in_expr(&self, expr: Expr<TypeTC>) -> Result<Expr<Type>> {
Ok(Expr {
kind: self.substitute_in_expr_kind(expr.kind)?,
ty: self.convert_TypeTC_to_Type(&expr.ty)?,
})
}
fn substitute_in_if_else(
&self,
if_else: IfElse<TypeTC>,
) -> Result<IfElse<Type>> {
let cond = self.substitute_in_expr(if_else.cond)?;
let body = self.substitute_in_stmts(if_else.body)?;
let else_clause = match if_else.else_clause {
Some(ElseClause::If(if_else2)) => Some(ElseClause::If(Box::new(
self.substitute_in_if_else(*if_else2)?,
))),
Some(ElseClause::Body(body)) => {
Some(ElseClause::Body(self.substitute_in_stmts(body)?))
}
None => None,
};
Ok(IfElse {
cond,
body,
else_clause,
})
}
fn substitute_in_stmts(
&self,
stmts: Vec<Stmt<TypeTC>>,
) -> Result<Vec<Stmt<Type>>> {
stmts
.into_iter()
.map(|stmt| {
Ok(match stmt {
Stmt::Let(name, ty, body) => {
let ty = self.convert_TypeTC_to_Type(&ty)?;
Stmt::Let(name, ty, self.substitute_in_expr(body)?)
}
Stmt::Return(ret_val) => Stmt::Return(match ret_val {
Some(v) => Some(self.substitute_in_expr(v)?),
None => None,
}),
Stmt::IfElse(if_else) => {
Stmt::IfElse(self.substitute_in_if_else(if_else)?)
}
})
})
.collect()
}
fn substitute_in_func(&self, func: Func<TypeTC>) -> Result<Func<Type>> {
let args = func
.args
.into_iter()
.map(|arg| {
Ok(Arg {
name: arg.name,
ty: self.convert_TypeTC_to_Type(&arg.ty)?,
})
})
.collect::<Result<_>>()?;
let return_ty = self.convert_TypeTC_to_Type(&func.return_ty)?;
Ok(Func {
name: func.name,
args,
return_ty,
stmts: self.substitute_in_stmts(func.stmts)?,
})
}
fn substitute_types(&mut self, ty: &TypeTC) -> TypeTC {
match ty {
TypeTC::Var(n) => match self.assignments.get(&n) {
Some(ty2) => ty2.clone(),
None => ty.clone(),
},
TypeTC::Func(args, ret) => {
let args = args
.into_iter()
.map(|arg| self.substitute_types(arg))
.collect();
let ret = self.substitute_types(&*ret);
TypeTC::Func(args, Box::new(ret))
}
TypeTC::Struct(fields) => TypeTC::Struct(fields.to_vec()),
TypeTC::Unit | TypeTC::Int | TypeTC::Bool => ty.clone(),
}
}
fn unify_constraints(
&mut self,
constraints: &HashSet<Constraint2>,
) -> Result<()> {
for constraint in constraints.iter() {
match constraint {
Constraint2::Eq(left, right) => {
let left = self.substitute_types(left);
let right = self.substitute_types(right);
self.unify_single(left, right)?;
}
Constraint2::HasField(struct_ty, field_name, field_ty) => {
let struct_ty = self.substitute_types(struct_ty);
let field_ty = self.substitute_types(field_ty);
match struct_ty {
TypeTC::Struct(fields) => {}
TypeTC::Var(n) => {}
_ => bail!(
"Expected struct with field {field_name}, got {struct_ty:?}"
),
}
}
}
}
Ok(())
}
fn unify_single(&mut self, left: TypeTC, right: TypeTC) -> Result<()> {
match (left, right) {
// Unify basic types
(TypeTC::Unit, TypeTC::Unit)
| (TypeTC::Int, TypeTC::Int)
| (TypeTC::Bool, TypeTC::Bool) => {}
// Unify variables
(TypeTC::Var(n), o) | (o, TypeTC::Var(n)) => {
self.assignments.insert(n, o);
}
// Unify functions
(
TypeTC::Func(left_args, left_ret),
TypeTC::Func(right_args, right_ret),
) => {
let mut new_constraints = HashSet::new();
for (left_arg, right_arg) in
left_args.into_iter().zip(right_args.into_iter())
{
new_constraints.insert(Constraint2::Eq(left_arg, right_arg));
}
new_constraints.insert(Constraint2::Eq(*left_ret, *right_ret));
self.unify_constraints(&new_constraints)?;
}
(left, right) => bail!("Mismatching types {left:?} vs. {right:?}"),
};
Ok(())
}
fn type_var(&mut self) -> TypeTC {
TypeTC::Var(self.gen_int())
}
fn gen_int(&mut self) -> usize {
let id = self.ctr;
self.ctr += 1;
id
}
fn constrain(&mut self, left: TypeTC, right: TypeTC) {
if left == right {
// No op, return now
return;
}
self.constraints.insert(Constraint2::Eq(left, right));
}
fn require_field(
&mut self,
left: TypeTC,
right_name: &str,
right_ty: TypeTC,
) {
self.constraints.insert(Constraint2::HasField(
left,
right_name.to_owned(),
right_ty,
));
}
}
impl Op {
fn constraints(
&self,
tc: &mut TypeChecker,
left: &TypeTC,
right: &TypeTC,
output: &TypeTC,
) {
match self {
Op::Plus => {
tc.constrain(left.clone(), TypeTC::Int);
tc.constrain(right.clone(), TypeTC::Int);
tc.constrain(output.clone(), TypeTC::Int);
}
Op::LessThan | Op::GreaterThan => {
tc.constrain(left.clone(), TypeTC::Int);
tc.constrain(right.clone(), TypeTC::Int);
tc.constrain(output.clone(), TypeTC::Bool);
}
}
}
}
#[derive(Debug, Hash, Eq, PartialEq)]
struct Constraint(TypeTC, TypeTC);
#[derive(Debug, Hash, Eq, PartialEq)]
enum Constraint2 {
Eq(TypeTC, TypeTC),
HasField(TypeTC, String, TypeTC),
}

149
src/ast/typeck_bidi.rs Normal file
View file

@ -0,0 +1,149 @@
use std::collections::{HashSet, VecDeque};
use anyhow::Result;
use crate::utils::LayeredEnv;
use super::types::{Type, TypeS, TypeTC};
use super::{Arg, Decl, Expr, ExprKind, Stmt};
#[derive(Default)]
pub struct TypeChecker {
ctr: usize,
type_env: LayeredEnv<String, TypeTC>,
struct_env: LayeredEnv<String, Vec<Arg<TypeTC>>>,
}
impl TypeChecker {
pub fn convert(mut self, ast: Vec<Decl<TypeS>>) -> Result<Vec<Decl<Type>>> {
// First pass, gather all of the type signatures in the top level
self.type_env.push();
for decl in ast.iter() {
match decl {
Decl::Func(func) => {
let name = func.name.clone();
let args_ty = func.args.iter().map(|arg| arg.ty.clone()).collect();
let ty = TypeS::Func(args_ty, Box::new(func.return_ty.clone()));
let ty = self.convert_TypeS_to_TypeTC(ty);
self.type_env.insert(name, ty);
}
Decl::TypeDef(typedef) => match &typedef.def {
TypeS::Struct(strct) => {
let strct = strct
.iter()
.map(|arg| Arg {
name: arg.name.clone(),
ty: self.convert_TypeS_to_TypeTC(arg.ty.clone()),
})
.collect();
self.struct_env.insert(typedef.name.clone(), strct);
}
_ => {}
},
}
}
// Now, type-check each function separately
// let mut new_decl = Vec::new();
for decl in ast.iter() {
if let Decl::Func(func) = decl {
self.type_env.push();
// Stmt queue. If there's not enough information to figure out the type,
// punt it to the back of the queue. Eventually we stitch it back
// together in order.
let mut stmts: VecDeque<_> = func.stmts.iter().enumerate().collect();
while !stmts.is_empty() {
let next_stmt = stmts.pop_front();
// self.infer_stmt(next_stmt);
}
self.type_env.pop();
}
}
self.type_env.pop();
todo!()
}
fn infer_stmt(&mut self, stmt: Stmt<TypeS>) -> Result<Stmt<TypeTC>> {
todo!()
}
fn infer_expr(&mut self, expr: Expr<TypeS>) -> Result<Expr<TypeTC>> {
Ok(match expr.kind {
ExprKind::Int(n) => Expr {
kind: ExprKind::Int(n),
ty: TypeTC::Int,
},
ExprKind::Var(_) => todo!(),
ExprKind::BinOp(_, _, _) => todo!(),
ExprKind::Call(_, _) => todo!(),
ExprKind::StructNew(_, _) => todo!(),
ExprKind::FieldAccess(_, _) => todo!(),
})
}
fn check_expr(
&mut self,
expr: Expr<TypeS>,
ty: TypeTC,
) -> Result<Expr<TypeTC>> {
Ok(match expr.kind {
ExprKind::Int(_) => self.infer_expr(expr)?,
ExprKind::Var(_) => todo!(),
ExprKind::BinOp(_, _, _) => todo!(),
ExprKind::Call(_, _) => todo!(),
ExprKind::StructNew(_, _) => todo!(),
ExprKind::FieldAccess(_, _) => todo!(),
})
}
/// Convert a type from the source code into a type used for type-checking.
#[allow(non_snake_case)]
fn convert_TypeS_to_TypeTC(&mut self, ty: TypeS) -> TypeTC {
match ty {
TypeS::Infer => self.type_var(),
TypeS::Named(name) => match self.struct_env.lookup(&name) {
Some(ty) => TypeTC::Struct(ty.clone()),
None => self.type_var(),
},
TypeS::Unit => TypeTC::Unit,
TypeS::Int => TypeTC::Int,
TypeS::Bool => TypeTC::Bool,
TypeS::Func(args, ret) => {
let args = args
.into_iter()
.map(|arg| self.convert_TypeS_to_TypeTC(arg))
.collect();
let ret = self.convert_TypeS_to_TypeTC(*ret);
TypeTC::Func(args, Box::new(ret))
}
TypeS::Struct(fields) => TypeTC::Struct(
fields
.into_iter()
.map(|arg| Arg {
name: arg.name,
ty: self.convert_TypeS_to_TypeTC(arg.ty),
})
.collect(),
),
}
}
fn type_var(&mut self) -> TypeTC {
TypeTC::Var(self.gen_int())
}
fn gen_int(&mut self) -> usize {
let id = self.ctr;
self.ctr += 1;
id
}
}

View file

@ -1,532 +0,0 @@
use std::{
collections::{HashMap, HashSet},
mem,
};
use anyhow::Result;
use crate::ast::Expr;
use super::{Decl, ElseClause, ExprKind, Func, IfElse, Op, Stmt, Type};
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub enum Type_ {
Var(usize),
Int,
Bool,
StructInst(Vec<(String, Type_)>),
Func(Vec<Type_>, Box<Type_>),
}
impl Type_ {
fn from_type(ty: Type) -> Self {
match ty {
Type::Int => Type_::Int,
Type::Bool => Type_::Bool,
Type::Func(args, ret) => {
let args = args.into_iter().map(|arg| Type_::from_type(arg)).collect();
let ret = Type_::from_type(*ret);
Type_::Func(args, Box::new(ret))
}
Type::StructInst(fields) => Type_::StructInst(
fields
.into_iter()
.map(|(name, ty)| (name, Type_::from_type(ty)))
.collect(),
),
}
}
fn convert(&self, assignments: &Assignments) -> Result<Type> {
Ok(match self {
Type_::Var(n) => match assignments.get(&n) {
Some(v) => v.convert(assignments)?,
None => bail!("Unsolved constraint variable {n}"),
},
Type_::Int => Type::Int,
Type_::Bool => Type::Bool,
Type_::Func(args, ret) => {
let args = args
.into_iter()
.map(|arg| arg.convert(assignments))
.collect::<Result<_>>()?;
let ret = ret.convert(assignments)?;
Type::Func(args, Box::new(ret))
}
Type_::StructInst(fields) => Type::StructInst(
fields
.into_iter()
.map(|(name, ty)| Ok((name.clone(), ty.convert(assignments)?)))
.collect::<Result<_>>()?,
),
})
}
}
impl Op {
fn constraints(
&self,
ctx: &mut AnnotationContext,
left: &Type_,
right: &Type_,
output: &Type_,
) {
match self {
Op::Plus => {
ctx.constrain(left.clone(), Type_::Int);
ctx.constrain(right.clone(), Type_::Int);
ctx.constrain(output.clone(), Type_::Int);
}
Op::LessThan | Op::GreaterThan => {
ctx.constrain(left.clone(), Type_::Int);
ctx.constrain(right.clone(), Type_::Int);
ctx.constrain(output.clone(), Type_::Bool);
}
}
}
}
#[derive(Debug, Hash, Eq, PartialEq)]
struct Constraint(Type_, Type_);
#[derive(Debug, Default)]
struct Env {
parent: Option<Box<Env>>,
local_type_map: HashMap<String, Type_>,
}
impl Env {
pub fn lookup(&self, name: impl AsRef<str>) -> Option<&Type_> {
match self.local_type_map.get(name.as_ref()) {
Some(v) => Some(v),
None => match &self.parent {
Some(p) => p.lookup(name),
None => None,
},
}
}
}
struct AnnotationContext<'a> {
counter: usize,
constraints: &'a mut HashSet<Constraint>,
current_env: Option<Env>,
}
impl<'a> AnnotationContext<'a> {
pub fn type_var(&mut self) -> Type_ {
Type_::Var(self.gen_int())
}
pub fn gen_int(&mut self) -> usize {
let id = self.counter;
self.counter += 1;
id
}
pub fn constrain(&mut self, left: Type_, right: Type_) {
if left == right {
// No op, return now
return;
}
self.constraints.insert(Constraint(left, right));
}
pub fn lookup(&self, name: impl AsRef<str>) -> Option<&Type_> {
self.current_env.as_ref().unwrap().lookup(name)
}
pub fn define_var(&mut self, name: impl AsRef<str>, ty: Type_) {
self
.current_env
.as_mut()
.unwrap()
.local_type_map
.insert(name.as_ref().to_string(), ty);
}
pub fn push_scope(&mut self) {
self.current_env = Some(Env {
parent: Some(Box::new(self.current_env.take().unwrap())),
local_type_map: Default::default(),
});
}
pub fn pop_scope(&mut self) {
self.current_env =
Some(*self.current_env.take().unwrap().parent.take().unwrap());
}
}
fn annotate_stmts(
ctx: &mut AnnotationContext,
stmts: impl AsRef<[Stmt<()>]>,
) -> Result<Vec<Stmt<Type_>>> {
let stmts = stmts.as_ref();
let mut new_stmts = Vec::new();
ctx.push_scope();
for stmt in stmts.iter() {
match stmt {
Stmt::Return(ret_val) => {
let new_stmt = match ret_val {
Some(v) => Some(annotate_expr(ctx, v)?),
None => None,
};
new_stmts.push(Stmt::Return(new_stmt));
}
Stmt::Let(name, ty, body) => {
let new_stmt =
Stmt::Let(name.clone(), ty.clone(), annotate_expr(ctx, body)?);
let ty = match ty {
Some(v) => Type_::from_type(v.clone()),
None => ctx.type_var(),
};
ctx.define_var(name, ty);
new_stmts.push(new_stmt);
}
Stmt::IfElse(if_else) => {
let new_stmt = Stmt::IfElse(annotate_if_else(ctx, &if_else)?);
new_stmts.push(new_stmt);
}
}
}
ctx.pop_scope();
Ok(new_stmts)
}
fn annotate_expr(
ctx: &mut AnnotationContext,
expr: &Expr<()>,
) -> Result<Expr<Type_>> {
Ok(match &expr.kind {
ExprKind::Int(n) => Expr {
kind: ExprKind::Int(*n),
ty: Type_::Int,
},
ExprKind::Var(name) => {
let ty = match ctx.lookup(name) {
Some(v) => v.clone(),
None => ctx.type_var(),
};
Expr {
kind: ExprKind::Var(name.clone()),
ty,
}
}
ExprKind::BinOp(left, op, right) => {
let left = annotate_expr(ctx, left)?;
let right = annotate_expr(ctx, right)?;
let output = ctx.type_var();
op.constraints(ctx, &left.ty, &right.ty, &output);
Expr {
kind: ExprKind::BinOp(Box::new(left), *op, Box::new(right)),
ty: output,
}
}
ExprKind::Call(func_name, args) => {
let mut args_annot = Vec::new();
let ret_ty = ctx.type_var();
let (func_args_ty, func_ret_ty) =
match ctx.current_env.as_ref().unwrap().lookup(func_name) {
Some(Type_::Func(args, ret)) => (args.clone(), *ret.clone()),
Some(_) => bail!("Not a function"),
None => bail!("Name not found"),
};
ctx.constrain(ret_ty.clone(), func_ret_ty);
for (arg, expected_ty) in args.iter().zip(func_args_ty.iter()) {
let arg_annot = annotate_expr(ctx, arg)?;
ctx.constrain(arg_annot.ty.clone(), expected_ty.clone());
args_annot.push(arg_annot);
}
Expr {
kind: ExprKind::Call(func_name.to_string(), args_annot),
ty: ret_ty,
}
}
})
}
fn annotate_if_else(
ctx: &mut AnnotationContext,
if_else: &IfElse<()>,
) -> Result<IfElse<Type_>> {
let converted_cond = annotate_expr(ctx, &if_else.cond)?;
let converted_body = annotate_stmts(ctx, &if_else.body)?;
let else_clause = match &if_else.else_clause {
Some(ElseClause::If(if_else2)) => {
Some(ElseClause::If(Box::new(annotate_if_else(ctx, &if_else2)?)))
}
Some(ElseClause::Body(stmts)) => {
Some(ElseClause::Body(annotate_stmts(ctx, &stmts)?))
}
None => None,
};
Ok(IfElse {
cond: converted_cond,
body: converted_body,
else_clause,
})
}
fn collect_info(
env: Env,
func: &Func<()>,
) -> Result<(Func<Type_>, HashSet<Constraint>, Env)> {
let mut constraints = HashSet::new();
let mut ctx = AnnotationContext {
counter: 0,
constraints: &mut constraints,
current_env: Some(env),
};
let new_stmts = annotate_stmts(&mut ctx, &func.stmts)?;
let args_ = func
.args
.iter()
.cloned()
.map(|arg| Type_::from_type(arg.ty))
.collect();
let total_ty =
Type_::Func(args_, Box::new(Type_::from_type(func.return_ty.clone())));
let func = Func {
name: func.name.clone(),
args: func.args.clone(),
return_ty: func.return_ty.clone(),
stmts: new_stmts,
ty: total_ty,
};
let env = ctx.current_env.unwrap();
mem::drop(ctx.constraints);
Ok((func, constraints, env))
}
type Assignments = HashMap<usize, Type_>;
fn substitute_types(assignments: &Assignments, ty: &Type_) -> Type_ {
match ty {
Type_::Var(n) => match assignments.get(&n) {
Some(ty2) => ty2.clone(),
None => ty.clone(),
},
Type_::Func(args, ret) => {
let args = args
.into_iter()
.map(|arg| substitute_types(assignments, arg))
.collect();
let ret = substitute_types(assignments, &*ret);
Type_::Func(args, Box::new(ret))
}
Type_::StructInst(fields) => Type_::StructInst(
fields
.into_iter()
.map(|(name, ty)| (name.clone(), substitute_types(assignments, ty)))
.collect(),
),
Type_::Int | Type_::Bool => ty.clone(),
}
}
fn unify_constraints(constraints: &HashSet<Constraint>) -> Result<Assignments> {
let mut assignments = HashMap::new();
for Constraint(left, right) in constraints {
let left = substitute_types(&assignments, left);
let right = substitute_types(&assignments, right);
unify_single(&mut assignments, left, right)?;
}
Ok(assignments)
}
fn unify_single(
assignments: &mut Assignments,
left: Type_,
right: Type_,
) -> Result<()> {
match (left, right) {
(Type_::Int, Type_::Int) | (Type_::Bool, Type_::Bool) => {}
(Type_::Var(n), o) | (o, Type_::Var(n)) => {
assignments.insert(n, o);
}
(Type_::Func(left_args, left_ret), Type_::Func(right_args, right_ret)) => {
let mut new_constraints = HashSet::new();
for (left_arg, right_arg) in
left_args.into_iter().zip(right_args.into_iter())
{
new_constraints.insert(Constraint(left_arg, right_arg));
}
new_constraints.insert(Constraint(*left_ret, *right_ret));
assignments.extend(unify_constraints(&new_constraints)?);
}
(left, right) => bail!("Mismatching types {left:?} vs. {right:?}"),
};
Ok(())
}
fn substitute_in_expr_kind(
assignments: &Assignments,
expr_kind: ExprKind<Type_>,
) -> Result<ExprKind<Type>> {
Ok(match expr_kind {
ExprKind::Int(n) => ExprKind::Int(n),
ExprKind::Var(name) => ExprKind::Var(name),
ExprKind::BinOp(left, op, right) => {
let left = substitute_in_expr(assignments, *left)?;
let right = substitute_in_expr(assignments, *right)?;
ExprKind::BinOp(Box::new(left), op, Box::new(right))
}
ExprKind::Call(func, args) => {
let args = args
.into_iter()
.map(|arg| substitute_in_expr(assignments, arg))
.collect::<Result<_>>()?;
ExprKind::Call(func, args)
}
})
}
fn substitute_in_expr(
assignments: &Assignments,
expr: Expr<Type_>,
) -> Result<Expr<Type>> {
Ok(Expr {
kind: substitute_in_expr_kind(assignments, expr.kind)?,
ty: expr.ty.convert(assignments)?,
})
}
fn substitute_in_if_else(
assignments: &Assignments,
if_else: IfElse<Type_>,
) -> Result<IfElse<Type>> {
let cond = substitute_in_expr(assignments, if_else.cond)?;
let body = substitute_in_stmts(assignments, if_else.body)?;
let else_clause = match if_else.else_clause {
Some(ElseClause::If(if_else2)) => Some(ElseClause::If(Box::new(
substitute_in_if_else(assignments, *if_else2)?,
))),
Some(ElseClause::Body(body)) => {
Some(ElseClause::Body(substitute_in_stmts(assignments, body)?))
}
None => None,
};
Ok(IfElse {
cond,
body,
else_clause,
})
}
fn substitute_in_stmts(
assignments: &Assignments,
stmts: Vec<Stmt<Type_>>,
) -> Result<Vec<Stmt<Type>>> {
stmts
.into_iter()
.map(|stmt| {
Ok(match stmt {
Stmt::Let(name, ty, body) => {
Stmt::Let(name, ty, substitute_in_expr(assignments, body)?)
}
Stmt::Return(ret_val) => Stmt::Return(match ret_val {
Some(v) => Some(substitute_in_expr(assignments, v)?),
None => None,
}),
Stmt::IfElse(if_else) => {
Stmt::IfElse(substitute_in_if_else(assignments, if_else)?)
}
})
})
.collect()
}
fn substitute_in_func(
assignments: &Assignments,
func: Func<Type_>,
) -> Result<Func<Type>> {
Ok(Func {
name: func.name,
args: func.args,
return_ty: func.return_ty,
stmts: substitute_in_stmts(assignments, func.stmts)?,
ty: func.ty.convert(assignments)?,
})
}
pub fn convert(ast: Vec<Decl<()>>) -> Result<Vec<Decl<Type>>> {
// First pass, gather all of the type signatures in the top level
let mut top_level_env = Env::default();
for decl in ast.iter() {
match decl {
super::Decl::Func(func) => {
let name = func.name.clone();
let args_ty = func.args.iter().map(|arg| arg.ty.clone()).collect();
let ty = Type::Func(args_ty, Box::new(func.return_ty.clone()));
top_level_env
.local_type_map
.insert(name, Type_::from_type(ty));
}
}
}
// Now, type-check each function separately
let mut new_decl = Vec::new();
let mut env = top_level_env;
for decl in ast.iter() {
match decl {
Decl::Func(func) => {
let mut scoped_env = Env {
parent: Some(Box::new(env)),
local_type_map: HashMap::new(),
};
for arg in func.args.iter() {
scoped_env
.local_type_map
.insert(arg.name.clone(), Type_::from_type(arg.ty.clone()));
}
let (decorated_func, constraints, env2) =
collect_info(scoped_env, func)?;
env = *env2.parent.unwrap();
let assignments = unify_constraints(&constraints)?;
let typed_func = substitute_in_func(&assignments, decorated_func)?;
new_decl.push(Decl::Func(typed_func));
}
}
}
Ok(new_decl)
}

57
src/ast/types.rs Normal file
View file

@ -0,0 +1,57 @@
use super::Arg;
pub trait IType {
fn func_ty(args: impl AsRef<[Self]>, ret: Self) -> Self
where
Self: Sized;
}
/// The type, as written in the source code. Notably, this includes type
/// variables that cannot appear in the final type.
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub enum TypeS {
/// Type is not given in the source, will infer it later.
Infer,
Unit,
Bool,
Int,
Struct(Vec<Arg<TypeS>>),
Func(Vec<TypeS>, Box<TypeS>),
Named(String),
}
impl IType for TypeS {
fn func_ty(args: impl AsRef<[Self]>, ret: Self) -> Self
where
Self: Sized,
{
TypeS::Func(args.as_ref().to_vec(), Box::new(ret))
}
}
/// The "real" type, that is modeled.
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub enum Type {
Unit,
Bool,
Int,
Struct(Vec<Arg<Type>>),
Func(Vec<Type>, Box<Type>),
}
/// Types used during type-checking. Notably, this includes the constraint
/// variable Var
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub enum TypeTC {
Var(usize),
Unit,
Bool,
Int,
Struct(Vec<Arg<TypeTC>>),
Func(Vec<TypeTC>, Box<TypeTC>),
}

View file

@ -2,7 +2,7 @@ use std::io::Write;
use anyhow::Result; use anyhow::Result;
use crate::ast::{Expr, ExprKind, Op, Type}; use crate::ast::{types::Type, Expr, ExprKind, Op};
use super::{type_to_llvm, LlvmIrCodegen}; use super::{type_to_llvm, LlvmIrCodegen};
@ -25,19 +25,15 @@ impl<W: Write> LlvmIrCodegen<W> {
match &expr.kind { match &expr.kind {
ExprKind::Int(n) => { ExprKind::Int(n) => {
expr_ref = format!("{}", n); expr_ref = format!("{}", n);
// writeln!(self.writer, "%{} = alloca {}", expr_id, ty_str)?;
// writeln!(
// self.writer,
// "store {} {}, {}* %{}",
// ty_str, n, ty_str, expr_id
// )?;
} }
ExprKind::Var(name) => match self.var_env.lookup(name) { ExprKind::Var(name) => match self.var_env.lookup(name) {
Some((_, name)) => { Some((_, name)) => {
expr_ref = name.clone(); expr_ref = name.clone();
} }
None => bail!("Unbound name {name:?}"), None => bail!("Unbound name {name:?}"),
}, },
ExprKind::BinOp(left, op, right) => { ExprKind::BinOp(left, op, right) => {
let result_ty = match op.check_types(&left.ty, &right.ty) { let result_ty = match op.check_types(&left.ty, &right.ty) {
Some(v) => type_to_llvm(&v), Some(v) => type_to_llvm(&v),
@ -75,6 +71,7 @@ impl<W: Write> LlvmIrCodegen<W> {
)?, )?,
} }
} }
ExprKind::Call(func, args) => match self.var_env.lookup(func) { ExprKind::Call(func, args) => match self.var_env.lookup(func) {
Some((Type::Func(func_args_ty, func_ret_ty), func_name)) => { Some((Type::Func(func_args_ty, func_ret_ty), func_name)) => {
// Clone these so we aren't depending on a reference // Clone these so we aren't depending on a reference
@ -106,6 +103,13 @@ impl<W: Write> LlvmIrCodegen<W> {
} }
_ => bail!("No function with name {func:?}"), _ => bail!("No function with name {func:?}"),
}, },
ExprKind::StructNew(struct_ref, values) => {
todo!()
}
ExprKind::FieldAccess(expr, field_name) => {
todo!()
}
} }
Ok(ExprLlvm(expr_ref, expr)) Ok(ExprLlvm(expr_ref, expr))

View file

@ -2,7 +2,7 @@ use std::io::Write;
use anyhow::Result; use anyhow::Result;
use crate::ast::{ElseClause, IfElse, Type}; use crate::ast::{ElseClause, IfElse, types::Type};
use super::LlvmIrCodegen; use super::LlvmIrCodegen;

View file

@ -6,7 +6,8 @@ use std::io::Write;
use anyhow::Result; use anyhow::Result;
use crate::ast::{Decl, Type}; use crate::ast::Arg;
use crate::ast::{types::Type, Decl};
use crate::utils::LayeredEnv; use crate::utils::LayeredEnv;
use super::CodegenBackend; use super::CodegenBackend;
@ -14,15 +15,17 @@ use super::CodegenBackend;
pub struct LlvmIrCodegen<W> { pub struct LlvmIrCodegen<W> {
ctr: usize, ctr: usize,
var_env: LayeredEnv<String, (Type, String)>, var_env: LayeredEnv<String, (Type, String)>,
/// Mapping from source code name to type def and LLVM struct name
struct_env: LayeredEnv<String, (Vec<Arg<Type>>, String)>,
writer: W, writer: W,
} }
impl<W> LlvmIrCodegen<W> { impl<W> LlvmIrCodegen<W> {
pub fn new(writer: W) -> Self { pub fn new(writer: W) -> Self {
let env = LayeredEnv::new();
LlvmIrCodegen { LlvmIrCodegen {
ctr: 0, ctr: 0,
var_env: env, var_env: LayeredEnv::default(),
struct_env: LayeredEnv::default(),
writer, writer,
} }
} }
@ -45,30 +48,45 @@ impl<W> LlvmIrCodegen<W> {
fn type_to_llvm(ty: &Type) -> String { fn type_to_llvm(ty: &Type) -> String {
match ty { match ty {
Type::Unit => String::from("void"),
Type::Int => String::from("i32"), Type::Int => String::from("i32"),
Type::Bool => String::from("i1"), Type::Bool => String::from("i1"),
Type::StructInst(_) => todo!(), Type::Struct(_) => todo!(),
Type::Func(_, _) => todo!(), Type::Func(_, _) => todo!(),
} }
} }
impl<W: Write> CodegenBackend for LlvmIrCodegen<W> { impl<W: Write> CodegenBackend for LlvmIrCodegen<W> {
fn convert(&mut self, program: Vec<Decl<Type>>) -> Result<()> { fn convert(&mut self, program: Vec<Decl<Type>>) -> Result<()> {
// First, create a global environment and add all functions, so they can be // First, create a global environment and add all functions and types, so
// called within other functions // they can be referenced within other functions
// //
// This is in a separate loop so there's no dependency on function order // This is in a separate loop so there's no dependency on order
self.var_env = LayeredEnv::new(); for decl in program.iter() {
for func in program.iter().filter_map(|decl| match decl { match decl {
Decl::Func(v) => Some(v), Decl::Func(func) => {
_ => None, let func_name = match func.name.as_str() {
}) { "main" => String::from("@main"),
let func_name = match func.name.as_str() { _ => self.gensym(Some("@"), format!("func.{}", func.name)),
"main" => String::from("@main"), };
_ => self.gensym(Some("@"), format!("func.{}", func.name)), let func_ty = Type::Func(
}; func.args.iter().map(|arg| arg.ty.clone()).collect(),
let value = (func.ty.clone(), func_name); Box::new(func.return_ty.clone()),
self.var_env.insert(func.name.clone(), value); );
let value = (func_ty, func_name);
self.var_env.insert(func.name.clone(), value);
}
Decl::TypeDef(typedef) => match &typedef.def {
Type::Struct(strct) => {
let name =
self.gensym(Some("%"), format!("struct.{}", typedef.name));
let value = (strct.clone(), name);
self.struct_env.insert(typedef.name.clone(), value);
}
_ => {}
},
}
} }
// Convert all functions // Convert all functions
@ -76,10 +94,7 @@ impl<W: Write> CodegenBackend for LlvmIrCodegen<W> {
Decl::Func(v) => Some(v), Decl::Func(v) => Some(v),
_ => None, _ => None,
}) { }) {
let (_, ret_ty) = match &func.ty { let ret_ty = &func.return_ty;
Type::Func(args, ret) => (args, ret),
_ => unreachable!(""),
};
let (_, func_name) = let (_, func_name) =
self.var_env.lookup(&func.name).expect("Just inserted."); self.var_env.lookup(&func.name).expect("Just inserted.");
let func_name = func_name.to_owned(); let func_name = func_name.to_owned();
@ -100,7 +115,7 @@ impl<W: Write> CodegenBackend for LlvmIrCodegen<W> {
writeln!( writeln!(
self.writer, self.writer,
"define {} {} ({}) {{", "define {} {} ({}) {{",
type_to_llvm(ret_ty), type_to_llvm(&ret_ty),
func_name, func_name,
args_str.join(", "), args_str.join(", "),
)?; )?;

View file

@ -2,7 +2,7 @@ use std::io::Write;
use anyhow::Result; use anyhow::Result;
use crate::ast::{Stmt, Type}; use crate::ast::{Stmt, types::Type};
use super::{type_to_llvm, LlvmIrCodegen}; use super::{type_to_llvm, LlvmIrCodegen};

View file

@ -1,8 +1,13 @@
//! This module contains everything related to codegen.
//!
//! Although written in a style that could possibly support multiple backends,
//! the only planned backend is the [LLVM IR][llvm_ir] backend.
pub mod llvm_ir; pub mod llvm_ir;
use anyhow::Result; use anyhow::Result;
use crate::ast::{Decl, Type}; use crate::ast::{Decl, types::Type};
pub trait CodegenBackend { pub trait CodegenBackend {
fn convert(&mut self, program: Vec<Decl<Type>>) -> Result<()>; fn convert(&mut self, program: Vec<Decl<Type>>) -> Result<()>;

View file

@ -1,9 +1,15 @@
#![doc = include_str!("../README.md")]
#[macro_use] #[macro_use]
extern crate lalrpop_util; extern crate lalrpop_util;
#[macro_use] #[macro_use]
extern crate anyhow; extern crate anyhow;
lalrpop_mod!(pub parser); /// This module contains an auto-generated parser created by LALRPOP.
pub mod parser {
lalrpop_mod!(parser);
pub use self::parser::*;
}
pub mod ast; pub mod ast;
pub mod codegen; pub mod codegen;

View file

@ -1,67 +1,93 @@
use crate::ast::*; use crate::ast::{*, types::*};
grammar; grammar;
pub Program: Vec<Decl<()>> = Decl* => <>; pub Program: Vec<Decl<TypeS>> = Decl* => <>;
Decl: Decl<()> = { Decl: Decl<TypeS> = {
Func => Decl::Func(<>), Func => Decl::Func(<>),
TypeDef => Decl::TypeDef(<>),
}; };
Func: Func<()> = { Func: Func<TypeS> = {
"fn" <name:Ident> "(" <args:Args> ")" "->" <return_ty:Type> "{" <stmts:Stmt*> "}" => "fn" <name:Ident> "(" <args:Args> ")" "->" <return_ty:Type> "{" <stmts:Stmt*> "}" =>
Func { name, args, return_ty, stmts, ty: (), }, Func { name, args, return_ty, stmts },
"fn" <name:Ident> "(" <args:Args> ")" "{" <stmts:Stmt*> "}" =>
Func { name, args, return_ty: TypeS::Unit, stmts },
}; };
Args: Vec<Arg> = Punct<",", Arg>? => <>.unwrap_or_else(|| Vec::new()); TypeDef: TypeDef<TypeS> = {
"type" <name:Ident> "=" <def:Type> => TypeDef { name, def },
};
Arg: Arg = <name:Ident> ":" <ty:Type> => Arg { name, ty }; Args: Vec<Arg<TypeS>> = Punct<",", Arg>? => <>.unwrap_or_else(|| Vec::new());
Stmt: Stmt<()> = { Arg: Arg<TypeS> = <name:Ident> ":" <ty:Type> => Arg { name, ty };
NameValue: NameValue<TypeS> = <name:Ident> ":" <expr:Expr> => NameValue { name, expr };
Stmt: Stmt<TypeS> = {
"let" <name:Ident> <ty:ColonType?> "=" <expr:Expr> ";" => "let" <name:Ident> <ty:ColonType?> "=" <expr:Expr> ";" =>
Stmt::Let(name, ty, expr), Stmt::Let(name, ty.unwrap_or(TypeS::Infer), expr),
"return" <expr:Expr?> ";" => Stmt::Return(expr), "return" <expr:Expr?> ";" => Stmt::Return(expr),
IfElse => Stmt::IfElse(<>), IfElse => Stmt::IfElse(<>),
}; };
ColonType: Type = ":" <ty:Type> => ty; ColonType: TypeS = ":" <ty:Type> => ty;
IfElse: IfElse<()> = IfElse: IfElse<TypeS> =
"if" <cond:Expr> "{" <body:Stmt*> "}" <else_clause:Else?> => "if" <cond:Expr> "{" <body:Stmt*> "}" <else_clause:Else?> =>
IfElse { cond, body, else_clause }; IfElse { cond, body, else_clause };
Else: ElseClause<()> = "else" <else_clause:Else_> => else_clause; Else: ElseClause<TypeS> = "else" <else_clause:Else_> => else_clause;
Else_: ElseClause<()> = { Else_: ElseClause<TypeS> = {
IfElse => ElseClause::If(Box::new(<>)), IfElse => ElseClause::If(Box::new(<>)),
"{" <body:Stmt*> "}" => ElseClause::Body(body), "{" <body:Stmt*> "}" => ElseClause::Body(body),
}; };
Expr: Expr<()> = { Expr: Expr<TypeS> = {
#[precedence(level = "0")] #[precedence(level = "0")]
"(" <expr:Expr> ")" => expr, "(" <expr:Expr> ")" => expr,
#[precedence(level = "0")] #[precedence(level = "0")]
r"-?[0-9]+" => Expr { kind: ExprKind::Int(<>.parse::<i64>().unwrap()), ty: () }, r"-?[0-9]+" => Expr {
kind: ExprKind::Int(<>.parse::<i64>().unwrap()),
ty: TypeS::Infer,
},
#[precedence(level = "0")] #[precedence(level = "0")]
Ident => Expr { kind: ExprKind::Var(<>), ty: () }, "new" <name:Ident> "{" <values:Punct<",", NameValue>> "}" => Expr {
kind: ExprKind::StructNew(name, values),
ty: TypeS::Infer,
},
#[precedence(level = "1")]
Ident => Expr { kind: ExprKind::Var(<>), ty: TypeS::Infer },
#[precedence(level = "2")] #[precedence(level = "2")]
#[assoc(side = "left")]
<expr:Expr> "." <field:Ident> =>
Expr { kind: ExprKind::FieldAccess(Box::new(expr), field), ty: TypeS::Infer },
#[precedence(level = "3")]
<func:Ident> "(" <args:Punct<",", Expr>?> ")" => <func:Ident> "(" <args:Punct<",", Expr>?> ")" =>
Expr { kind: ExprKind::Call(func, args.unwrap_or_else(|| vec![])), ty: () }, Expr {
kind: ExprKind::Call(func, args.unwrap_or_else(|| vec![])),
ty: TypeS::Infer,
},
#[precedence(level = "8")] #[precedence(level = "8")]
#[assoc(side = "left")] #[assoc(side = "left")]
<left:Expr> <op:AddOp> <right:Expr> => Expr { <left:Expr> <op:AddOp> <right:Expr> => Expr {
kind: ExprKind::BinOp(Box::new(left), op, Box::new(right)), kind: ExprKind::BinOp(Box::new(left), op, Box::new(right)),
ty: (), ty: TypeS::Infer,
}, },
#[precedence(level = "13")] #[precedence(level = "13")]
#[assoc(side = "none")] #[assoc(side = "none")]
<left:Expr> <op:CompareOp> <right:Expr> => Expr { <left:Expr> <op:CompareOp> <right:Expr> => Expr {
kind: ExprKind::BinOp(Box::new(left), op, Box::new(right)), kind: ExprKind::BinOp(Box::new(left), op, Box::new(right)),
ty: (), ty: TypeS::Infer,
}, },
}; };
@ -74,22 +100,24 @@ CompareOp: Op = {
">" => Op::GreaterThan, ">" => Op::GreaterThan,
}; };
Type: Type = { Type: TypeS = {
"int" => Type::Int, "int" => TypeS::Int,
"struct" "{" <fields:Punct<",", Arg>> "}" => TypeS::Struct(fields),
<name:Ident> => TypeS::Named(name),
}; };
Ident: String = r"([A-Za-z][A-Za-z0-9_]*)|(_[A-Za-z0-9_]+)" => <>.to_string(); Ident: String = r"([A-Za-z][A-Za-z0-9_]*)|(_[A-Za-z0-9_]+)" => <>.to_string();
Punct<P, T>: Vec<T> = { Punct<P, T>: Vec<T> = {
P => vec![], P => vec![],
<first:T> <rest:(P Punct<P, T>)?> => {
match rest { T => vec![<>],
Some(rest) => {
let (_, mut vec) = rest; <first:T> P => vec![first],
vec.insert(0, first);
vec <first:T> P <rest:Punct<P, T>> => {
} let mut vec = rest;
None => vec![first], vec.insert(0, first);
} vec
}, }
}; };

View file

@ -1,11 +1,47 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::hash::Hash; use std::hash::Hash;
/// Layered environment that lets us push and pop frames and lookup across
/// layers.
#[derive(Debug)] #[derive(Debug)]
pub struct LayeredEnv<K, V> { pub struct LayeredEnv<K, V> {
inner: Option<InnerEnv<K, V>>, inner: Option<InnerEnv<K, V>>,
} }
impl<K: Hash + Eq, V> Default for LayeredEnv<K, V> {
fn default() -> Self {
LayeredEnv {
inner: Some(InnerEnv::new()),
}
}
}
impl<K: Hash + Eq, V> LayeredEnv<K, V> {
pub fn insert(&mut self, key: K, value: V) {
self.inner.as_mut().unwrap().map.insert(key, value);
}
pub fn push(&mut self) {
let inner = self.inner.take().unwrap();
self.inner = Some(InnerEnv::with_parent(inner));
}
pub fn pop(&mut self) {
let inner = self.inner.take().unwrap();
self.inner = Some(*inner.parent.unwrap());
}
/// Look up the key within the environment.
///
/// If a key exists in multiple layers at once, the top-most one (of the
/// stack) will shadow all the others.
///
/// TODO: Make a "lookup" function that traverses all environments.
pub fn lookup(&self, key: &K) -> Option<&V> {
self.inner.as_ref().unwrap().lookup(key)
}
}
#[derive(Debug)] #[derive(Debug)]
struct InnerEnv<K, V> { struct InnerEnv<K, V> {
parent: Option<Box<InnerEnv<K, V>>>, parent: Option<Box<InnerEnv<K, V>>>,
@ -37,29 +73,3 @@ impl<K: Hash + Eq, V> InnerEnv<K, V> {
} }
} }
} }
impl<K: Hash + Eq, V> LayeredEnv<K, V> {
pub fn new() -> Self {
LayeredEnv {
inner: Some(InnerEnv::new()),
}
}
pub fn insert(&mut self, key: K, value: V) {
self.inner.as_mut().unwrap().map.insert(key, value);
}
pub fn push(&mut self) {
let inner = self.inner.take().unwrap();
self.inner = Some(InnerEnv::with_parent(inner));
}
pub fn pop(&mut self) {
let inner = self.inner.take().unwrap();
self.inner = Some(*inner.parent.unwrap());
}
pub fn lookup(&self, key: &K) -> Option<&V> {
self.inner.as_ref().unwrap().lookup(key)
}
}

0
std/prelude.e0 Normal file
View file