diff --git a/bin/main.ml b/bin/main.ml index c8e6ccb..f71da4e 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -1,6 +1,51 @@ open E0 +open E0.Util + +module I = Parser.MenhirInterpreter + +let get_parse_error env = + match I.stack env with + | lazy Nil -> "Invalid syntax" + | lazy (Cons (I.Element (state, _, _, _), _)) -> + try (E0.Parser_messages.message (I.number state)) with + | Not_found -> "invalid syntax (no specific message for this eror)" + +let rec parse lexbuf (checkpoint : Ast.program I.checkpoint) = + match checkpoint with + | I.InputNeeded _env -> + let token = Lexer.f lexbuf in + let startp = lexbuf.lex_start_p + and endp = lexbuf.lex_curr_p in + let checkpoint = I.offer checkpoint (token, startp, endp) in + parse lexbuf checkpoint + | I.Shifting _ + | I.AboutToReduce _ -> + let checkpoint = I.resume checkpoint in + parse lexbuf checkpoint + | I.HandlingError _env -> + let line, pos = Util.get_lexing_position lexbuf in + let err = get_parse_error _env in + raise (Syntax_error (Some (line, pos), err)) + | I.Accepted v -> v + | I.Rejected -> + raise (Syntax_error (None, "invalid syntax (parser rejected the input)")) + +let try_parse lexbuf = + try + let program = parse lexbuf (Parser.Incremental.program lexbuf.lex_curr_p) in + Ok program + with + | Util.Syntax_error (pos, err) -> + begin + match pos with + | Some (line, pos) -> + Error (Printf.sprintf "Syntax error on line %d, character %d: %s" line pos err) + | None -> Error (Printf.sprintf "Syntax error: %s" err) + end -let () = print_endline "Hello, World!" let () = - Lexing.from_channel stdin |> Parser.program Lexer.f + let p = Lexing.from_channel stdin |> try_parse in + match p with + | Error msg -> Printf.eprintf "Could not load program: %s" msg + | Ok p -> Ast.show_program p |> print_endline diff --git a/examples/basic.e0 b/examples/basic.e0 index bdc49bf..b52ff19 100644 --- a/examples/basic.e0 +++ b/examples/basic.e0 @@ -1,3 +1,3 @@ -fn main(argc: u32, argv: **u8) -> u8 { - return 42 +fn main() { + return 42; } diff --git a/lib/ast.ml b/lib/ast.ml index ccd93e8..23d4b64 100644 --- a/lib/ast.ml +++ b/lib/ast.ml @@ -3,20 +3,53 @@ type op | OpSub | OpMul | OpDiv +[@@deriving show] type lit - = LitInt of int64 + = LitInt of int + | LitNegInt of int | LitFloat of float +[@@deriving show] + +type ty + = TySizedInt + | TyGenericInt + | TyFunc of ty list * ty + | TyPointer of ty + | TyStruct of (string * ty) list +[@@deriving show] type expr - = ExprLit of lit + = ExprUnit + | ExprLit of lit | ExprBin of expr * op * expr + | ExprAnnot of expr * ty +[@@deriving show] + +type pat + = PatName of string +[@@deriving show] + +type stmt + = StmtLet of pat * expr + | StmtReturn of expr +[@@deriving show] + +type block = + { stmts : stmt list + ; ret : expr + } +[@@deriving show] type func = { name : string + ; body : block } +[@@deriving show] type decl = DeclFunc of func +[@@deriving show] type program = decl list +[@@deriving show] diff --git a/lib/dune b/lib/dune index 1466ae9..d1fdba1 100644 --- a/lib/dune +++ b/lib/dune @@ -1,7 +1,16 @@ (library - (name e0)) + (name e0) + (libraries menhirLib) + (preprocess (pps ppx_deriving.show ppx_deriving.ord))) (ocamllex lexer) (menhir + (flags --table) (modules parser)) + +; https://baturin.org/blog/declarative-parse-error-reporting-with-menhir/ +(rule + (targets parser_messages.ml) + (deps parser.messages parser.mly) + (action (with-stdout-to %{targets} (run menhir --compile-errors %{deps})))) diff --git a/lib/lexer.mll b/lib/lexer.mll index ea6f136..060bd85 100644 --- a/lib/lexer.mll +++ b/lib/lexer.mll @@ -1,5 +1,18 @@ { open Parser } rule f = parse +| '\n' { Lexing.new_line lexbuf; f lexbuf } +| [' ' '\t'] { f lexbuf } | "fn" { KWD_FN } -| ['a'-'z']* as i { IDENT i } +| "return" { KWD_RETURN } +| "(" { PUNCT_LPAREN } +| ")" { PUNCT_RPAREN } +| "->" { PUNCT_RARROW } +| "{" { PUNCT_LBRACE } +| "}" { PUNCT_RBRACE } +| ":" { PUNCT_COLON } +| ";" { PUNCT_SEMI } +| "-" ['0'-'9']+ as i { LIT_NEG_INT (- (int_of_string i)) } +| ['0'-'9']+ as i { LIT_INT (int_of_string i) } +| ['a'-'z' 'A'-'Z']* as i { IDENT i } +| eof { EOF } diff --git a/lib/parser.messages b/lib/parser.messages new file mode 100644 index 0000000..a06c15f --- /dev/null +++ b/lib/parser.messages @@ -0,0 +1,108 @@ +program: PUNCT_RPAREN +## +## Ends in an error in state: 0. +## +## program' -> . program [ # ] +## +## The known suffix of the stack is as follows: +## +## + + + +program: KWD_FN PUNCT_RPAREN +## +## Ends in an error in state: 1. +## +## func -> KWD_FN . IDENT PUNCT_LPAREN PUNCT_RPAREN body [ KWD_FN # ] +## +## The known suffix of the stack is as follows: +## KWD_FN +## + + + +program: KWD_FN IDENT PUNCT_RPAREN +## +## Ends in an error in state: 2. +## +## func -> KWD_FN IDENT . PUNCT_LPAREN PUNCT_RPAREN body [ KWD_FN # ] +## +## The known suffix of the stack is as follows: +## KWD_FN IDENT +## + + + +program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RBRACE +## +## Ends in an error in state: 3. +## +## func -> KWD_FN IDENT PUNCT_LPAREN . PUNCT_RPAREN body [ KWD_FN # ] +## +## The known suffix of the stack is as follows: +## KWD_FN IDENT PUNCT_LPAREN +## + + + +program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_RPAREN +## +## Ends in an error in state: 4. +## +## func -> KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN . body [ KWD_FN # ] +## +## The known suffix of the stack is as follows: +## KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN +## + + + +program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE PUNCT_RPAREN +## +## Ends in an error in state: 5. +## +## body -> PUNCT_LBRACE . list(stmt) PUNCT_RBRACE [ KWD_FN # ] +## +## The known suffix of the stack is as follows: +## PUNCT_LBRACE +## + + + +program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE KWD_RETURN PUNCT_RPAREN +## +## Ends in an error in state: 6. +## +## stmt -> KWD_RETURN . expr [ PUNCT_RBRACE KWD_RETURN ] +## +## The known suffix of the stack is as follows: +## KWD_RETURN +## + + + +program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE KWD_RETURN LIT_NEG_INT PUNCT_RPAREN +## +## Ends in an error in state: 9. +## +## list(stmt) -> stmt . list(stmt) [ PUNCT_RBRACE ] +## +## The known suffix of the stack is as follows: +## stmt +## + + + +program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE PUNCT_RBRACE PUNCT_RPAREN +## +## Ends in an error in state: 17. +## +## list(decl) -> decl . list(decl) [ # ] +## +## The known suffix of the stack is as follows: +## decl +## + + + diff --git a/lib/parser.mly b/lib/parser.mly index c02b266..1d2b98d 100644 --- a/lib/parser.mly +++ b/lib/parser.mly @@ -2,7 +2,18 @@ open Ast %} +%token EOF %token KWD_FN +%token KWD_RETURN +%token PUNCT_LPAREN +%token PUNCT_RPAREN +%token PUNCT_RARROW +%token PUNCT_LBRACE +%token PUNCT_RBRACE +%token PUNCT_COLON +%token PUNCT_SEMI +%token LIT_NEG_INT +%token LIT_INT %token IDENT %start program @@ -10,10 +21,20 @@ %% program: -| decls=decl* { decls } +| decls=decl* EOF { decls } decl: | func=func { DeclFunc func } func: -| KWD_FN name=IDENT { { name = name } } +| KWD_FN name=IDENT PUNCT_LPAREN PUNCT_RPAREN body=body { { name; body } } + +body: +| PUNCT_LBRACE stmts=stmt* PUNCT_RBRACE { { stmts; ret=ExprUnit } } + +stmt: +| KWD_RETURN expr=expr PUNCT_SEMI { StmtReturn expr } + +expr: +| i=LIT_NEG_INT { ExprLit (LitNegInt i) } +| i=LIT_INT { ExprLit (LitInt i) } diff --git a/lib/typeck.ml b/lib/typeck.ml new file mode 100644 index 0000000..aca0c72 --- /dev/null +++ b/lib/typeck.ml @@ -0,0 +1,3 @@ +(* Hindley milner type checking *) + + diff --git a/lib/util.ml b/lib/util.ml new file mode 100644 index 0000000..8e45ecd --- /dev/null +++ b/lib/util.ml @@ -0,0 +1,7 @@ +exception Syntax_error of ((int * int) option * string) + +let get_lexing_position lexbuf = + let p = Lexing.lexeme_start_p lexbuf in + let line_number = p.Lexing.pos_lnum in + let column = p.Lexing.pos_cnum - p.Lexing.pos_bol + 1 in + (line_number, column) diff --git a/spec.md b/spec.md index 5cb6f90..2e128ca 100644 --- a/spec.md +++ b/spec.md @@ -7,3 +7,5 @@ Types IR -- + +