This commit is contained in:
Michael Zhang 2022-03-08 02:52:20 -06:00
parent 5015468bd4
commit e66b515826
Signed by untrusted user who does not match committer: michael
GPG key ID: BDA47A31A3C8EE6B
10 changed files with 251 additions and 10 deletions

View file

@ -1,6 +1,51 @@
open E0
open E0.Util
module I = Parser.MenhirInterpreter
let get_parse_error env =
match I.stack env with
| lazy Nil -> "Invalid syntax"
| lazy (Cons (I.Element (state, _, _, _), _)) ->
try (E0.Parser_messages.message (I.number state)) with
| Not_found -> "invalid syntax (no specific message for this eror)"
let rec parse lexbuf (checkpoint : Ast.program I.checkpoint) =
match checkpoint with
| I.InputNeeded _env ->
let token = Lexer.f lexbuf in
let startp = lexbuf.lex_start_p
and endp = lexbuf.lex_curr_p in
let checkpoint = I.offer checkpoint (token, startp, endp) in
parse lexbuf checkpoint
| I.Shifting _
| I.AboutToReduce _ ->
let checkpoint = I.resume checkpoint in
parse lexbuf checkpoint
| I.HandlingError _env ->
let line, pos = Util.get_lexing_position lexbuf in
let err = get_parse_error _env in
raise (Syntax_error (Some (line, pos), err))
| I.Accepted v -> v
| I.Rejected ->
raise (Syntax_error (None, "invalid syntax (parser rejected the input)"))
let try_parse lexbuf =
try
let program = parse lexbuf (Parser.Incremental.program lexbuf.lex_curr_p) in
Ok program
with
| Util.Syntax_error (pos, err) ->
begin
match pos with
| Some (line, pos) ->
Error (Printf.sprintf "Syntax error on line %d, character %d: %s" line pos err)
| None -> Error (Printf.sprintf "Syntax error: %s" err)
end
let () = print_endline "Hello, World!"
let () =
Lexing.from_channel stdin |> Parser.program Lexer.f
let p = Lexing.from_channel stdin |> try_parse in
match p with
| Error msg -> Printf.eprintf "Could not load program: %s" msg
| Ok p -> Ast.show_program p |> print_endline

View file

@ -1,3 +1,3 @@
fn main(argc: u32, argv: **u8) -> u8 {
return 42
fn main() {
return 42;
}

View file

@ -3,20 +3,53 @@ type op
| OpSub
| OpMul
| OpDiv
[@@deriving show]
type lit
= LitInt of int64
= LitInt of int
| LitNegInt of int
| LitFloat of float
[@@deriving show]
type ty
= TySizedInt
| TyGenericInt
| TyFunc of ty list * ty
| TyPointer of ty
| TyStruct of (string * ty) list
[@@deriving show]
type expr
= ExprLit of lit
= ExprUnit
| ExprLit of lit
| ExprBin of expr * op * expr
| ExprAnnot of expr * ty
[@@deriving show]
type pat
= PatName of string
[@@deriving show]
type stmt
= StmtLet of pat * expr
| StmtReturn of expr
[@@deriving show]
type block =
{ stmts : stmt list
; ret : expr
}
[@@deriving show]
type func =
{ name : string
; body : block
}
[@@deriving show]
type decl
= DeclFunc of func
[@@deriving show]
type program = decl list
[@@deriving show]

View file

@ -1,7 +1,16 @@
(library
(name e0))
(name e0)
(libraries menhirLib)
(preprocess (pps ppx_deriving.show ppx_deriving.ord)))
(ocamllex lexer)
(menhir
(flags --table)
(modules parser))
; https://baturin.org/blog/declarative-parse-error-reporting-with-menhir/
(rule
(targets parser_messages.ml)
(deps parser.messages parser.mly)
(action (with-stdout-to %{targets} (run menhir --compile-errors %{deps}))))

View file

@ -1,5 +1,18 @@
{ open Parser }
rule f = parse
| '\n' { Lexing.new_line lexbuf; f lexbuf }
| [' ' '\t'] { f lexbuf }
| "fn" { KWD_FN }
| ['a'-'z']* as i { IDENT i }
| "return" { KWD_RETURN }
| "(" { PUNCT_LPAREN }
| ")" { PUNCT_RPAREN }
| "->" { PUNCT_RARROW }
| "{" { PUNCT_LBRACE }
| "}" { PUNCT_RBRACE }
| ":" { PUNCT_COLON }
| ";" { PUNCT_SEMI }
| "-" ['0'-'9']+ as i { LIT_NEG_INT (- (int_of_string i)) }
| ['0'-'9']+ as i { LIT_INT (int_of_string i) }
| ['a'-'z' 'A'-'Z']* as i { IDENT i }
| eof { EOF }

108
lib/parser.messages Normal file
View file

@ -0,0 +1,108 @@
program: PUNCT_RPAREN
##
## Ends in an error in state: 0.
##
## program' -> . program [ # ]
##
## The known suffix of the stack is as follows:
##
##
<YOUR SYNTAX ERROR MESSAGE HERE>
program: KWD_FN PUNCT_RPAREN
##
## Ends in an error in state: 1.
##
## func -> KWD_FN . IDENT PUNCT_LPAREN PUNCT_RPAREN body [ KWD_FN # ]
##
## The known suffix of the stack is as follows:
## KWD_FN
##
<YOUR SYNTAX ERROR MESSAGE HERE>
program: KWD_FN IDENT PUNCT_RPAREN
##
## Ends in an error in state: 2.
##
## func -> KWD_FN IDENT . PUNCT_LPAREN PUNCT_RPAREN body [ KWD_FN # ]
##
## The known suffix of the stack is as follows:
## KWD_FN IDENT
##
<YOUR SYNTAX ERROR MESSAGE HERE>
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RBRACE
##
## Ends in an error in state: 3.
##
## func -> KWD_FN IDENT PUNCT_LPAREN . PUNCT_RPAREN body [ KWD_FN # ]
##
## The known suffix of the stack is as follows:
## KWD_FN IDENT PUNCT_LPAREN
##
<YOUR SYNTAX ERROR MESSAGE HERE>
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_RPAREN
##
## Ends in an error in state: 4.
##
## func -> KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN . body [ KWD_FN # ]
##
## The known suffix of the stack is as follows:
## KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN
##
<YOUR SYNTAX ERROR MESSAGE HERE>
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE PUNCT_RPAREN
##
## Ends in an error in state: 5.
##
## body -> PUNCT_LBRACE . list(stmt) PUNCT_RBRACE [ KWD_FN # ]
##
## The known suffix of the stack is as follows:
## PUNCT_LBRACE
##
<YOUR SYNTAX ERROR MESSAGE HERE>
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE KWD_RETURN PUNCT_RPAREN
##
## Ends in an error in state: 6.
##
## stmt -> KWD_RETURN . expr [ PUNCT_RBRACE KWD_RETURN ]
##
## The known suffix of the stack is as follows:
## KWD_RETURN
##
<YOUR SYNTAX ERROR MESSAGE HERE>
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE KWD_RETURN LIT_NEG_INT PUNCT_RPAREN
##
## Ends in an error in state: 9.
##
## list(stmt) -> stmt . list(stmt) [ PUNCT_RBRACE ]
##
## The known suffix of the stack is as follows:
## stmt
##
<YOUR SYNTAX ERROR MESSAGE HERE>
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE PUNCT_RBRACE PUNCT_RPAREN
##
## Ends in an error in state: 17.
##
## list(decl) -> decl . list(decl) [ # ]
##
## The known suffix of the stack is as follows:
## decl
##
<YOUR SYNTAX ERROR MESSAGE HERE>

View file

@ -2,7 +2,18 @@
open Ast
%}
%token EOF
%token KWD_FN
%token KWD_RETURN
%token PUNCT_LPAREN
%token PUNCT_RPAREN
%token PUNCT_RARROW
%token PUNCT_LBRACE
%token PUNCT_RBRACE
%token PUNCT_COLON
%token PUNCT_SEMI
%token <int> LIT_NEG_INT
%token <int> LIT_INT
%token <string> IDENT
%start <program> program
@ -10,10 +21,20 @@
%%
program:
| decls=decl* { decls }
| decls=decl* EOF { decls }
decl:
| func=func { DeclFunc func }
func:
| KWD_FN name=IDENT { { name = name } }
| KWD_FN name=IDENT PUNCT_LPAREN PUNCT_RPAREN body=body { { name; body } }
body:
| PUNCT_LBRACE stmts=stmt* PUNCT_RBRACE { { stmts; ret=ExprUnit } }
stmt:
| KWD_RETURN expr=expr PUNCT_SEMI { StmtReturn expr }
expr:
| i=LIT_NEG_INT { ExprLit (LitNegInt i) }
| i=LIT_INT { ExprLit (LitInt i) }

3
lib/typeck.ml Normal file
View file

@ -0,0 +1,3 @@
(* Hindley milner type checking *)

7
lib/util.ml Normal file
View file

@ -0,0 +1,7 @@
exception Syntax_error of ((int * int) option * string)
let get_lexing_position lexbuf =
let p = Lexing.lexeme_start_p lexbuf in
let line_number = p.Lexing.pos_lnum in
let column = p.Lexing.pos_cnum - p.Lexing.pos_bol + 1 in
(line_number, column)

View file

@ -7,3 +7,5 @@ Types
IR
--