Parsing
This commit is contained in:
parent
5015468bd4
commit
e66b515826
10 changed files with 251 additions and 10 deletions
49
bin/main.ml
49
bin/main.ml
|
@ -1,6 +1,51 @@
|
||||||
open E0
|
open E0
|
||||||
|
open E0.Util
|
||||||
|
|
||||||
|
module I = Parser.MenhirInterpreter
|
||||||
|
|
||||||
|
let get_parse_error env =
|
||||||
|
match I.stack env with
|
||||||
|
| lazy Nil -> "Invalid syntax"
|
||||||
|
| lazy (Cons (I.Element (state, _, _, _), _)) ->
|
||||||
|
try (E0.Parser_messages.message (I.number state)) with
|
||||||
|
| Not_found -> "invalid syntax (no specific message for this eror)"
|
||||||
|
|
||||||
|
let rec parse lexbuf (checkpoint : Ast.program I.checkpoint) =
|
||||||
|
match checkpoint with
|
||||||
|
| I.InputNeeded _env ->
|
||||||
|
let token = Lexer.f lexbuf in
|
||||||
|
let startp = lexbuf.lex_start_p
|
||||||
|
and endp = lexbuf.lex_curr_p in
|
||||||
|
let checkpoint = I.offer checkpoint (token, startp, endp) in
|
||||||
|
parse lexbuf checkpoint
|
||||||
|
| I.Shifting _
|
||||||
|
| I.AboutToReduce _ ->
|
||||||
|
let checkpoint = I.resume checkpoint in
|
||||||
|
parse lexbuf checkpoint
|
||||||
|
| I.HandlingError _env ->
|
||||||
|
let line, pos = Util.get_lexing_position lexbuf in
|
||||||
|
let err = get_parse_error _env in
|
||||||
|
raise (Syntax_error (Some (line, pos), err))
|
||||||
|
| I.Accepted v -> v
|
||||||
|
| I.Rejected ->
|
||||||
|
raise (Syntax_error (None, "invalid syntax (parser rejected the input)"))
|
||||||
|
|
||||||
|
let try_parse lexbuf =
|
||||||
|
try
|
||||||
|
let program = parse lexbuf (Parser.Incremental.program lexbuf.lex_curr_p) in
|
||||||
|
Ok program
|
||||||
|
with
|
||||||
|
| Util.Syntax_error (pos, err) ->
|
||||||
|
begin
|
||||||
|
match pos with
|
||||||
|
| Some (line, pos) ->
|
||||||
|
Error (Printf.sprintf "Syntax error on line %d, character %d: %s" line pos err)
|
||||||
|
| None -> Error (Printf.sprintf "Syntax error: %s" err)
|
||||||
|
end
|
||||||
|
|
||||||
let () = print_endline "Hello, World!"
|
|
||||||
|
|
||||||
let () =
|
let () =
|
||||||
Lexing.from_channel stdin |> Parser.program Lexer.f
|
let p = Lexing.from_channel stdin |> try_parse in
|
||||||
|
match p with
|
||||||
|
| Error msg -> Printf.eprintf "Could not load program: %s" msg
|
||||||
|
| Ok p -> Ast.show_program p |> print_endline
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
fn main(argc: u32, argv: **u8) -> u8 {
|
fn main() {
|
||||||
return 42
|
return 42;
|
||||||
}
|
}
|
||||||
|
|
37
lib/ast.ml
37
lib/ast.ml
|
@ -3,20 +3,53 @@ type op
|
||||||
| OpSub
|
| OpSub
|
||||||
| OpMul
|
| OpMul
|
||||||
| OpDiv
|
| OpDiv
|
||||||
|
[@@deriving show]
|
||||||
|
|
||||||
type lit
|
type lit
|
||||||
= LitInt of int64
|
= LitInt of int
|
||||||
|
| LitNegInt of int
|
||||||
| LitFloat of float
|
| LitFloat of float
|
||||||
|
[@@deriving show]
|
||||||
|
|
||||||
|
type ty
|
||||||
|
= TySizedInt
|
||||||
|
| TyGenericInt
|
||||||
|
| TyFunc of ty list * ty
|
||||||
|
| TyPointer of ty
|
||||||
|
| TyStruct of (string * ty) list
|
||||||
|
[@@deriving show]
|
||||||
|
|
||||||
type expr
|
type expr
|
||||||
= ExprLit of lit
|
= ExprUnit
|
||||||
|
| ExprLit of lit
|
||||||
| ExprBin of expr * op * expr
|
| ExprBin of expr * op * expr
|
||||||
|
| ExprAnnot of expr * ty
|
||||||
|
[@@deriving show]
|
||||||
|
|
||||||
|
type pat
|
||||||
|
= PatName of string
|
||||||
|
[@@deriving show]
|
||||||
|
|
||||||
|
type stmt
|
||||||
|
= StmtLet of pat * expr
|
||||||
|
| StmtReturn of expr
|
||||||
|
[@@deriving show]
|
||||||
|
|
||||||
|
type block =
|
||||||
|
{ stmts : stmt list
|
||||||
|
; ret : expr
|
||||||
|
}
|
||||||
|
[@@deriving show]
|
||||||
|
|
||||||
type func =
|
type func =
|
||||||
{ name : string
|
{ name : string
|
||||||
|
; body : block
|
||||||
}
|
}
|
||||||
|
[@@deriving show]
|
||||||
|
|
||||||
type decl
|
type decl
|
||||||
= DeclFunc of func
|
= DeclFunc of func
|
||||||
|
[@@deriving show]
|
||||||
|
|
||||||
type program = decl list
|
type program = decl list
|
||||||
|
[@@deriving show]
|
||||||
|
|
11
lib/dune
11
lib/dune
|
@ -1,7 +1,16 @@
|
||||||
(library
|
(library
|
||||||
(name e0))
|
(name e0)
|
||||||
|
(libraries menhirLib)
|
||||||
|
(preprocess (pps ppx_deriving.show ppx_deriving.ord)))
|
||||||
|
|
||||||
(ocamllex lexer)
|
(ocamllex lexer)
|
||||||
|
|
||||||
(menhir
|
(menhir
|
||||||
|
(flags --table)
|
||||||
(modules parser))
|
(modules parser))
|
||||||
|
|
||||||
|
; https://baturin.org/blog/declarative-parse-error-reporting-with-menhir/
|
||||||
|
(rule
|
||||||
|
(targets parser_messages.ml)
|
||||||
|
(deps parser.messages parser.mly)
|
||||||
|
(action (with-stdout-to %{targets} (run menhir --compile-errors %{deps}))))
|
||||||
|
|
|
@ -1,5 +1,18 @@
|
||||||
{ open Parser }
|
{ open Parser }
|
||||||
|
|
||||||
rule f = parse
|
rule f = parse
|
||||||
|
| '\n' { Lexing.new_line lexbuf; f lexbuf }
|
||||||
|
| [' ' '\t'] { f lexbuf }
|
||||||
| "fn" { KWD_FN }
|
| "fn" { KWD_FN }
|
||||||
| ['a'-'z']* as i { IDENT i }
|
| "return" { KWD_RETURN }
|
||||||
|
| "(" { PUNCT_LPAREN }
|
||||||
|
| ")" { PUNCT_RPAREN }
|
||||||
|
| "->" { PUNCT_RARROW }
|
||||||
|
| "{" { PUNCT_LBRACE }
|
||||||
|
| "}" { PUNCT_RBRACE }
|
||||||
|
| ":" { PUNCT_COLON }
|
||||||
|
| ";" { PUNCT_SEMI }
|
||||||
|
| "-" ['0'-'9']+ as i { LIT_NEG_INT (- (int_of_string i)) }
|
||||||
|
| ['0'-'9']+ as i { LIT_INT (int_of_string i) }
|
||||||
|
| ['a'-'z' 'A'-'Z']* as i { IDENT i }
|
||||||
|
| eof { EOF }
|
||||||
|
|
108
lib/parser.messages
Normal file
108
lib/parser.messages
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
program: PUNCT_RPAREN
|
||||||
|
##
|
||||||
|
## Ends in an error in state: 0.
|
||||||
|
##
|
||||||
|
## program' -> . program [ # ]
|
||||||
|
##
|
||||||
|
## The known suffix of the stack is as follows:
|
||||||
|
##
|
||||||
|
##
|
||||||
|
|
||||||
|
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||||
|
|
||||||
|
program: KWD_FN PUNCT_RPAREN
|
||||||
|
##
|
||||||
|
## Ends in an error in state: 1.
|
||||||
|
##
|
||||||
|
## func -> KWD_FN . IDENT PUNCT_LPAREN PUNCT_RPAREN body [ KWD_FN # ]
|
||||||
|
##
|
||||||
|
## The known suffix of the stack is as follows:
|
||||||
|
## KWD_FN
|
||||||
|
##
|
||||||
|
|
||||||
|
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||||
|
|
||||||
|
program: KWD_FN IDENT PUNCT_RPAREN
|
||||||
|
##
|
||||||
|
## Ends in an error in state: 2.
|
||||||
|
##
|
||||||
|
## func -> KWD_FN IDENT . PUNCT_LPAREN PUNCT_RPAREN body [ KWD_FN # ]
|
||||||
|
##
|
||||||
|
## The known suffix of the stack is as follows:
|
||||||
|
## KWD_FN IDENT
|
||||||
|
##
|
||||||
|
|
||||||
|
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||||
|
|
||||||
|
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RBRACE
|
||||||
|
##
|
||||||
|
## Ends in an error in state: 3.
|
||||||
|
##
|
||||||
|
## func -> KWD_FN IDENT PUNCT_LPAREN . PUNCT_RPAREN body [ KWD_FN # ]
|
||||||
|
##
|
||||||
|
## The known suffix of the stack is as follows:
|
||||||
|
## KWD_FN IDENT PUNCT_LPAREN
|
||||||
|
##
|
||||||
|
|
||||||
|
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||||
|
|
||||||
|
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_RPAREN
|
||||||
|
##
|
||||||
|
## Ends in an error in state: 4.
|
||||||
|
##
|
||||||
|
## func -> KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN . body [ KWD_FN # ]
|
||||||
|
##
|
||||||
|
## The known suffix of the stack is as follows:
|
||||||
|
## KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN
|
||||||
|
##
|
||||||
|
|
||||||
|
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||||
|
|
||||||
|
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE PUNCT_RPAREN
|
||||||
|
##
|
||||||
|
## Ends in an error in state: 5.
|
||||||
|
##
|
||||||
|
## body -> PUNCT_LBRACE . list(stmt) PUNCT_RBRACE [ KWD_FN # ]
|
||||||
|
##
|
||||||
|
## The known suffix of the stack is as follows:
|
||||||
|
## PUNCT_LBRACE
|
||||||
|
##
|
||||||
|
|
||||||
|
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||||
|
|
||||||
|
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE KWD_RETURN PUNCT_RPAREN
|
||||||
|
##
|
||||||
|
## Ends in an error in state: 6.
|
||||||
|
##
|
||||||
|
## stmt -> KWD_RETURN . expr [ PUNCT_RBRACE KWD_RETURN ]
|
||||||
|
##
|
||||||
|
## The known suffix of the stack is as follows:
|
||||||
|
## KWD_RETURN
|
||||||
|
##
|
||||||
|
|
||||||
|
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||||
|
|
||||||
|
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE KWD_RETURN LIT_NEG_INT PUNCT_RPAREN
|
||||||
|
##
|
||||||
|
## Ends in an error in state: 9.
|
||||||
|
##
|
||||||
|
## list(stmt) -> stmt . list(stmt) [ PUNCT_RBRACE ]
|
||||||
|
##
|
||||||
|
## The known suffix of the stack is as follows:
|
||||||
|
## stmt
|
||||||
|
##
|
||||||
|
|
||||||
|
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||||
|
|
||||||
|
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE PUNCT_RBRACE PUNCT_RPAREN
|
||||||
|
##
|
||||||
|
## Ends in an error in state: 17.
|
||||||
|
##
|
||||||
|
## list(decl) -> decl . list(decl) [ # ]
|
||||||
|
##
|
||||||
|
## The known suffix of the stack is as follows:
|
||||||
|
## decl
|
||||||
|
##
|
||||||
|
|
||||||
|
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||||
|
|
|
@ -2,7 +2,18 @@
|
||||||
open Ast
|
open Ast
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
%token EOF
|
||||||
%token KWD_FN
|
%token KWD_FN
|
||||||
|
%token KWD_RETURN
|
||||||
|
%token PUNCT_LPAREN
|
||||||
|
%token PUNCT_RPAREN
|
||||||
|
%token PUNCT_RARROW
|
||||||
|
%token PUNCT_LBRACE
|
||||||
|
%token PUNCT_RBRACE
|
||||||
|
%token PUNCT_COLON
|
||||||
|
%token PUNCT_SEMI
|
||||||
|
%token <int> LIT_NEG_INT
|
||||||
|
%token <int> LIT_INT
|
||||||
%token <string> IDENT
|
%token <string> IDENT
|
||||||
|
|
||||||
%start <program> program
|
%start <program> program
|
||||||
|
@ -10,10 +21,20 @@
|
||||||
%%
|
%%
|
||||||
|
|
||||||
program:
|
program:
|
||||||
| decls=decl* { decls }
|
| decls=decl* EOF { decls }
|
||||||
|
|
||||||
decl:
|
decl:
|
||||||
| func=func { DeclFunc func }
|
| func=func { DeclFunc func }
|
||||||
|
|
||||||
func:
|
func:
|
||||||
| KWD_FN name=IDENT { { name = name } }
|
| KWD_FN name=IDENT PUNCT_LPAREN PUNCT_RPAREN body=body { { name; body } }
|
||||||
|
|
||||||
|
body:
|
||||||
|
| PUNCT_LBRACE stmts=stmt* PUNCT_RBRACE { { stmts; ret=ExprUnit } }
|
||||||
|
|
||||||
|
stmt:
|
||||||
|
| KWD_RETURN expr=expr PUNCT_SEMI { StmtReturn expr }
|
||||||
|
|
||||||
|
expr:
|
||||||
|
| i=LIT_NEG_INT { ExprLit (LitNegInt i) }
|
||||||
|
| i=LIT_INT { ExprLit (LitInt i) }
|
||||||
|
|
3
lib/typeck.ml
Normal file
3
lib/typeck.ml
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
(* Hindley milner type checking *)
|
||||||
|
|
||||||
|
|
7
lib/util.ml
Normal file
7
lib/util.ml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
exception Syntax_error of ((int * int) option * string)
|
||||||
|
|
||||||
|
let get_lexing_position lexbuf =
|
||||||
|
let p = Lexing.lexeme_start_p lexbuf in
|
||||||
|
let line_number = p.Lexing.pos_lnum in
|
||||||
|
let column = p.Lexing.pos_cnum - p.Lexing.pos_bol + 1 in
|
||||||
|
(line_number, column)
|
2
spec.md
2
spec.md
|
@ -7,3 +7,5 @@ Types
|
||||||
|
|
||||||
IR
|
IR
|
||||||
--
|
--
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue