Parsing
This commit is contained in:
parent
5015468bd4
commit
e66b515826
10 changed files with 251 additions and 10 deletions
49
bin/main.ml
49
bin/main.ml
|
@ -1,6 +1,51 @@
|
|||
open E0
|
||||
open E0.Util
|
||||
|
||||
module I = Parser.MenhirInterpreter
|
||||
|
||||
let get_parse_error env =
|
||||
match I.stack env with
|
||||
| lazy Nil -> "Invalid syntax"
|
||||
| lazy (Cons (I.Element (state, _, _, _), _)) ->
|
||||
try (E0.Parser_messages.message (I.number state)) with
|
||||
| Not_found -> "invalid syntax (no specific message for this eror)"
|
||||
|
||||
let rec parse lexbuf (checkpoint : Ast.program I.checkpoint) =
|
||||
match checkpoint with
|
||||
| I.InputNeeded _env ->
|
||||
let token = Lexer.f lexbuf in
|
||||
let startp = lexbuf.lex_start_p
|
||||
and endp = lexbuf.lex_curr_p in
|
||||
let checkpoint = I.offer checkpoint (token, startp, endp) in
|
||||
parse lexbuf checkpoint
|
||||
| I.Shifting _
|
||||
| I.AboutToReduce _ ->
|
||||
let checkpoint = I.resume checkpoint in
|
||||
parse lexbuf checkpoint
|
||||
| I.HandlingError _env ->
|
||||
let line, pos = Util.get_lexing_position lexbuf in
|
||||
let err = get_parse_error _env in
|
||||
raise (Syntax_error (Some (line, pos), err))
|
||||
| I.Accepted v -> v
|
||||
| I.Rejected ->
|
||||
raise (Syntax_error (None, "invalid syntax (parser rejected the input)"))
|
||||
|
||||
let try_parse lexbuf =
|
||||
try
|
||||
let program = parse lexbuf (Parser.Incremental.program lexbuf.lex_curr_p) in
|
||||
Ok program
|
||||
with
|
||||
| Util.Syntax_error (pos, err) ->
|
||||
begin
|
||||
match pos with
|
||||
| Some (line, pos) ->
|
||||
Error (Printf.sprintf "Syntax error on line %d, character %d: %s" line pos err)
|
||||
| None -> Error (Printf.sprintf "Syntax error: %s" err)
|
||||
end
|
||||
|
||||
let () = print_endline "Hello, World!"
|
||||
|
||||
let () =
|
||||
Lexing.from_channel stdin |> Parser.program Lexer.f
|
||||
let p = Lexing.from_channel stdin |> try_parse in
|
||||
match p with
|
||||
| Error msg -> Printf.eprintf "Could not load program: %s" msg
|
||||
| Ok p -> Ast.show_program p |> print_endline
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
fn main(argc: u32, argv: **u8) -> u8 {
|
||||
return 42
|
||||
fn main() {
|
||||
return 42;
|
||||
}
|
||||
|
|
37
lib/ast.ml
37
lib/ast.ml
|
@ -3,20 +3,53 @@ type op
|
|||
| OpSub
|
||||
| OpMul
|
||||
| OpDiv
|
||||
[@@deriving show]
|
||||
|
||||
type lit
|
||||
= LitInt of int64
|
||||
= LitInt of int
|
||||
| LitNegInt of int
|
||||
| LitFloat of float
|
||||
[@@deriving show]
|
||||
|
||||
type ty
|
||||
= TySizedInt
|
||||
| TyGenericInt
|
||||
| TyFunc of ty list * ty
|
||||
| TyPointer of ty
|
||||
| TyStruct of (string * ty) list
|
||||
[@@deriving show]
|
||||
|
||||
type expr
|
||||
= ExprLit of lit
|
||||
= ExprUnit
|
||||
| ExprLit of lit
|
||||
| ExprBin of expr * op * expr
|
||||
| ExprAnnot of expr * ty
|
||||
[@@deriving show]
|
||||
|
||||
type pat
|
||||
= PatName of string
|
||||
[@@deriving show]
|
||||
|
||||
type stmt
|
||||
= StmtLet of pat * expr
|
||||
| StmtReturn of expr
|
||||
[@@deriving show]
|
||||
|
||||
type block =
|
||||
{ stmts : stmt list
|
||||
; ret : expr
|
||||
}
|
||||
[@@deriving show]
|
||||
|
||||
type func =
|
||||
{ name : string
|
||||
; body : block
|
||||
}
|
||||
[@@deriving show]
|
||||
|
||||
type decl
|
||||
= DeclFunc of func
|
||||
[@@deriving show]
|
||||
|
||||
type program = decl list
|
||||
[@@deriving show]
|
||||
|
|
11
lib/dune
11
lib/dune
|
@ -1,7 +1,16 @@
|
|||
(library
|
||||
(name e0))
|
||||
(name e0)
|
||||
(libraries menhirLib)
|
||||
(preprocess (pps ppx_deriving.show ppx_deriving.ord)))
|
||||
|
||||
(ocamllex lexer)
|
||||
|
||||
(menhir
|
||||
(flags --table)
|
||||
(modules parser))
|
||||
|
||||
; https://baturin.org/blog/declarative-parse-error-reporting-with-menhir/
|
||||
(rule
|
||||
(targets parser_messages.ml)
|
||||
(deps parser.messages parser.mly)
|
||||
(action (with-stdout-to %{targets} (run menhir --compile-errors %{deps}))))
|
||||
|
|
|
@ -1,5 +1,18 @@
|
|||
{ open Parser }
|
||||
|
||||
rule f = parse
|
||||
| '\n' { Lexing.new_line lexbuf; f lexbuf }
|
||||
| [' ' '\t'] { f lexbuf }
|
||||
| "fn" { KWD_FN }
|
||||
| ['a'-'z']* as i { IDENT i }
|
||||
| "return" { KWD_RETURN }
|
||||
| "(" { PUNCT_LPAREN }
|
||||
| ")" { PUNCT_RPAREN }
|
||||
| "->" { PUNCT_RARROW }
|
||||
| "{" { PUNCT_LBRACE }
|
||||
| "}" { PUNCT_RBRACE }
|
||||
| ":" { PUNCT_COLON }
|
||||
| ";" { PUNCT_SEMI }
|
||||
| "-" ['0'-'9']+ as i { LIT_NEG_INT (- (int_of_string i)) }
|
||||
| ['0'-'9']+ as i { LIT_INT (int_of_string i) }
|
||||
| ['a'-'z' 'A'-'Z']* as i { IDENT i }
|
||||
| eof { EOF }
|
||||
|
|
108
lib/parser.messages
Normal file
108
lib/parser.messages
Normal file
|
@ -0,0 +1,108 @@
|
|||
program: PUNCT_RPAREN
|
||||
##
|
||||
## Ends in an error in state: 0.
|
||||
##
|
||||
## program' -> . program [ # ]
|
||||
##
|
||||
## The known suffix of the stack is as follows:
|
||||
##
|
||||
##
|
||||
|
||||
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||
|
||||
program: KWD_FN PUNCT_RPAREN
|
||||
##
|
||||
## Ends in an error in state: 1.
|
||||
##
|
||||
## func -> KWD_FN . IDENT PUNCT_LPAREN PUNCT_RPAREN body [ KWD_FN # ]
|
||||
##
|
||||
## The known suffix of the stack is as follows:
|
||||
## KWD_FN
|
||||
##
|
||||
|
||||
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||
|
||||
program: KWD_FN IDENT PUNCT_RPAREN
|
||||
##
|
||||
## Ends in an error in state: 2.
|
||||
##
|
||||
## func -> KWD_FN IDENT . PUNCT_LPAREN PUNCT_RPAREN body [ KWD_FN # ]
|
||||
##
|
||||
## The known suffix of the stack is as follows:
|
||||
## KWD_FN IDENT
|
||||
##
|
||||
|
||||
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||
|
||||
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RBRACE
|
||||
##
|
||||
## Ends in an error in state: 3.
|
||||
##
|
||||
## func -> KWD_FN IDENT PUNCT_LPAREN . PUNCT_RPAREN body [ KWD_FN # ]
|
||||
##
|
||||
## The known suffix of the stack is as follows:
|
||||
## KWD_FN IDENT PUNCT_LPAREN
|
||||
##
|
||||
|
||||
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||
|
||||
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_RPAREN
|
||||
##
|
||||
## Ends in an error in state: 4.
|
||||
##
|
||||
## func -> KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN . body [ KWD_FN # ]
|
||||
##
|
||||
## The known suffix of the stack is as follows:
|
||||
## KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN
|
||||
##
|
||||
|
||||
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||
|
||||
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE PUNCT_RPAREN
|
||||
##
|
||||
## Ends in an error in state: 5.
|
||||
##
|
||||
## body -> PUNCT_LBRACE . list(stmt) PUNCT_RBRACE [ KWD_FN # ]
|
||||
##
|
||||
## The known suffix of the stack is as follows:
|
||||
## PUNCT_LBRACE
|
||||
##
|
||||
|
||||
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||
|
||||
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE KWD_RETURN PUNCT_RPAREN
|
||||
##
|
||||
## Ends in an error in state: 6.
|
||||
##
|
||||
## stmt -> KWD_RETURN . expr [ PUNCT_RBRACE KWD_RETURN ]
|
||||
##
|
||||
## The known suffix of the stack is as follows:
|
||||
## KWD_RETURN
|
||||
##
|
||||
|
||||
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||
|
||||
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE KWD_RETURN LIT_NEG_INT PUNCT_RPAREN
|
||||
##
|
||||
## Ends in an error in state: 9.
|
||||
##
|
||||
## list(stmt) -> stmt . list(stmt) [ PUNCT_RBRACE ]
|
||||
##
|
||||
## The known suffix of the stack is as follows:
|
||||
## stmt
|
||||
##
|
||||
|
||||
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||
|
||||
program: KWD_FN IDENT PUNCT_LPAREN PUNCT_RPAREN PUNCT_LBRACE PUNCT_RBRACE PUNCT_RPAREN
|
||||
##
|
||||
## Ends in an error in state: 17.
|
||||
##
|
||||
## list(decl) -> decl . list(decl) [ # ]
|
||||
##
|
||||
## The known suffix of the stack is as follows:
|
||||
## decl
|
||||
##
|
||||
|
||||
<YOUR SYNTAX ERROR MESSAGE HERE>
|
||||
|
|
@ -2,7 +2,18 @@
|
|||
open Ast
|
||||
%}
|
||||
|
||||
%token EOF
|
||||
%token KWD_FN
|
||||
%token KWD_RETURN
|
||||
%token PUNCT_LPAREN
|
||||
%token PUNCT_RPAREN
|
||||
%token PUNCT_RARROW
|
||||
%token PUNCT_LBRACE
|
||||
%token PUNCT_RBRACE
|
||||
%token PUNCT_COLON
|
||||
%token PUNCT_SEMI
|
||||
%token <int> LIT_NEG_INT
|
||||
%token <int> LIT_INT
|
||||
%token <string> IDENT
|
||||
|
||||
%start <program> program
|
||||
|
@ -10,10 +21,20 @@
|
|||
%%
|
||||
|
||||
program:
|
||||
| decls=decl* { decls }
|
||||
| decls=decl* EOF { decls }
|
||||
|
||||
decl:
|
||||
| func=func { DeclFunc func }
|
||||
|
||||
func:
|
||||
| KWD_FN name=IDENT { { name = name } }
|
||||
| KWD_FN name=IDENT PUNCT_LPAREN PUNCT_RPAREN body=body { { name; body } }
|
||||
|
||||
body:
|
||||
| PUNCT_LBRACE stmts=stmt* PUNCT_RBRACE { { stmts; ret=ExprUnit } }
|
||||
|
||||
stmt:
|
||||
| KWD_RETURN expr=expr PUNCT_SEMI { StmtReturn expr }
|
||||
|
||||
expr:
|
||||
| i=LIT_NEG_INT { ExprLit (LitNegInt i) }
|
||||
| i=LIT_INT { ExprLit (LitInt i) }
|
||||
|
|
3
lib/typeck.ml
Normal file
3
lib/typeck.ml
Normal file
|
@ -0,0 +1,3 @@
|
|||
(* Hindley milner type checking *)
|
||||
|
||||
|
7
lib/util.ml
Normal file
7
lib/util.ml
Normal file
|
@ -0,0 +1,7 @@
|
|||
exception Syntax_error of ((int * int) option * string)
|
||||
|
||||
let get_lexing_position lexbuf =
|
||||
let p = Lexing.lexeme_start_p lexbuf in
|
||||
let line_number = p.Lexing.pos_lnum in
|
||||
let column = p.Lexing.pos_cnum - p.Lexing.pos_bol + 1 in
|
||||
(line_number, column)
|
2
spec.md
2
spec.md
|
@ -7,3 +7,5 @@ Types
|
|||
|
||||
IR
|
||||
--
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue