From 3bde699fbe3a397dc556eb4d29a811d9a043ab90 Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Thu, 12 Jun 2014 09:08:38 -0700 Subject: [PATCH] feat(frontends/lean/parser): add parse_level Signed-off-by: Leonardo de Moura --- src/frontends/lean/builtin_cmds.cpp | 15 +++- src/frontends/lean/builtin_exprs.cpp | 15 ++++ src/frontends/lean/parse_table.cpp | 2 +- src/frontends/lean/parse_table.h | 4 +- src/frontends/lean/parser.cpp | 127 +++++++++++++++++++++++++++ src/frontends/lean/parser.h | 20 ++++- src/frontends/lean/token_table.cpp | 7 +- src/tests/frontends/lean/scanner.cpp | 8 +- tests/lean/run/t2.lean | 9 ++ tests/lua/token_table.lua | 4 +- 10 files changed, 198 insertions(+), 13 deletions(-) create mode 100644 tests/lean/run/t2.lean diff --git a/src/frontends/lean/builtin_cmds.cpp b/src/frontends/lean/builtin_cmds.cpp index 3647d5a9b..22a006670 100644 --- a/src/frontends/lean/builtin_cmds.cpp +++ b/src/frontends/lean/builtin_cmds.cpp @@ -9,6 +9,18 @@ Author: Leonardo de Moura namespace lean { static name g_raw("raw"); + +environment universe_cmd(parser & p) { + if (p.curr_is_identifier()) { + // TODO(Leo): take namespace and scopes into account + name n = p.get_name_val(); + p.next(); + return p.env().add_global_level(n); + } else { + throw parser_error("invalid universe declaration, identifier expected", p.cmd_pos()); + } +} + environment print_cmd(parser & p) { if (p.curr() == scanner::token_kind::String) { p.regular_stream() << p.get_str_val() << "\n"; @@ -25,7 +37,8 @@ environment print_cmd(parser & p) { cmd_table init_cmd_table() { cmd_table r; - add_cmd(r, cmd_info("print", "print a string", print_cmd)); + add_cmd(r, cmd_info("print", "print a string", print_cmd)); + add_cmd(r, cmd_info("universe", "declare a global universe level", universe_cmd)); return r; } diff --git a/src/frontends/lean/builtin_exprs.cpp b/src/frontends/lean/builtin_exprs.cpp index 944b73673..8352aa984 100644 --- a/src/frontends/lean/builtin_exprs.cpp +++ b/src/frontends/lean/builtin_exprs.cpp @@ -5,9 +5,23 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Leonardo de Moura */ #include "frontends/lean/builtin_exprs.h" +#include "frontends/lean/parser.h" namespace lean { namespace notation { +static name g_llevel_curly(".{"); +static name g_rcurly("}"); + +static expr parse_Type(parser & p, unsigned, expr const *) { + if (p.curr_is_token(g_llevel_curly)) { + p.next(); + level l = p.parse_level(); + p.check_token_next(g_rcurly, "invalid Type expression, '}' expected"); + return mk_sort(l); + } else { + return mk_sort(p.mk_new_level_param()); + } +} parse_table init_nud_table() { action Expr(mk_expr_action()); @@ -19,6 +33,7 @@ parse_table init_nud_table() { r.add({transition("(", Expr), transition(")", Skip)}, x0); r.add({transition("fun", Binders), transition(",", mk_scoped_expr_action(x0))}, x0); r.add({transition("Pi", Binders), transition(",", mk_scoped_expr_action(x0, 0, false))}, x0); + r.add({transition("Type", mk_ext_action(parse_Type))}, x0); return r; } diff --git a/src/frontends/lean/parse_table.cpp b/src/frontends/lean/parse_table.cpp index 5877b9699..e7af2fb60 100644 --- a/src/frontends/lean/parse_table.cpp +++ b/src/frontends/lean/parse_table.cpp @@ -145,7 +145,7 @@ action mk_exprs_action(name const & sep, expr const & rec, expr const & ini, boo action mk_scoped_expr_action(expr const & rec, unsigned rb, bool lambda) { return action(new scoped_expr_action_cell(rec, rb, lambda)); } -action mk_ext_parse_action(parse_fn const & fn) { return action(new ext_action_cell(fn)); } +action mk_ext_action(parse_fn const & fn) { return action(new ext_action_cell(fn)); } struct parse_table::cell { bool m_nud; diff --git a/src/frontends/lean/parse_table.h b/src/frontends/lean/parse_table.h index 413b3bd71..54a422840 100644 --- a/src/frontends/lean/parse_table.h +++ b/src/frontends/lean/parse_table.h @@ -61,7 +61,7 @@ public: friend action mk_binder_action(); friend action mk_binders_action(); friend action mk_scoped_expr_action(expr const & rec, unsigned rbp, bool lambda); - friend action mk_ext_parse_action(parse_fn const & fn); + friend action mk_ext_action(parse_fn const & fn); action_kind kind() const; unsigned rbp() const; @@ -81,7 +81,7 @@ action mk_exprs_action(name const & sep, expr const & rec, expr const & ini, boo action mk_binder_action(); action mk_binders_action(); action mk_scoped_expr_action(expr const & rec, unsigned rbp = 0, bool lambda = true); -action mk_proc_action(parse_fn const & fn); +action mk_ext_action(parse_fn const & fn); class transition { name m_token; diff --git a/src/frontends/lean/parser.cpp b/src/frontends/lean/parser.cpp index adf0e485b..0172b18c5 100644 --- a/src/frontends/lean/parser.cpp +++ b/src/frontends/lean/parser.cpp @@ -174,6 +174,15 @@ bool parser::curr_is_token(name const & tk) const { get_token_info().value() == tk; } +bool parser::curr_is_token_or_id(name const & tk) const { + if (curr() == scanner::token_kind::Keyword || curr() == scanner::token_kind::CommandKeyword) + return get_token_info().value() == tk; + else if (curr() == scanner::token_kind::Identifier) + return get_name_val() == tk; + else + return false; +} + void parser::check_token_next(name const & tk, char const * msg) { if (!curr_is_token(tk)) throw parser_error(msg, pos()); @@ -209,6 +218,124 @@ static name g_lcurly("{"); static name g_rcurly("}"); static name g_lbracket("["); static name g_rbracket("]"); +static name g_add("+"); +static name g_max("max"); +static name g_imax("imax"); +static name g_cup("\u2294"); +static unsigned g_level_add_prec = 10; +static unsigned g_level_cup_prec = 5; + +unsigned parser::parse_small_nat() { + auto p = pos(); + mpz val = get_num_val().get_numerator(); + next(); + lean_assert(val >= 0); + if (!val.is_unsigned_int()) + throw parser_error("invalid level expression, value does not fit in a machine integer", p); + return val.get_unsigned_int(); +} + +static level lift(level l, unsigned k) { + while (k > 0) { + k--; + l = mk_succ(l); + } + return l; +} + +unsigned parser::curr_level_lbp() const { + if (curr_is_token(g_cup)) + return g_level_cup_prec; + else if (curr_is_token(g_add)) + return g_level_add_prec; + else + return 0; +} + +level parser::parse_max_imax(bool is_max) { + auto p = pos(); + next(); + buffer lvls; + while (curr_is_identifier() || curr_is_numeral() || curr_is_token(g_lparen)) { + lvls.push_back(parse_level()); + } + if (lvls.size() < 2) + throw parser_error("invalid level expression, max must have at least two arguments", p); + unsigned i = lvls.size() - 1; + level r = lvls[i]; + while (i > 0) { + --i; + if (is_max) + r = mk_max(lvls[i], r); + else + r = mk_imax(lvls[i], r); + } + return r; +} + +level parser::parse_level_id() { + auto p = pos(); + name id = get_name_val(); + next(); + auto it = m_local_level_decls.find(id); + if (it != m_local_level_decls.end()) + return it->second.first; + if (m_env.is_global_level(id)) + return mk_global_univ(id); + throw parser_error(sstream() << "unknown level '" << id << "'", p); +} + +level parser::parse_level_nud() { + if (curr_is_token_or_id(g_max)) { + return parse_max_imax(true); + } else if (curr_is_token_or_id(g_imax)) { + return parse_max_imax(false); + } else if (curr_is_token(g_lparen)) { + next(); + level l = parse_level(); + check_token_next(g_rparen, "invalid level expression, ')' expected"); + return l; + } else if (curr_is_numeral()) { + unsigned k = parse_small_nat(); + return lift(level(), k); + } else if (curr_is_identifier()) { + return parse_level_id(); + } else { + throw parser_error("invalid level expression", pos()); + } +} + +level parser::parse_level_led(level left) { + auto p = pos(); + if (curr_is_token(g_cup)) { + next(); + level right = parse_level(g_level_cup_prec); + return mk_max(left, right); + } else if (curr_is_token(g_add)) { + next(); + if (curr_is_numeral()) { + unsigned k = parse_small_nat(); + return lift(left, k); + } else { + throw parser_error("invalid level expression, right hand side of '+' (aka universe lift operator) must be a numeral", p); + } + } else { + throw parser_error("invalid level expression", p); + } +} + +level parser::parse_level(unsigned rbp) { + level left = parse_level_nud(); + while (rbp < curr_level_lbp()) { + left = parse_level_led(left); + } + return left; +} + +level parser::mk_new_level_param() { + // TODO(Leo) + return level(); +} /** \brief Parse ID ':' expr, where the expression represents the type of the identifier. */ parameter parser::parse_binder_core(binder_info const & bi) { diff --git a/src/frontends/lean/parser.h b/src/frontends/lean/parser.h index 8f5e99e1f..cf9844076 100644 --- a/src/frontends/lean/parser.h +++ b/src/frontends/lean/parser.h @@ -44,7 +44,9 @@ struct interrupt_parser {}; class parser { typedef std::pair local_entry; + typedef std::pair local_level_entry; typedef scoped_map local_decls; + typedef scoped_map local_level_decls; environment m_env; io_state m_ios; @@ -56,6 +58,7 @@ class parser { scanner m_scanner; scanner::token_kind m_curr; local_decls m_local_decls; + local_level_decls m_local_level_decls; pos_info m_last_cmd_pos; pos_info m_last_script_pos; unsigned m_next_tag_idx; @@ -94,10 +97,18 @@ class parser { cmd_table const & cmds() const { return cfg().m_cmds; } parse_table const & nud() const { return cfg().m_nud; } parse_table const & led() const { return cfg().m_led; } + /** \brief Return true if the current token is a keyword named \c tk or an identifier named \c tk */ + bool curr_is_token_or_id(name const & tk) const; void add_local_entry(name const & n, expr const & e); void add_local_decl(expr const & t); + unsigned curr_level_lbp() const; + level parse_max_imax(bool is_max); + level parse_level_id(); + level parse_level_nud(); + level parse_level_led(level left); + void parse_command(); void parse_script(bool as_expr = false); bool parse_commands(); @@ -126,6 +137,10 @@ public: script_state * ss() const { return m_ss; } void parse_names(buffer> & result); + unsigned parse_small_nat(); + + level parse_level(unsigned rbp = 0); + level mk_new_level_param(); parameter parse_binder(); void parse_binders(buffer & r); @@ -145,6 +160,7 @@ public: expr rec_save_pos(expr const & e, pos_info p); pos_info pos_of(expr const & e, pos_info default_pos); pos_info pos_of(expr const & e) { return pos_of(e, pos()); } + pos_info cmd_pos() const { return m_last_cmd_pos; } /** \brief Read the next token. */ void scan() { m_curr = m_scanner.scan(m_env); } @@ -152,9 +168,11 @@ public: scanner::token_kind curr() const { return m_curr; } /** \brief Return true iff the current token is an identifier */ bool curr_is_identifier() const { return curr() == scanner::token_kind::Identifier; } + /** \brief Return true iff the current token is a numeral */ + bool curr_is_numeral() const { return curr() == scanner::token_kind::Numeral; } /** \brief Read the next token if the current one is not End-of-file. */ void next() { if (m_curr != scanner::token_kind::Eof) scan(); } - /** \brief Return true iff the current token is equal to \c tk */ + /** \brief Return true iff the current token is a keyword (or command keyword) named \c tk */ bool curr_is_token(name const & tk) const; /** \brief Check current token, and move to next characther, throw exception if current token is not \c tk. */ void check_token_next(name const & tk, char const * msg); diff --git a/src/frontends/lean/token_table.cpp b/src/frontends/lean/token_table.cpp index 3597fdb46..ad9d4aeee 100644 --- a/src/frontends/lean/token_table.cpp +++ b/src/frontends/lean/token_table.cpp @@ -39,16 +39,19 @@ static char const * g_lambda_unicode = "\u03BB"; static char const * g_pi_unicode = "\u03A0"; static char const * g_forall_unicode = "\u2200"; static char const * g_arrow_unicode = "\u2192"; +static char const * g_cup = "\u2294"; + token_table init_token_table() { token_table t; char const * builtin[] = {"fun", "Pi", "let", "in", "have", "show", "by", "from", "(", ")", "{", "}", "[", "]", ".{", "Type", "...", ",", ".", ":", "calc", ":=", "--", "(*", "(--", "->", - "proof", "qed", "private", "raw", "Bool", nullptr}; + "proof", "qed", "private", "raw", "Bool", "+", g_cup, nullptr}; char const * commands[] = {"theorem", "axiom", "variable", "definition", "evaluate", "check", "print", "variables", "end", "namespace", "section", "import", - "abbreviation", "inductive", "record", "structure", "module", nullptr}; + "abbreviation", "inductive", "record", "structure", "module", "universe", + nullptr}; std::pair aliases[] = {{g_lambda_unicode, "fun"}, {"forall", "Pi"}, {g_forall_unicode, "Pi"}, {g_pi_unicode, "Pi"}, diff --git a/src/tests/frontends/lean/scanner.cpp b/src/tests/frontends/lean/scanner.cpp index 2b4a6027a..3afe06f24 100644 --- a/src/tests/frontends/lean/scanner.cpp +++ b/src/tests/frontends/lean/scanner.cpp @@ -147,12 +147,12 @@ static void tst2() { // check_name("x.10", name(name("x"), 10)); check_name("x.bla", name({"x", "bla"})); - scan_error("+++"); + scan_error("***"); environment env; token_table s = mk_default_token_table(); - s = add_token(s, "+++", "tplus"); + s = add_token(s, "***", "tplus"); env = update_token_table(env, s); - check_keyword("+++", "tplus", env); + check_keyword("***", "tplus", env); s = add_token(s, "+", "plus"); env = update_token_table(env, s); check("x+y", {tk::Identifier, tk::Keyword, tk::Identifier}, env); @@ -188,7 +188,7 @@ static void tst3() { scan_error("\"\\"); scan_error("\"\\a"); scan("\"\naaa\""); - scan_error("foo.+ 01"); + scan_error("foo.* 01"); scan("10.0."); scan("{ } . forall exists let in \u2200 := _"); } diff --git a/tests/lean/run/t2.lean b/tests/lean/run/t2.lean new file mode 100644 index 000000000..a47f1120d --- /dev/null +++ b/tests/lean/run/t2.lean @@ -0,0 +1,9 @@ +universe l +universe u +print raw Type +print raw Type.{1} +print raw Type.{2} +print raw Type.{l+1} +print raw Type.{max l u 1} +print raw Type.{imax l+1 u 1} +print raw Type.{imax l+1 l u} \ No newline at end of file diff --git a/tests/lua/token_table.lua b/tests/lua/token_table.lua index 890dbb9a5..6b6b2d251 100644 --- a/tests/lua/token_table.lua +++ b/tests/lua/token_table.lua @@ -22,8 +22,8 @@ s = s:add_command_token("tast", "tst2") s = s:add_command_token("tests", "tst3") s = s:add_command_token("fests", "tst4") s = s:add_command_token("tes", "tst5") -s = s:add_token("++", "++", 65) -s = s:add_token("++-", "plusminus") +s = s:add_token("**", "**", 65) +s = s:add_token("**-", "ttimeminus") assert(token_table_size(s) == 7) display_token_table(s)