From 5a008717a4aa38ed98335d4ff8664fdc0473cbb4 Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Wed, 11 Jun 2014 20:56:10 -0700 Subject: [PATCH] feat(frontends/lean/parser): add parse_notation Signed-off-by: Leonardo de Moura --- src/frontends/lean/CMakeLists.txt | 3 +- src/frontends/lean/builtin_cmds.cpp | 8 +- src/frontends/lean/builtin_exprs.cpp | 45 ++++++++ src/frontends/lean/builtin_exprs.h | 12 +++ src/frontends/lean/parse_table.h | 3 + src/frontends/lean/parser.cpp | 152 ++++++++++++++++++++++----- src/frontends/lean/parser.h | 9 +- src/frontends/lean/parser_config.cpp | 5 +- src/frontends/lean/token_table.cpp | 4 +- src/kernel/abstract.h | 4 +- src/tests/frontends/lean/scanner.cpp | 4 +- 11 files changed, 213 insertions(+), 36 deletions(-) create mode 100644 src/frontends/lean/builtin_exprs.cpp create mode 100644 src/frontends/lean/builtin_exprs.h diff --git a/src/frontends/lean/CMakeLists.txt b/src/frontends/lean/CMakeLists.txt index c9f1ddc94..eecc28636 100644 --- a/src/frontends/lean/CMakeLists.txt +++ b/src/frontends/lean/CMakeLists.txt @@ -1,5 +1,6 @@ add_library(lean_frontend register_module.cpp token_table.cpp scanner.cpp parse_table.cpp parser_config.cpp parser.cpp -parser_pos_provider.cpp builtin_cmds.cpp builtin_tactic_cmds.cpp) +parser_pos_provider.cpp builtin_cmds.cpp builtin_tactic_cmds.cpp +builtin_exprs.cpp) target_link_libraries(lean_frontend ${LEAN_LIBS}) diff --git a/src/frontends/lean/builtin_cmds.cpp b/src/frontends/lean/builtin_cmds.cpp index 75d4acf62..3647d5a9b 100644 --- a/src/frontends/lean/builtin_cmds.cpp +++ b/src/frontends/lean/builtin_cmds.cpp @@ -8,10 +8,15 @@ Author: Leonardo de Moura #include "frontends/lean/parser.h" namespace lean { +static name g_raw("raw"); environment print_cmd(parser & p) { if (p.curr() == scanner::token_kind::String) { - regular(p.env(), p.ios()) << p.get_str_val() << "\n"; + p.regular_stream() << p.get_str_val() << "\n"; p.next(); + } else if (p.curr_is_token(g_raw)) { + p.next(); + expr e = p.parse_expr(); + p.regular_stream() << e << "\n"; } else { throw parser_error("invalid print command", p.pos()); } @@ -31,4 +36,3 @@ cmd_table get_builtin_cmds() { return *r; } } - diff --git a/src/frontends/lean/builtin_exprs.cpp b/src/frontends/lean/builtin_exprs.cpp new file mode 100644 index 000000000..944b73673 --- /dev/null +++ b/src/frontends/lean/builtin_exprs.cpp @@ -0,0 +1,45 @@ +/* +Copyright (c) 2014 Microsoft Corporation. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. + +Author: Leonardo de Moura +*/ +#include "frontends/lean/builtin_exprs.h" + +namespace lean { +namespace notation { + +parse_table init_nud_table() { + action Expr(mk_expr_action()); + action Skip(mk_skip_action()); + action Binders(mk_binders_action()); + expr x0 = mk_var(0); + parse_table r; + r.add({transition("Bool", Skip)}, Bool); + r.add({transition("(", Expr), transition(")", Skip)}, x0); + r.add({transition("fun", Binders), transition(",", mk_scoped_expr_action(x0))}, x0); + r.add({transition("Pi", Binders), transition(",", mk_scoped_expr_action(x0, 0, false))}, x0); + return r; +} + +parse_table init_led_table() { + parse_table r; + // TODO(Leo) + return r; +} +} + +parse_table get_builtin_nud_table() { + static optional r; + if (!r) + r = notation::init_nud_table(); + return *r; +} + +parse_table get_builtin_led_table() { + static optional r; + if (!r) + r = notation::init_led_table(); + return *r; +} +} diff --git a/src/frontends/lean/builtin_exprs.h b/src/frontends/lean/builtin_exprs.h new file mode 100644 index 000000000..abc2adfa9 --- /dev/null +++ b/src/frontends/lean/builtin_exprs.h @@ -0,0 +1,12 @@ +/* +Copyright (c) 2014 Microsoft Corporation. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. + +Author: Leonardo de Moura +*/ +#pragma once +#include "frontends/lean/parse_table.h" +namespace lean { +parse_table get_builtin_nud_table(); +parse_table get_builtin_led_table(); +} diff --git a/src/frontends/lean/parse_table.h b/src/frontends/lean/parse_table.h index d729b9d8b..413b3bd71 100644 --- a/src/frontends/lean/parse_table.h +++ b/src/frontends/lean/parse_table.h @@ -113,6 +113,9 @@ public: bool is_nud() const; parse_table add(unsigned num, transition const * ts, expr const & a, bool overload) const; + parse_table add(std::initializer_list const & ts, expr const & a) const { + return add(ts.size(), ts.begin(), a, true); + } parse_table merge(parse_table const & s, bool overload) const; optional> find(name const & tk) const; list const & is_accepting() const; diff --git a/src/frontends/lean/parser.cpp b/src/frontends/lean/parser.cpp index 0613c04f4..adf0e485b 100644 --- a/src/frontends/lean/parser.cpp +++ b/src/frontends/lean/parser.cpp @@ -12,7 +12,7 @@ Author: Leonardo de Moura #include "util/sstream.h" #include "kernel/for_each_fn.h" #include "kernel/abstract.h" -#include "library/io_state_stream.h" +#include "kernel/instantiate.h" #include "library/parser_nested_exception.h" #include "library/aliases.h" #include "library/private.h" @@ -56,16 +56,16 @@ void parser::updt_options() { } void parser::display_error_pos(unsigned line, unsigned pos) { - regular(m_env, m_ios) << get_stream_name() << ":" << line << ":"; + regular_stream() << get_stream_name() << ":" << line << ":"; if (pos != static_cast(-1)) - regular(m_env, m_ios) << pos << ":"; - regular(m_env, m_ios) << " error:"; + regular_stream() << pos << ":"; + regular_stream() << " error:"; } void parser::display_error_pos(pos_info p) { display_error_pos(p.first, p.second); } void parser::display_error(char const * msg, unsigned line, unsigned pos) { display_error_pos(line, pos); - regular(m_env, m_ios) << " " << msg << endl; + regular_stream() << " " << msg << endl; } void parser::display_error(char const * msg, pos_info p) { @@ -74,7 +74,7 @@ void parser::display_error(char const * msg, pos_info p) { void parser::display_error(exception const & ex) { parser_pos_provider pos_provider(m_pos_table, get_stream_name(), m_last_cmd_pos); - ::lean::display_error(regular(m_env, m_ios), &pos_provider, ex); + ::lean::display_error(regular_stream(), &pos_provider, ex); } void parser::throw_parser_exception(char const * msg, pos_info p) { @@ -99,7 +99,7 @@ void parser::protected_call(std::function && f, std::function && // CATCH(display_error(ex), // throw parser_exception(ex.what(), m_strm_name.c_str(), ex.m_pos.first, ex.m_pos.second)); } catch (parser_exception & ex) { - CATCH(regular(m_env, m_ios) << ex.what() << endl, + CATCH(regular_stream() << ex.what() << endl, throw); } catch (parser_error & ex) { CATCH(display_error(ex.what(), ex.m_pos), @@ -107,7 +107,7 @@ void parser::protected_call(std::function && f, std::function && } catch (interrupted & ex) { reset_interrupt(); if (m_verbose) - regular(m_env, m_ios) << "!!!Interrupted!!!" << endl; + regular_stream() << "!!!Interrupted!!!" << endl; sync(); if (m_use_exceptions) throw; @@ -238,12 +238,12 @@ parameter parser::parse_binder() { } else if (curr_is_token(g_lcurly)) { next(); auto p = parse_binder_core(mk_implicit_binder_info()); - check_token_next(g_rparen, "invalid binder, '}' expected"); + check_token_next(g_rcurly, "invalid binder, '}' expected"); return p; } else if (curr_is_token(g_lbracket)) { next(); auto p = parse_binder_core(mk_cast_binder_info()); - check_token_next(g_rparen, "invalid binder, ']' expected"); + check_token_next(g_rbracket, "invalid binder, ']' expected"); return p; } else { throw parser_error("invalid binder, '(', '{', '[' or identifier expected", pos()); @@ -282,11 +282,11 @@ void parser::parse_binders_core(buffer & r) { } else if (curr_is_token(g_lcurly)) { next(); parse_binder_block(r, mk_implicit_binder_info()); - check_token_next(g_rparen, "invalid binder, '}' expected"); + check_token_next(g_rcurly, "invalid binder, '}' expected"); } else if (curr_is_token(g_lbracket)) { next(); parse_binder_block(r, mk_cast_binder_info()); - check_token_next(g_rparen, "invalid binder, ']' expected"); + check_token_next(g_rbracket, "invalid binder, ']' expected"); } else { return; } @@ -301,15 +301,112 @@ void parser::parse_binders(buffer & r) { throw parser_error("invalid binder, '(', '{', '[' or identifier expected", pos()); } -expr parser::parse_nud_keyword() { - // TODO(Leo) - return expr(); +expr parser::parse_notation(parse_table t, expr * left) { + lean_assert(curr() == scanner::token_kind::Keyword); + auto p = pos(); + buffer args; + buffer ps; + if (left) + args.push_back(*left); + while (true) { + if (curr() != scanner::token_kind::Keyword) + break; + auto p = t.find(get_token_info().value()); + if (!p) + break; + next(); + notation::action const & a = p->first; + switch (a.kind()) { + case notation::action_kind::Skip: + break; + case notation::action_kind::Expr: + args.push_back(parse_expr(a.rbp())); + break; + case notation::action_kind::Exprs: { + buffer r_args; + r_args.push_back(parse_expr(a.rbp())); + while (curr_is_token(a.get_sep())) { + r_args.push_back(parse_expr(a.rbp())); + } + expr r = instantiate(a.get_initial(), args.size(), args.data()); + if (a.is_fold_right()) { + unsigned i = r_args.size(); + while (!i > 0) { + --i; + args.push_back(r_args[i]); + args.push_back(r); + r = instantiate(a.get_rec(), args.size(), args.data()); + args.pop_back(); args.pop_back(); + } + } else { + for (unsigned i = 0; i < r_args.size(); i++) { + args.push_back(r_args[i]); + args.push_back(r); + r = instantiate(a.get_rec(), args.size(), args.data()); + args.pop_back(); args.pop_back(); + } + } + args.push_back(r); + break; + } + case notation::action_kind::Binder: + ps.push_back(parse_binder()); + break; + case notation::action_kind::Binders: + parse_binders(ps); + break; + case notation::action_kind::ScopedExpr: { + expr r = parse_scoped_expr(ps, a.rbp()); + if (is_var(a.get_rec(), 0)) { + r = abstract(ps, r, a.use_lambda_abstraction()); + } else { + unsigned i = ps.size(); + while (i > 0) { + --i; + expr const & l = ps[i].m_local; + if (a.use_lambda_abstraction()) + r = Fun(l, r, ps[i].m_bi); + else + r = Pi(l, r, ps[i].m_bi); + args.push_back(r); + r = instantiate(a.get_rec(), args.size(), args.data()); + args.pop_back(); + } + } + args.push_back(r); + break; + } + case notation::action_kind::Ext: + args.push_back(a.get_parse_fn()(*this, args.size(), args.data())); + break; + } + t = p->second; + } + list const & as = t.is_accepting(); + if (is_nil(as)) { + if (p == pos()) + throw parser_error(sstream() << "invalid expression", pos()); + else + throw parser_error(sstream() << "invalid expression starting at " << p.first << ":" << p.second, pos()); + } + buffer cs; + for (expr const & a : as) { + cs.push_back(instantiate(a, args.size(), args.data())); + } + return mk_choice(cs.size(), cs.data()); } -expr parser::parse_led_keyword(expr /* left */) { - // TODO(Leo) - return expr(); +expr parser::parse_nud_notation() { + return parse_notation(cfg().m_nud, nullptr); } + +expr parser::parse_led_notation(expr left) { + if (cfg().m_led.find(get_token_info().value())) + return parse_notation(cfg().m_led, &left); + else + return mk_app(left, parse_nud_notation(), pos_of(left)); +} + expr parser::parse_id() { auto p = pos(); name id = get_name_val(); @@ -352,7 +449,7 @@ expr parser::parse_string_expr() { expr parser::parse_nud() { switch (curr()) { - case scanner::token_kind::Keyword: return parse_nud_keyword(); + case scanner::token_kind::Keyword: return parse_nud_notation(); case scanner::token_kind::Identifier: return parse_id(); case scanner::token_kind::Numeral: return parse_numeral_expr(); case scanner::token_kind::Decimal: return parse_decimal_expr(); @@ -363,7 +460,7 @@ expr parser::parse_nud() { expr parser::parse_led(expr left) { switch (curr()) { - case scanner::token_kind::Keyword: return parse_led_keyword(left); + case scanner::token_kind::Keyword: return parse_led_notation(left); case scanner::token_kind::Identifier: return mk_app(left, parse_id(), pos_of(left)); case scanner::token_kind::Numeral: return mk_app(left, parse_numeral_expr(), pos_of(left)); case scanner::token_kind::Decimal: return mk_app(left, parse_decimal_expr(), pos_of(left)); @@ -373,10 +470,17 @@ expr parser::parse_led(expr left) { } unsigned parser::curr_lbp() const { - if (curr() == scanner::token_kind::Keyword) + switch (curr()) { + case scanner::token_kind::Keyword: return get_token_info().precedence(); - else + case scanner::token_kind::CommandKeyword: case scanner::token_kind::Eof: + case scanner::token_kind::ScriptBlock: return 0; + case scanner::token_kind::Identifier: case scanner::token_kind::Numeral: + case scanner::token_kind::Decimal: case scanner::token_kind::String: + return std::numeric_limits::max(); + } + lean_unreachable(); // LCOV_EXCL_LINE } expr parser::parse_expr(unsigned rbp) { @@ -413,9 +517,9 @@ expr parser::abstract(unsigned num_params, parameter const * ps, expr const & e, name const & n = local_pp_name(l); expr type = ::lean::abstract(mlocal_type(l), i, locals.data()); if (lambda) - r = mk_lambda(n, type, r); + r = mk_lambda(n, type, r, ps[i].m_bi); else - r = mk_pi(n, type, r); + r = mk_pi(n, type, r, ps[i].m_bi); } return r; } diff --git a/src/frontends/lean/parser.h b/src/frontends/lean/parser.h index ab39ea810..8f5e99e1f 100644 --- a/src/frontends/lean/parser.h +++ b/src/frontends/lean/parser.h @@ -15,6 +15,7 @@ Author: Leonardo de Moura #include "kernel/environment.h" #include "kernel/expr_maps.h" #include "library/io_state.h" +#include "library/io_state_stream.h" #include "library/kernel_bindings.h" #include "frontends/lean/scanner.h" #include "frontends/lean/parser_config.h" @@ -101,10 +102,11 @@ class parser { void parse_script(bool as_expr = false); bool parse_commands(); unsigned curr_lbp() const; + expr parse_notation(parse_table t, expr * left); + expr parse_nud_notation(); + expr parse_led_notation(expr left); expr parse_nud(); expr parse_led(expr left); - expr parse_nud_keyword(); - expr parse_led_keyword(expr left); expr parse_id(); expr parse_numeral_expr(); expr parse_decimal_expr(); @@ -163,6 +165,9 @@ public: token_info const & get_token_info() const { return m_scanner.get_token_info(); } std::string const & get_stream_name() const { return m_scanner.get_stream_name(); } + regular regular_stream() const { return regular(env(), ios()); } + diagnostic diagnostic_stream() const { return diagnostic(env(), ios()); } + /** parse all commands in the input stream */ bool operator()() { return parse_commands(); } }; diff --git a/src/frontends/lean/parser_config.cpp b/src/frontends/lean/parser_config.cpp index 0532f13fc..c4057f255 100644 --- a/src/frontends/lean/parser_config.cpp +++ b/src/frontends/lean/parser_config.cpp @@ -5,12 +5,15 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Leonardo de Moura */ #include "frontends/lean/parser_config.h" +#include "frontends/lean/builtin_exprs.h" #include "frontends/lean/builtin_cmds.h" #include "frontends/lean/builtin_tactic_cmds.h" namespace lean { parser_config::parser_config() { m_tokens = mk_default_token_table(); + m_nud = get_builtin_nud_table(); + m_led = get_builtin_led_table(); m_cmds = get_builtin_cmds(); m_tactic_cmds = get_builtin_tactic_cmds(); } @@ -31,11 +34,9 @@ static parser_ext const & get_extension(environment const & env) { static environment update(environment const & env, parser_ext const & ext) { return env.update(g_ext.m_ext_id, std::make_shared(ext)); } - parser_config const & get_parser_config(environment const & env) { return get_extension(env).m_cfg; } - environment update_parser_config(environment const & env, parser_config const & c) { parser_ext ext = get_extension(env); ext.m_cfg = c; diff --git a/src/frontends/lean/token_table.cpp b/src/frontends/lean/token_table.cpp index d9519bb84..3597fdb46 100644 --- a/src/frontends/lean/token_table.cpp +++ b/src/frontends/lean/token_table.cpp @@ -42,9 +42,9 @@ static char const * g_arrow_unicode = "\u2192"; token_table init_token_table() { token_table t; - char const * builtin[] = {"fun", "Pi", "let", "in", "have", "show", "by", "from", "(", ")", "{", "}", + char const * builtin[] = {"fun", "Pi", "let", "in", "have", "show", "by", "from", "(", ")", "{", "}", "[", "]", ".{", "Type", "...", ",", ".", ":", "calc", ":=", "--", "(*", "(--", "->", - "proof", "qed", "private", nullptr}; + "proof", "qed", "private", "raw", "Bool", nullptr}; char const * commands[] = {"theorem", "axiom", "variable", "definition", "evaluate", "check", "print", "variables", "end", "namespace", "section", "import", diff --git a/src/kernel/abstract.h b/src/kernel/abstract.h index 1836d9ee3..d0e0680f5 100644 --- a/src/kernel/abstract.h +++ b/src/kernel/abstract.h @@ -64,7 +64,9 @@ expr Pi(std::initializer_list> const & l, /** \brief Create a Pi-expression by abstracting the given local constants over b */ expr Pi(unsigned num, expr const * locals, expr const & b); template expr Pi(T const & locals, expr const & b) { return Pi(locals.size(), locals.data(), b); } -inline expr Pi(expr const & local, expr const & b) { return Pi(1, &local, b); } +inline expr Pi(expr const & local, expr const & b, binder_info const & bi = binder_info()) { + return Pi(local_pp_name(local), mlocal_type(local), abstract(b, local), bi); +} /** - \brief Create a Let expression (Let x := v in b), the term b is abstracted using abstract(b, x). -*/ diff --git a/src/tests/frontends/lean/scanner.cpp b/src/tests/frontends/lean/scanner.cpp index 3b5a5e6e8..2b4a6027a 100644 --- a/src/tests/frontends/lean/scanner.cpp +++ b/src/tests/frontends/lean/scanner.cpp @@ -141,7 +141,7 @@ static void tst2() { scan("x.name"); scan("x.foo"); check("x.name", {tk::Identifier}); - check("fun (x : Bool), x", {tk::Keyword, tk::Keyword, tk::Identifier, tk::Keyword, tk::Identifier, + check("fun (x : Bool), x", {tk::Keyword, tk::Keyword, tk::Identifier, tk::Keyword, tk::Keyword, tk::Keyword, tk::Keyword, tk::Identifier}); check_name("x10", name("x10")); // check_name("x.10", name(name("x"), 10)); @@ -176,7 +176,7 @@ static void tst2() { scan("0..1"); check("0..1", {tk::Numeral, tk::Keyword, tk::Keyword, tk::Numeral}); scan("theorem a : Bool axiom b : Int"); - check("theorem a : Bool axiom b : Int", {tk::CommandKeyword, tk::Identifier, tk::Keyword, tk::Identifier, + check("theorem a : Bool axiom b : Int", {tk::CommandKeyword, tk::Identifier, tk::Keyword, tk::Keyword, tk::CommandKeyword, tk::Identifier, tk::Keyword, tk::Identifier}); scan("foo \"ttk\\\"\" : Int"); check("foo \"ttk\\\"\" : Int", {tk::Identifier, tk::String, tk::Keyword, tk::Identifier});