From b2c877b0c3b7c545582634b9630af2178932db58 Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Mon, 19 Aug 2013 09:35:19 -0700 Subject: [PATCH] Add comments to parser.cpp Signed-off-by: Leonardo de Moura --- src/frontend/parser.cpp | 234 +++++++++++++++++++++++++++++++++++----- 1 file changed, 208 insertions(+), 26 deletions(-) diff --git a/src/frontend/parser.cpp b/src/frontend/parser.cpp index 066c68f0e..159e52b29 100644 --- a/src/frontend/parser.cpp +++ b/src/frontend/parser.cpp @@ -19,6 +19,8 @@ Author: Leonardo de Moura #include "printer.h" namespace lean { +// ========================================== +// Builtin commands static name g_definition_kwd("Definition"); static name g_variable_kwd("Variable"); static name g_theorem_kwd("Theorem"); @@ -36,6 +38,11 @@ static name g_postfix_kwd("Postfix"); static name g_mixfixl_kwd("Mixfixl"); static name g_mixfixr_kwd("Mixfixr"); static name g_mixfixc_kwd("Mixfixc"); +/** \brief Table/List with all builtin command keywords */ +static list g_command_keywords = {g_definition_kwd, g_variable_kwd, g_theorem_kwd, g_axiom_kwd, g_universe_kwd, g_eval_kwd, + g_show_kwd, g_check_kwd, g_env_kwd, g_infix_kwd, g_infixl_kwd, g_infixr_kwd, g_prefix_kwd, + g_postfix_kwd, g_mixfixl_kwd, g_mixfixr_kwd, g_mixfixc_kwd}; +// ========================================== // ========================================== // Support for parsing levels @@ -46,13 +53,22 @@ static unsigned g_level_plus_prec = 10; static unsigned g_level_cup_prec = 5; // ========================================== +// ========================================== +// Auxiliary constant used to mark applications of overloaded operators. static name g_overload_name(name(name(name(0u), "parser"), "overload")); static expr g_overload = mk_constant(g_overload_name); +// ========================================== -static list g_command_keywords = {g_definition_kwd, g_variable_kwd, g_theorem_kwd, g_axiom_kwd, g_universe_kwd, g_eval_kwd, g_show_kwd, g_check_kwd, g_env_kwd, g_infix_kwd, g_infixl_kwd, g_infixr_kwd, g_prefix_kwd, g_postfix_kwd, g_mixfixl_kwd, g_mixfixr_kwd, g_mixfixc_kwd}; +/** + \brief Functional object for parsing -/** \brief Functional object for parsing */ -struct parser_fn { + \remark It is an instance of a Pratt parser + (http://en.wikipedia.org/wiki/Pratt_parser) described in the paper + "Top down operator precedence". This algorithm is super simple, + and it is easy to support user-defined infix/prefix/postfix/mixfix + operators. +*/ +class parser_fn { typedef scoped_map local_decls; typedef std::unordered_map builtins; frontend m_frontend; @@ -72,6 +88,10 @@ struct parser_fn { parser_error(char const * msg):exception(msg) {} }; + /** + \brief Auxiliar struct for creating/destroying a new scope for + local declarations. + */ struct mk_scope { parser_fn & m_fn; local_decls::mk_scope m_scope; @@ -80,16 +100,27 @@ struct parser_fn { ~mk_scope() { m_fn.m_num_local_decls = m_old_num_local_decls; } }; + /** \brief Read the next token. */ void scan() { m_curr = m_scanner.scan(); } + /** \brief Return the current token */ scanner::token curr() const { return m_curr; } + /** \brief Read the next token if the current one is not End-of-file. */ void next() { if (m_curr != scanner::token::Eof) scan(); } + /** \brief Keep consume tokens until we find a Command or End-of-file. */ void sync() { while (curr() != scanner::token::CommandId && curr() != scanner::token::Eof) next(); } + + /** \brief Return the name associated with the current token. */ name const & curr_name() const { return m_scanner.get_name_val(); } + /** \brief Return the numeral associated with the current token. */ mpq const & curr_num() const { return m_scanner.get_num_val(); } + /** + \brief Check if the current token is \c t, and move to the + next one. If the current token is not \c t, it throws a parser error. + */ void check_next(scanner::token t, char const * msg) { if (curr() == t) next(); @@ -97,23 +128,42 @@ struct parser_fn { throw parser_error(msg); } + /** \brief Return true iff the current toke is an identifier */ bool curr_is_identifier() const { return curr() == scanner::token::Id; } + /** \brief Return true iff the current toke is an integer */ bool curr_is_int() const { return curr() == scanner::token::IntVal; } + /** \brief Return true iff the current toke is a '(' */ bool curr_is_lparen() const { return curr() == scanner::token::LeftParen; } + /** \brief Return true iff the current toke is a ':' */ bool curr_is_colon() const { return curr() == scanner::token::Colon; } + /** \brief Return true iff the current toke is a ',' */ bool curr_is_comma() const { return curr() == scanner::token::Comma; } + /** \brief Return true iff the current toke is an 'in' token */ bool curr_is_in() const { return curr() == scanner::token::In; } + /** \brief Throws a parser error if the current token is not an identifier. */ void check_identifier(char const * msg) { if (!curr_is_identifier()) throw parser_error(msg); } + /** + \brief Throws a parser error if the current token is not an + identifier. If it is, move to the next token. + */ name check_identifier_next(char const * msg) { check_identifier(msg); name r = curr_name(); next(); return r; } + /** \brief Throws a parser error if the current token is not ':'. If it is, move to the next token. */ void check_colon_next(char const * msg) { check_next(scanner::token::Colon, msg); } + /** \brief Throws a parser error if the current token is not ','. If it is, move to the next token. */ void check_comma_next(char const * msg) { check_next(scanner::token::Comma, msg); } + /** \brief Throws a parser error if the current token is not '('. If it is, move to the next token. */ void check_lparen_next(char const * msg) { check_next(scanner::token::LeftParen, msg); } + /** \brief Throws a parser error if the current token is not ')'. If it is, move to the next token. */ void check_rparen_next(char const * msg) { check_next(scanner::token::RightParen, msg); } + /** \brief Throws a parser error if the current token is not ':='. If it is, move to the next token. */ void check_assign_next(char const * msg) { check_next(scanner::token::Assign, msg); } + /** \brief Throws a parser error if the current token is not an identifier named \c op. */ void check_name(name const & op, char const * msg) { if(!curr_is_identifier() || curr_name() != op) throw parser_error(msg); } + /** \brief Throws a parser error if the current token is not an identifier named \c op. If it is, move to the next token. */ void check_name_next(name const & op, char const * msg) { check_name(op, msg); next(); } + /** \brief Initialize \c m_builtins table with Lean builtin symbols that do not have notation associated with them. */ void init_builtins() { m_builtins["Bool"] = Bool; m_builtins["true"] = True; @@ -123,27 +173,15 @@ struct parser_fn { m_builtins["Int"] = Int; } - parser_fn(frontend & fe, std::istream & in, std::ostream * out, std::ostream * err, bool use_exceptions): - m_frontend(fe), - m_scanner(in), - m_formatter(mk_pp_formatter(fe)), - m_out(out), - m_err(err), - m_use_exceptions(use_exceptions) { - m_found_errors = false; - m_num_local_decls = 0; - m_scanner.set_command_keywords(g_command_keywords); - init_builtins(); - scan(); - } - [[ noreturn ]] void not_implemented_yet() { // TODO throw parser_error("not implemented yet"); } - // ========================================== - // Universe Level parsing + /** + @name Parse Universe levels + */ + /*@{*/ level parse_level_max() { next(); buffer lvls; @@ -221,6 +259,7 @@ struct parser_fn { } } + /** \brief Parse a universe level */ level parse_level(unsigned rbp = 0) { level left = parse_level_nud(); while (rbp < curr_level_lbp()) { @@ -228,8 +267,21 @@ struct parser_fn { } return left; } - // ========================================== + /*@}*/ + /** + @name Parse Expressions + */ + /*@{*/ + + /** + \brief Return the function associated with the given operator. + If the operator has been overloaded, it returns an expression + of the form (overload f_k ... (overload f_2 f_1) ...) + where f_i's are different options. + After we finish parsing, the procedure #elaborate will + resolve/decide which f_i should be used. + */ expr mk_fun(operator_info const & op) { list const & fs = op.get_internal_names(); lean_assert(!is_nil(fs)); @@ -237,38 +289,49 @@ struct parser_fn { expr r = mk_constant(*it); ++it; for (; it != fs.end(); ++it) - r = mk_app(g_overload, r, mk_constant(*it)); + r = mk_app(g_overload, mk_constant(*it), r); return r; } + /** \brief Parse a user defined prefix operator. */ expr parse_prefix(operator_info const & op) { expr arg = parse_expr(op.get_precedence()); return mk_app(mk_fun(op), arg); } + /** \brief Parse a user defined postfix operator. */ expr parse_postfix(expr const & left, operator_info const & op) { return mk_app(mk_fun(op), left); } + /** \brief Parse a user defined infix operator. */ expr parse_infix(expr const & left, operator_info const & op) { expr right = parse_expr(op.get_precedence()+1); return mk_app(mk_fun(op), left, right); } + /** \brief Parse a user defined infix-left operator. */ expr parse_infixl(expr const & left, operator_info const & op) { expr right = parse_expr(op.get_precedence()); return mk_app(mk_fun(op), left, right); } + /** \brief Parse a user defined infix-right operator. */ expr parse_infixr(expr const & left, operator_info const & op) { expr right = parse_expr(op.get_precedence()-1); return mk_app(mk_fun(op), left, right); } + /** + \brief Throws an error if the current token is not an identifier named \c op_part. + If it is, move to the next toke. The error message assumes + this method has been used when parsing mixfix operators. + */ void check_op_part(name const & op_part) { - check_name_next(op_part, "invalid expression, identifier expected"); + check_name_next(op_part, "invalid mixfix operator application, identifier expected"); } + /** \brief Auxiliary function for #parse_mixfixl and #parse_mixfixr */ void parse_mixfix_args(list const & ops, unsigned prec, buffer & args) { auto it = ops.begin(); ++it; @@ -279,6 +342,7 @@ struct parser_fn { } } + /** \brief Parse user defined mixfixl operator. It has the form: ID _ ... ID _ */ expr parse_mixfixl(operator_info const & op) { buffer args; args.push_back(mk_fun(op)); @@ -287,6 +351,7 @@ struct parser_fn { return mk_app(args.size(), args.data()); } + /** \brief Parse user defined mixfixr operator. It has the form: _ ID ... _ ID */ expr parse_mixfixr(expr const & left, operator_info const & op) { buffer args; args.push_back(mk_fun(op)); @@ -295,6 +360,7 @@ struct parser_fn { return mk_app(args.size(), args.data()); } + /** \brief Parse user defined mixfixc operator. It has the form: ID _ ID ... _ ID */ expr parse_mixfixc(operator_info const & op) { buffer args; args.push_back(mk_fun(op)); @@ -311,6 +377,11 @@ struct parser_fn { } } + /** + \brief Try to find an object (Definition or Postulate) named \c + id in the frontend/environment. If there isn't one, then tries + to check if \c id is a builtin symbol. If it is not throws an error. + */ expr get_name_ref(name const & id) { object const & obj = m_frontend.find_object(id); if (obj) { @@ -330,6 +401,14 @@ struct parser_fn { } } + /** + \brief Parse an identifier that has a "null denotation" (See + paper: "Top down operator precedence"). A nud identifier is a + token that appears at the beginning of a language construct. + In Lean, local declarations (i.e., local functions), user + defined prefix, mixfixl and mixfixc operators, and global + functions can begin a language construct. + */ expr parse_nud_id() { name id = curr_name(); next(); @@ -351,6 +430,15 @@ struct parser_fn { } } + /** + \brief Parse an identifier that has a "left denotation" (See + paper: "Top down operator precedence"). A left identifier is a + token that appears inside of a construct (to left of the rest + of the construct). In Lean, local declarations (i.e., function + application arguments), user defined infix, infixl, infixr, + mixfixr and global values (as function application arguments) + can appear inside of a construct. + */ expr parse_led_id(expr const & left) { name id = curr_name(); next(); @@ -373,18 +461,22 @@ struct parser_fn { } } + /** \brief Parse expr '=' expr. */ expr parse_eq(expr const & left) { next(); expr right = parse_expr(g_eq_precedence); return mk_eq(left, right); } + /** \brief Parse expr '->' expr. */ expr parse_arrow(expr const & left) { next(); + // The -1 is a trick to get right associativity in Pratt's parsers expr right = parse_expr(g_arrow_precedence-1); return mk_arrow(left, right); } + /** \brief Parse '(' expr ')'. */ expr parse_lparen() { next(); expr r = parse_expr(); @@ -392,6 +484,10 @@ struct parser_fn { return r; } + /** + \brief Parse a sequence of identifiers ID*. Store the + result in \c result. + */ void parse_names(buffer & result) { while (curr_is_identifier()) { result.push_back(curr_name()); @@ -399,6 +495,7 @@ struct parser_fn { } } + /** \brief Register the name \c n as a local declaration. */ void register_binding(name const & n) { unsigned lvl = m_num_local_decls; m_local_decls.insert(n, lvl); @@ -406,8 +503,11 @@ struct parser_fn { lean_assert(m_local_decls.find(n)->second == lvl); } + /** + \brief Parse ID ... ID ':' expr, where the expression + represents the type of the identifiers. + */ void parse_simple_bindings(buffer> & result) { - // ID ... ID : type buffer names; parse_names(names); check_colon_next("invalid binder, ':' expected"); @@ -422,6 +522,11 @@ struct parser_fn { } } + /** + \brief Parse a sequence of '(' ID ... ID ':' expr ')'. + + This is used when parsing lambda, Pi, forall/exists expressions and definitions. + */ void parse_bindings(buffer> & result) { if (curr_is_identifier()) { parse_simple_bindings(result); @@ -438,6 +543,10 @@ struct parser_fn { } } + /** + \brief Create a lambda/Pi abstraction, using the giving binders + and body. + */ expr mk_abstraction(bool is_lambda, buffer> const & bindings, expr const & body) { expr result = body; unsigned i = bindings.size(); @@ -451,6 +560,7 @@ struct parser_fn { return result; } + /** \brief Parse lambda/Pi abstraction. */ expr parse_abstraction(bool is_lambda) { next(); mk_scope scope(*this); @@ -461,14 +571,17 @@ struct parser_fn { return mk_abstraction(is_lambda, bindings, result); } + /** \brief Parse lambda abstraction. */ expr parse_lambda() { return parse_abstraction(true); } + /** \brief Parse Pi abstraction. */ expr parse_pi() { return parse_abstraction(false); } + /** \brief Parse Let expression. */ expr parse_let() { next(); mk_scope scope(*this); @@ -494,6 +607,7 @@ struct parser_fn { } } + /** \brief Parse an integer value. */ expr parse_int() { expr r = mk_int_value(m_scanner.get_num_val().get_numerator()); next(); @@ -508,6 +622,7 @@ struct parser_fn { not_implemented_yet(); } + /** \brief Parse 'Type' and 'Type' level expressions. */ expr parse_type() { next(); if (curr_is_identifier() || curr_is_int()) { @@ -517,6 +632,9 @@ struct parser_fn { } } + /** + \brief Auxiliary method used when processing the beginning of an expression. + */ expr parse_nud() { switch (curr()) { case scanner::token::Id: return parse_nud_id(); @@ -533,6 +651,9 @@ struct parser_fn { } } + /** + \brief Auxiliary method used when processing the 'inside' of an expression. + */ expr parse_led(expr const & left) { switch (curr()) { case scanner::token::Id: return parse_led_id(left); @@ -547,6 +668,7 @@ struct parser_fn { } } + /** \brief Return the binding power of the current token (when parsing expression). */ unsigned curr_lbp() { switch (curr()) { case scanner::token::Id: { @@ -572,6 +694,7 @@ struct parser_fn { } } + /** \brief Parse an expression */ expr parse_expr(unsigned rbp = 0) { expr left = parse_nud(); while (rbp < curr_lbp()) { @@ -579,12 +702,18 @@ struct parser_fn { } return left; } + /*@}*/ expr elaborate(expr const & e) { // TODO return e; } + /** + @name Parse Commands + */ + /*@{*/ + /** \brief Auxiliary method used for parsing definitions and theorems. */ void parse_def_core(bool is_definition) { next(); expr type, val; @@ -611,14 +740,29 @@ struct parser_fn { m_frontend.add_theorem(id, type, val); } + /** + \brief Parse a Definition. It has one of the following two forms: + + 1) 'Definition' ID ':' expr ':=' expr + + 2) 'Definition' ID bindings ':' expr ':=' expr + */ void parse_definition() { parse_def_core(true); } + /** + \brief Parse a Theorem. It has one of the following two forms: + + 1) 'Theorem' ID ':' expr ':=' expr + + 2) 'Theorem' ID bindings ':' expr ':=' expr + */ void parse_theorem() { parse_def_core(false); } + /** \brief Parse 'Variable' ID ':' expr */ void parse_variable() { next(); name id = check_identifier_next("invalid variable declaration, identifier expected"); @@ -627,6 +771,7 @@ struct parser_fn { m_frontend.add_var(id, type); } + /** \brief Parse 'Axiom' ID ':' expr */ void parse_axiom() { next(); name id = check_identifier_next("invalid axiom, identifier expected"); @@ -635,6 +780,7 @@ struct parser_fn { m_frontend.add_axiom(id, type); } + /** \brief Parse 'Eval' expr */ void parse_eval() { next(); expr v = elaborate(parse_expr()); @@ -642,12 +788,14 @@ struct parser_fn { (*m_out) << m_formatter(r) << std::endl; } + /** \brief Parse 'Show' expr */ void parse_show() { next(); expr v = elaborate(parse_expr()); (*m_out) << m_formatter(v) << std::endl; } + /** \brief Parse 'Check' expr */ void parse_check() { next(); expr v = elaborate(parse_expr()); @@ -655,12 +803,14 @@ struct parser_fn { (*m_out) << m_formatter(t) << std::endl; } + /** \brief Parse 'Environment' */ void parse_env() { next(); (*m_out) << m_frontend << std::endl; } - unsigned parse_precision() { + /** \brief Return the (optional) precedence of a user-defined operator. */ + unsigned parse_precedence() { if (curr() == scanner::token::IntVal) { mpz pval = curr_num().get_numerator(); if (!pval.is_unsigned_int()) @@ -673,13 +823,20 @@ struct parser_fn { } } + /** \brief Throw an error if the current token is not an identifier. If it is, move to next token. */ name parse_op_id() { return check_identifier_next("invalid operator definition, identifier expected"); } + /** + \brief Parse prefix/postfix/infix/infixl/infixr user operator + definitions. These definitions have the form: + + - fixity [Num] ID ID + */ void parse_op(fixity fx) { next(); - unsigned prec = parse_precision(); + unsigned prec = parse_precedence(); name op_id = parse_op_id(); name id = parse_op_id(); switch (fx) { @@ -692,9 +849,15 @@ struct parser_fn { } } + /** + \brief Parse mixfix/mixfixl/mixfixr user operator definition. + These definitions have the form: + + - fixity [NUM] ID ID+ ID + */ void parse_mixfix_op(fixity fx) { next(); - unsigned prec = parse_precision(); + unsigned prec = parse_precedence(); buffer parts; parts.push_back(parse_op_id()); parts.push_back(parse_op_id()); @@ -712,6 +875,7 @@ struct parser_fn { } } + /** \brief Parse a Lean command. */ void parse_command() { name const & cmd_id = curr_name(); if (cmd_id == g_definition_kwd) parse_definition(); @@ -732,6 +896,7 @@ struct parser_fn { else if (cmd_id == g_mixfixc_kwd) parse_mixfix_op(fixity::Mixfixc); else { lean_unreachable(); } } + /*@}*/ void error(char const * msg, unsigned line, unsigned pos) { lean_assert(m_err); @@ -743,6 +908,22 @@ struct parser_fn { sync(); } +public: + parser_fn(frontend & fe, std::istream & in, std::ostream * out, std::ostream * err, bool use_exceptions): + m_frontend(fe), + m_scanner(in), + m_formatter(mk_pp_formatter(fe)), + m_out(out), + m_err(err), + m_use_exceptions(use_exceptions) { + m_found_errors = false; + m_num_local_decls = 0; + m_scanner.set_command_keywords(g_command_keywords); + init_builtins(); + scan(); + } + + /** \brief Parse a sequence of commands. This method also perform error management. */ bool parse_commands() { while (true) { try { @@ -771,6 +952,7 @@ struct parser_fn { } } + /** \brief Parse an expression. */ expr parse_expr_main() { try { return elaborate(parse_expr());