Add comments to parser.cpp

Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
2013-08-19 09:35:19 -07:00 · 2013-08-19 09:35:19 -07:00 · b2c877b0c3
commit b2c877b0c3
parent ce43c1cbae
1 changed files with 208 additions and 26 deletions
--- a/src/frontend/parser.cpp
+++ b/src/frontend/parser.cpp
@ -19,6 +19,8 @@ Author: Leonardo de Moura
 #include "printer.h"

 namespace lean {
+// ==========================================
+// Builtin commands
 static name g_definition_kwd("Definition");
 static name g_variable_kwd("Variable");
 static name g_theorem_kwd("Theorem");
@ -36,6 +38,11 @@ static name g_postfix_kwd("Postfix");
 static name g_mixfixl_kwd("Mixfixl");
 static name g_mixfixr_kwd("Mixfixr");
 static name g_mixfixc_kwd("Mixfixc");
+/** \brief Table/List with all builtin command keywords */
+static list<name> g_command_keywords = {g_definition_kwd, g_variable_kwd, g_theorem_kwd, g_axiom_kwd, g_universe_kwd, g_eval_kwd,
+                                        g_show_kwd, g_check_kwd, g_env_kwd, g_infix_kwd, g_infixl_kwd, g_infixr_kwd, g_prefix_kwd,
+                                        g_postfix_kwd, g_mixfixl_kwd, g_mixfixr_kwd, g_mixfixc_kwd};
+// ==========================================

 // ==========================================
 // Support for parsing levels
@ -46,13 +53,22 @@ static unsigned g_level_plus_prec = 10;
 static unsigned g_level_cup_prec  = 5;
 // ==========================================

+// ==========================================
+// Auxiliary constant used to mark applications of overloaded operators.
 static name g_overload_name(name(name(name(0u), "parser"), "overload"));
 static expr g_overload = mk_constant(g_overload_name);
+// ==========================================

-static list<name> g_command_keywords = {g_definition_kwd, g_variable_kwd, g_theorem_kwd, g_axiom_kwd, g_universe_kwd, g_eval_kwd, g_show_kwd, g_check_kwd, g_env_kwd, g_infix_kwd, g_infixl_kwd, g_infixr_kwd, g_prefix_kwd, g_postfix_kwd, g_mixfixl_kwd, g_mixfixr_kwd, g_mixfixc_kwd};
+/**
+    \brief Functional object for parsing

-/** \brief Functional object for parsing */
-struct parser_fn {
+    \remark It is an instance of a Pratt parser
+    (http://en.wikipedia.org/wiki/Pratt_parser) described in the paper
+    "Top down operator precedence". This algorithm is super simple,
+    and it is easy to support user-defined infix/prefix/postfix/mixfix
+    operators.
+*/
+class parser_fn {
    typedef scoped_map<name, unsigned, name_hash, name_eq> local_decls;
    typedef std::unordered_map<name, expr, name_hash, name_eq>  builtins;
    frontend       m_frontend;
@ -72,6 +88,10 @@ struct parser_fn {
        parser_error(char const * msg):exception(msg) {}
    };

+    /**
+        \brief Auxiliar struct for creating/destroying a new scope for
+        local declarations.
+    */
    struct mk_scope {
        parser_fn &           m_fn;
        local_decls::mk_scope m_scope;
@ -80,16 +100,27 @@ struct parser_fn {
        ~mk_scope() { m_fn.m_num_local_decls = m_old_num_local_decls; }
    };

+    /** \brief Read the next token. */
    void scan() { m_curr = m_scanner.scan(); }
+    /** \brief Return the current token */
    scanner::token curr() const { return m_curr; }
+    /** \brief Read the next token if the current one is not End-of-file. */
    void next() { if (m_curr != scanner::token::Eof) scan(); }
+    /** \brief Keep consume tokens until we find a Command or End-of-file. */
    void sync() {
        while (curr() != scanner::token::CommandId && curr() != scanner::token::Eof)
            next();
    }
+
+    /** \brief Return the name associated with the current token. */
    name const & curr_name() const { return m_scanner.get_name_val(); }
+    /** \brief Return the numeral associated with the current token. */
    mpq const & curr_num() const { return m_scanner.get_num_val(); }

+    /**
+        \brief Check if the current token is \c t, and move to the
+        next one. If the current token is not \c t, it throws a parser error.
+    */
    void check_next(scanner::token t, char const * msg) {
        if (curr() == t)
            next();
@ -97,23 +128,42 @@ struct parser_fn {
            throw parser_error(msg);
    }

+    /** \brief Return true iff the current toke is an identifier */
    bool curr_is_identifier() const { return curr() == scanner::token::Id; }
+    /** \brief Return true iff the current toke is an integer */
    bool curr_is_int() const { return curr() == scanner::token::IntVal; }
+    /** \brief Return true iff the current toke is a '(' */
    bool curr_is_lparen() const { return curr() == scanner::token::LeftParen; }
+    /** \brief Return true iff the current toke is a ':' */
    bool curr_is_colon() const { return curr() == scanner::token::Colon; }
+    /** \brief Return true iff the current toke is a ',' */
    bool curr_is_comma() const { return curr() == scanner::token::Comma; }
+    /** \brief Return true iff the current toke is an 'in' token */
    bool curr_is_in() const { return curr() == scanner::token::In; }

+    /** \brief Throws a parser error if the current token is not an identifier. */
    void check_identifier(char const * msg) { if (!curr_is_identifier()) throw parser_error(msg); }
+    /**
+        \brief Throws a parser error if the current token is not an
+        identifier. If it is, move to the next token.
+    */
    name check_identifier_next(char const * msg) { check_identifier(msg); name r = curr_name(); next(); return r; }
+    /** \brief Throws a parser error if the current token is not ':'. If it is, move to the next token. */
    void check_colon_next(char const * msg) { check_next(scanner::token::Colon, msg); }
+    /** \brief Throws a parser error if the current token is not ','. If it is, move to the next token. */
    void check_comma_next(char const * msg) { check_next(scanner::token::Comma, msg); }
+    /** \brief Throws a parser error if the current token is not '('. If it is, move to the next token. */
    void check_lparen_next(char const * msg) { check_next(scanner::token::LeftParen, msg); }
+    /** \brief Throws a parser error if the current token is not ')'. If it is, move to the next token. */
    void check_rparen_next(char const * msg) { check_next(scanner::token::RightParen, msg); }
+    /** \brief Throws a parser error if the current token is not ':='. If it is, move to the next token. */
    void check_assign_next(char const * msg) { check_next(scanner::token::Assign, msg); }
+    /** \brief Throws a parser error if the current token is not an identifier named \c op. */
    void check_name(name const & op, char const * msg) { if(!curr_is_identifier() || curr_name() != op) throw parser_error(msg); }
+    /** \brief Throws a parser error if the current token is not an identifier named \c op. If it is, move to the next token. */
    void check_name_next(name const & op, char const * msg) { check_name(op, msg); next(); }

+    /** \brief Initialize \c m_builtins table with Lean builtin symbols that do not have notation associated with them. */
    void init_builtins() {
        m_builtins["Bool"]   = Bool;
        m_builtins["true"]   = True;
@ -123,27 +173,15 @@ struct parser_fn {
        m_builtins["Int"]    = Int;
    }

-    parser_fn(frontend & fe, std::istream & in, std::ostream * out, std::ostream * err, bool use_exceptions):
-        m_frontend(fe),
-        m_scanner(in),
-        m_formatter(mk_pp_formatter(fe)),
-        m_out(out),
-        m_err(err),
-        m_use_exceptions(use_exceptions) {
-        m_found_errors = false;
-        m_num_local_decls = 0;
-        m_scanner.set_command_keywords(g_command_keywords);
-        init_builtins();
-        scan();
-    }
-
    [[ noreturn ]] void not_implemented_yet() {
        // TODO
        throw parser_error("not implemented yet");
    }

-    // ==========================================
-    // Universe Level parsing
+    /**
+       @name Parse Universe levels
+    */
+    /*@{*/
    level parse_level_max() {
        next();
        buffer<level> lvls;
@ -221,6 +259,7 @@ struct parser_fn {
        }
    }

+    /** \brief Parse a universe level */
    level parse_level(unsigned rbp = 0) {
        level left = parse_level_nud();
        while (rbp < curr_level_lbp()) {
@ -228,8 +267,21 @@ struct parser_fn {
        }
        return left;
    }
-    // ==========================================
+    /*@}*/

+    /**
+       @name Parse Expressions
+    */
+    /*@{*/
+
+    /**
+       \brief Return the function associated with the given operator.
+       If the operator has been overloaded, it returns an expression
+       of the form (overload f_k ... (overload f_2 f_1) ...)
+       where f_i's are different options.
+       After we finish parsing, the procedure #elaborate will
+       resolve/decide which f_i should be used.
+    */
    expr mk_fun(operator_info const & op) {
        list<name> const & fs = op.get_internal_names();
        lean_assert(!is_nil(fs));
@ -237,38 +289,49 @@ struct parser_fn {
        expr r = mk_constant(*it);
        ++it;
        for (; it != fs.end(); ++it)
-            r = mk_app(g_overload, r, mk_constant(*it));
+            r = mk_app(g_overload, mk_constant(*it), r);
        return r;
    }

+    /** \brief Parse a user defined prefix operator. */
    expr parse_prefix(operator_info const & op) {
        expr arg = parse_expr(op.get_precedence());
        return mk_app(mk_fun(op), arg);
    }

+    /** \brief Parse a user defined postfix operator. */
    expr parse_postfix(expr const & left, operator_info const & op) {
        return mk_app(mk_fun(op), left);
    }

+    /** \brief Parse a user defined infix operator. */
    expr parse_infix(expr const & left, operator_info const & op) {
        expr right = parse_expr(op.get_precedence()+1);
        return mk_app(mk_fun(op), left, right);
    }

+    /** \brief Parse a user defined infix-left operator. */
    expr parse_infixl(expr const & left, operator_info const & op) {
        expr right = parse_expr(op.get_precedence());
        return mk_app(mk_fun(op), left, right);
    }

+    /** \brief Parse a user defined infix-right operator. */
    expr parse_infixr(expr const & left, operator_info const & op) {
        expr right = parse_expr(op.get_precedence()-1);
        return mk_app(mk_fun(op), left, right);
    }

+    /**
+        \brief Throws an error if the current token is not an identifier named \c op_part.
+        If it is, move to the next toke. The error message assumes
+        this method has been used when parsing mixfix operators.
+    */
    void check_op_part(name const & op_part) {
-        check_name_next(op_part, "invalid expression, identifier expected");
+        check_name_next(op_part, "invalid mixfix operator application, identifier expected");
    }

+    /** \brief Auxiliary function for #parse_mixfixl and #parse_mixfixr */
    void parse_mixfix_args(list<name> const & ops, unsigned prec, buffer<expr> & args) {
        auto it = ops.begin();
        ++it;
@ -279,6 +342,7 @@ struct parser_fn {
        }
    }

+    /** \brief Parse user defined mixfixl operator. It has the form: ID _ ... ID _ */
    expr parse_mixfixl(operator_info const & op) {
        buffer<expr> args;
        args.push_back(mk_fun(op));
@ -287,6 +351,7 @@ struct parser_fn {
        return mk_app(args.size(), args.data());
    }

+    /** \brief Parse user defined mixfixr operator. It has the form: _ ID ... _ ID */
    expr parse_mixfixr(expr const & left, operator_info const & op) {
        buffer<expr> args;
        args.push_back(mk_fun(op));
@ -295,6 +360,7 @@ struct parser_fn {
        return mk_app(args.size(), args.data());
    }

+    /** \brief Parse user defined mixfixc operator. It has the form: ID _ ID ... _ ID */
    expr parse_mixfixc(operator_info const & op) {
        buffer<expr> args;
        args.push_back(mk_fun(op));
@ -311,6 +377,11 @@ struct parser_fn {
        }
    }

+    /**
+       \brief Try to find an object (Definition or Postulate) named \c
+       id in the frontend/environment. If there isn't one, then tries
+       to check if \c id is a builtin symbol. If it is not throws an error.
+    */
    expr get_name_ref(name const & id) {
        object const & obj = m_frontend.find_object(id);
        if (obj) {
@ -330,6 +401,14 @@ struct parser_fn {
        }
    }

+    /**
+        \brief Parse an identifier that has a "null denotation" (See
+        paper: "Top down operator precedence"). A nud identifier is a
+        token that appears at the beginning of a language construct.
+        In Lean, local declarations (i.e., local functions), user
+        defined prefix, mixfixl and mixfixc operators, and global
+        functions can begin a language construct.
+    */
    expr parse_nud_id() {
        name id = curr_name();
        next();
@ -351,6 +430,15 @@ struct parser_fn {
        }
    }

+    /**
+        \brief Parse an identifier that has a "left denotation" (See
+        paper: "Top down operator precedence"). A left identifier is a
+        token that appears inside of a construct (to left of the rest
+        of the construct). In Lean, local declarations (i.e., function
+        application arguments), user defined infix, infixl, infixr,
+        mixfixr and global values (as function application arguments)
+        can appear inside of a construct.
+    */
    expr parse_led_id(expr const & left) {
        name id = curr_name();
        next();
@ -373,18 +461,22 @@ struct parser_fn {
        }
    }

+    /** \brief Parse <tt>expr '=' expr</tt>. */
    expr parse_eq(expr const & left) {
        next();
        expr right = parse_expr(g_eq_precedence);
        return mk_eq(left, right);
    }

+    /** \brief Parse <tt>expr '->' expr</tt>. */
    expr parse_arrow(expr const & left) {
        next();
+        // The -1 is a trick to get right associativity in Pratt's parsers
        expr right = parse_expr(g_arrow_precedence-1);
        return mk_arrow(left, right);
    }

+    /** \brief Parse <tt>'(' expr ')'</tt>. */
    expr parse_lparen() {
        next();
        expr r = parse_expr();
@ -392,6 +484,10 @@ struct parser_fn {
        return r;
    }

+    /**
+        \brief Parse a sequence of identifiers <tt>ID*</tt>. Store the
+        result in \c result.
+    */
    void parse_names(buffer<name> & result) {
        while (curr_is_identifier()) {
            result.push_back(curr_name());
@ -399,6 +495,7 @@ struct parser_fn {
        }
    }

+    /** \brief Register the name \c n as a local declaration. */
    void register_binding(name const & n) {
        unsigned lvl = m_num_local_decls;
        m_local_decls.insert(n, lvl);
@ -406,8 +503,11 @@ struct parser_fn {
        lean_assert(m_local_decls.find(n)->second == lvl);
    }

+    /**
+        \brief Parse <tt>ID ... ID ':' expr</tt>, where the expression
+        represents the type of the identifiers.
+    */
    void parse_simple_bindings(buffer<std::pair<name, expr>> & result) {
-        // ID ... ID : type
        buffer<name> names;
        parse_names(names);
        check_colon_next("invalid binder, ':' expected");
@ -422,6 +522,11 @@ struct parser_fn {
        }
    }

+    /**
+        \brief Parse a sequence of <tt>'(' ID ... ID ':' expr ')'</tt>.
+
+        This is used when parsing lambda, Pi, forall/exists expressions and definitions.
+    */
    void parse_bindings(buffer<std::pair<name, expr>> & result) {
        if (curr_is_identifier()) {
            parse_simple_bindings(result);
@ -438,6 +543,10 @@ struct parser_fn {
        }
    }

+    /**
+        \brief Create a lambda/Pi abstraction, using the giving binders
+        and body.
+    */
    expr mk_abstraction(bool is_lambda, buffer<std::pair<name, expr>> const & bindings, expr const & body) {
        expr result = body;
        unsigned i = bindings.size();
@ -451,6 +560,7 @@ struct parser_fn {
        return result;
    }

+    /** \brief Parse lambda/Pi abstraction. */
    expr parse_abstraction(bool is_lambda) {
        next();
        mk_scope scope(*this);
@ -461,14 +571,17 @@ struct parser_fn {
        return mk_abstraction(is_lambda, bindings, result);
    }

+    /** \brief Parse lambda abstraction. */
    expr parse_lambda() {
        return parse_abstraction(true);
    }

+    /** \brief Parse Pi abstraction. */
    expr parse_pi() {
        return parse_abstraction(false);
    }

+    /** \brief Parse Let expression. */
    expr parse_let() {
        next();
        mk_scope scope(*this);
@ -494,6 +607,7 @@ struct parser_fn {
        }
    }

+    /** \brief Parse an integer value. */
    expr parse_int() {
        expr r = mk_int_value(m_scanner.get_num_val().get_numerator());
        next();
@ -508,6 +622,7 @@ struct parser_fn {
        not_implemented_yet();
    }

+    /** \brief Parse <tt>'Type'</tt> and <tt>'Type' level</tt> expressions. */
    expr parse_type() {
        next();
        if (curr_is_identifier() || curr_is_int()) {
@ -517,6 +632,9 @@ struct parser_fn {
        }
    }

+    /**
+       \brief Auxiliary method used when processing the beginning of an expression.
+    */
    expr parse_nud() {
        switch (curr()) {
        case scanner::token::Id:         return parse_nud_id();
@ -533,6 +651,9 @@ struct parser_fn {
        }
    }

+    /**
+       \brief Auxiliary method used when processing the 'inside' of an expression.
+    */
    expr parse_led(expr const & left) {
        switch (curr()) {
        case scanner::token::Id:         return parse_led_id(left);
@ -547,6 +668,7 @@ struct parser_fn {
        }
    }

+    /** \brief Return the binding power of the current token (when parsing expression). */
    unsigned curr_lbp() {
        switch (curr()) {
        case scanner::token::Id: {
@ -572,6 +694,7 @@ struct parser_fn {
        }
    }

+    /** \brief Parse an expression */
    expr parse_expr(unsigned rbp = 0) {
        expr left = parse_nud();
        while (rbp < curr_lbp()) {
@ -579,12 +702,18 @@ struct parser_fn {
        }
        return left;
    }
+    /*@}*/

    expr elaborate(expr const & e) {
        // TODO
        return e;
    }

+    /**
+       @name Parse Commands
+    */
+    /*@{*/
+    /** \brief Auxiliary method used for parsing definitions and theorems. */
    void parse_def_core(bool is_definition) {
        next();
        expr type, val;
@ -611,14 +740,29 @@ struct parser_fn {
            m_frontend.add_theorem(id, type, val);
    }

+    /**
+        \brief Parse a Definition. It has one of the following two forms:
+
+        1) 'Definition' ID ':' expr ':=' expr
+
+        2) 'Definition' ID bindings ':' expr ':=' expr
+    */
    void parse_definition() {
        parse_def_core(true);
    }

+    /**
+        \brief Parse a Theorem. It has one of the following two forms:
+
+        1) 'Theorem' ID ':' expr ':=' expr
+
+        2) 'Theorem' ID bindings ':' expr ':=' expr
+    */
    void parse_theorem() {
        parse_def_core(false);
    }

+    /** \brief Parse 'Variable' ID ':' expr */
    void parse_variable() {
        next();
        name id = check_identifier_next("invalid variable declaration, identifier expected");
@ -627,6 +771,7 @@ struct parser_fn {
        m_frontend.add_var(id, type);
    }

+    /** \brief Parse 'Axiom' ID ':' expr */
    void parse_axiom() {
        next();
        name id = check_identifier_next("invalid axiom, identifier expected");
@ -635,6 +780,7 @@ struct parser_fn {
        m_frontend.add_axiom(id, type);
    }

+    /** \brief Parse 'Eval' expr */
    void parse_eval() {
        next();
        expr v = elaborate(parse_expr());
@ -642,12 +788,14 @@ struct parser_fn {
        (*m_out) << m_formatter(r) << std::endl;
    }

+    /** \brief Parse 'Show' expr */
    void parse_show() {
        next();
        expr v = elaborate(parse_expr());
        (*m_out) << m_formatter(v) << std::endl;
    }

+    /** \brief Parse 'Check' expr */
    void parse_check() {
        next();
        expr v = elaborate(parse_expr());
@ -655,12 +803,14 @@ struct parser_fn {
        (*m_out) << m_formatter(t) << std::endl;
    }

+    /** \brief Parse 'Environment' */
    void parse_env() {
        next();
        (*m_out) << m_frontend << std::endl;
    }

-    unsigned parse_precision() {
+    /** \brief Return the (optional) precedence of a user-defined operator. */
+    unsigned parse_precedence() {
        if (curr() == scanner::token::IntVal) {
            mpz pval = curr_num().get_numerator();
            if (!pval.is_unsigned_int())
@ -673,13 +823,20 @@ struct parser_fn {
        }
    }

+    /** \brief Throw an error if the current token is not an identifier. If it is, move to next token. */
    name parse_op_id() {
        return check_identifier_next("invalid operator definition, identifier expected");
    }

+    /**
+        \brief Parse prefix/postfix/infix/infixl/infixr user operator
+        definitions. These definitions have the form:
+
+           - fixity [Num] ID ID
+    */
    void parse_op(fixity fx) {
        next();
-        unsigned prec = parse_precision();
+        unsigned prec = parse_precedence();
        name op_id = parse_op_id();
        name id    = parse_op_id();
        switch (fx) {
@ -692,9 +849,15 @@ struct parser_fn {
        }
    }

+    /**
+        \brief Parse mixfix/mixfixl/mixfixr user operator definition.
+        These definitions have the form:
+
+        - fixity [NUM] ID ID+ ID
+    */
    void parse_mixfix_op(fixity fx) {
        next();
-        unsigned prec = parse_precision();
+        unsigned prec = parse_precedence();
        buffer<name> parts;
        parts.push_back(parse_op_id());
        parts.push_back(parse_op_id());
@ -712,6 +875,7 @@ struct parser_fn {
        }
    }

+    /** \brief Parse a Lean command. */
    void parse_command() {
        name const & cmd_id = curr_name();
        if (cmd_id == g_definition_kwd)    parse_definition();
@ -732,6 +896,7 @@ struct parser_fn {
        else if (cmd_id == g_mixfixc_kwd)  parse_mixfix_op(fixity::Mixfixc);
        else { lean_unreachable(); }
    }
+    /*@}*/

    void error(char const * msg, unsigned line, unsigned pos) {
        lean_assert(m_err);
@ -743,6 +908,22 @@ struct parser_fn {
        sync();
    }

+public:
+    parser_fn(frontend & fe, std::istream & in, std::ostream * out, std::ostream * err, bool use_exceptions):
+        m_frontend(fe),
+        m_scanner(in),
+        m_formatter(mk_pp_formatter(fe)),
+        m_out(out),
+        m_err(err),
+        m_use_exceptions(use_exceptions) {
+        m_found_errors = false;
+        m_num_local_decls = 0;
+        m_scanner.set_command_keywords(g_command_keywords);
+        init_builtins();
+        scan();
+    }
+
+    /** \brief Parse a sequence of commands. This method also perform error management. */
    bool parse_commands() {
        while (true) {
            try {
@ -771,6 +952,7 @@ struct parser_fn {
        }
    }

+    /** \brief Parse an expression. */
    expr parse_expr_main() {
        try {
            return elaborate(parse_expr());