From b2c877b0c3b7c545582634b9630af2178932db58 Mon Sep 17 00:00:00 2001
From: Leonardo de Moura <leonardo@microsoft.com>
Date: Mon, 19 Aug 2013 09:35:19 -0700
Subject: [PATCH] Add comments to parser.cpp

Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
---
 src/frontend/parser.cpp | 234 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 208 insertions(+), 26 deletions(-)
diff --git a/src/frontend/parser.cpp b/src/frontend/parser.cpp
index 066c68f0e..159e52b29 100644
--- a/src/frontend/parser.cpp
+++ b/src/frontend/parser.cpp
@@ -19,6 +19,8 @@ Author: Leonardo de Moura
 #include "printer.h"
 
 namespace lean {
+// ==========================================
+// Builtin commands
 static name g_definition_kwd("Definition");
 static name g_variable_kwd("Variable");
 static name g_theorem_kwd("Theorem");
@@ -36,6 +38,11 @@ static name g_postfix_kwd("Postfix");
 static name g_mixfixl_kwd("Mixfixl");
 static name g_mixfixr_kwd("Mixfixr");
 static name g_mixfixc_kwd("Mixfixc");
+/** \brief Table/List with all builtin command keywords */
+static list<name> g_command_keywords = {g_definition_kwd, g_variable_kwd, g_theorem_kwd, g_axiom_kwd, g_universe_kwd, g_eval_kwd,
+                                        g_show_kwd, g_check_kwd, g_env_kwd, g_infix_kwd, g_infixl_kwd, g_infixr_kwd, g_prefix_kwd,
+                                        g_postfix_kwd, g_mixfixl_kwd, g_mixfixr_kwd, g_mixfixc_kwd};
+// ==========================================
 
 // ==========================================
 // Support for parsing levels
@@ -46,13 +53,22 @@ static unsigned g_level_plus_prec = 10;
 static unsigned g_level_cup_prec  = 5;
 // ==========================================
 
+// ==========================================
+// Auxiliary constant used to mark applications of overloaded operators.
 static name g_overload_name(name(name(name(0u), "parser"), "overload"));
 static expr g_overload = mk_constant(g_overload_name);
+// ==========================================
 
-static list<name> g_command_keywords = {g_definition_kwd, g_variable_kwd, g_theorem_kwd, g_axiom_kwd, g_universe_kwd, g_eval_kwd, g_show_kwd, g_check_kwd, g_env_kwd, g_infix_kwd, g_infixl_kwd, g_infixr_kwd, g_prefix_kwd, g_postfix_kwd, g_mixfixl_kwd, g_mixfixr_kwd, g_mixfixc_kwd};
+/**
+    \brief Functional object for parsing
 
-/** \brief Functional object for parsing */
-struct parser_fn {
+    \remark It is an instance of a Pratt parser
+    (http://en.wikipedia.org/wiki/Pratt_parser) described in the paper
+    "Top down operator precedence". This algorithm is super simple,
+    and it is easy to support user-defined infix/prefix/postfix/mixfix
+    operators.
+*/
+class parser_fn {
     typedef scoped_map<name, unsigned, name_hash, name_eq> local_decls;
     typedef std::unordered_map<name, expr, name_hash, name_eq>  builtins;
     frontend       m_frontend;
@@ -72,6 +88,10 @@ struct parser_fn {
         parser_error(char const * msg):exception(msg) {}
     };
 
+    /**
+        \brief Auxiliar struct for creating/destroying a new scope for
+        local declarations.
+    */
     struct mk_scope {
         parser_fn &           m_fn;
         local_decls::mk_scope m_scope;
@@ -80,16 +100,27 @@ struct parser_fn {
         ~mk_scope() { m_fn.m_num_local_decls = m_old_num_local_decls; }
     };
 
+    /** \brief Read the next token. */
     void scan() { m_curr = m_scanner.scan(); }
+    /** \brief Return the current token */
     scanner::token curr() const { return m_curr; }
+    /** \brief Read the next token if the current one is not End-of-file. */
     void next() { if (m_curr != scanner::token::Eof) scan(); }
+    /** \brief Keep consume tokens until we find a Command or End-of-file. */
     void sync() {
         while (curr() != scanner::token::CommandId && curr() != scanner::token::Eof)
             next();
     }
+
+    /** \brief Return the name associated with the current token. */
     name const & curr_name() const { return m_scanner.get_name_val(); }
+    /** \brief Return the numeral associated with the current token. */
     mpq const & curr_num() const { return m_scanner.get_num_val(); }
 
+    /**
+        \brief Check if the current token is \c t, and move to the
+        next one. If the current token is not \c t, it throws a parser error.
+    */
     void check_next(scanner::token t, char const * msg) {
         if (curr() == t)
             next();
@@ -97,23 +128,42 @@ struct parser_fn {
             throw parser_error(msg);
     }
 
+    /** \brief Return true iff the current toke is an identifier */
     bool curr_is_identifier() const { return curr() == scanner::token::Id; }
+    /** \brief Return true iff the current toke is an integer */
     bool curr_is_int() const { return curr() == scanner::token::IntVal; }
+    /** \brief Return true iff the current toke is a '(' */
     bool curr_is_lparen() const { return curr() == scanner::token::LeftParen; }
+    /** \brief Return true iff the current toke is a ':' */
     bool curr_is_colon() const { return curr() == scanner::token::Colon; }
+    /** \brief Return true iff the current toke is a ',' */
     bool curr_is_comma() const { return curr() == scanner::token::Comma; }
+    /** \brief Return true iff the current toke is an 'in' token */
     bool curr_is_in() const { return curr() == scanner::token::In; }
 
+    /** \brief Throws a parser error if the current token is not an identifier. */
     void check_identifier(char const * msg) { if (!curr_is_identifier()) throw parser_error(msg); }
+    /**
+        \brief Throws a parser error if the current token is not an
+        identifier. If it is, move to the next token.
+    */
     name check_identifier_next(char const * msg) { check_identifier(msg); name r = curr_name(); next(); return r; }
+    /** \brief Throws a parser error if the current token is not ':'. If it is, move to the next token. */
     void check_colon_next(char const * msg) { check_next(scanner::token::Colon, msg); }
+    /** \brief Throws a parser error if the current token is not ','. If it is, move to the next token. */
     void check_comma_next(char const * msg) { check_next(scanner::token::Comma, msg); }
+    /** \brief Throws a parser error if the current token is not '('. If it is, move to the next token. */
     void check_lparen_next(char const * msg) { check_next(scanner::token::LeftParen, msg); }
+    /** \brief Throws a parser error if the current token is not ')'. If it is, move to the next token. */
     void check_rparen_next(char const * msg) { check_next(scanner::token::RightParen, msg); }
+    /** \brief Throws a parser error if the current token is not ':='. If it is, move to the next token. */
     void check_assign_next(char const * msg) { check_next(scanner::token::Assign, msg); }
+    /** \brief Throws a parser error if the current token is not an identifier named \c op. */
     void check_name(name const & op, char const * msg) { if(!curr_is_identifier() || curr_name() != op) throw parser_error(msg); }
+    /** \brief Throws a parser error if the current token is not an identifier named \c op. If it is, move to the next token. */
     void check_name_next(name const & op, char const * msg) { check_name(op, msg); next(); }
 
+    /** \brief Initialize \c m_builtins table with Lean builtin symbols that do not have notation associated with them. */
     void init_builtins() {
         m_builtins["Bool"]   = Bool;
         m_builtins["true"]   = True;
@@ -123,27 +173,15 @@ struct parser_fn {
         m_builtins["Int"]    = Int;
     }
 
-    parser_fn(frontend & fe, std::istream & in, std::ostream * out, std::ostream * err, bool use_exceptions):
-        m_frontend(fe),
-        m_scanner(in),
-        m_formatter(mk_pp_formatter(fe)),
-        m_out(out),
-        m_err(err),
-        m_use_exceptions(use_exceptions) {
-        m_found_errors = false;
-        m_num_local_decls = 0;
-        m_scanner.set_command_keywords(g_command_keywords);
-        init_builtins();
-        scan();
-    }
-
     [[ noreturn ]] void not_implemented_yet() {
         // TODO
         throw parser_error("not implemented yet");
     }
 
-    // ==========================================
-    // Universe Level parsing
+    /**
+       @name Parse Universe levels
+    */
+    /*@{*/
     level parse_level_max() {
         next();
         buffer<level> lvls;
@@ -221,6 +259,7 @@ struct parser_fn {
         }
     }
 
+    /** \brief Parse a universe level */
     level parse_level(unsigned rbp = 0) {
         level left = parse_level_nud();
         while (rbp < curr_level_lbp()) {
@@ -228,8 +267,21 @@ struct parser_fn {
         }
         return left;
     }
-    // ==========================================
+    /*@}*/
 
+    /**
+       @name Parse Expressions
+    */
+    /*@{*/
+
+    /**
+       \brief Return the function associated with the given operator.
+       If the operator has been overloaded, it returns an expression
+       of the form (overload f_k ... (overload f_2 f_1) ...)
+       where f_i's are different options.
+       After we finish parsing, the procedure #elaborate will
+       resolve/decide which f_i should be used.
+    */
     expr mk_fun(operator_info const & op) {
         list<name> const & fs = op.get_internal_names();
         lean_assert(!is_nil(fs));
@@ -237,38 +289,49 @@ struct parser_fn {
         expr r = mk_constant(*it);
         ++it;
         for (; it != fs.end(); ++it)
-            r = mk_app(g_overload, r, mk_constant(*it));
+            r = mk_app(g_overload, mk_constant(*it), r);
         return r;
     }
 
+    /** \brief Parse a user defined prefix operator. */
     expr parse_prefix(operator_info const & op) {
         expr arg = parse_expr(op.get_precedence());
         return mk_app(mk_fun(op), arg);
     }
 
+    /** \brief Parse a user defined postfix operator. */
     expr parse_postfix(expr const & left, operator_info const & op) {
         return mk_app(mk_fun(op), left);
     }
 
+    /** \brief Parse a user defined infix operator. */
     expr parse_infix(expr const & left, operator_info const & op) {
         expr right = parse_expr(op.get_precedence()+1);
         return mk_app(mk_fun(op), left, right);
     }
 
+    /** \brief Parse a user defined infix-left operator. */
     expr parse_infixl(expr const & left, operator_info const & op) {
         expr right = parse_expr(op.get_precedence());
         return mk_app(mk_fun(op), left, right);
     }
 
+    /** \brief Parse a user defined infix-right operator. */
     expr parse_infixr(expr const & left, operator_info const & op) {
         expr right = parse_expr(op.get_precedence()-1);
         return mk_app(mk_fun(op), left, right);
     }
 
+    /**
+        \brief Throws an error if the current token is not an identifier named \c op_part.
+        If it is, move to the next toke. The error message assumes
+        this method has been used when parsing mixfix operators.
+    */
     void check_op_part(name const & op_part) {
-        check_name_next(op_part, "invalid expression, identifier expected");
+        check_name_next(op_part, "invalid mixfix operator application, identifier expected");
     }
 
+    /** \brief Auxiliary function for #parse_mixfixl and #parse_mixfixr */
     void parse_mixfix_args(list<name> const & ops, unsigned prec, buffer<expr> & args) {
         auto it = ops.begin();
         ++it;
@@ -279,6 +342,7 @@ struct parser_fn {
         }
     }
 
+    /** \brief Parse user defined mixfixl operator. It has the form: ID _ ... ID _ */
     expr parse_mixfixl(operator_info const & op) {
         buffer<expr> args;
         args.push_back(mk_fun(op));
@@ -287,6 +351,7 @@ struct parser_fn {
         return mk_app(args.size(), args.data());
     }
 
+    /** \brief Parse user defined mixfixr operator. It has the form: _ ID ... _ ID */
     expr parse_mixfixr(expr const & left, operator_info const & op) {
         buffer<expr> args;
         args.push_back(mk_fun(op));
@@ -295,6 +360,7 @@ struct parser_fn {
         return mk_app(args.size(), args.data());
     }
 
+    /** \brief Parse user defined mixfixc operator. It has the form: ID _ ID ... _ ID */
     expr parse_mixfixc(operator_info const & op) {
         buffer<expr> args;
         args.push_back(mk_fun(op));
@@ -311,6 +377,11 @@ struct parser_fn {
         }
     }
 
+    /**
+       \brief Try to find an object (Definition or Postulate) named \c
+       id in the frontend/environment. If there isn't one, then tries
+       to check if \c id is a builtin symbol. If it is not throws an error.
+    */
     expr get_name_ref(name const & id) {
         object const & obj = m_frontend.find_object(id);
         if (obj) {
@@ -330,6 +401,14 @@ struct parser_fn {
         }
     }
 
+    /**
+        \brief Parse an identifier that has a "null denotation" (See
+        paper: "Top down operator precedence"). A nud identifier is a
+        token that appears at the beginning of a language construct.
+        In Lean, local declarations (i.e., local functions), user
+        defined prefix, mixfixl and mixfixc operators, and global
+        functions can begin a language construct.
+    */
     expr parse_nud_id() {
         name id = curr_name();
         next();
@@ -351,6 +430,15 @@ struct parser_fn {
         }
     }
 
+    /**
+        \brief Parse an identifier that has a "left denotation" (See
+        paper: "Top down operator precedence"). A left identifier is a
+        token that appears inside of a construct (to left of the rest
+        of the construct). In Lean, local declarations (i.e., function
+        application arguments), user defined infix, infixl, infixr,
+        mixfixr and global values (as function application arguments)
+        can appear inside of a construct.
+    */
     expr parse_led_id(expr const & left) {
         name id = curr_name();
         next();
@@ -373,18 +461,22 @@ struct parser_fn {
         }
     }
 
+    /** \brief Parse <tt>expr '=' expr</tt>. */
     expr parse_eq(expr const & left) {
         next();
         expr right = parse_expr(g_eq_precedence);
         return mk_eq(left, right);
     }
 
+    /** \brief Parse <tt>expr '->' expr</tt>. */
     expr parse_arrow(expr const & left) {
         next();
+        // The -1 is a trick to get right associativity in Pratt's parsers
         expr right = parse_expr(g_arrow_precedence-1);
         return mk_arrow(left, right);
     }
 
+    /** \brief Parse <tt>'(' expr ')'</tt>. */
     expr parse_lparen() {
         next();
         expr r = parse_expr();
@@ -392,6 +484,10 @@ struct parser_fn {
         return r;
     }
 
+    /**
+        \brief Parse a sequence of identifiers <tt>ID*</tt>. Store the
+        result in \c result.
+    */
     void parse_names(buffer<name> & result) {
         while (curr_is_identifier()) {
             result.push_back(curr_name());
@@ -399,6 +495,7 @@ struct parser_fn {
         }
     }
 
+    /** \brief Register the name \c n as a local declaration. */
     void register_binding(name const & n) {
         unsigned lvl = m_num_local_decls;
         m_local_decls.insert(n, lvl);
@@ -406,8 +503,11 @@ struct parser_fn {
         lean_assert(m_local_decls.find(n)->second == lvl);
     }
 
+    /**
+        \brief Parse <tt>ID ... ID ':' expr</tt>, where the expression
+        represents the type of the identifiers.
+    */
     void parse_simple_bindings(buffer<std::pair<name, expr>> & result) {
-        // ID ... ID : type
         buffer<name> names;
         parse_names(names);
         check_colon_next("invalid binder, ':' expected");
@@ -422,6 +522,11 @@ struct parser_fn {
         }
     }
 
+    /**
+        \brief Parse a sequence of <tt>'(' ID ... ID ':' expr ')'</tt>.
+
+        This is used when parsing lambda, Pi, forall/exists expressions and definitions.
+    */
     void parse_bindings(buffer<std::pair<name, expr>> & result) {
         if (curr_is_identifier()) {
             parse_simple_bindings(result);
@@ -438,6 +543,10 @@ struct parser_fn {
         }
     }
 
+    /**
+        \brief Create a lambda/Pi abstraction, using the giving binders
+        and body.
+    */
     expr mk_abstraction(bool is_lambda, buffer<std::pair<name, expr>> const & bindings, expr const & body) {
         expr result = body;
         unsigned i = bindings.size();
@@ -451,6 +560,7 @@ struct parser_fn {
         return result;
     }
 
+    /** \brief Parse lambda/Pi abstraction. */
     expr parse_abstraction(bool is_lambda) {
         next();
         mk_scope scope(*this);
@@ -461,14 +571,17 @@ struct parser_fn {
         return mk_abstraction(is_lambda, bindings, result);
     }
 
+    /** \brief Parse lambda abstraction. */
     expr parse_lambda() {
         return parse_abstraction(true);
     }
 
+    /** \brief Parse Pi abstraction. */
     expr parse_pi() {
         return parse_abstraction(false);
     }
 
+    /** \brief Parse Let expression. */
     expr parse_let() {
         next();
         mk_scope scope(*this);
@@ -494,6 +607,7 @@ struct parser_fn {
         }
     }
 
+    /** \brief Parse an integer value. */
     expr parse_int() {
         expr r = mk_int_value(m_scanner.get_num_val().get_numerator());
         next();
@@ -508,6 +622,7 @@ struct parser_fn {
         not_implemented_yet();
     }
 
+    /** \brief Parse <tt>'Type'</tt> and <tt>'Type' level</tt> expressions. */
     expr parse_type() {
         next();
         if (curr_is_identifier() || curr_is_int()) {
@@ -517,6 +632,9 @@ struct parser_fn {
         }
     }
 
+    /**
+       \brief Auxiliary method used when processing the beginning of an expression.
+    */
     expr parse_nud() {
         switch (curr()) {
         case scanner::token::Id:         return parse_nud_id();
@@ -533,6 +651,9 @@ struct parser_fn {
         }
     }
 
+    /**
+       \brief Auxiliary method used when processing the 'inside' of an expression.
+    */
     expr parse_led(expr const & left) {
         switch (curr()) {
         case scanner::token::Id:         return parse_led_id(left);
@@ -547,6 +668,7 @@ struct parser_fn {
         }
     }
 
+    /** \brief Return the binding power of the current token (when parsing expression). */
     unsigned curr_lbp() {
         switch (curr()) {
         case scanner::token::Id: {
@@ -572,6 +694,7 @@ struct parser_fn {
         }
     }
 
+    /** \brief Parse an expression */
     expr parse_expr(unsigned rbp = 0) {
         expr left = parse_nud();
         while (rbp < curr_lbp()) {
@@ -579,12 +702,18 @@ struct parser_fn {
         }
         return left;
     }
+    /*@}*/
 
     expr elaborate(expr const & e) {
         // TODO
         return e;
     }
 
+    /**
+       @name Parse Commands
+    */
+    /*@{*/
+    /** \brief Auxiliary method used for parsing definitions and theorems. */
     void parse_def_core(bool is_definition) {
         next();
         expr type, val;
@@ -611,14 +740,29 @@ struct parser_fn {
             m_frontend.add_theorem(id, type, val);
     }
 
+    /**
+        \brief Parse a Definition. It has one of the following two forms:
+
+        1) 'Definition' ID ':' expr ':=' expr
+
+        2) 'Definition' ID bindings ':' expr ':=' expr
+    */
     void parse_definition() {
         parse_def_core(true);
     }
 
+    /**
+        \brief Parse a Theorem. It has one of the following two forms:
+
+        1) 'Theorem' ID ':' expr ':=' expr
+
+        2) 'Theorem' ID bindings ':' expr ':=' expr
+    */
     void parse_theorem() {
         parse_def_core(false);
     }
 
+    /** \brief Parse 'Variable' ID ':' expr */
     void parse_variable() {
         next();
         name id = check_identifier_next("invalid variable declaration, identifier expected");
@@ -627,6 +771,7 @@ struct parser_fn {
         m_frontend.add_var(id, type);
     }
 
+    /** \brief Parse 'Axiom' ID ':' expr */
     void parse_axiom() {
         next();
         name id = check_identifier_next("invalid axiom, identifier expected");
@@ -635,6 +780,7 @@ struct parser_fn {
         m_frontend.add_axiom(id, type);
     }
 
+    /** \brief Parse 'Eval' expr */
     void parse_eval() {
         next();
         expr v = elaborate(parse_expr());
@@ -642,12 +788,14 @@ struct parser_fn {
         (*m_out) << m_formatter(r) << std::endl;
     }
 
+    /** \brief Parse 'Show' expr */
     void parse_show() {
         next();
         expr v = elaborate(parse_expr());
         (*m_out) << m_formatter(v) << std::endl;
     }
 
+    /** \brief Parse 'Check' expr */
     void parse_check() {
         next();
         expr v = elaborate(parse_expr());
@@ -655,12 +803,14 @@ struct parser_fn {
         (*m_out) << m_formatter(t) << std::endl;
     }
 
+    /** \brief Parse 'Environment' */
     void parse_env() {
         next();
         (*m_out) << m_frontend << std::endl;
     }
 
-    unsigned parse_precision() {
+    /** \brief Return the (optional) precedence of a user-defined operator. */
+    unsigned parse_precedence() {
         if (curr() == scanner::token::IntVal) {
             mpz pval = curr_num().get_numerator();
             if (!pval.is_unsigned_int())
@@ -673,13 +823,20 @@ struct parser_fn {
         }
     }
 
+    /** \brief Throw an error if the current token is not an identifier. If it is, move to next token. */
     name parse_op_id() {
         return check_identifier_next("invalid operator definition, identifier expected");
     }
 
+    /**
+        \brief Parse prefix/postfix/infix/infixl/infixr user operator
+        definitions. These definitions have the form:
+
+           - fixity [Num] ID ID
+    */
     void parse_op(fixity fx) {
         next();
-        unsigned prec = parse_precision();
+        unsigned prec = parse_precedence();
         name op_id = parse_op_id();
         name id    = parse_op_id();
         switch (fx) {
@@ -692,9 +849,15 @@ struct parser_fn {
         }
     }
 
+    /**
+        \brief Parse mixfix/mixfixl/mixfixr user operator definition.
+        These definitions have the form:
+
+        - fixity [NUM] ID ID+ ID
+    */
     void parse_mixfix_op(fixity fx) {
         next();
-        unsigned prec = parse_precision();
+        unsigned prec = parse_precedence();
         buffer<name> parts;
         parts.push_back(parse_op_id());
         parts.push_back(parse_op_id());
@@ -712,6 +875,7 @@ struct parser_fn {
         }
     }
 
+    /** \brief Parse a Lean command. */
     void parse_command() {
         name const & cmd_id = curr_name();
         if (cmd_id == g_definition_kwd)    parse_definition();
@@ -732,6 +896,7 @@ struct parser_fn {
         else if (cmd_id == g_mixfixc_kwd)  parse_mixfix_op(fixity::Mixfixc);
         else { lean_unreachable(); }
     }
+    /*@}*/
 
     void error(char const * msg, unsigned line, unsigned pos) {
         lean_assert(m_err);
@@ -743,6 +908,22 @@ struct parser_fn {
         sync();
     }
 
+public:
+    parser_fn(frontend & fe, std::istream & in, std::ostream * out, std::ostream * err, bool use_exceptions):
+        m_frontend(fe),
+        m_scanner(in),
+        m_formatter(mk_pp_formatter(fe)),
+        m_out(out),
+        m_err(err),
+        m_use_exceptions(use_exceptions) {
+        m_found_errors = false;
+        m_num_local_decls = 0;
+        m_scanner.set_command_keywords(g_command_keywords);
+        init_builtins();
+        scan();
+    }
+
+    /** \brief Parse a sequence of commands. This method also perform error management. */
     bool parse_commands() {
         while (true) {
             try {
@@ -771,6 +952,7 @@ struct parser_fn {
         }
     }
 
+    /** \brief Parse an expression. */
     expr parse_expr_main() {
         try {
             return elaborate(parse_expr());