Add parser skeleton

Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
This commit is contained in:
Leonardo de Moura 2013-08-17 18:13:55 -07:00
parent f1961ab33f
commit 685aeae43a
11 changed files with 496 additions and 15 deletions

View file

@ -1,2 +1,2 @@
add_library(frontend frontend.cpp operator_info.cpp scanner.cpp pp.cpp builtin_notation.cpp)
add_library(frontend frontend.cpp operator_info.cpp scanner.cpp parser.cpp pp.cpp builtin_notation.cpp)
target_link_libraries(frontend ${LEAN_LIBS})

View file

@ -13,8 +13,11 @@ namespace lean {
*/
void init_builtin_notation(frontend & f) {
f.add_prefix("\u00ac", 40, const_name(mk_not_fn()));
f.add_infixr("&&", 35, const_name(mk_and_fn()));
f.add_infixr("\u2227", 35, const_name(mk_and_fn()));
f.add_infixr("||", 30, const_name(mk_or_fn()));
f.add_infixr("\u2228", 30, const_name(mk_or_fn()));
f.add_infixr("=>", 25, const_name(mk_implies_fn()));
f.add_infixr("\u21D2", 25, const_name(mk_implies_fn()));
}
}

View file

@ -205,6 +205,10 @@ void frontend::add_var(name const & n, expr const & t) { return m_imp->m_env.add
object const & frontend::get_object(name const & n) const { return m_imp->m_env.get_object(n); }
object const & frontend::find_object(name const & n) const { return m_imp->m_env.find_object(n); }
bool frontend::has_object(name const & n) const { return m_imp->m_env.has_object(n); }
frontend::object_iterator frontend::begin_objects() const { return m_imp->m_env.begin_objects(); }
frontend::object_iterator frontend::end_objects() const { return m_imp->m_env.end_objects(); }
frontend::object_iterator frontend::begin_local_objects() const { return m_imp->m_env.begin_local_objects(); }
frontend::object_iterator frontend::end_local_objects() const { return m_imp->m_env.end_local_objects(); }
void frontend::add_infixl(name const & opn, unsigned p, name const & n) { m_imp->add_infixl(opn, p, n); }
void frontend::add_infixr(name const & opn, unsigned p, name const & n) { m_imp->add_infixr(opn, p, n); }
@ -214,4 +218,6 @@ void frontend::add_mixfixl(unsigned sz, name const * opns, unsigned p, name cons
void frontend::add_mixfixr(unsigned sz, name const * opns, unsigned p, name const & n) { m_imp->add_mixfixr(sz, opns, p, n); }
void frontend::add_mixfixc(unsigned sz, name const * opns, unsigned p, name const & n) { m_imp->add_mixfixc(sz, opns, p, n); }
operator_info frontend::find_op_for(name const & n) const { return m_imp->find_op_for(n); }
operator_info frontend::find_nud(name const & n) const { return m_imp->find_nud(n); }
operator_info frontend::find_led(name const & n) const { return m_imp->find_led(n); }
}

View file

@ -59,6 +59,11 @@ public:
object const & get_object(name const & n) const;
object const & find_object(name const & n) const;
bool has_object(name const & n) const;
typedef environment::object_iterator object_iterator;
object_iterator begin_objects() const;
object_iterator end_objects() const;
object_iterator begin_local_objects() const;
object_iterator end_local_objects() const;
// =======================================
// =======================================
@ -78,6 +83,10 @@ public:
return the null operator.
*/
operator_info find_op_for(name const & n) const;
operator_info find_nud(name const & n) const;
operator_info find_led(name const & n) const;
// =======================================
};
}

410
src/frontend/parser.cpp Normal file
View file

@ -0,0 +1,410 @@
/*
Copyright (c) 2013 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
#include <unordered_map>
#include "frontend.h"
#include "scanner.h"
#include "scoped_map.h"
#include "exception.h"
#include "builtin.h"
#include "arith.h"
namespace lean {
static name g_definition_kwd("Definition");
static name g_variable_kwd("Variable");
static name g_theorem_kwd("Theorem");
static name g_axiom_kwd("Axiom");
static name g_universe_kwd("Universe");
static name g_eval_kwd("Eval");
static name g_show_kwd("Show");
static name g_infix_kwd("Infix");
static name g_infixl_kwd("Infixl");
static name g_infixr_kwd("Infixr");
static name g_prefix_kwd("Prefix");
static name g_postfix_kwd("Postfix");
static name g_mixfixl_kwd("Mixfixl");
static name g_mixfixr_kwd("Mixfixr");
static name g_mixfixc_kwd("Mixfixc");
static name g_overload_name(name(name(name(0u), "parser"), "overload"));
static expr g_overload = mk_constant(g_overload_name);
static list<name> g_command_keywords = {g_definition_kwd, g_variable_kwd, g_theorem_kwd, g_axiom_kwd, g_universe_kwd, g_eval_kwd, g_show_kwd, g_infix_kwd, g_infixl_kwd, g_infixr_kwd, g_prefix_kwd, g_postfix_kwd, g_mixfixl_kwd, g_mixfixr_kwd, g_mixfixc_kwd};
/** \brief Functional object for parsing */
struct parser_fn {
typedef scoped_map<name, unsigned, name_hash, name_eq> local_decls;
typedef std::unordered_map<name, expr, name_hash, name_eq> builtins;
frontend m_frontend;
scanner m_scanner;
std::ostream & m_err;
scanner::token m_curr;
bool m_use_exceptions;
bool m_found_errors;
local_decls m_local_decls;
builtins m_builtins;
/** \brief Exception used to track parsing erros, it does not leak outside of this class. */
struct parser_error : public exception {
parser_error(char const * msg):exception(msg) {}
};
void scan() { m_curr = m_scanner.scan(); }
scanner::token curr() const { return m_curr; }
void next() { if (m_curr != scanner::token::Eof) scan(); }
void sync() {
while (curr() != scanner::token::CommandId && curr() != scanner::token::Eof)
next();
}
name const & curr_name() const { return m_scanner.get_name_val(); }
void check_next(scanner::token t, char const * msg) {
if (curr() == t)
next();
else
throw parser_error(msg);
}
bool curr_is_identifier() const { return curr() == scanner::token::Id; }
void check_identifier(char const * msg) { if (!curr_is_identifier()) throw parser_error(msg); }
name check_identifier_next(char const * msg) { check_identifier(msg); name r = curr_name(); next(); return r; }
void check_colon_next(char const * msg) { check_next(scanner::token::Colon, msg); }
void check_rparen_next(char const * msg) { check_next(scanner::token::RightParen, msg); }
void init_builtins() {
m_builtins["Bool"] = Bool;
m_builtins["true"] = True;
m_builtins["false"] = False;
m_builtins["\u22A4"] = True;
m_builtins["\u22A5"] = False;
m_builtins["Int"] = Int;
}
parser_fn(frontend & fe, std::istream & in, std::ostream & err, bool use_exceptions):
m_frontend(fe),
m_scanner(in),
m_err(err),
m_use_exceptions(use_exceptions) {
m_found_errors = false;
m_scanner.set_command_keywords(g_command_keywords);
init_builtins();
scan();
}
[[ noreturn ]] void not_implemented_yet() {
throw parser_error("not implemented yet");
}
expr mk_fun(operator_info const & op) {
list<name> const & fs = op.get_internal_names();
lean_assert(!is_nil(fs));
auto it = fs.begin();
expr r = mk_constant(*it);
++it;
for (; it != fs.end(); ++it)
r = mk_app(g_overload, r, mk_constant(*it));
return r;
}
expr parse_prefix(operator_info const & op) {
expr arg = parse_expr(op.get_precedence());
return mk_app(mk_fun(op), arg);
}
expr parse_postfix(expr const & left, operator_info const & op) {
return mk_app(mk_fun(op), left);
}
expr parse_infix(expr const & left, operator_info const & op) {
expr right = parse_expr(op.get_precedence()+1);
return mk_app(mk_fun(op), left, right);
}
expr parse_infixl(expr const & left, operator_info const & op) {
expr right = parse_expr(op.get_precedence());
return mk_app(mk_fun(op), left, right);
}
expr parse_infixr(expr const & left, operator_info const & op) {
expr right = parse_expr(op.get_precedence()-1);
return mk_app(mk_fun(op), left, right);
}
expr parse_mixfixl(operator_info const & op) {
not_implemented_yet();
}
expr parse_mixfixr(expr const & left, operator_info const & op) {
not_implemented_yet();
}
expr parse_mixfixc(operator_info const & op) {
not_implemented_yet();
}
expr get_name_ref(name const & id) {
object const & obj = m_frontend.find_object(id);
if (obj) {
object_kind k = obj.kind();
if (k == object_kind::Definition || k == object_kind::Postulate)
return mk_constant(obj.get_name());
else
throw parser_error("invalid object reference, object is not an expression.");
}
else {
auto it = m_builtins.find(id);
if (it != m_builtins.end()) {
return it->second;
} else {
throw parser_error("unknown identifier");
}
}
}
expr parse_nud_id() {
name id = curr_name();
next();
auto it = m_local_decls.find(id);
if (it != m_local_decls.end()) {
return mk_var(m_local_decls.size() - it->second - 1);
} else {
operator_info op = m_frontend.find_nud(id);
if (op) {
switch (op.get_fixity()) {
case fixity::Prefix: return parse_prefix(op);
case fixity::Mixfixl: return parse_mixfixl(op);
case fixity::Mixfixc: return parse_mixfixc(op);
default: lean_unreachable(); return expr();
}
} else {
return get_name_ref(id);
}
}
}
expr parse_led_id(expr const & left) {
name id = curr_name();
next();
auto it = m_local_decls.find(id);
if (it != m_local_decls.end()) {
return mk_app(left, mk_var(m_local_decls.size() - it->second - 1));
} else {
operator_info op = m_frontend.find_led(id);
if (op) {
switch (op.get_fixity()) {
case fixity::Infix: return parse_infix(left, op);
case fixity::Infixl: return parse_infixl(left, op);
case fixity::Infixr: return parse_infixr(left, op);
case fixity::Mixfixr: return parse_mixfixr(left, op);
default: lean_unreachable(); return expr();
}
} else {
return mk_app(left, get_name_ref(id));
}
}
}
expr parse_lparen() {
next();
expr r = parse_expr();
check_rparen_next("invalid expression, ')' expected");
return r;
}
expr parse_lambda() {
not_implemented_yet();
}
expr parse_pi() {
not_implemented_yet();
}
expr parse_int() {
expr r = mk_int_value(m_scanner.get_num_val().get_numerator());
next();
return r;
}
expr parse_decimal() {
not_implemented_yet();
}
expr parse_string() {
not_implemented_yet();
}
expr parse_type() {
next();
// TODO universe
return Type();
}
expr parse_nud() {
switch (curr()) {
case scanner::token::Id: return parse_nud_id();
case scanner::token::LeftParen: return parse_lparen();
case scanner::token::Lambda: return parse_lambda();
case scanner::token::Pi: return parse_pi();
case scanner::token::IntVal: return parse_int();
case scanner::token::DecimalVal: return parse_decimal();
case scanner::token::StringVal: return parse_string();
case scanner::token::Type: return parse_type();
default:
throw parser_error("invalid expression, unexpected token");
}
}
expr parse_led(expr const & left) {
switch (curr()) {
case scanner::token::Id: return parse_led_id(left);
case scanner::token::LeftParen: return mk_app(left, parse_lparen());
case scanner::token::IntVal: return mk_app(left, parse_int());
case scanner::token::DecimalVal: return mk_app(left, parse_decimal());
case scanner::token::StringVal: return mk_app(left, parse_string());
case scanner::token::Type: return mk_app(left, parse_type());
default: return left;
}
}
unsigned curr_lbp() {
switch (curr()) {
case scanner::token::Id: {
name const & id = curr_name();
auto it = m_local_decls.find(id);
if (it != m_local_decls.end()) {
return 1;
} else {
operator_info op = m_frontend.find_led(id);
if (op)
return op.get_precedence();
else
return 1;
}
}
case scanner::token::LeftParen: case scanner::token::IntVal: case scanner::token::DecimalVal:
case scanner::token::StringVal: case scanner::token::Type:
return 1;
default:
return 0;
}
}
expr parse_expr(unsigned rbp = 0) {
expr left = parse_nud();
while (rbp < curr_lbp()) {
left = parse_led(left);
}
return left;
}
expr elaborate(expr const & e) {
// TODO
return e;
}
void parse_definition() {
next();
name id = check_identifier_next("invalid definition, identifier expected");
check_colon_next("invalid definition, ':' expected");
// TODO
}
void parse_variable() {
next();
name id = check_identifier_next("invalid variable declaration, identifier expected");
check_colon_next("invalid variable declaration, ':' expected");
expr type = elaborate(parse_expr());
m_frontend.add_var(id, type);
}
void parse_theorem() {
// TODO
}
void parse_axiom() {
next();
name id = check_identifier_next("invalid axiom, identifier expected");
check_colon_next("invalid axiom, ':' expected");
expr type = elaborate(parse_expr());
m_frontend.add_axiom(id, type);
}
void parse_eval() {
// TODO
}
void parse_show() {
// TODO
}
void parse_op(fixity fx) {
// TODO
}
void parse_mixfix_op(fixity fx) {
// TODO
}
void parse_command() {
name const & cmd_id = curr_name();
if (cmd_id == g_definition_kwd) parse_definition();
else if (cmd_id == g_variable_kwd) parse_variable();
else if (cmd_id == g_theorem_kwd) parse_theorem();
else if (cmd_id == g_axiom_kwd) parse_axiom();
else if (cmd_id == g_eval_kwd) parse_eval();
else if (cmd_id == g_show_kwd) parse_show();
else if (cmd_id == g_infix_kwd) parse_op(fixity::Infix);
else if (cmd_id == g_infixl_kwd) parse_op(fixity::Infixl);
else if (cmd_id == g_infixr_kwd) parse_op(fixity::Infixr);
else if (cmd_id == g_prefix_kwd) parse_op(fixity::Prefix);
else if (cmd_id == g_postfix_kwd) parse_op(fixity::Postfix);
else if (cmd_id == g_mixfixl_kwd) parse_mixfix_op(fixity::Mixfixl);
else if (cmd_id == g_mixfixr_kwd) parse_mixfix_op(fixity::Mixfixr);
else if (cmd_id == g_mixfixc_kwd) parse_mixfix_op(fixity::Mixfixc);
else { lean_unreachable(); }
}
void error(char const * msg, unsigned line, unsigned pos) {
m_err << "Error (line: " << line << ", pos: " << pos << ") " << msg << std::endl;
}
void error(char const * msg) {
error(msg, m_scanner.get_line(), m_scanner.get_pos());
sync();
}
bool parse_commands() {
while (true) {
try {
switch (curr()) {
case scanner::token::CommandId: parse_command(); break;
case scanner::token::Eof: return !m_found_errors;
default:
throw parser_error("Command expected");
}
} catch (parser_error & ex) {
m_found_errors = true;
if (m_use_exceptions) {
throw parser_exception(ex.what(), m_scanner.get_line(), m_scanner.get_pos());
} else {
error(ex.what());
}
} catch (exception & ex) {
m_found_errors = true;
if (m_use_exceptions) {
throw;
} else {
error(ex.what());
}
}
}
}
};
bool parse_commands(frontend & fe, std::istream & in, std::ostream & err, bool use_exceptions) {
return parser_fn(fe, in, err, use_exceptions).parse_commands();
}
}

13
src/frontend/parser.h Normal file
View file

@ -0,0 +1,13 @@
/*
Copyright (c) 2013 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
#pragma once
#include <iostream>
#include "frontend.h"
namespace lean {
bool parse_commands(frontend & fe, std::istream & in, std::ostream & err = std::cerr, bool use_exceptions = false);
}

View file

@ -268,7 +268,7 @@ scanner::token scanner::read_number() {
}
if (is_decimal)
m_num_val /= q;
return is_decimal ? token::Decimal : token::Int;
return is_decimal ? token::DecimalVal : token::IntVal;
}
scanner::token scanner::read_string() {
@ -281,7 +281,7 @@ scanner::token scanner::read_string() {
throw_exception("unexpected end of string");
} else if (c == '\"') {
next();
return token::String;
return token::StringVal;
} else if (c == '\n') {
new_line();
} else if (c == '\\') {
@ -349,9 +349,9 @@ std::ostream & operator<<(std::ostream & out, scanner::token const & t) {
case scanner::token::Arrow: out << g_arrow_unicode; break;
case scanner::token::Id: out << "Id"; break;
case scanner::token::CommandId: out << "CId"; break;
case scanner::token::Int: out << "Int"; break;
case scanner::token::Decimal: out << "Dec"; break;
case scanner::token::String: out << "String"; break;
case scanner::token::IntVal: out << "Int"; break;
case scanner::token::DecimalVal: out << "Dec"; break;
case scanner::token::StringVal: out << "String"; break;
case scanner::token::Eq: out << "="; break;
case scanner::token::Assign: out << ":="; break;
case scanner::token::Type: out << "Type"; break;

View file

@ -19,7 +19,7 @@ class scanner {
public:
enum class token {
LeftParen, RightParen, LeftCurlyBracket, RightCurlyBracket, Colon, Comma, Period, Lambda, Pi, Arrow,
Id, CommandId, Int, Decimal, String, Eq, Assign, Type, Eof
Id, CommandId, IntVal, DecimalVal, StringVal, Eq, Assign, Type, Eof
};
protected:
int m_spos; // position in the current line of the stream

View file

@ -4,3 +4,6 @@ add_test(frontend ${CMAKE_CURRENT_BINARY_DIR}/frontend_tst)
add_executable(scanner scanner.cpp)
target_link_libraries(scanner ${EXTRA_LIBS})
add_test(scanner ${CMAKE_CURRENT_BINARY_DIR}/scanner)
add_executable(parser parser.cpp)
target_link_libraries(parser ${EXTRA_LIBS})
add_test(parser ${CMAKE_CURRENT_BINARY_DIR}/parser)

View file

@ -0,0 +1,37 @@
/*
Copyright (c) 2013 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
#include <sstream>
#include "parser.h"
#include "pp.h"
#include "printer.h"
#include "test.h"
using namespace lean;
static void parse(frontend & fe, char const * msg) {
frontend child = fe.mk_child();
std::istringstream in(msg);
if (parse_commands(child, in)) {
formatter fmt = mk_pp_formatter(child);
std::for_each(child.begin_local_objects(),
child.end_local_objects(),
[&](object const & obj) {
std::cout << fmt(obj) << "\n";
std::cout << obj << "\n";
});
}
}
static void tst1() {
frontend fe;
parse(fe,
"Variable x : Bool Variable y : Bool Axiom H : x && y || x => x");
}
int main() {
tst1();
return has_violations() ? 1 : 0;
}

View file

@ -24,9 +24,9 @@ static void scan(char const * str, list<name> const & cmds = list<name>()) {
std::cout << t;
if (t == st::Id || t == st::CommandId)
std::cout << "[" << s.get_name_val() << "]";
else if (t == st::Int || t == st::Decimal)
else if (t == st::IntVal || t == st::DecimalVal)
std::cout << "[" << s.get_num_val() << "]";
else if (t == st::String)
else if (t == st::StringVal)
std::cout << "[\"" << escaped(s.get_str_val().c_str()) << "\"]";
std::cout << " ";
}
@ -77,12 +77,12 @@ static void tst2() {
check("+++", {st::Id});
check("x+y", {st::Id, st::Id, st::Id});
check("(* testing *)", {});
check(" 2.31 ", {st::Decimal});
check(" 333 22", {st::Int, st::Int});
check(" 2.31 ", {st::DecimalVal});
check(" 333 22", {st::IntVal, st::IntVal});
check("Int -> Int", {st::Id, st::Arrow, st::Id});
check("Int --> Int", {st::Id, st::Id, st::Id});
check("x := 10", {st::Id, st::Assign, st::Int});
check("(x+1):Int", {st::LeftParen, st::Id, st::Id, st::Int, st::RightParen, st::Colon, st::Id});
check("x := 10", {st::Id, st::Assign, st::IntVal});
check("(x+1):Int", {st::LeftParen, st::Id, st::Id, st::IntVal, st::RightParen, st::Colon, st::Id});
check("{x}", {st::LeftCurlyBracket, st::Id, st::RightCurlyBracket});
check("\u03BB \u03A0 \u2192", {st::Lambda, st::Pi, st::Arrow});
scan("++\u2295++x\u2296\u2296");
@ -91,7 +91,7 @@ static void tst2() {
check_name("x10", name("x10"));
check_name("x::10", name(name("x"), 10));
check_name("x::10::bla::0", name(name(name(name("x"), 10), "bla"), 0u));
check("0::1", {st::Int, st::Colon, st::Colon, st::Int});
check("0::1", {st::IntVal, st::Colon, st::Colon, st::IntVal});
check_name("\u2296\u2296", name("\u2296\u2296"));
try {
scan("x::1000000000000000000");
@ -102,7 +102,7 @@ static void tst2() {
scan("Theorem a : Bool Axiom b : Int", list<name>({"Theorem", "Axiom"}));
check("Theorem a : Bool Axiom b : Int", {st::CommandId, st::Id, st::Colon, st::Id, st::CommandId, st::Id, st::Colon, st::Id}, list<name>({"Theorem", "Axiom"}));
scan("foo \"tst\\\"\" : Int");
check("foo \"tst\\\"\" : Int", {st::Id, st::String, st::Colon, st::Id});
check("foo \"tst\\\"\" : Int", {st::Id, st::StringVal, st::Colon, st::Id});
try {
scan("\"foo");
lean_unreachable();