refactor(frontends/lean/scanner): use the parser configuration in the environment

Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
This commit is contained in:
Leonardo de Moura 2014-06-10 10:59:12 -07:00
parent e2adb101d5
commit d3e3301208
4 changed files with 78 additions and 45 deletions

View file

@ -12,6 +12,9 @@ struct parser_ext : public environment_extension {
token_table m_tokens; token_table m_tokens;
parse_table m_nud; parse_table m_nud;
parse_table m_led; parse_table m_led;
parser_ext() {
m_tokens = mk_default_token_table();
}
}; };
struct parser_ext_reg { struct parser_ext_reg {

View file

@ -8,6 +8,7 @@ Author: Leonardo de Moura
#include <string> #include <string>
#include "util/exception.h" #include "util/exception.h"
#include "frontends/lean/scanner.h" #include "frontends/lean/scanner.h"
#include "frontends/lean/parser_config.h"
namespace lean { namespace lean {
void scanner::next() { void scanner::next() {
@ -211,7 +212,7 @@ auto scanner::read_key_cmd_id() -> token_kind {
static char const * error_msg = "unexpected token"; static char const * error_msg = "unexpected token";
char c = curr(); char c = curr();
unsigned num_cs = 1; // number of characters read unsigned num_cs = 1; // number of characters read
token_table const * it = find(m_tokens, c); token_table const * it = find(*m_tokens, c);
token_info const * info = nullptr; token_info const * info = nullptr;
unsigned key_size = 0; unsigned key_size = 0;
if (it) { if (it) {
@ -278,7 +279,8 @@ static name g_begin_script_tk("(*");
static name g_begin_comment_tk("--"); static name g_begin_comment_tk("--");
static name g_begin_comment_block_tk("(--"); static name g_begin_comment_block_tk("(--");
auto scanner::scan() -> token_kind { auto scanner::scan(environment const & env) -> token_kind {
m_tokens = &get_token_table(env);
while (true) { while (true) {
char c = curr(); char c = curr();
m_pos = m_spos; m_pos = m_spos;
@ -318,8 +320,8 @@ auto scanner::scan() -> token_kind {
} }
} }
scanner::scanner(token_table const & tks, std::istream & strm, char const * strm_name): scanner::scanner(std::istream & strm, char const * strm_name):
m_tokens(tks), m_stream(strm), m_spos(0), m_sline(1), m_curr(0), m_pos(0), m_line(1), m_tokens(nullptr), m_stream(strm), m_spos(0), m_sline(1), m_curr(0), m_pos(0), m_line(1),
m_token_info(nullptr) { m_token_info(nullptr) {
m_stream_name = strm_name ? strm_name : "[unknown]"; m_stream_name = strm_name ? strm_name : "[unknown]";
next(); next();

View file

@ -8,8 +8,10 @@ Author: Leonardo de Moura
#include <iostream> #include <iostream>
#include "util/name.h" #include "util/name.h"
#include "util/numerics/mpq.h" #include "util/numerics/mpq.h"
#include "kernel/environment.h"
#include "frontends/lean/token_table.h" #include "frontends/lean/token_table.h"
namespace lean { namespace lean {
/** /**
\brief Scanner. The behavior of the scanner is controlled using a token set. \brief Scanner. The behavior of the scanner is controlled using a token set.
@ -22,22 +24,22 @@ class scanner {
public: public:
enum class token_kind {Keyword, CommandKeyword, ScriptBlock, Identifier, Numeral, Decimal, String, Eof}; enum class token_kind {Keyword, CommandKeyword, ScriptBlock, Identifier, Numeral, Decimal, String, Eof};
protected: protected:
token_table m_tokens; token_table const * m_tokens;
std::istream & m_stream; std::istream & m_stream;
std::string m_stream_name; std::string m_stream_name;
int m_spos; // current position int m_spos; // current position
int m_sline; // current line int m_sline; // current line
char m_curr; // current char; char m_curr; // current char;
int m_pos; // start position of the token int m_pos; // start position of the token
int m_line; // line of the token int m_line; // line of the token
name m_name_val; name m_name_val;
token_info const * m_token_info; token_info const * m_token_info;
mpq m_num_val; mpq m_num_val;
std::string m_buffer; std::string m_buffer;
std::string m_aux_buffer; std::string m_aux_buffer;
[[ noreturn ]] void throw_exception(char const * msg) const; [[ noreturn ]] void throw_exception(char const * msg) const;
void next(); void next();
@ -60,11 +62,11 @@ protected:
token_kind read_key_cmd_id(); token_kind read_key_cmd_id();
public: public:
scanner(token_table const & tks, std::istream & strm, char const * strm_name = nullptr); scanner(std::istream & strm, char const * strm_name = nullptr);
int get_line() const { return m_line; } int get_line() const { return m_line; }
int get_pos() const { return m_pos; } int get_pos() const { return m_pos; }
token_kind scan(); token_kind scan(environment const & env);
mpq const & get_num_val() const { return m_num_val; } mpq const & get_num_val() const { return m_num_val; }
name const & get_name_val() const { return m_name_val; } name const & get_name_val() const { return m_name_val; }

View file

@ -9,15 +9,16 @@ Author: Leonardo de Moura
#include "util/escaped.h" #include "util/escaped.h"
#include "util/exception.h" #include "util/exception.h"
#include "frontends/lean/scanner.h" #include "frontends/lean/scanner.h"
#include "frontends/lean/parser_config.h"
using namespace lean; using namespace lean;
#define tk scanner::token_kind #define tk scanner::token_kind
static void scan(char const * str, token_table set = mk_default_token_table()) { static void scan(char const * str, environment const & env = environment()) {
std::istringstream in(str); std::istringstream in(str);
scanner s(set, in, "[string]"); scanner s(in, "[string]");
while (true) { while (true) {
tk k = s.scan(); tk k = s.scan(env);
if (k == tk::Eof) if (k == tk::Eof)
break; break;
if (k == tk::Identifier) if (k == tk::Identifier)
@ -35,9 +36,9 @@ static void scan(char const * str, token_table set = mk_default_token_table()) {
std::cout << "\n"; std::cout << "\n";
} }
static void scan_success(char const * str, token_table set = mk_default_token_table()) { static void scan_success(char const * str, environment const & env = environment()) {
try { try {
scan(str, set); scan(str, env);
} catch (exception & ex) { } catch (exception & ex) {
std::cout << "ERROR: " << ex.what() << "\n"; std::cout << "ERROR: " << ex.what() << "\n";
lean_unreachable(); lean_unreachable();
@ -54,13 +55,13 @@ static void scan_error(char const * str) {
} }
static void check(char const * str, std::initializer_list<tk> const & l, static void check(char const * str, std::initializer_list<tk> const & l,
token_table set = mk_default_token_table()) { environment const & env = environment()) {
try { try {
auto it = l.begin(); auto it = l.begin();
std::istringstream in(str); std::istringstream in(str);
scanner s(set, in, "[string]"); scanner s(in, "[string]");
while (true) { while (true) {
tk k = s.scan(); tk k = s.scan(env);
if (k == tk::Eof) { if (k == tk::Eof) {
lean_assert(it == l.end()); lean_assert(it == l.end());
return; return;
@ -75,37 +76,40 @@ static void check(char const * str, std::initializer_list<tk> const & l,
} }
} }
static void check_name(char const * str, name const & expected, token_table set = mk_default_token_table()) { static void check_name(char const * str, name const & expected, environment const & env = environment()) {
std::istringstream in(str); std::istringstream in(str);
scanner s(set, in, "[string]"); scanner s(in, "[string]");
tk k = s.scan(); tk k = s.scan(env);
lean_assert(k == tk::Identifier); lean_assert(k == tk::Identifier);
lean_assert(s.get_name_val() == expected); lean_assert(s.get_name_val() == expected);
lean_assert(s.scan() == tk::Eof); lean_assert(s.scan(env) == tk::Eof);
} }
static void check_keyword(char const * str, name const & expected, token_table set = mk_default_token_table()) { static void check_keyword(char const * str, name const & expected, environment const & env = environment()) {
std::istringstream in(str); std::istringstream in(str);
scanner s(set, in, "[string]"); scanner s(in, "[string]");
tk k = s.scan(); tk k = s.scan(env);
lean_assert(k == tk::Keyword); lean_assert(k == tk::Keyword);
lean_assert(s.get_token_info().value() == expected); lean_assert(s.get_token_info().value() == expected);
lean_assert(s.scan() == tk::Eof); lean_assert(s.scan(env) == tk::Eof);
} }
static void tst1() { static void tst1() {
token_table s = mk_default_token_table(); environment env;
token_table s = get_token_table(env);
s = add_token(s, "+", "plus"); s = add_token(s, "+", "plus");
s = add_token(s, "=", "eq"); s = add_token(s, "=", "eq");
env = update_token_table(env, s);
scan_success("a..a"); scan_success("a..a");
check("a..a", {tk::Identifier, tk::Keyword, tk::Keyword, tk::Identifier}); check("a..a", {tk::Identifier, tk::Keyword, tk::Keyword, tk::Identifier});
check("Type.{0}", {tk::Keyword, tk::Keyword, tk::Numeral, tk::Keyword}); check("Type.{0}", {tk::Keyword, tk::Keyword, tk::Numeral, tk::Keyword});
s = add_token(s, "ab+cde", "weird1"); s = add_token(s, "ab+cde", "weird1");
s = add_token(s, "+cd", "weird2"); s = add_token(s, "+cd", "weird2");
scan_success("ab+cd", s); env = update_token_table(env, s);
check("ab+cd", {tk::Identifier, tk::Keyword}, s); scan_success("ab+cd", env);
scan_success("ab+cde", s); check("ab+cd", {tk::Identifier, tk::Keyword}, env);
check("ab+cde", {tk::Keyword}, s); scan_success("ab+cde", env);
check("ab+cde", {tk::Keyword}, env);
scan_success("Type.{0}"); scan_success("Type.{0}");
scan_success("0.a a"); scan_success("0.a a");
scan_success("0."); scan_success("0.");
@ -115,11 +119,11 @@ static void tst1() {
scan_success(".."); scan_success("..");
scan_success("...."); scan_success("....");
scan_success("....\n.."); scan_success("....\n..");
scan_success("a", s); scan_success("a", env);
scan_success("a. b.c.."); scan_success("a. b.c..");
scan_success(".. .."); scan_success(".. ..");
scan_success("....\n.."); scan_success("....\n..");
scan_success("fun(x: forall A : Type, A -> A), x+1 = 2.0 λvalue.foo. . . a", s); scan_success("fun(x: forall A : Type, A -> A), x+1 = 2.0 λvalue.foo. . . a", env);
} }
static void tst2() { static void tst2() {
@ -133,11 +137,14 @@ static void tst2() {
check_name("x.bla", name({"x", "bla"})); check_name("x.bla", name({"x", "bla"}));
scan_error("+++"); scan_error("+++");
environment env;
token_table s = mk_default_token_table(); token_table s = mk_default_token_table();
s = add_token(s, "+++", "tplus"); s = add_token(s, "+++", "tplus");
check_keyword("+++", "tplus", s); env = update_token_table(env, s);
check_keyword("+++", "tplus", env);
s = add_token(s, "+", "plus"); s = add_token(s, "+", "plus");
check("x+y", {tk::Identifier, tk::Keyword, tk::Identifier}, s); env = update_token_table(env, s);
check("x+y", {tk::Identifier, tk::Keyword, tk::Identifier}, env);
check("-- testing", {}); check("-- testing", {});
check("(-- testing --)", {}); check("(-- testing --)", {});
check("(-- (-- testing\n --) --)", {}); check("(-- (-- testing\n --) --)", {});
@ -148,7 +155,8 @@ static void tst2() {
check("int->int", {tk::Identifier, tk::Keyword, tk::Identifier}); check("int->int", {tk::Identifier, tk::Keyword, tk::Identifier});
check_keyword("->", "->"); check_keyword("->", "->");
s = add_token(s, "-+->", "arrow"); s = add_token(s, "-+->", "arrow");
check("Int -+-> Int", {tk::Identifier, tk::Keyword, tk::Identifier}, s); env = update_token_table(env, s);
check("Int -+-> Int", {tk::Identifier, tk::Keyword, tk::Identifier}, env);
check("x := 10", {tk::Identifier, tk::Keyword, tk::Numeral}); check("x := 10", {tk::Identifier, tk::Keyword, tk::Numeral});
check("{x}", {tk::Keyword, tk::Identifier, tk::Keyword}); check("{x}", {tk::Keyword, tk::Identifier, tk::Keyword});
check("\u03BB \u2200 \u2192", {tk::Keyword, tk::Keyword, tk::Keyword}); check("\u03BB \u2200 \u2192", {tk::Keyword, tk::Keyword, tk::Keyword});
@ -174,10 +182,28 @@ static void tst3() {
scan("{ } . forall exists let in \u2200 := _"); scan("{ } . forall exists let in \u2200 := _");
} }
static void tst4(unsigned N) {
std::string big;
for (unsigned i = 0; i < N; i++)
big += "aaa ";
std::istringstream in(big);
environment env;
scanner s(in, "[string]");
unsigned i = 0;
while (true) {
tk k = s.scan(env);
if (k == tk::Eof)
break;
i++;
}
std::cout << i << "\n";
}
int main() { int main() {
save_stack_info(); save_stack_info();
tst1(); tst1();
tst2(); tst2();
tst3(); tst3();
tst4(100000);
return has_violations() ? 1 : 0; return has_violations() ? 1 : 0;
} }