refactor(frontends/lean/scanner): use the parser configuration in the environment
Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
This commit is contained in:
parent
e2adb101d5
commit
d3e3301208
4 changed files with 78 additions and 45 deletions
|
@ -12,6 +12,9 @@ struct parser_ext : public environment_extension {
|
||||||
token_table m_tokens;
|
token_table m_tokens;
|
||||||
parse_table m_nud;
|
parse_table m_nud;
|
||||||
parse_table m_led;
|
parse_table m_led;
|
||||||
|
parser_ext() {
|
||||||
|
m_tokens = mk_default_token_table();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct parser_ext_reg {
|
struct parser_ext_reg {
|
||||||
|
|
|
@ -8,6 +8,7 @@ Author: Leonardo de Moura
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "util/exception.h"
|
#include "util/exception.h"
|
||||||
#include "frontends/lean/scanner.h"
|
#include "frontends/lean/scanner.h"
|
||||||
|
#include "frontends/lean/parser_config.h"
|
||||||
|
|
||||||
namespace lean {
|
namespace lean {
|
||||||
void scanner::next() {
|
void scanner::next() {
|
||||||
|
@ -211,7 +212,7 @@ auto scanner::read_key_cmd_id() -> token_kind {
|
||||||
static char const * error_msg = "unexpected token";
|
static char const * error_msg = "unexpected token";
|
||||||
char c = curr();
|
char c = curr();
|
||||||
unsigned num_cs = 1; // number of characters read
|
unsigned num_cs = 1; // number of characters read
|
||||||
token_table const * it = find(m_tokens, c);
|
token_table const * it = find(*m_tokens, c);
|
||||||
token_info const * info = nullptr;
|
token_info const * info = nullptr;
|
||||||
unsigned key_size = 0;
|
unsigned key_size = 0;
|
||||||
if (it) {
|
if (it) {
|
||||||
|
@ -278,7 +279,8 @@ static name g_begin_script_tk("(*");
|
||||||
static name g_begin_comment_tk("--");
|
static name g_begin_comment_tk("--");
|
||||||
static name g_begin_comment_block_tk("(--");
|
static name g_begin_comment_block_tk("(--");
|
||||||
|
|
||||||
auto scanner::scan() -> token_kind {
|
auto scanner::scan(environment const & env) -> token_kind {
|
||||||
|
m_tokens = &get_token_table(env);
|
||||||
while (true) {
|
while (true) {
|
||||||
char c = curr();
|
char c = curr();
|
||||||
m_pos = m_spos;
|
m_pos = m_spos;
|
||||||
|
@ -318,8 +320,8 @@ auto scanner::scan() -> token_kind {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
scanner::scanner(token_table const & tks, std::istream & strm, char const * strm_name):
|
scanner::scanner(std::istream & strm, char const * strm_name):
|
||||||
m_tokens(tks), m_stream(strm), m_spos(0), m_sline(1), m_curr(0), m_pos(0), m_line(1),
|
m_tokens(nullptr), m_stream(strm), m_spos(0), m_sline(1), m_curr(0), m_pos(0), m_line(1),
|
||||||
m_token_info(nullptr) {
|
m_token_info(nullptr) {
|
||||||
m_stream_name = strm_name ? strm_name : "[unknown]";
|
m_stream_name = strm_name ? strm_name : "[unknown]";
|
||||||
next();
|
next();
|
||||||
|
|
|
@ -8,8 +8,10 @@ Author: Leonardo de Moura
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "util/name.h"
|
#include "util/name.h"
|
||||||
#include "util/numerics/mpq.h"
|
#include "util/numerics/mpq.h"
|
||||||
|
#include "kernel/environment.h"
|
||||||
#include "frontends/lean/token_table.h"
|
#include "frontends/lean/token_table.h"
|
||||||
|
|
||||||
|
|
||||||
namespace lean {
|
namespace lean {
|
||||||
/**
|
/**
|
||||||
\brief Scanner. The behavior of the scanner is controlled using a token set.
|
\brief Scanner. The behavior of the scanner is controlled using a token set.
|
||||||
|
@ -22,22 +24,22 @@ class scanner {
|
||||||
public:
|
public:
|
||||||
enum class token_kind {Keyword, CommandKeyword, ScriptBlock, Identifier, Numeral, Decimal, String, Eof};
|
enum class token_kind {Keyword, CommandKeyword, ScriptBlock, Identifier, Numeral, Decimal, String, Eof};
|
||||||
protected:
|
protected:
|
||||||
token_table m_tokens;
|
token_table const * m_tokens;
|
||||||
std::istream & m_stream;
|
std::istream & m_stream;
|
||||||
std::string m_stream_name;
|
std::string m_stream_name;
|
||||||
|
|
||||||
int m_spos; // current position
|
int m_spos; // current position
|
||||||
int m_sline; // current line
|
int m_sline; // current line
|
||||||
char m_curr; // current char;
|
char m_curr; // current char;
|
||||||
|
|
||||||
int m_pos; // start position of the token
|
int m_pos; // start position of the token
|
||||||
int m_line; // line of the token
|
int m_line; // line of the token
|
||||||
|
|
||||||
name m_name_val;
|
name m_name_val;
|
||||||
token_info const * m_token_info;
|
token_info const * m_token_info;
|
||||||
mpq m_num_val;
|
mpq m_num_val;
|
||||||
std::string m_buffer;
|
std::string m_buffer;
|
||||||
std::string m_aux_buffer;
|
std::string m_aux_buffer;
|
||||||
|
|
||||||
[[ noreturn ]] void throw_exception(char const * msg) const;
|
[[ noreturn ]] void throw_exception(char const * msg) const;
|
||||||
void next();
|
void next();
|
||||||
|
@ -60,11 +62,11 @@ protected:
|
||||||
token_kind read_key_cmd_id();
|
token_kind read_key_cmd_id();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
scanner(token_table const & tks, std::istream & strm, char const * strm_name = nullptr);
|
scanner(std::istream & strm, char const * strm_name = nullptr);
|
||||||
|
|
||||||
int get_line() const { return m_line; }
|
int get_line() const { return m_line; }
|
||||||
int get_pos() const { return m_pos; }
|
int get_pos() const { return m_pos; }
|
||||||
token_kind scan();
|
token_kind scan(environment const & env);
|
||||||
|
|
||||||
mpq const & get_num_val() const { return m_num_val; }
|
mpq const & get_num_val() const { return m_num_val; }
|
||||||
name const & get_name_val() const { return m_name_val; }
|
name const & get_name_val() const { return m_name_val; }
|
||||||
|
|
|
@ -9,15 +9,16 @@ Author: Leonardo de Moura
|
||||||
#include "util/escaped.h"
|
#include "util/escaped.h"
|
||||||
#include "util/exception.h"
|
#include "util/exception.h"
|
||||||
#include "frontends/lean/scanner.h"
|
#include "frontends/lean/scanner.h"
|
||||||
|
#include "frontends/lean/parser_config.h"
|
||||||
using namespace lean;
|
using namespace lean;
|
||||||
|
|
||||||
#define tk scanner::token_kind
|
#define tk scanner::token_kind
|
||||||
|
|
||||||
static void scan(char const * str, token_table set = mk_default_token_table()) {
|
static void scan(char const * str, environment const & env = environment()) {
|
||||||
std::istringstream in(str);
|
std::istringstream in(str);
|
||||||
scanner s(set, in, "[string]");
|
scanner s(in, "[string]");
|
||||||
while (true) {
|
while (true) {
|
||||||
tk k = s.scan();
|
tk k = s.scan(env);
|
||||||
if (k == tk::Eof)
|
if (k == tk::Eof)
|
||||||
break;
|
break;
|
||||||
if (k == tk::Identifier)
|
if (k == tk::Identifier)
|
||||||
|
@ -35,9 +36,9 @@ static void scan(char const * str, token_table set = mk_default_token_table()) {
|
||||||
std::cout << "\n";
|
std::cout << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
static void scan_success(char const * str, token_table set = mk_default_token_table()) {
|
static void scan_success(char const * str, environment const & env = environment()) {
|
||||||
try {
|
try {
|
||||||
scan(str, set);
|
scan(str, env);
|
||||||
} catch (exception & ex) {
|
} catch (exception & ex) {
|
||||||
std::cout << "ERROR: " << ex.what() << "\n";
|
std::cout << "ERROR: " << ex.what() << "\n";
|
||||||
lean_unreachable();
|
lean_unreachable();
|
||||||
|
@ -54,13 +55,13 @@ static void scan_error(char const * str) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check(char const * str, std::initializer_list<tk> const & l,
|
static void check(char const * str, std::initializer_list<tk> const & l,
|
||||||
token_table set = mk_default_token_table()) {
|
environment const & env = environment()) {
|
||||||
try {
|
try {
|
||||||
auto it = l.begin();
|
auto it = l.begin();
|
||||||
std::istringstream in(str);
|
std::istringstream in(str);
|
||||||
scanner s(set, in, "[string]");
|
scanner s(in, "[string]");
|
||||||
while (true) {
|
while (true) {
|
||||||
tk k = s.scan();
|
tk k = s.scan(env);
|
||||||
if (k == tk::Eof) {
|
if (k == tk::Eof) {
|
||||||
lean_assert(it == l.end());
|
lean_assert(it == l.end());
|
||||||
return;
|
return;
|
||||||
|
@ -75,37 +76,40 @@ static void check(char const * str, std::initializer_list<tk> const & l,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_name(char const * str, name const & expected, token_table set = mk_default_token_table()) {
|
static void check_name(char const * str, name const & expected, environment const & env = environment()) {
|
||||||
std::istringstream in(str);
|
std::istringstream in(str);
|
||||||
scanner s(set, in, "[string]");
|
scanner s(in, "[string]");
|
||||||
tk k = s.scan();
|
tk k = s.scan(env);
|
||||||
lean_assert(k == tk::Identifier);
|
lean_assert(k == tk::Identifier);
|
||||||
lean_assert(s.get_name_val() == expected);
|
lean_assert(s.get_name_val() == expected);
|
||||||
lean_assert(s.scan() == tk::Eof);
|
lean_assert(s.scan(env) == tk::Eof);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_keyword(char const * str, name const & expected, token_table set = mk_default_token_table()) {
|
static void check_keyword(char const * str, name const & expected, environment const & env = environment()) {
|
||||||
std::istringstream in(str);
|
std::istringstream in(str);
|
||||||
scanner s(set, in, "[string]");
|
scanner s(in, "[string]");
|
||||||
tk k = s.scan();
|
tk k = s.scan(env);
|
||||||
lean_assert(k == tk::Keyword);
|
lean_assert(k == tk::Keyword);
|
||||||
lean_assert(s.get_token_info().value() == expected);
|
lean_assert(s.get_token_info().value() == expected);
|
||||||
lean_assert(s.scan() == tk::Eof);
|
lean_assert(s.scan(env) == tk::Eof);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tst1() {
|
static void tst1() {
|
||||||
token_table s = mk_default_token_table();
|
environment env;
|
||||||
|
token_table s = get_token_table(env);
|
||||||
s = add_token(s, "+", "plus");
|
s = add_token(s, "+", "plus");
|
||||||
s = add_token(s, "=", "eq");
|
s = add_token(s, "=", "eq");
|
||||||
|
env = update_token_table(env, s);
|
||||||
scan_success("a..a");
|
scan_success("a..a");
|
||||||
check("a..a", {tk::Identifier, tk::Keyword, tk::Keyword, tk::Identifier});
|
check("a..a", {tk::Identifier, tk::Keyword, tk::Keyword, tk::Identifier});
|
||||||
check("Type.{0}", {tk::Keyword, tk::Keyword, tk::Numeral, tk::Keyword});
|
check("Type.{0}", {tk::Keyword, tk::Keyword, tk::Numeral, tk::Keyword});
|
||||||
s = add_token(s, "ab+cde", "weird1");
|
s = add_token(s, "ab+cde", "weird1");
|
||||||
s = add_token(s, "+cd", "weird2");
|
s = add_token(s, "+cd", "weird2");
|
||||||
scan_success("ab+cd", s);
|
env = update_token_table(env, s);
|
||||||
check("ab+cd", {tk::Identifier, tk::Keyword}, s);
|
scan_success("ab+cd", env);
|
||||||
scan_success("ab+cde", s);
|
check("ab+cd", {tk::Identifier, tk::Keyword}, env);
|
||||||
check("ab+cde", {tk::Keyword}, s);
|
scan_success("ab+cde", env);
|
||||||
|
check("ab+cde", {tk::Keyword}, env);
|
||||||
scan_success("Type.{0}");
|
scan_success("Type.{0}");
|
||||||
scan_success("0.a a");
|
scan_success("0.a a");
|
||||||
scan_success("0.");
|
scan_success("0.");
|
||||||
|
@ -115,11 +119,11 @@ static void tst1() {
|
||||||
scan_success("..");
|
scan_success("..");
|
||||||
scan_success("....");
|
scan_success("....");
|
||||||
scan_success("....\n..");
|
scan_success("....\n..");
|
||||||
scan_success("a", s);
|
scan_success("a", env);
|
||||||
scan_success("a. b.c..");
|
scan_success("a. b.c..");
|
||||||
scan_success(".. ..");
|
scan_success(".. ..");
|
||||||
scan_success("....\n..");
|
scan_success("....\n..");
|
||||||
scan_success("fun(x: forall A : Type, A -> A), x+1 = 2.0 λvalue.foo. . . a", s);
|
scan_success("fun(x: forall A : Type, A -> A), x+1 = 2.0 λvalue.foo. . . a", env);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tst2() {
|
static void tst2() {
|
||||||
|
@ -133,11 +137,14 @@ static void tst2() {
|
||||||
check_name("x.bla", name({"x", "bla"}));
|
check_name("x.bla", name({"x", "bla"}));
|
||||||
|
|
||||||
scan_error("+++");
|
scan_error("+++");
|
||||||
|
environment env;
|
||||||
token_table s = mk_default_token_table();
|
token_table s = mk_default_token_table();
|
||||||
s = add_token(s, "+++", "tplus");
|
s = add_token(s, "+++", "tplus");
|
||||||
check_keyword("+++", "tplus", s);
|
env = update_token_table(env, s);
|
||||||
|
check_keyword("+++", "tplus", env);
|
||||||
s = add_token(s, "+", "plus");
|
s = add_token(s, "+", "plus");
|
||||||
check("x+y", {tk::Identifier, tk::Keyword, tk::Identifier}, s);
|
env = update_token_table(env, s);
|
||||||
|
check("x+y", {tk::Identifier, tk::Keyword, tk::Identifier}, env);
|
||||||
check("-- testing", {});
|
check("-- testing", {});
|
||||||
check("(-- testing --)", {});
|
check("(-- testing --)", {});
|
||||||
check("(-- (-- testing\n --) --)", {});
|
check("(-- (-- testing\n --) --)", {});
|
||||||
|
@ -148,7 +155,8 @@ static void tst2() {
|
||||||
check("int->int", {tk::Identifier, tk::Keyword, tk::Identifier});
|
check("int->int", {tk::Identifier, tk::Keyword, tk::Identifier});
|
||||||
check_keyword("->", "->");
|
check_keyword("->", "->");
|
||||||
s = add_token(s, "-+->", "arrow");
|
s = add_token(s, "-+->", "arrow");
|
||||||
check("Int -+-> Int", {tk::Identifier, tk::Keyword, tk::Identifier}, s);
|
env = update_token_table(env, s);
|
||||||
|
check("Int -+-> Int", {tk::Identifier, tk::Keyword, tk::Identifier}, env);
|
||||||
check("x := 10", {tk::Identifier, tk::Keyword, tk::Numeral});
|
check("x := 10", {tk::Identifier, tk::Keyword, tk::Numeral});
|
||||||
check("{x}", {tk::Keyword, tk::Identifier, tk::Keyword});
|
check("{x}", {tk::Keyword, tk::Identifier, tk::Keyword});
|
||||||
check("\u03BB \u2200 \u2192", {tk::Keyword, tk::Keyword, tk::Keyword});
|
check("\u03BB \u2200 \u2192", {tk::Keyword, tk::Keyword, tk::Keyword});
|
||||||
|
@ -174,10 +182,28 @@ static void tst3() {
|
||||||
scan("{ } . forall exists let in \u2200 := _");
|
scan("{ } . forall exists let in \u2200 := _");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void tst4(unsigned N) {
|
||||||
|
std::string big;
|
||||||
|
for (unsigned i = 0; i < N; i++)
|
||||||
|
big += "aaa ";
|
||||||
|
std::istringstream in(big);
|
||||||
|
environment env;
|
||||||
|
scanner s(in, "[string]");
|
||||||
|
unsigned i = 0;
|
||||||
|
while (true) {
|
||||||
|
tk k = s.scan(env);
|
||||||
|
if (k == tk::Eof)
|
||||||
|
break;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
std::cout << i << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
save_stack_info();
|
save_stack_info();
|
||||||
tst1();
|
tst1();
|
||||||
tst2();
|
tst2();
|
||||||
tst3();
|
tst3();
|
||||||
|
tst4(100000);
|
||||||
return has_violations() ? 1 : 0;
|
return has_violations() ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue