Fix scanner. Add scanner tests. Add itera to list::iterator. Add parser_exce.
Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
This commit is contained in:
parent
0a4e03efc5
commit
00c06839a4
6 changed files with 235 additions and 47 deletions
|
@ -5,6 +5,8 @@ Released under Apache 2.0 license as described in the file LICENSE.
|
|||
Author: Leonardo de Moura
|
||||
*/
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "scanner.h"
|
||||
#include "debug.h"
|
||||
#include "exception.h"
|
||||
|
@ -16,7 +18,7 @@ static name g_pi_unicode("\u03A0");
|
|||
static name g_arrow_unicode("\u2192");
|
||||
static name g_lambda_name("fun");
|
||||
static name g_type_name("Type");
|
||||
static name g_pi_name("pi");
|
||||
static name g_pi_name("Pi");
|
||||
static name g_arrow_name("->");
|
||||
static name g_eq_name("=");
|
||||
|
||||
|
@ -63,6 +65,9 @@ public:
|
|||
// new line
|
||||
set('\n', '\n');
|
||||
|
||||
// double quotes for strings
|
||||
set('\"', '\"');
|
||||
|
||||
// eof
|
||||
set(255, -1);
|
||||
}
|
||||
|
@ -86,6 +91,14 @@ scanner::scanner(std::istream& stream):
|
|||
scanner::~scanner() {
|
||||
}
|
||||
|
||||
void scanner::add_command_keyword(name const & n) {
|
||||
m_commands = cons(n, m_commands);
|
||||
}
|
||||
|
||||
void scanner::throw_exception(char const * msg) {
|
||||
throw parser_exception(msg, m_line, m_spos);
|
||||
}
|
||||
|
||||
void scanner::next() {
|
||||
lean_assert(m_curr != EOF);
|
||||
m_curr = m_stream.get();
|
||||
|
@ -122,7 +135,7 @@ void scanner::read_comment() {
|
|||
new_line();
|
||||
next();
|
||||
} else if (curr() == EOF) {
|
||||
throw exception("unexpected end of comment");
|
||||
throw_exception("unexpected end of comment");
|
||||
} else {
|
||||
next();
|
||||
}
|
||||
|
@ -130,7 +143,22 @@ void scanner::read_comment() {
|
|||
}
|
||||
|
||||
bool scanner::is_command(name const & n) const {
|
||||
return false;
|
||||
return std::any_of(m_commands.begin(), m_commands.end(), [&](name const & c) { return c == n; });
|
||||
}
|
||||
|
||||
/** \brief Auxiliary function for #read_a_symbol */
|
||||
name scanner::mk_name(name const & curr, std::string const & buf, bool only_digits) {
|
||||
if (curr.is_anonymous()) {
|
||||
lean_assert(!only_digits);
|
||||
return name(buf.c_str());
|
||||
} else if (only_digits) {
|
||||
mpz val(buf.c_str());
|
||||
if (!val.is_unsigned_int())
|
||||
throw_exception("invalid hierarchical name, numeral is too big");
|
||||
return name(curr, val.get_unsigned_int());
|
||||
} else {
|
||||
return name(curr, buf.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
scanner::token scanner::read_a_symbol() {
|
||||
|
@ -139,23 +167,25 @@ scanner::token scanner::read_a_symbol() {
|
|||
m_buffer += curr();
|
||||
m_name_val = name();
|
||||
next();
|
||||
bool only_digits = false;
|
||||
while (true) {
|
||||
if (normalize(curr()) == 'a') {
|
||||
if (only_digits)
|
||||
throw_exception("invalid hierarchical name, digit expected");
|
||||
m_buffer += curr();
|
||||
next();
|
||||
} else if (normalize(curr()) == '0') {
|
||||
m_buffer += curr();
|
||||
next();
|
||||
} else if (curr() == ':' && check_next(':')) {
|
||||
next();
|
||||
lean_assert(curr() == ':');
|
||||
next();
|
||||
if (m_name_val.is_anonymous())
|
||||
m_name_val = name(m_buffer.c_str());
|
||||
else
|
||||
m_name_val = name(m_name_val, m_buffer.c_str());
|
||||
m_name_val = mk_name(m_name_val, m_buffer, only_digits);
|
||||
m_buffer.clear();
|
||||
only_digits = (normalize(curr()) == '0');
|
||||
} else {
|
||||
if (m_name_val.is_anonymous())
|
||||
m_name_val = name(m_buffer.c_str());
|
||||
else
|
||||
m_name_val = name(m_name_val, m_buffer.c_str());
|
||||
m_name_val = mk_name(m_name_val, m_buffer, only_digits);
|
||||
if (m_name_val == g_lambda_name)
|
||||
return token::Lambda;
|
||||
else if (m_name_val == g_pi_name)
|
||||
|
@ -240,6 +270,32 @@ scanner::token scanner::read_number() {
|
|||
return is_decimal ? token::Decimal : token::Int;
|
||||
}
|
||||
|
||||
scanner::token scanner::read_string() {
|
||||
lean_assert(curr() == '\"');
|
||||
next();
|
||||
m_buffer.clear();
|
||||
while (true) {
|
||||
char c = curr();
|
||||
if (c == EOF) {
|
||||
throw_exception("unexpected end of string");
|
||||
} else if (c == '\"') {
|
||||
next();
|
||||
return token::String;
|
||||
} else if (c == '\n') {
|
||||
new_line();
|
||||
} else if (c == '\\') {
|
||||
next();
|
||||
c = curr();
|
||||
if (c == EOF)
|
||||
throw_exception("unexpected end of string");
|
||||
if (c != '\\' && c != '\"')
|
||||
throw_exception("invalid escape sequence");
|
||||
}
|
||||
m_buffer += c;
|
||||
next();
|
||||
}
|
||||
}
|
||||
|
||||
scanner::token scanner::scan() {
|
||||
while (true) {
|
||||
char c = curr();
|
||||
|
@ -271,11 +327,13 @@ scanner::token scanner::scan() {
|
|||
case 'b': return read_b_symbol();
|
||||
case 'c': return read_c_symbol();
|
||||
case '0': return read_number();
|
||||
case '\"': return read_string();
|
||||
case -1: return token::Eof;
|
||||
default: lean_unreachable();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & out, scanner::token const & t) {
|
||||
switch (t) {
|
||||
case scanner::token::LeftParen: out << "("; break;
|
||||
|
@ -292,6 +350,7 @@ std::ostream & operator<<(std::ostream & out, scanner::token const & t) {
|
|||
case scanner::token::CommandId: out << "CId"; break;
|
||||
case scanner::token::Int: out << "Int"; break;
|
||||
case scanner::token::Decimal: out << "Dec"; break;
|
||||
case scanner::token::String: out << "String"; break;
|
||||
case scanner::token::Eq: out << "="; break;
|
||||
case scanner::token::Assign: out << ":="; break;
|
||||
case scanner::token::Type: out << "Type"; break;
|
||||
|
|
|
@ -9,6 +9,7 @@ Author: Leonardo de Moura
|
|||
#include <vector>
|
||||
#include "mpq.h"
|
||||
#include "name.h"
|
||||
#include "list.h"
|
||||
|
||||
namespace lean {
|
||||
/**
|
||||
|
@ -18,7 +19,7 @@ class scanner {
|
|||
public:
|
||||
enum class token {
|
||||
LeftParen, RightParen, LeftCurlyBracket, RightCurlyBracket, Colon, Comma, Period, Lambda, Pi, Arrow,
|
||||
Id, CommandId, Int, Decimal, Eq, Assign, Type, Eof
|
||||
Id, CommandId, Int, Decimal, String, Eq, Assign, Type, Eof
|
||||
};
|
||||
protected:
|
||||
int m_spos; // position in the current line of the stream
|
||||
|
@ -32,27 +33,36 @@ protected:
|
|||
name m_name_val;
|
||||
std::string m_buffer;
|
||||
|
||||
list<name> m_commands;
|
||||
|
||||
void throw_exception(char const * msg);
|
||||
char curr() const { return m_curr; }
|
||||
void new_line() { m_line++; m_spos = 0; }
|
||||
void next();
|
||||
bool check_next(char c);
|
||||
void read_comment();
|
||||
name mk_name(name const & curr, std::string const & buf, bool only_digits);
|
||||
token read_a_symbol();
|
||||
token read_b_symbol();
|
||||
token read_c_symbol();
|
||||
token read_number();
|
||||
token read_string();
|
||||
bool is_command(name const & n) const;
|
||||
|
||||
public:
|
||||
scanner(std::istream& stream);
|
||||
~scanner();
|
||||
|
||||
/** \brief Register a new command keyword. */
|
||||
void add_command_keyword(name const & n);
|
||||
|
||||
int get_line() const { return m_line; }
|
||||
int get_pos() const { return m_pos; }
|
||||
token scan();
|
||||
|
||||
name const & get_name_val() const { return m_name_val; }
|
||||
mpq const & get_num_val() const { return m_num_val; }
|
||||
std::string const & get_str_val() const { return m_buffer; }
|
||||
};
|
||||
std::ostream & operator<<(std::ostream & out, scanner::token const & t);
|
||||
}
|
||||
|
|
|
@ -6,24 +6,115 @@ Author: Leonardo de Moura
|
|||
*/
|
||||
#include <sstream>
|
||||
#include "scanner.h"
|
||||
#include "exception.h"
|
||||
#include "escaped.h"
|
||||
#include "test.h"
|
||||
using namespace lean;
|
||||
|
||||
static void tst1() {
|
||||
char tst[] = "fun(x: pi A : Type, A -> A), (* (* test *) *) x+1 = 2.0 λ";
|
||||
std::istringstream in(tst);
|
||||
#define st scanner::token
|
||||
|
||||
static void scan(char const * str, list<name> const & cmds = list<name>()) {
|
||||
std::istringstream in(str);
|
||||
scanner s(in);
|
||||
for (name const & n : cmds) s.add_command_keyword(n);
|
||||
while (true) {
|
||||
scanner::token t = s.scan();
|
||||
if (t == scanner::token::Eof)
|
||||
st t = s.scan();
|
||||
if (t == st::Eof)
|
||||
break;
|
||||
std::cout << t << " ";
|
||||
std::cout << t;
|
||||
if (t == st::Id || t == st::CommandId)
|
||||
std::cout << "[" << s.get_name_val() << "]";
|
||||
else if (t == st::Int || t == st::Decimal)
|
||||
std::cout << "[" << s.get_num_val() << "]";
|
||||
else if (t == st::String)
|
||||
std::cout << "[\"" << escaped(s.get_str_val().c_str()) << "\"]";
|
||||
std::cout << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
static void check(char const * str, std::initializer_list<scanner::token> const & l, list<name> const & cmds = list<name>()) {
|
||||
auto it = l.begin();
|
||||
std::istringstream in(str);
|
||||
scanner s(in);
|
||||
for (name const & n : cmds) s.add_command_keyword(n);
|
||||
while (true) {
|
||||
st t = s.scan();
|
||||
if (t == st::Eof) {
|
||||
lean_assert(it == l.end());
|
||||
return;
|
||||
}
|
||||
lean_assert(it != l.end());
|
||||
lean_assert(t == *it);
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
static void check_name(char const * str, name const & expected) {
|
||||
std::istringstream in(str);
|
||||
scanner s(in);
|
||||
st t = s.scan();
|
||||
lean_assert(t == st::Id);
|
||||
lean_assert(s.get_name_val() == expected);
|
||||
lean_assert(s.scan() == st::Eof);
|
||||
}
|
||||
|
||||
static void tst1() {
|
||||
scan("fun(x: Pi A : Type, A -> A), (* (* test *) *) x+1 = 2.0 λ");
|
||||
try {
|
||||
scan("(* (* foo *)");
|
||||
lean_unreachable();
|
||||
} catch (exception const & ex) {
|
||||
std::cout << "expected error: " << ex.what() << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
static void tst2() {
|
||||
scan("x::name");
|
||||
scan("x::10::foo");
|
||||
check("x::name", {st::Id});
|
||||
check("fun (x : Bool), x", {st::Lambda, st::LeftParen, st::Id, st::Colon, st::Id, st::RightParen, st::Comma, st::Id});
|
||||
check("+++", {st::Id});
|
||||
check("x+y", {st::Id, st::Id, st::Id});
|
||||
check("(* testing *)", {});
|
||||
check(" 2.31 ", {st::Decimal});
|
||||
check(" 333 22", {st::Int, st::Int});
|
||||
check("Int -> Int", {st::Id, st::Arrow, st::Id});
|
||||
check("Int --> Int", {st::Id, st::Id, st::Id});
|
||||
check("x := 10", {st::Id, st::Assign, st::Int});
|
||||
check("(x+1):Int", {st::LeftParen, st::Id, st::Id, st::Int, st::RightParen, st::Colon, st::Id});
|
||||
check("{x}", {st::LeftCurlyBracket, st::Id, st::RightCurlyBracket});
|
||||
check("\u03BB \u03A0 \u2192", {st::Lambda, st::Pi, st::Arrow});
|
||||
scan("++\u2295++x\u2296\u2296");
|
||||
check("++\u2295++x\u2296\u2296", {st::Id, st::Id, st::Id, st::Id, st::Id});
|
||||
scan("x10");
|
||||
check_name("x10", name("x10"));
|
||||
check_name("x::10", name(name("x"), 10));
|
||||
check_name("x::10::bla::0", name(name(name(name("x"), 10), "bla"), 0u));
|
||||
check("0::1", {st::Int, st::Colon, st::Colon, st::Int});
|
||||
check_name("\u2296\u2296", name("\u2296\u2296"));
|
||||
try {
|
||||
scan("x::1000000000000000000");
|
||||
lean_unreachable();
|
||||
} catch (exception const & ex) {
|
||||
std::cout << "expected error: " << ex.what() << "\n";
|
||||
}
|
||||
scan("Theorem a : Bool Axiom b : Int", list<name>({"Theorem", "Axiom"}));
|
||||
check("Theorem a : Bool Axiom b : Int", {st::CommandId, st::Id, st::Colon, st::Id, st::CommandId, st::Id, st::Colon, st::Id}, list<name>({"Theorem", "Axiom"}));
|
||||
scan("foo \"tst\\\"\" : Int");
|
||||
check("foo \"tst\\\"\" : Int", {st::Id, st::String, st::Colon, st::Id});
|
||||
try {
|
||||
scan("\"foo");
|
||||
lean_unreachable();
|
||||
} catch (exception const & ex) {
|
||||
std::cout << "expected error: " << ex.what() << "\n";
|
||||
}
|
||||
scan("2.13 1.2 0.5");
|
||||
}
|
||||
|
||||
int main() {
|
||||
continue_on_violation(true);
|
||||
tst1();
|
||||
tst2();
|
||||
return has_violations() ? 1 : 0;
|
||||
}
|
||||
|
|
|
@ -4,24 +4,31 @@ Released under Apache 2.0 license as described in the file LICENSE.
|
|||
|
||||
Author: Leonardo de Moura
|
||||
*/
|
||||
#include <sstream>
|
||||
#include "exception.h"
|
||||
|
||||
namespace lean {
|
||||
|
||||
exception::exception(char const * msg):m_msg(msg) {
|
||||
}
|
||||
exception::exception(char const * msg):m_msg(msg) {}
|
||||
exception::exception(std::string const & msg):m_msg(msg) {}
|
||||
exception::exception(exception const & e):m_msg(e.m_msg) {}
|
||||
exception::~exception() noexcept {}
|
||||
char const * exception::what() const noexcept { return m_msg.c_str(); }
|
||||
|
||||
exception::exception(std::string const & msg):m_msg(msg) {
|
||||
}
|
||||
|
||||
exception::exception(exception const & e):m_msg(e.m_msg) {
|
||||
}
|
||||
|
||||
exception::~exception() noexcept {
|
||||
}
|
||||
|
||||
char const * exception::what() const noexcept {
|
||||
parser_exception::parser_exception(char const * msg, unsigned l, unsigned p):exception(msg), m_line(l), m_pos(p) {}
|
||||
parser_exception::parser_exception(std::string const & msg, unsigned l, unsigned p):exception(msg), m_line(l), m_pos(p) {}
|
||||
parser_exception::parser_exception(parser_exception const & e):exception(e), m_line(e.m_line), m_pos(e.m_pos) {}
|
||||
parser_exception::~parser_exception() noexcept {}
|
||||
char const * parser_exception::what() const noexcept {
|
||||
try {
|
||||
static thread_local std::string buffer;
|
||||
std::ostringstream s;
|
||||
s << "(line: " << m_line << ", pos: " << m_pos << ") " << m_msg;
|
||||
buffer = s.str();
|
||||
return buffer.c_str();
|
||||
} catch (std::exception ex) {
|
||||
// failed to generate extended message
|
||||
return m_msg.c_str();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ Author: Leonardo de Moura
|
|||
namespace lean {
|
||||
|
||||
class exception : public std::exception {
|
||||
protected:
|
||||
std::string m_msg;
|
||||
public:
|
||||
exception(char const * msg);
|
||||
|
@ -20,4 +21,17 @@ public:
|
|||
virtual char const * what() const noexcept;
|
||||
};
|
||||
|
||||
class parser_exception : public exception {
|
||||
protected:
|
||||
unsigned m_line;
|
||||
unsigned m_pos;
|
||||
public:
|
||||
parser_exception(char const * msg, unsigned l, unsigned p);
|
||||
parser_exception(std::string const & msg, unsigned l, unsigned p);
|
||||
parser_exception(parser_exception const & ex);
|
||||
virtual ~parser_exception() noexcept;
|
||||
virtual char const * what() const noexcept;
|
||||
unsigned get_line() const { return m_line; }
|
||||
unsigned get_pos() const { return m_pos; }
|
||||
};
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ Author: Leonardo de Moura
|
|||
*/
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include "rc.h"
|
||||
#include "debug.h"
|
||||
|
||||
|
@ -56,6 +57,12 @@ public:
|
|||
cell const * m_it;
|
||||
iterator(cell const * it):m_it(it) {}
|
||||
public:
|
||||
typedef std::forward_iterator_tag iterator_category;
|
||||
typedef T value_type;
|
||||
typedef unsigned difference_type;
|
||||
typedef T const * pointer;
|
||||
typedef T const & reference;
|
||||
|
||||
iterator(iterator const & s):m_it(s.m_it) {}
|
||||
iterator & operator++() { m_it = m_it->m_tail.m_ptr; return *this; }
|
||||
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; }
|
||||
|
|
Loading…
Reference in a new issue