Fix scanner. Add scanner tests. Add itera to list::iterator. Add parser_exce.

Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
This commit is contained in:
Leonardo de Moura 2013-08-13 10:55:41 -07:00
parent 0a4e03efc5
commit 00c06839a4
6 changed files with 235 additions and 47 deletions

View file

@ -5,6 +5,8 @@ Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura Author: Leonardo de Moura
*/ */
#include <cstdio> #include <cstdio>
#include <string>
#include <algorithm>
#include "scanner.h" #include "scanner.h"
#include "debug.h" #include "debug.h"
#include "exception.h" #include "exception.h"
@ -16,7 +18,7 @@ static name g_pi_unicode("\u03A0");
static name g_arrow_unicode("\u2192"); static name g_arrow_unicode("\u2192");
static name g_lambda_name("fun"); static name g_lambda_name("fun");
static name g_type_name("Type"); static name g_type_name("Type");
static name g_pi_name("pi"); static name g_pi_name("Pi");
static name g_arrow_name("->"); static name g_arrow_name("->");
static name g_eq_name("="); static name g_eq_name("=");
@ -63,6 +65,9 @@ public:
// new line // new line
set('\n', '\n'); set('\n', '\n');
// double quotes for strings
set('\"', '\"');
// eof // eof
set(255, -1); set(255, -1);
} }
@ -86,6 +91,14 @@ scanner::scanner(std::istream& stream):
scanner::~scanner() { scanner::~scanner() {
} }
void scanner::add_command_keyword(name const & n) {
m_commands = cons(n, m_commands);
}
void scanner::throw_exception(char const * msg) {
throw parser_exception(msg, m_line, m_spos);
}
void scanner::next() { void scanner::next() {
lean_assert(m_curr != EOF); lean_assert(m_curr != EOF);
m_curr = m_stream.get(); m_curr = m_stream.get();
@ -122,7 +135,7 @@ void scanner::read_comment() {
new_line(); new_line();
next(); next();
} else if (curr() == EOF) { } else if (curr() == EOF) {
throw exception("unexpected end of comment"); throw_exception("unexpected end of comment");
} else { } else {
next(); next();
} }
@ -130,7 +143,22 @@ void scanner::read_comment() {
} }
bool scanner::is_command(name const & n) const { bool scanner::is_command(name const & n) const {
return false; return std::any_of(m_commands.begin(), m_commands.end(), [&](name const & c) { return c == n; });
}
/** \brief Auxiliary function for #read_a_symbol */
name scanner::mk_name(name const & curr, std::string const & buf, bool only_digits) {
if (curr.is_anonymous()) {
lean_assert(!only_digits);
return name(buf.c_str());
} else if (only_digits) {
mpz val(buf.c_str());
if (!val.is_unsigned_int())
throw_exception("invalid hierarchical name, numeral is too big");
return name(curr, val.get_unsigned_int());
} else {
return name(curr, buf.c_str());
}
} }
scanner::token scanner::read_a_symbol() { scanner::token scanner::read_a_symbol() {
@ -139,23 +167,25 @@ scanner::token scanner::read_a_symbol() {
m_buffer += curr(); m_buffer += curr();
m_name_val = name(); m_name_val = name();
next(); next();
bool only_digits = false;
while (true) { while (true) {
if (normalize(curr()) == 'a') { if (normalize(curr()) == 'a') {
if (only_digits)
throw_exception("invalid hierarchical name, digit expected");
m_buffer += curr();
next();
} else if (normalize(curr()) == '0') {
m_buffer += curr(); m_buffer += curr();
next(); next();
} else if (curr() == ':' && check_next(':')) { } else if (curr() == ':' && check_next(':')) {
next(); next();
lean_assert(curr() == ':'); lean_assert(curr() == ':');
next(); next();
if (m_name_val.is_anonymous()) m_name_val = mk_name(m_name_val, m_buffer, only_digits);
m_name_val = name(m_buffer.c_str()); m_buffer.clear();
else only_digits = (normalize(curr()) == '0');
m_name_val = name(m_name_val, m_buffer.c_str());
} else { } else {
if (m_name_val.is_anonymous()) m_name_val = mk_name(m_name_val, m_buffer, only_digits);
m_name_val = name(m_buffer.c_str());
else
m_name_val = name(m_name_val, m_buffer.c_str());
if (m_name_val == g_lambda_name) if (m_name_val == g_lambda_name)
return token::Lambda; return token::Lambda;
else if (m_name_val == g_pi_name) else if (m_name_val == g_pi_name)
@ -240,6 +270,32 @@ scanner::token scanner::read_number() {
return is_decimal ? token::Decimal : token::Int; return is_decimal ? token::Decimal : token::Int;
} }
scanner::token scanner::read_string() {
lean_assert(curr() == '\"');
next();
m_buffer.clear();
while (true) {
char c = curr();
if (c == EOF) {
throw_exception("unexpected end of string");
} else if (c == '\"') {
next();
return token::String;
} else if (c == '\n') {
new_line();
} else if (c == '\\') {
next();
c = curr();
if (c == EOF)
throw_exception("unexpected end of string");
if (c != '\\' && c != '\"')
throw_exception("invalid escape sequence");
}
m_buffer += c;
next();
}
}
scanner::token scanner::scan() { scanner::token scanner::scan() {
while (true) { while (true) {
char c = curr(); char c = curr();
@ -264,18 +320,20 @@ scanner::token scanner::scan() {
} else { } else {
return token::LeftParen; return token::LeftParen;
} }
case ')': next(); return token::RightParen; case ')': next(); return token::RightParen;
case '{': next(); return token::LeftCurlyBracket; case '{': next(); return token::LeftCurlyBracket;
case '}': next(); return token::RightCurlyBracket; case '}': next(); return token::RightCurlyBracket;
case 'a': return read_a_symbol(); case 'a': return read_a_symbol();
case 'b': return read_b_symbol(); case 'b': return read_b_symbol();
case 'c': return read_c_symbol(); case 'c': return read_c_symbol();
case '0': return read_number(); case '0': return read_number();
case -1: return token::Eof; case '\"': return read_string();
default: lean_unreachable(); case -1: return token::Eof;
default: lean_unreachable();
} }
} }
} }
std::ostream & operator<<(std::ostream & out, scanner::token const & t) { std::ostream & operator<<(std::ostream & out, scanner::token const & t) {
switch (t) { switch (t) {
case scanner::token::LeftParen: out << "("; break; case scanner::token::LeftParen: out << "("; break;
@ -292,6 +350,7 @@ std::ostream & operator<<(std::ostream & out, scanner::token const & t) {
case scanner::token::CommandId: out << "CId"; break; case scanner::token::CommandId: out << "CId"; break;
case scanner::token::Int: out << "Int"; break; case scanner::token::Int: out << "Int"; break;
case scanner::token::Decimal: out << "Dec"; break; case scanner::token::Decimal: out << "Dec"; break;
case scanner::token::String: out << "String"; break;
case scanner::token::Eq: out << "="; break; case scanner::token::Eq: out << "="; break;
case scanner::token::Assign: out << ":="; break; case scanner::token::Assign: out << ":="; break;
case scanner::token::Type: out << "Type"; break; case scanner::token::Type: out << "Type"; break;

View file

@ -9,6 +9,7 @@ Author: Leonardo de Moura
#include <vector> #include <vector>
#include "mpq.h" #include "mpq.h"
#include "name.h" #include "name.h"
#include "list.h"
namespace lean { namespace lean {
/** /**
@ -18,7 +19,7 @@ class scanner {
public: public:
enum class token { enum class token {
LeftParen, RightParen, LeftCurlyBracket, RightCurlyBracket, Colon, Comma, Period, Lambda, Pi, Arrow, LeftParen, RightParen, LeftCurlyBracket, RightCurlyBracket, Colon, Comma, Period, Lambda, Pi, Arrow,
Id, CommandId, Int, Decimal, Eq, Assign, Type, Eof Id, CommandId, Int, Decimal, String, Eq, Assign, Type, Eof
}; };
protected: protected:
int m_spos; // position in the current line of the stream int m_spos; // position in the current line of the stream
@ -32,27 +33,36 @@ protected:
name m_name_val; name m_name_val;
std::string m_buffer; std::string m_buffer;
char curr() const { return m_curr; } list<name> m_commands;
void new_line() { m_line++; m_spos = 0; }
void next(); void throw_exception(char const * msg);
bool check_next(char c); char curr() const { return m_curr; }
void new_line() { m_line++; m_spos = 0; }
void next();
bool check_next(char c);
void read_comment(); void read_comment();
name mk_name(name const & curr, std::string const & buf, bool only_digits);
token read_a_symbol(); token read_a_symbol();
token read_b_symbol(); token read_b_symbol();
token read_c_symbol(); token read_c_symbol();
token read_number(); token read_number();
bool is_command(name const & n) const; token read_string();
bool is_command(name const & n) const;
public: public:
scanner(std::istream& stream); scanner(std::istream& stream);
~scanner(); ~scanner();
/** \brief Register a new command keyword. */
void add_command_keyword(name const & n);
int get_line() const { return m_line; } int get_line() const { return m_line; }
int get_pos() const { return m_pos; } int get_pos() const { return m_pos; }
token scan(); token scan();
name const & get_name_val() const { return m_name_val; } name const & get_name_val() const { return m_name_val; }
mpq const & get_num_val() const { return m_num_val; } mpq const & get_num_val() const { return m_num_val; }
std::string const & get_str_val() const { return m_buffer; }
}; };
std::ostream & operator<<(std::ostream & out, scanner::token const & t); std::ostream & operator<<(std::ostream & out, scanner::token const & t);
} }

View file

@ -6,24 +6,115 @@ Author: Leonardo de Moura
*/ */
#include <sstream> #include <sstream>
#include "scanner.h" #include "scanner.h"
#include "exception.h"
#include "escaped.h"
#include "test.h" #include "test.h"
using namespace lean; using namespace lean;
static void tst1() { #define st scanner::token
char tst[] = "fun(x: pi A : Type, A -> A), (* (* test *) *) x+1 = 2.0 λ";
std::istringstream in(tst); static void scan(char const * str, list<name> const & cmds = list<name>()) {
std::istringstream in(str);
scanner s(in); scanner s(in);
for (name const & n : cmds) s.add_command_keyword(n);
while (true) { while (true) {
scanner::token t = s.scan(); st t = s.scan();
if (t == scanner::token::Eof) if (t == st::Eof)
break; break;
std::cout << t << " "; std::cout << t;
if (t == st::Id || t == st::CommandId)
std::cout << "[" << s.get_name_val() << "]";
else if (t == st::Int || t == st::Decimal)
std::cout << "[" << s.get_num_val() << "]";
else if (t == st::String)
std::cout << "[\"" << escaped(s.get_str_val().c_str()) << "\"]";
std::cout << " ";
} }
std::cout << "\n"; std::cout << "\n";
} }
static void check(char const * str, std::initializer_list<scanner::token> const & l, list<name> const & cmds = list<name>()) {
auto it = l.begin();
std::istringstream in(str);
scanner s(in);
for (name const & n : cmds) s.add_command_keyword(n);
while (true) {
st t = s.scan();
if (t == st::Eof) {
lean_assert(it == l.end());
return;
}
lean_assert(it != l.end());
lean_assert(t == *it);
++it;
}
}
static void check_name(char const * str, name const & expected) {
std::istringstream in(str);
scanner s(in);
st t = s.scan();
lean_assert(t == st::Id);
lean_assert(s.get_name_val() == expected);
lean_assert(s.scan() == st::Eof);
}
static void tst1() {
scan("fun(x: Pi A : Type, A -> A), (* (* test *) *) x+1 = 2.0 λ");
try {
scan("(* (* foo *)");
lean_unreachable();
} catch (exception const & ex) {
std::cout << "expected error: " << ex.what() << "\n";
}
}
static void tst2() {
scan("x::name");
scan("x::10::foo");
check("x::name", {st::Id});
check("fun (x : Bool), x", {st::Lambda, st::LeftParen, st::Id, st::Colon, st::Id, st::RightParen, st::Comma, st::Id});
check("+++", {st::Id});
check("x+y", {st::Id, st::Id, st::Id});
check("(* testing *)", {});
check(" 2.31 ", {st::Decimal});
check(" 333 22", {st::Int, st::Int});
check("Int -> Int", {st::Id, st::Arrow, st::Id});
check("Int --> Int", {st::Id, st::Id, st::Id});
check("x := 10", {st::Id, st::Assign, st::Int});
check("(x+1):Int", {st::LeftParen, st::Id, st::Id, st::Int, st::RightParen, st::Colon, st::Id});
check("{x}", {st::LeftCurlyBracket, st::Id, st::RightCurlyBracket});
check("\u03BB \u03A0 \u2192", {st::Lambda, st::Pi, st::Arrow});
scan("++\u2295++x\u2296\u2296");
check("++\u2295++x\u2296\u2296", {st::Id, st::Id, st::Id, st::Id, st::Id});
scan("x10");
check_name("x10", name("x10"));
check_name("x::10", name(name("x"), 10));
check_name("x::10::bla::0", name(name(name(name("x"), 10), "bla"), 0u));
check("0::1", {st::Int, st::Colon, st::Colon, st::Int});
check_name("\u2296\u2296", name("\u2296\u2296"));
try {
scan("x::1000000000000000000");
lean_unreachable();
} catch (exception const & ex) {
std::cout << "expected error: " << ex.what() << "\n";
}
scan("Theorem a : Bool Axiom b : Int", list<name>({"Theorem", "Axiom"}));
check("Theorem a : Bool Axiom b : Int", {st::CommandId, st::Id, st::Colon, st::Id, st::CommandId, st::Id, st::Colon, st::Id}, list<name>({"Theorem", "Axiom"}));
scan("foo \"tst\\\"\" : Int");
check("foo \"tst\\\"\" : Int", {st::Id, st::String, st::Colon, st::Id});
try {
scan("\"foo");
lean_unreachable();
} catch (exception const & ex) {
std::cout << "expected error: " << ex.what() << "\n";
}
scan("2.13 1.2 0.5");
}
int main() { int main() {
continue_on_violation(true); continue_on_violation(true);
tst1(); tst1();
tst2();
return has_violations() ? 1 : 0; return has_violations() ? 1 : 0;
} }

View file

@ -4,24 +4,31 @@ Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura Author: Leonardo de Moura
*/ */
#include <sstream>
#include "exception.h" #include "exception.h"
namespace lean { namespace lean {
exception::exception(char const * msg):m_msg(msg) { exception::exception(char const * msg):m_msg(msg) {}
} exception::exception(std::string const & msg):m_msg(msg) {}
exception::exception(exception const & e):m_msg(e.m_msg) {}
exception::~exception() noexcept {}
char const * exception::what() const noexcept { return m_msg.c_str(); }
exception::exception(std::string const & msg):m_msg(msg) { parser_exception::parser_exception(char const * msg, unsigned l, unsigned p):exception(msg), m_line(l), m_pos(p) {}
parser_exception::parser_exception(std::string const & msg, unsigned l, unsigned p):exception(msg), m_line(l), m_pos(p) {}
parser_exception::parser_exception(parser_exception const & e):exception(e), m_line(e.m_line), m_pos(e.m_pos) {}
parser_exception::~parser_exception() noexcept {}
char const * parser_exception::what() const noexcept {
try {
static thread_local std::string buffer;
std::ostringstream s;
s << "(line: " << m_line << ", pos: " << m_pos << ") " << m_msg;
buffer = s.str();
return buffer.c_str();
} catch (std::exception ex) {
// failed to generate extended message
return m_msg.c_str();
}
} }
exception::exception(exception const & e):m_msg(e.m_msg) {
}
exception::~exception() noexcept {
}
char const * exception::what() const noexcept {
return m_msg.c_str();
}
} }

View file

@ -11,6 +11,7 @@ Author: Leonardo de Moura
namespace lean { namespace lean {
class exception : public std::exception { class exception : public std::exception {
protected:
std::string m_msg; std::string m_msg;
public: public:
exception(char const * msg); exception(char const * msg);
@ -20,4 +21,17 @@ public:
virtual char const * what() const noexcept; virtual char const * what() const noexcept;
}; };
class parser_exception : public exception {
protected:
unsigned m_line;
unsigned m_pos;
public:
parser_exception(char const * msg, unsigned l, unsigned p);
parser_exception(std::string const & msg, unsigned l, unsigned p);
parser_exception(parser_exception const & ex);
virtual ~parser_exception() noexcept;
virtual char const * what() const noexcept;
unsigned get_line() const { return m_line; }
unsigned get_pos() const { return m_pos; }
};
} }

View file

@ -6,6 +6,7 @@ Author: Leonardo de Moura
*/ */
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <iterator>
#include "rc.h" #include "rc.h"
#include "debug.h" #include "debug.h"
@ -56,6 +57,12 @@ public:
cell const * m_it; cell const * m_it;
iterator(cell const * it):m_it(it) {} iterator(cell const * it):m_it(it) {}
public: public:
typedef std::forward_iterator_tag iterator_category;
typedef T value_type;
typedef unsigned difference_type;
typedef T const * pointer;
typedef T const & reference;
iterator(iterator const & s):m_it(s.m_it) {} iterator(iterator const & s):m_it(s.m_it) {}
iterator & operator++() { m_it = m_it->m_tail.m_ptr; return *this; } iterator & operator++() { m_it = m_it->m_tail.m_ptr; return *this; }
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; } iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; }