From 00c06839a45620218aa22e2d9b0313e73e1c9c1e Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Tue, 13 Aug 2013 10:55:41 -0700 Subject: [PATCH] Fix scanner. Add scanner tests. Add itera to list::iterator. Add parser_exce. Signed-off-by: Leonardo de Moura --- src/frontend/scanner.cpp | 99 ++++++++++++++++++++++++------- src/frontend/scanner.h | 24 +++++--- src/tests/frontend/scanner.cpp | 103 +++++++++++++++++++++++++++++++-- src/util/exception.cpp | 35 ++++++----- src/util/exception.h | 14 +++++ src/util/list.h | 7 +++ 6 files changed, 235 insertions(+), 47 deletions(-) diff --git a/src/frontend/scanner.cpp b/src/frontend/scanner.cpp index 0e6d32bbc..d735d5636 100644 --- a/src/frontend/scanner.cpp +++ b/src/frontend/scanner.cpp @@ -5,6 +5,8 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Leonardo de Moura */ #include +#include +#include #include "scanner.h" #include "debug.h" #include "exception.h" @@ -16,7 +18,7 @@ static name g_pi_unicode("\u03A0"); static name g_arrow_unicode("\u2192"); static name g_lambda_name("fun"); static name g_type_name("Type"); -static name g_pi_name("pi"); +static name g_pi_name("Pi"); static name g_arrow_name("->"); static name g_eq_name("="); @@ -63,6 +65,9 @@ public: // new line set('\n', '\n'); + // double quotes for strings + set('\"', '\"'); + // eof set(255, -1); } @@ -86,6 +91,14 @@ scanner::scanner(std::istream& stream): scanner::~scanner() { } +void scanner::add_command_keyword(name const & n) { + m_commands = cons(n, m_commands); +} + +void scanner::throw_exception(char const * msg) { + throw parser_exception(msg, m_line, m_spos); +} + void scanner::next() { lean_assert(m_curr != EOF); m_curr = m_stream.get(); @@ -122,7 +135,7 @@ void scanner::read_comment() { new_line(); next(); } else if (curr() == EOF) { - throw exception("unexpected end of comment"); + throw_exception("unexpected end of comment"); } else { next(); } @@ -130,7 +143,22 @@ void scanner::read_comment() { } bool scanner::is_command(name const & n) const { - return false; + return std::any_of(m_commands.begin(), m_commands.end(), [&](name const & c) { return c == n; }); +} + +/** \brief Auxiliary function for #read_a_symbol */ +name scanner::mk_name(name const & curr, std::string const & buf, bool only_digits) { + if (curr.is_anonymous()) { + lean_assert(!only_digits); + return name(buf.c_str()); + } else if (only_digits) { + mpz val(buf.c_str()); + if (!val.is_unsigned_int()) + throw_exception("invalid hierarchical name, numeral is too big"); + return name(curr, val.get_unsigned_int()); + } else { + return name(curr, buf.c_str()); + } } scanner::token scanner::read_a_symbol() { @@ -139,23 +167,25 @@ scanner::token scanner::read_a_symbol() { m_buffer += curr(); m_name_val = name(); next(); + bool only_digits = false; while (true) { if (normalize(curr()) == 'a') { + if (only_digits) + throw_exception("invalid hierarchical name, digit expected"); + m_buffer += curr(); + next(); + } else if (normalize(curr()) == '0') { m_buffer += curr(); next(); } else if (curr() == ':' && check_next(':')) { next(); lean_assert(curr() == ':'); next(); - if (m_name_val.is_anonymous()) - m_name_val = name(m_buffer.c_str()); - else - m_name_val = name(m_name_val, m_buffer.c_str()); + m_name_val = mk_name(m_name_val, m_buffer, only_digits); + m_buffer.clear(); + only_digits = (normalize(curr()) == '0'); } else { - if (m_name_val.is_anonymous()) - m_name_val = name(m_buffer.c_str()); - else - m_name_val = name(m_name_val, m_buffer.c_str()); + m_name_val = mk_name(m_name_val, m_buffer, only_digits); if (m_name_val == g_lambda_name) return token::Lambda; else if (m_name_val == g_pi_name) @@ -240,6 +270,32 @@ scanner::token scanner::read_number() { return is_decimal ? token::Decimal : token::Int; } +scanner::token scanner::read_string() { + lean_assert(curr() == '\"'); + next(); + m_buffer.clear(); + while (true) { + char c = curr(); + if (c == EOF) { + throw_exception("unexpected end of string"); + } else if (c == '\"') { + next(); + return token::String; + } else if (c == '\n') { + new_line(); + } else if (c == '\\') { + next(); + c = curr(); + if (c == EOF) + throw_exception("unexpected end of string"); + if (c != '\\' && c != '\"') + throw_exception("invalid escape sequence"); + } + m_buffer += c; + next(); + } +} + scanner::token scanner::scan() { while (true) { char c = curr(); @@ -264,18 +320,20 @@ scanner::token scanner::scan() { } else { return token::LeftParen; } - case ')': next(); return token::RightParen; - case '{': next(); return token::LeftCurlyBracket; - case '}': next(); return token::RightCurlyBracket; - case 'a': return read_a_symbol(); - case 'b': return read_b_symbol(); - case 'c': return read_c_symbol(); - case '0': return read_number(); - case -1: return token::Eof; - default: lean_unreachable(); + case ')': next(); return token::RightParen; + case '{': next(); return token::LeftCurlyBracket; + case '}': next(); return token::RightCurlyBracket; + case 'a': return read_a_symbol(); + case 'b': return read_b_symbol(); + case 'c': return read_c_symbol(); + case '0': return read_number(); + case '\"': return read_string(); + case -1: return token::Eof; + default: lean_unreachable(); } } } + std::ostream & operator<<(std::ostream & out, scanner::token const & t) { switch (t) { case scanner::token::LeftParen: out << "("; break; @@ -292,6 +350,7 @@ std::ostream & operator<<(std::ostream & out, scanner::token const & t) { case scanner::token::CommandId: out << "CId"; break; case scanner::token::Int: out << "Int"; break; case scanner::token::Decimal: out << "Dec"; break; + case scanner::token::String: out << "String"; break; case scanner::token::Eq: out << "="; break; case scanner::token::Assign: out << ":="; break; case scanner::token::Type: out << "Type"; break; diff --git a/src/frontend/scanner.h b/src/frontend/scanner.h index 628c55027..43cdbb567 100644 --- a/src/frontend/scanner.h +++ b/src/frontend/scanner.h @@ -9,6 +9,7 @@ Author: Leonardo de Moura #include #include "mpq.h" #include "name.h" +#include "list.h" namespace lean { /** @@ -18,7 +19,7 @@ class scanner { public: enum class token { LeftParen, RightParen, LeftCurlyBracket, RightCurlyBracket, Colon, Comma, Period, Lambda, Pi, Arrow, - Id, CommandId, Int, Decimal, Eq, Assign, Type, Eof + Id, CommandId, Int, Decimal, String, Eq, Assign, Type, Eof }; protected: int m_spos; // position in the current line of the stream @@ -32,27 +33,36 @@ protected: name m_name_val; std::string m_buffer; - char curr() const { return m_curr; } - void new_line() { m_line++; m_spos = 0; } - void next(); - bool check_next(char c); + list m_commands; + + void throw_exception(char const * msg); + char curr() const { return m_curr; } + void new_line() { m_line++; m_spos = 0; } + void next(); + bool check_next(char c); void read_comment(); + name mk_name(name const & curr, std::string const & buf, bool only_digits); token read_a_symbol(); token read_b_symbol(); token read_c_symbol(); token read_number(); - bool is_command(name const & n) const; + token read_string(); + bool is_command(name const & n) const; public: scanner(std::istream& stream); ~scanner(); + /** \brief Register a new command keyword. */ + void add_command_keyword(name const & n); + int get_line() const { return m_line; } int get_pos() const { return m_pos; } token scan(); name const & get_name_val() const { return m_name_val; } - mpq const & get_num_val() const { return m_num_val; } + mpq const & get_num_val() const { return m_num_val; } + std::string const & get_str_val() const { return m_buffer; } }; std::ostream & operator<<(std::ostream & out, scanner::token const & t); } diff --git a/src/tests/frontend/scanner.cpp b/src/tests/frontend/scanner.cpp index a686f9110..6a79d15f8 100644 --- a/src/tests/frontend/scanner.cpp +++ b/src/tests/frontend/scanner.cpp @@ -6,24 +6,115 @@ Author: Leonardo de Moura */ #include #include "scanner.h" +#include "exception.h" +#include "escaped.h" #include "test.h" using namespace lean; -static void tst1() { - char tst[] = "fun(x: pi A : Type, A -> A), (* (* test *) *) x+1 = 2.0 λ"; - std::istringstream in(tst); +#define st scanner::token + +static void scan(char const * str, list const & cmds = list()) { + std::istringstream in(str); scanner s(in); + for (name const & n : cmds) s.add_command_keyword(n); while (true) { - scanner::token t = s.scan(); - if (t == scanner::token::Eof) + st t = s.scan(); + if (t == st::Eof) break; - std::cout << t << " "; + std::cout << t; + if (t == st::Id || t == st::CommandId) + std::cout << "[" << s.get_name_val() << "]"; + else if (t == st::Int || t == st::Decimal) + std::cout << "[" << s.get_num_val() << "]"; + else if (t == st::String) + std::cout << "[\"" << escaped(s.get_str_val().c_str()) << "\"]"; + std::cout << " "; } std::cout << "\n"; } +static void check(char const * str, std::initializer_list const & l, list const & cmds = list()) { + auto it = l.begin(); + std::istringstream in(str); + scanner s(in); + for (name const & n : cmds) s.add_command_keyword(n); + while (true) { + st t = s.scan(); + if (t == st::Eof) { + lean_assert(it == l.end()); + return; + } + lean_assert(it != l.end()); + lean_assert(t == *it); + ++it; + } +} + +static void check_name(char const * str, name const & expected) { + std::istringstream in(str); + scanner s(in); + st t = s.scan(); + lean_assert(t == st::Id); + lean_assert(s.get_name_val() == expected); + lean_assert(s.scan() == st::Eof); +} + +static void tst1() { + scan("fun(x: Pi A : Type, A -> A), (* (* test *) *) x+1 = 2.0 λ"); + try { + scan("(* (* foo *)"); + lean_unreachable(); + } catch (exception const & ex) { + std::cout << "expected error: " << ex.what() << "\n"; + } +} + +static void tst2() { + scan("x::name"); + scan("x::10::foo"); + check("x::name", {st::Id}); + check("fun (x : Bool), x", {st::Lambda, st::LeftParen, st::Id, st::Colon, st::Id, st::RightParen, st::Comma, st::Id}); + check("+++", {st::Id}); + check("x+y", {st::Id, st::Id, st::Id}); + check("(* testing *)", {}); + check(" 2.31 ", {st::Decimal}); + check(" 333 22", {st::Int, st::Int}); + check("Int -> Int", {st::Id, st::Arrow, st::Id}); + check("Int --> Int", {st::Id, st::Id, st::Id}); + check("x := 10", {st::Id, st::Assign, st::Int}); + check("(x+1):Int", {st::LeftParen, st::Id, st::Id, st::Int, st::RightParen, st::Colon, st::Id}); + check("{x}", {st::LeftCurlyBracket, st::Id, st::RightCurlyBracket}); + check("\u03BB \u03A0 \u2192", {st::Lambda, st::Pi, st::Arrow}); + scan("++\u2295++x\u2296\u2296"); + check("++\u2295++x\u2296\u2296", {st::Id, st::Id, st::Id, st::Id, st::Id}); + scan("x10"); + check_name("x10", name("x10")); + check_name("x::10", name(name("x"), 10)); + check_name("x::10::bla::0", name(name(name(name("x"), 10), "bla"), 0u)); + check("0::1", {st::Int, st::Colon, st::Colon, st::Int}); + check_name("\u2296\u2296", name("\u2296\u2296")); + try { + scan("x::1000000000000000000"); + lean_unreachable(); + } catch (exception const & ex) { + std::cout << "expected error: " << ex.what() << "\n"; + } + scan("Theorem a : Bool Axiom b : Int", list({"Theorem", "Axiom"})); + check("Theorem a : Bool Axiom b : Int", {st::CommandId, st::Id, st::Colon, st::Id, st::CommandId, st::Id, st::Colon, st::Id}, list({"Theorem", "Axiom"})); + scan("foo \"tst\\\"\" : Int"); + check("foo \"tst\\\"\" : Int", {st::Id, st::String, st::Colon, st::Id}); + try { + scan("\"foo"); + lean_unreachable(); + } catch (exception const & ex) { + std::cout << "expected error: " << ex.what() << "\n"; + } + scan("2.13 1.2 0.5"); +} + int main() { continue_on_violation(true); tst1(); + tst2(); return has_violations() ? 1 : 0; } diff --git a/src/util/exception.cpp b/src/util/exception.cpp index d8950d613..3674b3f91 100644 --- a/src/util/exception.cpp +++ b/src/util/exception.cpp @@ -4,24 +4,31 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Leonardo de Moura */ +#include #include "exception.h" namespace lean { -exception::exception(char const * msg):m_msg(msg) { -} +exception::exception(char const * msg):m_msg(msg) {} +exception::exception(std::string const & msg):m_msg(msg) {} +exception::exception(exception const & e):m_msg(e.m_msg) {} +exception::~exception() noexcept {} +char const * exception::what() const noexcept { return m_msg.c_str(); } -exception::exception(std::string const & msg):m_msg(msg) { +parser_exception::parser_exception(char const * msg, unsigned l, unsigned p):exception(msg), m_line(l), m_pos(p) {} +parser_exception::parser_exception(std::string const & msg, unsigned l, unsigned p):exception(msg), m_line(l), m_pos(p) {} +parser_exception::parser_exception(parser_exception const & e):exception(e), m_line(e.m_line), m_pos(e.m_pos) {} +parser_exception::~parser_exception() noexcept {} +char const * parser_exception::what() const noexcept { + try { + static thread_local std::string buffer; + std::ostringstream s; + s << "(line: " << m_line << ", pos: " << m_pos << ") " << m_msg; + buffer = s.str(); + return buffer.c_str(); + } catch (std::exception ex) { + // failed to generate extended message + return m_msg.c_str(); + } } - -exception::exception(exception const & e):m_msg(e.m_msg) { -} - -exception::~exception() noexcept { -} - -char const * exception::what() const noexcept { - return m_msg.c_str(); -} - } diff --git a/src/util/exception.h b/src/util/exception.h index e596ce956..6951dabe2 100644 --- a/src/util/exception.h +++ b/src/util/exception.h @@ -11,6 +11,7 @@ Author: Leonardo de Moura namespace lean { class exception : public std::exception { +protected: std::string m_msg; public: exception(char const * msg); @@ -20,4 +21,17 @@ public: virtual char const * what() const noexcept; }; +class parser_exception : public exception { +protected: + unsigned m_line; + unsigned m_pos; +public: + parser_exception(char const * msg, unsigned l, unsigned p); + parser_exception(std::string const & msg, unsigned l, unsigned p); + parser_exception(parser_exception const & ex); + virtual ~parser_exception() noexcept; + virtual char const * what() const noexcept; + unsigned get_line() const { return m_line; } + unsigned get_pos() const { return m_pos; } +}; } diff --git a/src/util/list.h b/src/util/list.h index 1a11d63d1..ce54ff260 100644 --- a/src/util/list.h +++ b/src/util/list.h @@ -6,6 +6,7 @@ Author: Leonardo de Moura */ #pragma once #include +#include #include "rc.h" #include "debug.h" @@ -56,6 +57,12 @@ public: cell const * m_it; iterator(cell const * it):m_it(it) {} public: + typedef std::forward_iterator_tag iterator_category; + typedef T value_type; + typedef unsigned difference_type; + typedef T const * pointer; + typedef T const & reference; + iterator(iterator const & s):m_it(s.m_it) {} iterator & operator++() { m_it = m_it->m_tail.m_ptr; return *this; } iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; }