From 2ea7479ee953232cee7c1aa8a2210ee6a4886612 Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Tue, 13 Aug 2013 03:40:51 -0700 Subject: [PATCH] Move sexpr/format/options to util/sexpr (reason: circular dependency between util and numerics lib), now numerics depend on util, and sexpr depends on numerics and util. Add scanner to frontend. Signed-off-by: Leonardo de Moura --- src/CMakeLists.txt | 3 + src/frontend/CMakeLists.txt | 2 +- src/frontend/scanner.cpp | 295 ++++++++++++++++++++++----- src/frontend/scanner.h | 45 ++-- src/kernel/level.h | 2 +- src/kernel/pp.cpp | 2 +- src/tests/frontend/CMakeLists.txt | 3 + src/tests/util/CMakeLists.txt | 6 +- src/util/CMakeLists.txt | 3 +- src/util/name.cpp | 18 +- src/util/name.h | 11 +- src/util/{ => sexpr}/format.cpp | 0 src/util/{ => sexpr}/format.h | 0 src/util/{ => sexpr}/options.cpp | 4 + src/util/{ => sexpr}/options.h | 2 + src/util/{ => sexpr}/sexpr.cpp | 0 src/util/{ => sexpr}/sexpr.h | 0 src/util/{ => sexpr}/sexpr_funcs.cpp | 0 src/util/{ => sexpr}/sexpr_funcs.h | 0 19 files changed, 299 insertions(+), 97 deletions(-) rename src/util/{ => sexpr}/format.cpp (100%) rename src/util/{ => sexpr}/format.h (100%) rename src/util/{ => sexpr}/options.cpp (96%) rename src/util/{ => sexpr}/options.h (95%) rename src/util/{ => sexpr}/sexpr.cpp (100%) rename src/util/{ => sexpr}/sexpr.h (100%) rename src/util/{ => sexpr}/sexpr_funcs.cpp (100%) rename src/util/{ => sexpr}/sexpr_funcs.h (100%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f0c09d2e..662f2084c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -61,6 +61,7 @@ endif() include_directories(${LEAN_SOURCE_DIR}/util) include_directories(${LEAN_SOURCE_DIR}/util/numerics) +include_directories(${LEAN_SOURCE_DIR}/util/sexpr) include_directories(${LEAN_SOURCE_DIR}/interval) include_directories(${LEAN_SOURCE_DIR}/kernel) include_directories(${LEAN_SOURCE_DIR}/kernel/arith) @@ -71,6 +72,8 @@ add_subdirectory(util) set(LEAN_LIBS ${LEAN_LIBS} util) add_subdirectory(util/numerics) set(LEAN_LIBS ${LEAN_LIBS} numerics) +add_subdirectory(util/sexpr) +set(LEAN_LIBS ${LEAN_LIBS} sexpr) add_subdirectory(interval) set(LEAN_LIBS ${LEAN_LIBS} interval) add_subdirectory(kernel) diff --git a/src/frontend/CMakeLists.txt b/src/frontend/CMakeLists.txt index f602416c7..b44b9dc3b 100644 --- a/src/frontend/CMakeLists.txt +++ b/src/frontend/CMakeLists.txt @@ -1,2 +1,2 @@ -add_library(frontend frontend.cpp operator_info) +add_library(frontend frontend.cpp operator_info.cpp scanner.cpp) target_link_libraries(frontend ${LEAN_LIBS}) diff --git a/src/frontend/scanner.cpp b/src/frontend/scanner.cpp index 62b9531e4..6543e93a8 100644 --- a/src/frontend/scanner.cpp +++ b/src/frontend/scanner.cpp @@ -7,77 +7,280 @@ Author: Leonardo de Moura #include #include "scanner.h" #include "debug.h" +#include "exception.h" namespace lean { -scanner::scanner(std::istream& stream, bool interactive): - m_interactive(interactive), +static name g_lambda_unicode("\u03BB"); +static name g_pi_unicode("\u03A0"); +static name g_arrow_unicode("\u2192"); +static char g_normalized[255]; +static name g_lambda_name("fun"); +static name g_pi_name("pi"); +static name g_arrow_name("->"); + +class init_normalized_table { +public: + init_normalized_table() { + // by default all characters are in group c, + for (int i = 0; i <= 255; i++) + g_normalized[i] = 'c'; + + // digits normalize to '0' + for (int i = '0'; i <= '9'; i++) + g_normalized[i] = '0'; + + // characters that can be used to create ids of group a + for (int i = 'a'; i <= 'z'; i++) + g_normalized[i] = 'a'; + for (int i = 'A'; i <= 'Z'; i++) + g_normalized[i] = 'a'; + g_normalized['_'] = 'a'; + + // characters that can be used to create ids of group b + for (unsigned char b : {'=', '<', '>', '@', '^', '|', '&', '~', '+', '-', '*', '/', '$', '%', '?', ';'}) + g_normalized[b] = 'b'; + + // punctuation + g_normalized['('] = '('; + g_normalized[')'] = ')'; + g_normalized['{'] = '{'; + g_normalized['}'] = '}'; + g_normalized[':'] = ':'; + g_normalized['.'] = '.'; + g_normalized[','] = ','; + + // spaces + g_normalized[' '] = ' '; + g_normalized['\t'] = ' '; + g_normalized['\r'] = ' '; + + // new line + g_normalized['\n'] = '\n'; + + // eof + g_normalized[255] = -1; + } +}; + +static init_normalized_table g_init_normalized_table; + +char normalize(char c) { + return g_normalized[static_cast(c)]; +} + +scanner::scanner(std::istream& stream): m_spos(0), m_curr(0), m_line(1), m_pos(0), - m_bpos(0), - m_bend(0), - m_stream(stream), - m_cache_input(false) { + m_stream(stream) { + next(); } scanner::~scanner() { } void scanner::next() { - if (m_cache_input) - m_cache.push_back(m_curr); lean_assert(m_curr != EOF); - if (m_interactive) { - m_curr = m_stream.get(); - } - else if (m_bpos < m_bend) { - m_curr = m_buffer[m_bpos]; - m_bpos++; - } - else { - m_stream.read(m_buffer, scanner_buffer_size); - m_bend = static_cast(m_stream.gcount()); - m_bpos = 0; - if (m_bpos == m_bend) { - m_curr = EOF; - } - else { - m_curr = m_buffer[m_bpos]; - m_bpos++; - } - } + m_curr = m_stream.get(); m_spos++; } -void scanner::start_caching() { - m_cache_input = true; - m_cache.clear(); +bool scanner::check_next(char c) { + lean_assert(m_curr != EOF); + bool r = m_stream.get() == c; + m_stream.unget(); + return r; } -void scanner::stop_caching() { - m_cache_input = false; +void scanner::read_comment() { + lean_assert(curr() == '*'); + next(); + int nest = 1; + while (true) { + if (curr() == '*') { + next(); + if (curr() == ')') { + next(); + nest--; + if (nest == 0) + return; + } + } else if (curr() == '(') { + next(); + if (curr() == '*') { + next(); + nest++; + } + } else if (curr() == '\n') { + new_line(); + next(); + } else if (curr() == EOF) { + throw exception("unexpected end of comment"); + } else { + next(); + } + } } -unsigned scanner::cache_size() const { - return m_cache.size(); +bool scanner::is_command(name const & n) const { + return false; } -void scanner::reset_cache() { - m_cache.clear(); +scanner::token scanner::read_a_symbol() { + lean_assert(normalize(curr()) == 'a'); + m_buffer.clear(); + m_buffer += curr(); + m_name_val = name(); + next(); + while (true) { + if (normalize(curr()) == 'a') { + m_buffer += curr(); + next(); + } else if (curr() == ':' && check_next(':')) { + next(); + lean_assert(curr() == ':'); + next(); + if (m_name_val.is_anonymous()) + m_name_val = name(m_buffer.c_str()); + else + m_name_val = name(m_name_val, m_buffer.c_str()); + } else { + if (m_name_val.is_anonymous()) + m_name_val = name(m_buffer.c_str()); + else + m_name_val = name(m_name_val, m_buffer.c_str()); + if (m_name_val == g_lambda_name) + return token::Lambda; + else if (m_name_val == g_pi_name) + return token::Pi; + else + return is_command(m_name_val) ? token::CommandId : token::Id; + } + } } -char const * scanner::cached_str(unsigned begin, unsigned end) { - m_cache_result.clear(); - while (isspace(m_cache[begin]) && begin < end) - begin++; - while (begin < end && isspace(m_cache[end-1])) - end--; - for (unsigned i = begin; i < end; i++) - m_cache_result.push_back(m_cache[i]); - m_cache_result.push_back(0); - return m_cache_result.data(); +scanner::token scanner::read_b_symbol() { + lean_assert(normalize(curr()) == 'b'); + m_buffer.clear(); + m_buffer += curr(); + next(); + while (true) { + if (normalize(curr()) == 'b') { + m_buffer += curr(); + next(); + } else { + m_name_val = name(m_buffer.c_str()); + if (m_name_val == g_arrow_name) + return token::Arrow; + else + return token::Id; + } + } } +scanner::token scanner::read_c_symbol() { + lean_assert(normalize(curr()) == 'c'); + m_buffer.clear(); + m_buffer += curr(); + next(); + while (true) { + if (normalize(curr()) == 'c') { + m_buffer += curr(); + next(); + } else { + m_name_val = name(m_buffer.c_str()); + if (m_name_val == g_arrow_unicode) + return token::Arrow; + else if (m_name_val == g_lambda_unicode) + return token::Lambda; + else if (m_name_val == g_pi_unicode) + return token::Pi; + else + return token::Id; + } + } +} + +scanner::token scanner::read_number() { + lean_assert('0' <= curr() && curr() <= '9'); + mpq q(1); + m_num_val = curr() - '0'; + next(); + bool is_decimal = false; + + while (true) { + char c = curr(); + if ('0' <= c && c <= '9') { + m_num_val = 10*m_num_val + (c - '0'); + if (is_decimal) + q *= 10; + next(); + } else if (c == '.') { + if (is_decimal) + break; + is_decimal = true; + next(); + } else { + break; + } + } + if (is_decimal) + m_num_val /= q; + return is_decimal ? token::Decimal : token::Int; +} + +scanner::token scanner::scan() { + while (true) { + char c = curr(); + m_pos = m_spos; + switch (normalize(c)) { + case ' ': next(); break; + case '\n': next(); new_line(); break; + case ':': next(); return token::Colon; + case ',': next(); return token::Comma; + case '.': next(); return token::Period; + case '(': + next(); + if (curr() == '*') { + read_comment(); + break; + } else { + return token::LeftParen; + } + case ')': next(); return token::RightParen; + case '{': next(); return token::LeftCurlyBracket; + case '}': next(); return token::RightCurlyBracket; + case 'a': return read_a_symbol(); + case 'b': return read_b_symbol(); + case 'c': return read_c_symbol(); + case '0': return read_number(); + case -1: return token::Eof; + default: lean_unreachable(); + } + } +} +std::ostream & operator<<(std::ostream & out, scanner::token const & t) { + switch (t) { + case scanner::token::LeftParen: out << "("; break; + case scanner::token::RightParen: out << ")"; break; + case scanner::token::LeftCurlyBracket: out << "{"; break; + case scanner::token::RightCurlyBracket: out << "}"; break; + case scanner::token::Colon: out << ":"; break; + case scanner::token::Comma: out << ","; break; + case scanner::token::Period: out << "."; break; + case scanner::token::Lambda: out << g_lambda_unicode; break; + case scanner::token::Pi: out << g_pi_unicode; break; + case scanner::token::Arrow: out << g_arrow_unicode; break; + case scanner::token::Id: out << "Id"; break; + case scanner::token::CommandId: out << "CId"; break; + case scanner::token::Int: out << "Int"; break; + case scanner::token::Decimal: out << "Dec"; break; + case scanner::token::Eq: out << "="; break; + case scanner::token::Assign: out << ":="; break; + case scanner::token::Eof: out << "EOF"; break; + } + return out; +} } diff --git a/src/frontend/scanner.h b/src/frontend/scanner.h index 879cf75ba..0d50e80eb 100644 --- a/src/frontend/scanner.h +++ b/src/frontend/scanner.h @@ -7,49 +7,52 @@ Author: Leonardo de Moura #pragma once #include #include +#include "mpq.h" +#include "name.h" namespace lean { -constexpr unsigned scanner_buffer_size = 1024; - /** - \brief Base class for all scanners in Lean - - It provides basic support for reading streams, buffering, and caching the contents of the input stream. + \brief Lean scanner. */ class scanner { +public: + enum class token { + LeftParen, RightParen, LeftCurlyBracket, RightCurlyBracket, Colon, Comma, Period, Lambda, Pi, Arrow, + Id, CommandId, Int, Decimal, Eq, Assign, Eof + }; protected: - bool m_interactive; int m_spos; // position in the current line of the stream char m_curr; // current char; int m_line; // line int m_pos; // start position of the token + std::istream & m_stream; - char m_buffer[scanner_buffer_size]; - unsigned m_bpos; - unsigned m_bend; - std::istream& m_stream; - - bool m_cache_input; - std::vector m_cache; - std::vector m_cache_result; + mpq m_num_val; + name m_name_val; + std::string m_buffer; char curr() const { return m_curr; } void new_line() { m_line++; m_spos = 0; } void next(); + bool check_next(char c); + void read_comment(); + token read_a_symbol(); + token read_b_symbol(); + token read_c_symbol(); + token read_number(); + bool is_command(name const & n) const; public: - scanner(std::istream& stream, bool interactive = false); + scanner(std::istream& stream); ~scanner(); int get_line() const { return m_line; } int get_pos() const { return m_pos; } + token scan(); - void start_caching(); - void stop_caching(); - unsigned cache_size() const; - void reset_cache(); - char const * cached_str(unsigned begin, unsigned end); + name const & get_name_val() const { return m_name_val; } + mpq const & get_num_val() const { return m_num_val; } }; - +std::ostream & operator<<(std::ostream & out, scanner::token const & t); } diff --git a/src/kernel/level.h b/src/kernel/level.h index 7fe5d2e29..7e8e57cd2 100644 --- a/src/kernel/level.h +++ b/src/kernel/level.h @@ -65,5 +65,5 @@ inline bool is_max (level const & l) { return kind(l) == level_kind::Max; } inline level const * max_begin_levels(level const & l) { return &max_level(l, 0); } inline level const * max_end_levels(level const & l) { return max_begin_levels(l) + max_size(l); } -format pp(level const & l, char const * sep = default_name_separator); +format pp(level const & l, char const * sep = LEAN_NAME_SEPARATOR); } diff --git a/src/kernel/pp.cpp b/src/kernel/pp.cpp index bd5f47a04..5b5ff5aff 100644 --- a/src/kernel/pp.cpp +++ b/src/kernel/pp.cpp @@ -17,7 +17,7 @@ struct pp_fn { pp_fn(environment const * env):m_env(env) {} unsigned indent() const { return 2; } - char const * sep() const { return default_name_separator; } + char const * sep() const { return LEAN_NAME_SEPARATOR; } format pp_keyword(char const * k) { return highlight(format(k), format::format_color::ORANGE); } format pp_type_kwd() { return highlight(format("Type"), format::format_color::PINK); } format pp_eq_kwd() { return highlight(format(" = "), format::format_color::BLUE); } diff --git a/src/tests/frontend/CMakeLists.txt b/src/tests/frontend/CMakeLists.txt index 45fa0bed0..55f0bb014 100644 --- a/src/tests/frontend/CMakeLists.txt +++ b/src/tests/frontend/CMakeLists.txt @@ -1,3 +1,6 @@ add_executable(frontend_tst frontend.cpp) target_link_libraries(frontend_tst ${EXTRA_LIBS}) add_test(frontend ${CMAKE_CURRENT_BINARY_DIR}/frontend_tst) +add_executable(scanner scanner.cpp) +target_link_libraries(scanner ${EXTRA_LIBS}) +add_test(scanner ${CMAKE_CURRENT_BINARY_DIR}/scanner) diff --git a/src/tests/util/CMakeLists.txt b/src/tests/util/CMakeLists.txt index b1322d6d9..1100290f4 100644 --- a/src/tests/util/CMakeLists.txt +++ b/src/tests/util/CMakeLists.txt @@ -4,9 +4,9 @@ add_test(interrupt ${CMAKE_CURRENT_BINARY_DIR}/interrupt) add_executable(name name.cpp) target_link_libraries(name ${EXTRA_LIBS}) add_test(name ${CMAKE_CURRENT_BINARY_DIR}/name) -add_executable(sexpr sexpr.cpp) -target_link_libraries(sexpr ${EXTRA_LIBS}) -add_test(sexpr ${CMAKE_CURRENT_BINARY_DIR}/sexpr) +add_executable(sexpr_tst sexpr.cpp) +target_link_libraries(sexpr_tst ${EXTRA_LIBS}) +add_test(sexpr ${CMAKE_CURRENT_BINARY_DIR}/sexpr_tst) add_executable(format format.cpp) target_link_libraries(format ${EXTRA_LIBS}) add_test(format ${CMAKE_CURRENT_BINARY_DIR}/format) diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 3fc0f47a7..ffeb8257b 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -1,4 +1,3 @@ add_library(util trace.cpp debug.cpp name.cpp exception.cpp verbosity.cpp interrupt.cpp hash.cpp escaped.cpp bit_tricks.cpp - sexpr.cpp sexpr_funcs.cpp format.cpp safe_arith.cpp options.cpp - ) + safe_arith.cpp) diff --git a/src/util/name.cpp b/src/util/name.cpp index 906286230..33c95cd8f 100644 --- a/src/util/name.cpp +++ b/src/util/name.cpp @@ -13,14 +13,8 @@ Author: Leonardo de Moura #include "rc.h" #include "hash.h" #include "trace.h" -#include "options.h" - -#ifndef LEAN_DEFAULT_NAME_SEPARATOR -#define LEAN_DEFAULT_NAME_SEPARATOR "::" -#endif namespace lean { - constexpr char const * anonymous_str = "[anonymous]"; /** @@ -269,7 +263,7 @@ size_t name::size(char const * sep) const { } size_t name::size() const { - return size(LEAN_DEFAULT_NAME_SEPARATOR); + return size(LEAN_NAME_SEPARATOR); } unsigned name::hash() const { @@ -283,11 +277,11 @@ std::string name::to_string(char const * sep) const { } std::string name::to_string() const { - return to_string(LEAN_DEFAULT_NAME_SEPARATOR); + return to_string(LEAN_NAME_SEPARATOR); } std::ostream & operator<<(std::ostream & out, name const & n) { - name::imp::display(out, LEAN_DEFAULT_NAME_SEPARATOR, n.m_ptr); + name::imp::display(out, LEAN_NAME_SEPARATOR, n.m_ptr); return out; } @@ -297,12 +291,6 @@ std::ostream & operator<<(std::ostream & out, name::sep const & s) { name::imp::display(out, s.m_sep, s.m_name.m_ptr); return out; } - -static name g_name_separator{"name", "separator"}; - -char const * get_name_separator(options const & o) { - return o.get_string(g_name_separator, LEAN_DEFAULT_NAME_SEPARATOR); -} } void print(lean::name const & n) { std::cout << n << std::endl; } diff --git a/src/util/name.h b/src/util/name.h index a8ff08382..86933e68d 100644 --- a/src/util/name.h +++ b/src/util/name.h @@ -7,11 +7,12 @@ Author: Leonardo de Moura #pragma once #include +#ifndef LEAN_NAME_SEPARATOR +#define LEAN_NAME_SEPARATOR "::" +#endif + namespace lean { - -constexpr char const * default_name_separator = "::"; enum class name_kind { ANONYMOUS, STRING, NUMERAL }; - /** \brief Hierarchical names. */ @@ -73,8 +74,4 @@ public: }; struct name_hash { unsigned operator()(name const & n) const { return n.hash(); } }; struct name_eq { bool operator()(name const & n1, name const & n2) const { return n1 == n2; } }; - -class options; -/** \brief Return the separator for hierarchical names */ -char const * get_name_separator(options const & o); } diff --git a/src/util/format.cpp b/src/util/sexpr/format.cpp similarity index 100% rename from src/util/format.cpp rename to src/util/sexpr/format.cpp diff --git a/src/util/format.h b/src/util/sexpr/format.h similarity index 100% rename from src/util/format.h rename to src/util/sexpr/format.h diff --git a/src/util/options.cpp b/src/util/sexpr/options.cpp similarity index 96% rename from src/util/options.cpp rename to src/util/sexpr/options.cpp index 5806b29bc..51d859eb6 100644 --- a/src/util/options.cpp +++ b/src/util/sexpr/options.cpp @@ -106,4 +106,8 @@ std::ostream & operator<<(std::ostream & out, options const & o) { out << g_right_angle_bracket; return out; } +static name g_name_separator{"name", "separator"}; +char const * get_name_separator(options const & o) { + return o.get_string(g_name_separator, "::"); +} } diff --git a/src/util/options.h b/src/util/sexpr/options.h similarity index 95% rename from src/util/options.h rename to src/util/sexpr/options.h index a9f428816..339f38747 100644 --- a/src/util/options.h +++ b/src/util/sexpr/options.h @@ -51,4 +51,6 @@ public: }; template options update(options const & o, name const & n, T const & v) { return o.update(n, sexpr(v)); } template options update(options const & o, char const * n, T const & v) { return o.update(name(n), sexpr(v)); } +/** \brief Return the separator for hierarchical names */ +char const * get_name_separator(options const & o); } diff --git a/src/util/sexpr.cpp b/src/util/sexpr/sexpr.cpp similarity index 100% rename from src/util/sexpr.cpp rename to src/util/sexpr/sexpr.cpp diff --git a/src/util/sexpr.h b/src/util/sexpr/sexpr.h similarity index 100% rename from src/util/sexpr.h rename to src/util/sexpr/sexpr.h diff --git a/src/util/sexpr_funcs.cpp b/src/util/sexpr/sexpr_funcs.cpp similarity index 100% rename from src/util/sexpr_funcs.cpp rename to src/util/sexpr/sexpr_funcs.cpp diff --git a/src/util/sexpr_funcs.h b/src/util/sexpr/sexpr_funcs.h similarity index 100% rename from src/util/sexpr_funcs.h rename to src/util/sexpr/sexpr_funcs.h