Move sexpr/format/options to util/sexpr (reason: circular dependency between util and numerics lib), now numerics depend on util, and sexpr depends on numerics and util. Add scanner to frontend.
Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
This commit is contained in:
parent
ac38937241
commit
2ea7479ee9
19 changed files with 299 additions and 97 deletions
|
@ -61,6 +61,7 @@ endif()
|
|||
|
||||
include_directories(${LEAN_SOURCE_DIR}/util)
|
||||
include_directories(${LEAN_SOURCE_DIR}/util/numerics)
|
||||
include_directories(${LEAN_SOURCE_DIR}/util/sexpr)
|
||||
include_directories(${LEAN_SOURCE_DIR}/interval)
|
||||
include_directories(${LEAN_SOURCE_DIR}/kernel)
|
||||
include_directories(${LEAN_SOURCE_DIR}/kernel/arith)
|
||||
|
@ -71,6 +72,8 @@ add_subdirectory(util)
|
|||
set(LEAN_LIBS ${LEAN_LIBS} util)
|
||||
add_subdirectory(util/numerics)
|
||||
set(LEAN_LIBS ${LEAN_LIBS} numerics)
|
||||
add_subdirectory(util/sexpr)
|
||||
set(LEAN_LIBS ${LEAN_LIBS} sexpr)
|
||||
add_subdirectory(interval)
|
||||
set(LEAN_LIBS ${LEAN_LIBS} interval)
|
||||
add_subdirectory(kernel)
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
add_library(frontend frontend.cpp operator_info)
|
||||
add_library(frontend frontend.cpp operator_info.cpp scanner.cpp)
|
||||
target_link_libraries(frontend ${LEAN_LIBS})
|
||||
|
|
|
@ -7,77 +7,280 @@ Author: Leonardo de Moura
|
|||
#include <cstdio>
|
||||
#include "scanner.h"
|
||||
#include "debug.h"
|
||||
#include "exception.h"
|
||||
|
||||
namespace lean {
|
||||
|
||||
scanner::scanner(std::istream& stream, bool interactive):
|
||||
m_interactive(interactive),
|
||||
static name g_lambda_unicode("\u03BB");
|
||||
static name g_pi_unicode("\u03A0");
|
||||
static name g_arrow_unicode("\u2192");
|
||||
static char g_normalized[255];
|
||||
static name g_lambda_name("fun");
|
||||
static name g_pi_name("pi");
|
||||
static name g_arrow_name("->");
|
||||
|
||||
class init_normalized_table {
|
||||
public:
|
||||
init_normalized_table() {
|
||||
// by default all characters are in group c,
|
||||
for (int i = 0; i <= 255; i++)
|
||||
g_normalized[i] = 'c';
|
||||
|
||||
// digits normalize to '0'
|
||||
for (int i = '0'; i <= '9'; i++)
|
||||
g_normalized[i] = '0';
|
||||
|
||||
// characters that can be used to create ids of group a
|
||||
for (int i = 'a'; i <= 'z'; i++)
|
||||
g_normalized[i] = 'a';
|
||||
for (int i = 'A'; i <= 'Z'; i++)
|
||||
g_normalized[i] = 'a';
|
||||
g_normalized['_'] = 'a';
|
||||
|
||||
// characters that can be used to create ids of group b
|
||||
for (unsigned char b : {'=', '<', '>', '@', '^', '|', '&', '~', '+', '-', '*', '/', '$', '%', '?', ';'})
|
||||
g_normalized[b] = 'b';
|
||||
|
||||
// punctuation
|
||||
g_normalized['('] = '(';
|
||||
g_normalized[')'] = ')';
|
||||
g_normalized['{'] = '{';
|
||||
g_normalized['}'] = '}';
|
||||
g_normalized[':'] = ':';
|
||||
g_normalized['.'] = '.';
|
||||
g_normalized[','] = ',';
|
||||
|
||||
// spaces
|
||||
g_normalized[' '] = ' ';
|
||||
g_normalized['\t'] = ' ';
|
||||
g_normalized['\r'] = ' ';
|
||||
|
||||
// new line
|
||||
g_normalized['\n'] = '\n';
|
||||
|
||||
// eof
|
||||
g_normalized[255] = -1;
|
||||
}
|
||||
};
|
||||
|
||||
static init_normalized_table g_init_normalized_table;
|
||||
|
||||
char normalize(char c) {
|
||||
return g_normalized[static_cast<unsigned char>(c)];
|
||||
}
|
||||
|
||||
scanner::scanner(std::istream& stream):
|
||||
m_spos(0),
|
||||
m_curr(0),
|
||||
m_line(1),
|
||||
m_pos(0),
|
||||
m_bpos(0),
|
||||
m_bend(0),
|
||||
m_stream(stream),
|
||||
m_cache_input(false) {
|
||||
m_stream(stream) {
|
||||
next();
|
||||
}
|
||||
|
||||
scanner::~scanner() {
|
||||
}
|
||||
|
||||
void scanner::next() {
|
||||
if (m_cache_input)
|
||||
m_cache.push_back(m_curr);
|
||||
lean_assert(m_curr != EOF);
|
||||
if (m_interactive) {
|
||||
m_curr = m_stream.get();
|
||||
}
|
||||
else if (m_bpos < m_bend) {
|
||||
m_curr = m_buffer[m_bpos];
|
||||
m_bpos++;
|
||||
}
|
||||
else {
|
||||
m_stream.read(m_buffer, scanner_buffer_size);
|
||||
m_bend = static_cast<unsigned>(m_stream.gcount());
|
||||
m_bpos = 0;
|
||||
if (m_bpos == m_bend) {
|
||||
m_curr = EOF;
|
||||
}
|
||||
else {
|
||||
m_curr = m_buffer[m_bpos];
|
||||
m_bpos++;
|
||||
}
|
||||
}
|
||||
m_spos++;
|
||||
}
|
||||
|
||||
void scanner::start_caching() {
|
||||
m_cache_input = true;
|
||||
m_cache.clear();
|
||||
bool scanner::check_next(char c) {
|
||||
lean_assert(m_curr != EOF);
|
||||
bool r = m_stream.get() == c;
|
||||
m_stream.unget();
|
||||
return r;
|
||||
}
|
||||
|
||||
void scanner::stop_caching() {
|
||||
m_cache_input = false;
|
||||
void scanner::read_comment() {
|
||||
lean_assert(curr() == '*');
|
||||
next();
|
||||
int nest = 1;
|
||||
while (true) {
|
||||
if (curr() == '*') {
|
||||
next();
|
||||
if (curr() == ')') {
|
||||
next();
|
||||
nest--;
|
||||
if (nest == 0)
|
||||
return;
|
||||
}
|
||||
} else if (curr() == '(') {
|
||||
next();
|
||||
if (curr() == '*') {
|
||||
next();
|
||||
nest++;
|
||||
}
|
||||
} else if (curr() == '\n') {
|
||||
new_line();
|
||||
next();
|
||||
} else if (curr() == EOF) {
|
||||
throw exception("unexpected end of comment");
|
||||
} else {
|
||||
next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned scanner::cache_size() const {
|
||||
return m_cache.size();
|
||||
bool scanner::is_command(name const & n) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
void scanner::reset_cache() {
|
||||
m_cache.clear();
|
||||
scanner::token scanner::read_a_symbol() {
|
||||
lean_assert(normalize(curr()) == 'a');
|
||||
m_buffer.clear();
|
||||
m_buffer += curr();
|
||||
m_name_val = name();
|
||||
next();
|
||||
while (true) {
|
||||
if (normalize(curr()) == 'a') {
|
||||
m_buffer += curr();
|
||||
next();
|
||||
} else if (curr() == ':' && check_next(':')) {
|
||||
next();
|
||||
lean_assert(curr() == ':');
|
||||
next();
|
||||
if (m_name_val.is_anonymous())
|
||||
m_name_val = name(m_buffer.c_str());
|
||||
else
|
||||
m_name_val = name(m_name_val, m_buffer.c_str());
|
||||
} else {
|
||||
if (m_name_val.is_anonymous())
|
||||
m_name_val = name(m_buffer.c_str());
|
||||
else
|
||||
m_name_val = name(m_name_val, m_buffer.c_str());
|
||||
if (m_name_val == g_lambda_name)
|
||||
return token::Lambda;
|
||||
else if (m_name_val == g_pi_name)
|
||||
return token::Pi;
|
||||
else
|
||||
return is_command(m_name_val) ? token::CommandId : token::Id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
char const * scanner::cached_str(unsigned begin, unsigned end) {
|
||||
m_cache_result.clear();
|
||||
while (isspace(m_cache[begin]) && begin < end)
|
||||
begin++;
|
||||
while (begin < end && isspace(m_cache[end-1]))
|
||||
end--;
|
||||
for (unsigned i = begin; i < end; i++)
|
||||
m_cache_result.push_back(m_cache[i]);
|
||||
m_cache_result.push_back(0);
|
||||
return m_cache_result.data();
|
||||
scanner::token scanner::read_b_symbol() {
|
||||
lean_assert(normalize(curr()) == 'b');
|
||||
m_buffer.clear();
|
||||
m_buffer += curr();
|
||||
next();
|
||||
while (true) {
|
||||
if (normalize(curr()) == 'b') {
|
||||
m_buffer += curr();
|
||||
next();
|
||||
} else {
|
||||
m_name_val = name(m_buffer.c_str());
|
||||
if (m_name_val == g_arrow_name)
|
||||
return token::Arrow;
|
||||
else
|
||||
return token::Id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
scanner::token scanner::read_c_symbol() {
|
||||
lean_assert(normalize(curr()) == 'c');
|
||||
m_buffer.clear();
|
||||
m_buffer += curr();
|
||||
next();
|
||||
while (true) {
|
||||
if (normalize(curr()) == 'c') {
|
||||
m_buffer += curr();
|
||||
next();
|
||||
} else {
|
||||
m_name_val = name(m_buffer.c_str());
|
||||
if (m_name_val == g_arrow_unicode)
|
||||
return token::Arrow;
|
||||
else if (m_name_val == g_lambda_unicode)
|
||||
return token::Lambda;
|
||||
else if (m_name_val == g_pi_unicode)
|
||||
return token::Pi;
|
||||
else
|
||||
return token::Id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
scanner::token scanner::read_number() {
|
||||
lean_assert('0' <= curr() && curr() <= '9');
|
||||
mpq q(1);
|
||||
m_num_val = curr() - '0';
|
||||
next();
|
||||
bool is_decimal = false;
|
||||
|
||||
while (true) {
|
||||
char c = curr();
|
||||
if ('0' <= c && c <= '9') {
|
||||
m_num_val = 10*m_num_val + (c - '0');
|
||||
if (is_decimal)
|
||||
q *= 10;
|
||||
next();
|
||||
} else if (c == '.') {
|
||||
if (is_decimal)
|
||||
break;
|
||||
is_decimal = true;
|
||||
next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (is_decimal)
|
||||
m_num_val /= q;
|
||||
return is_decimal ? token::Decimal : token::Int;
|
||||
}
|
||||
|
||||
scanner::token scanner::scan() {
|
||||
while (true) {
|
||||
char c = curr();
|
||||
m_pos = m_spos;
|
||||
switch (normalize(c)) {
|
||||
case ' ': next(); break;
|
||||
case '\n': next(); new_line(); break;
|
||||
case ':': next(); return token::Colon;
|
||||
case ',': next(); return token::Comma;
|
||||
case '.': next(); return token::Period;
|
||||
case '(':
|
||||
next();
|
||||
if (curr() == '*') {
|
||||
read_comment();
|
||||
break;
|
||||
} else {
|
||||
return token::LeftParen;
|
||||
}
|
||||
case ')': next(); return token::RightParen;
|
||||
case '{': next(); return token::LeftCurlyBracket;
|
||||
case '}': next(); return token::RightCurlyBracket;
|
||||
case 'a': return read_a_symbol();
|
||||
case 'b': return read_b_symbol();
|
||||
case 'c': return read_c_symbol();
|
||||
case '0': return read_number();
|
||||
case -1: return token::Eof;
|
||||
default: lean_unreachable();
|
||||
}
|
||||
}
|
||||
}
|
||||
std::ostream & operator<<(std::ostream & out, scanner::token const & t) {
|
||||
switch (t) {
|
||||
case scanner::token::LeftParen: out << "("; break;
|
||||
case scanner::token::RightParen: out << ")"; break;
|
||||
case scanner::token::LeftCurlyBracket: out << "{"; break;
|
||||
case scanner::token::RightCurlyBracket: out << "}"; break;
|
||||
case scanner::token::Colon: out << ":"; break;
|
||||
case scanner::token::Comma: out << ","; break;
|
||||
case scanner::token::Period: out << "."; break;
|
||||
case scanner::token::Lambda: out << g_lambda_unicode; break;
|
||||
case scanner::token::Pi: out << g_pi_unicode; break;
|
||||
case scanner::token::Arrow: out << g_arrow_unicode; break;
|
||||
case scanner::token::Id: out << "Id"; break;
|
||||
case scanner::token::CommandId: out << "CId"; break;
|
||||
case scanner::token::Int: out << "Int"; break;
|
||||
case scanner::token::Decimal: out << "Dec"; break;
|
||||
case scanner::token::Eq: out << "="; break;
|
||||
case scanner::token::Assign: out << ":="; break;
|
||||
case scanner::token::Eof: out << "EOF"; break;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,49 +7,52 @@ Author: Leonardo de Moura
|
|||
#pragma once
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "mpq.h"
|
||||
#include "name.h"
|
||||
|
||||
namespace lean {
|
||||
constexpr unsigned scanner_buffer_size = 1024;
|
||||
|
||||
/**
|
||||
\brief Base class for all scanners in Lean
|
||||
|
||||
It provides basic support for reading streams, buffering, and caching the contents of the input stream.
|
||||
\brief Lean scanner.
|
||||
*/
|
||||
class scanner {
|
||||
public:
|
||||
enum class token {
|
||||
LeftParen, RightParen, LeftCurlyBracket, RightCurlyBracket, Colon, Comma, Period, Lambda, Pi, Arrow,
|
||||
Id, CommandId, Int, Decimal, Eq, Assign, Eof
|
||||
};
|
||||
protected:
|
||||
bool m_interactive;
|
||||
int m_spos; // position in the current line of the stream
|
||||
char m_curr; // current char;
|
||||
|
||||
int m_line; // line
|
||||
int m_pos; // start position of the token
|
||||
std::istream & m_stream;
|
||||
|
||||
char m_buffer[scanner_buffer_size];
|
||||
unsigned m_bpos;
|
||||
unsigned m_bend;
|
||||
std::istream& m_stream;
|
||||
|
||||
bool m_cache_input;
|
||||
std::vector<char> m_cache;
|
||||
std::vector<char> m_cache_result;
|
||||
mpq m_num_val;
|
||||
name m_name_val;
|
||||
std::string m_buffer;
|
||||
|
||||
char curr() const { return m_curr; }
|
||||
void new_line() { m_line++; m_spos = 0; }
|
||||
void next();
|
||||
bool check_next(char c);
|
||||
void read_comment();
|
||||
token read_a_symbol();
|
||||
token read_b_symbol();
|
||||
token read_c_symbol();
|
||||
token read_number();
|
||||
bool is_command(name const & n) const;
|
||||
|
||||
public:
|
||||
scanner(std::istream& stream, bool interactive = false);
|
||||
scanner(std::istream& stream);
|
||||
~scanner();
|
||||
|
||||
int get_line() const { return m_line; }
|
||||
int get_pos() const { return m_pos; }
|
||||
token scan();
|
||||
|
||||
void start_caching();
|
||||
void stop_caching();
|
||||
unsigned cache_size() const;
|
||||
void reset_cache();
|
||||
char const * cached_str(unsigned begin, unsigned end);
|
||||
name const & get_name_val() const { return m_name_val; }
|
||||
mpq const & get_num_val() const { return m_num_val; }
|
||||
};
|
||||
|
||||
std::ostream & operator<<(std::ostream & out, scanner::token const & t);
|
||||
}
|
||||
|
|
|
@ -65,5 +65,5 @@ inline bool is_max (level const & l) { return kind(l) == level_kind::Max; }
|
|||
inline level const * max_begin_levels(level const & l) { return &max_level(l, 0); }
|
||||
inline level const * max_end_levels(level const & l) { return max_begin_levels(l) + max_size(l); }
|
||||
|
||||
format pp(level const & l, char const * sep = default_name_separator);
|
||||
format pp(level const & l, char const * sep = LEAN_NAME_SEPARATOR);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ struct pp_fn {
|
|||
pp_fn(environment const * env):m_env(env) {}
|
||||
|
||||
unsigned indent() const { return 2; }
|
||||
char const * sep() const { return default_name_separator; }
|
||||
char const * sep() const { return LEAN_NAME_SEPARATOR; }
|
||||
format pp_keyword(char const * k) { return highlight(format(k), format::format_color::ORANGE); }
|
||||
format pp_type_kwd() { return highlight(format("Type"), format::format_color::PINK); }
|
||||
format pp_eq_kwd() { return highlight(format(" = "), format::format_color::BLUE); }
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
add_executable(frontend_tst frontend.cpp)
|
||||
target_link_libraries(frontend_tst ${EXTRA_LIBS})
|
||||
add_test(frontend ${CMAKE_CURRENT_BINARY_DIR}/frontend_tst)
|
||||
add_executable(scanner scanner.cpp)
|
||||
target_link_libraries(scanner ${EXTRA_LIBS})
|
||||
add_test(scanner ${CMAKE_CURRENT_BINARY_DIR}/scanner)
|
||||
|
|
|
@ -4,9 +4,9 @@ add_test(interrupt ${CMAKE_CURRENT_BINARY_DIR}/interrupt)
|
|||
add_executable(name name.cpp)
|
||||
target_link_libraries(name ${EXTRA_LIBS})
|
||||
add_test(name ${CMAKE_CURRENT_BINARY_DIR}/name)
|
||||
add_executable(sexpr sexpr.cpp)
|
||||
target_link_libraries(sexpr ${EXTRA_LIBS})
|
||||
add_test(sexpr ${CMAKE_CURRENT_BINARY_DIR}/sexpr)
|
||||
add_executable(sexpr_tst sexpr.cpp)
|
||||
target_link_libraries(sexpr_tst ${EXTRA_LIBS})
|
||||
add_test(sexpr ${CMAKE_CURRENT_BINARY_DIR}/sexpr_tst)
|
||||
add_executable(format format.cpp)
|
||||
target_link_libraries(format ${EXTRA_LIBS})
|
||||
add_test(format ${CMAKE_CURRENT_BINARY_DIR}/format)
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
add_library(util trace.cpp debug.cpp name.cpp exception.cpp
|
||||
verbosity.cpp interrupt.cpp hash.cpp escaped.cpp bit_tricks.cpp
|
||||
sexpr.cpp sexpr_funcs.cpp format.cpp safe_arith.cpp options.cpp
|
||||
)
|
||||
safe_arith.cpp)
|
||||
|
|
|
@ -13,14 +13,8 @@ Author: Leonardo de Moura
|
|||
#include "rc.h"
|
||||
#include "hash.h"
|
||||
#include "trace.h"
|
||||
#include "options.h"
|
||||
|
||||
#ifndef LEAN_DEFAULT_NAME_SEPARATOR
|
||||
#define LEAN_DEFAULT_NAME_SEPARATOR "::"
|
||||
#endif
|
||||
|
||||
namespace lean {
|
||||
|
||||
constexpr char const * anonymous_str = "[anonymous]";
|
||||
|
||||
/**
|
||||
|
@ -269,7 +263,7 @@ size_t name::size(char const * sep) const {
|
|||
}
|
||||
|
||||
size_t name::size() const {
|
||||
return size(LEAN_DEFAULT_NAME_SEPARATOR);
|
||||
return size(LEAN_NAME_SEPARATOR);
|
||||
}
|
||||
|
||||
unsigned name::hash() const {
|
||||
|
@ -283,11 +277,11 @@ std::string name::to_string(char const * sep) const {
|
|||
}
|
||||
|
||||
std::string name::to_string() const {
|
||||
return to_string(LEAN_DEFAULT_NAME_SEPARATOR);
|
||||
return to_string(LEAN_NAME_SEPARATOR);
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & out, name const & n) {
|
||||
name::imp::display(out, LEAN_DEFAULT_NAME_SEPARATOR, n.m_ptr);
|
||||
name::imp::display(out, LEAN_NAME_SEPARATOR, n.m_ptr);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -297,12 +291,6 @@ std::ostream & operator<<(std::ostream & out, name::sep const & s) {
|
|||
name::imp::display(out, s.m_sep, s.m_name.m_ptr);
|
||||
return out;
|
||||
}
|
||||
|
||||
static name g_name_separator{"name", "separator"};
|
||||
|
||||
char const * get_name_separator(options const & o) {
|
||||
return o.get_string(g_name_separator, LEAN_DEFAULT_NAME_SEPARATOR);
|
||||
}
|
||||
}
|
||||
|
||||
void print(lean::name const & n) { std::cout << n << std::endl; }
|
||||
|
|
|
@ -7,11 +7,12 @@ Author: Leonardo de Moura
|
|||
#pragma once
|
||||
#include <iostream>
|
||||
|
||||
#ifndef LEAN_NAME_SEPARATOR
|
||||
#define LEAN_NAME_SEPARATOR "::"
|
||||
#endif
|
||||
|
||||
namespace lean {
|
||||
|
||||
constexpr char const * default_name_separator = "::";
|
||||
enum class name_kind { ANONYMOUS, STRING, NUMERAL };
|
||||
|
||||
/**
|
||||
\brief Hierarchical names.
|
||||
*/
|
||||
|
@ -73,8 +74,4 @@ public:
|
|||
};
|
||||
struct name_hash { unsigned operator()(name const & n) const { return n.hash(); } };
|
||||
struct name_eq { bool operator()(name const & n1, name const & n2) const { return n1 == n2; } };
|
||||
|
||||
class options;
|
||||
/** \brief Return the separator for hierarchical names */
|
||||
char const * get_name_separator(options const & o);
|
||||
}
|
||||
|
|
|
@ -106,4 +106,8 @@ std::ostream & operator<<(std::ostream & out, options const & o) {
|
|||
out << g_right_angle_bracket;
|
||||
return out;
|
||||
}
|
||||
static name g_name_separator{"name", "separator"};
|
||||
char const * get_name_separator(options const & o) {
|
||||
return o.get_string(g_name_separator, "::");
|
||||
}
|
||||
}
|
|
@ -51,4 +51,6 @@ public:
|
|||
};
|
||||
template<typename T> options update(options const & o, name const & n, T const & v) { return o.update(n, sexpr(v)); }
|
||||
template<typename T> options update(options const & o, char const * n, T const & v) { return o.update(name(n), sexpr(v)); }
|
||||
/** \brief Return the separator for hierarchical names */
|
||||
char const * get_name_separator(options const & o);
|
||||
}
|
Loading…
Reference in a new issue