feat(frontends/lean): parse strings as expressions of type 'string.string'

Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
This commit is contained in:
Leonardo de Moura 2014-07-02 10:00:55 -07:00
parent 8b8881deae
commit a66a08c89e
7 changed files with 243 additions and 4 deletions

View file

@ -26,6 +26,7 @@ Author: Leonardo de Moura
#include "library/module.h"
#include "library/scoped_ext.h"
#include "library/num.h"
#include "library/string.h"
#include "library/error_handling/error_handling.h"
#include "frontends/lean/parser.h"
#include "frontends/lean/parser_bindings.h"
@ -865,9 +866,15 @@ expr parser::parse_decimal_expr() {
}
expr parser::parse_string_expr() {
// TODO(Leo)
next(); // to avoid loop
return expr();
auto p = pos();
std::string v = get_str_val();
next();
if (!m_has_string)
m_has_string = has_string_decls(m_env);
if (!*m_has_string)
throw parser_error("string cannot be encoded as expression, environment does not contain the type 'string' "
"(solution: use 'import string')", p);
return from_string(v);
}
expr parser::parse_nud() {

View file

@ -65,6 +65,7 @@ class parser {
// This flag is when we are trying to parse mutually recursive declarations.
bool m_no_undef_id_error;
optional<bool> m_has_num;
optional<bool> m_has_string;
void display_error_pos(unsigned line, unsigned pos);
void display_error_pos(pos_info p);

View file

@ -360,6 +360,7 @@ public:
bool is_atomic_pp(bool unicode, bool coercion) const { return m_ptr->is_atomic_pp(unicode, coercion); }
unsigned hash() const { return m_ptr->hash(); }
void write(serializer & s) const { return m_ptr->write(s); }
macro_definition_cell const * raw() const { return m_ptr; }
};
/** \brief Macro attachments */

View file

@ -4,7 +4,7 @@ add_library(library deep_copy.cpp expr_lt.cpp io_state.cpp occurs.cpp
normalize.cpp shared_environment.cpp module.cpp coercion.cpp
private.cpp placeholder.cpp aliases.cpp level_names.cpp
update_declaration.cpp choice.cpp scoped_ext.cpp locals.cpp
unifier.cpp explicit.cpp num.cpp)
unifier.cpp explicit.cpp num.cpp string.cpp)
# hop_match.cpp)
target_link_libraries(library ${LEAN_LIBS})

188
src/library/string.cpp Normal file
View file

@ -0,0 +1,188 @@
/*
Copyright (c) 2014 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
#include <string>
#include <algorithm>
#include "kernel/type_checker.h"
#include "library/kernel_serializer.h"
#include "library/string.h"
namespace lean {
static name g_string_macro("string_macro");
static std::string g_string_opcode("Str");
static expr g_bit(Const(name("bit", "bit")));
static expr g_b0(Const(name("bit", "b0")));
static expr g_b1(Const(name("bit", "b1")));
static expr g_char(Const(name("string", "char")));
static expr g_ascii(Const(name("string", "ascii")));
static expr g_string(Const(name("string", "string")));
static expr g_empty(Const(name("string", "empty")));
static expr g_str(Const(name("string", "str")));
expr from_string_core(unsigned i, std::string const & s);
/** \brief The string macro is a compact way of encoding strings inside Lean expressions. */
class string_macro : public macro_definition_cell {
std::string m_value;
public:
string_macro(std::string const & v):m_value(v) {}
virtual bool lt(macro_definition_cell const & d) const {
return m_value < static_cast<string_macro const &>(d).m_value;
}
virtual name get_name() const { return g_string_macro; }
virtual expr get_type(expr const &, expr const *, extension_context &) const {
return g_string;
}
virtual optional<expr> expand(expr const &, extension_context &) const {
return some_expr(from_string_core(0, m_value));
}
virtual unsigned trust_level() const { return 0; }
virtual bool operator==(macro_definition_cell const & other) const {
string_macro const * other_ptr = dynamic_cast<string_macro const *>(&other);
return other_ptr && m_value == other_ptr->m_value;
}
virtual void display(std::ostream & out) const {
out << "\"";
for (unsigned i = 0; i < m_value.size(); i++) {
char c = m_value[i];
if (c == '\n')
out << "\\n";
else if (c == '\t')
out << "\\t";
else if (c == '\r')
out << "\\r";
else if (c == 0)
out << "\\0";
else if (c == '\"')
out << "\\\"";
else
out << c;
}
out << "\"";
}
virtual format pp(formatter const &, options const &) const {
std::ostringstream out;
display(out);
return format(out.str());
}
virtual bool is_atomic_pp(bool, bool) const { return true; }
virtual unsigned hash() const { return std::hash<std::string>()(m_value); }
virtual void write(serializer & s) const { s << g_string_opcode << m_value; }
std::string const & get_value() const { return m_value; }
};
expr mk_string_macro(std::string const & v) {
return mk_macro(macro_definition(new string_macro(v)));
}
bool is_string_macro(expr const & e) {
return is_macro(e) && dynamic_cast<string_macro const *>(macro_def(e).raw()) != nullptr;
}
string_macro const & to_string_macro(expr const & e) {
lean_assert(is_string_macro(e));
return *static_cast<string_macro const *>(macro_def(e).raw());
}
static register_macro_deserializer_fn
string_macro_des_fn(g_string_opcode,
[](deserializer & d, unsigned num, expr const *) {
if (num != 0)
throw_corrupted_file();
std::string v = d.read_string();
return mk_string_macro(v);
});
bool has_string_decls(environment const & env) {
try {
type_checker tc(env);
return
tc.infer(g_b0) == g_bit &&
tc.infer(g_b1) == g_bit &&
tc.infer(g_ascii) == g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> g_char))))))) &&
tc.infer(g_empty) == g_string &&
tc.infer(g_str) == g_char >> (g_string >> g_string);
} catch (...) {
return false;
}
}
expr from_char(unsigned char c) {
buffer<expr> bits;
while (c != 0) {
if (c % 2 == 1)
bits.push_back(g_b1);
else
bits.push_back(g_b0);
c /= 2;
}
while (bits.size() < 8)
bits.push_back(g_b0);
return mk_rev_app(g_ascii, bits.size(), bits.data());
}
expr from_string_core(unsigned i, std::string const & s) {
if (i == s.size())
return g_empty;
else
return mk_app(g_str, from_char(s[i]), from_string_core(i+1, s));
}
expr from_string(std::string const & s) {
return mk_string_macro(s);
}
bool to_char_core(expr const & e, buffer<char> & tmp) {
buffer<expr> args;
if (get_app_rev_args(e, args) == g_ascii && args.size() == 8) {
unsigned v = 0;
for (unsigned i = 0; i < args.size(); i++) {
v *= 2;
if (args[i] == g_b1)
v++;
else if (args[i] != g_b0)
return false;
}
tmp.push_back(v);
return true;
} else {
return false;
}
}
bool to_string_core(expr const & e, buffer<char> & tmp) {
if (e == g_empty) {
return true;
} else {
buffer<expr> args;
return
get_app_args(e, args) == g_str &&
args.size() == 2 &&
to_char_core(args[0], tmp) &&
to_string_core(args[1], tmp);
}
}
optional<std::string> to_string(expr const & e) {
if (is_string_macro(e)) {
return optional<std::string>(to_string_macro(e).get_value());
} else {
buffer<char> tmp;
if (to_string_core(e, tmp)) {
std::string r;
unsigned i = tmp.size();
while (i > 0) {
--i;
r += tmp[i];
}
return optional<std::string>(r);
} else {
return optional<std::string>();
}
}
}
}

39
src/library/string.h Normal file
View file

@ -0,0 +1,39 @@
/*
Copyright (c) 2014 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
#include <string>
#include "kernel/environment.h"
namespace lean {
/**
\brief Return true iff the environment \c env contains the following declarations
in the namespace 'bit'
b0 : bit
b1 : bit
and the following ones in the namespace 'string'
ascii : bit -> bit -> bit -> bit -> bit -> bit -> bit -> bit -> char
empty : string
str : char -> string -> string
*/
bool has_string_decls(environment const & env);
/**
\brief Return an expression that encodes the given string using the declarations
ascii, empty and str.
\see has_string_decls
\post *to_string(from_string(s)) == s
*/
expr from_string(std::string const & s);
/**
\brief If the given expression encodes a string, then convert it back to a string.
\see from_string
*/
optional<std::string> to_string(expr const & e);
}

View file

@ -0,0 +1,3 @@
import string
check "aaa"
check "B"