feat(frontends/lean): parse strings as expressions of type 'string.string'
Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
This commit is contained in:
parent
8b8881deae
commit
a66a08c89e
7 changed files with 243 additions and 4 deletions
|
@ -26,6 +26,7 @@ Author: Leonardo de Moura
|
||||||
#include "library/module.h"
|
#include "library/module.h"
|
||||||
#include "library/scoped_ext.h"
|
#include "library/scoped_ext.h"
|
||||||
#include "library/num.h"
|
#include "library/num.h"
|
||||||
|
#include "library/string.h"
|
||||||
#include "library/error_handling/error_handling.h"
|
#include "library/error_handling/error_handling.h"
|
||||||
#include "frontends/lean/parser.h"
|
#include "frontends/lean/parser.h"
|
||||||
#include "frontends/lean/parser_bindings.h"
|
#include "frontends/lean/parser_bindings.h"
|
||||||
|
@ -865,9 +866,15 @@ expr parser::parse_decimal_expr() {
|
||||||
}
|
}
|
||||||
|
|
||||||
expr parser::parse_string_expr() {
|
expr parser::parse_string_expr() {
|
||||||
// TODO(Leo)
|
auto p = pos();
|
||||||
next(); // to avoid loop
|
std::string v = get_str_val();
|
||||||
return expr();
|
next();
|
||||||
|
if (!m_has_string)
|
||||||
|
m_has_string = has_string_decls(m_env);
|
||||||
|
if (!*m_has_string)
|
||||||
|
throw parser_error("string cannot be encoded as expression, environment does not contain the type 'string' "
|
||||||
|
"(solution: use 'import string')", p);
|
||||||
|
return from_string(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
expr parser::parse_nud() {
|
expr parser::parse_nud() {
|
||||||
|
|
|
@ -65,6 +65,7 @@ class parser {
|
||||||
// This flag is when we are trying to parse mutually recursive declarations.
|
// This flag is when we are trying to parse mutually recursive declarations.
|
||||||
bool m_no_undef_id_error;
|
bool m_no_undef_id_error;
|
||||||
optional<bool> m_has_num;
|
optional<bool> m_has_num;
|
||||||
|
optional<bool> m_has_string;
|
||||||
|
|
||||||
void display_error_pos(unsigned line, unsigned pos);
|
void display_error_pos(unsigned line, unsigned pos);
|
||||||
void display_error_pos(pos_info p);
|
void display_error_pos(pos_info p);
|
||||||
|
|
|
@ -360,6 +360,7 @@ public:
|
||||||
bool is_atomic_pp(bool unicode, bool coercion) const { return m_ptr->is_atomic_pp(unicode, coercion); }
|
bool is_atomic_pp(bool unicode, bool coercion) const { return m_ptr->is_atomic_pp(unicode, coercion); }
|
||||||
unsigned hash() const { return m_ptr->hash(); }
|
unsigned hash() const { return m_ptr->hash(); }
|
||||||
void write(serializer & s) const { return m_ptr->write(s); }
|
void write(serializer & s) const { return m_ptr->write(s); }
|
||||||
|
macro_definition_cell const * raw() const { return m_ptr; }
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \brief Macro attachments */
|
/** \brief Macro attachments */
|
||||||
|
|
|
@ -4,7 +4,7 @@ add_library(library deep_copy.cpp expr_lt.cpp io_state.cpp occurs.cpp
|
||||||
normalize.cpp shared_environment.cpp module.cpp coercion.cpp
|
normalize.cpp shared_environment.cpp module.cpp coercion.cpp
|
||||||
private.cpp placeholder.cpp aliases.cpp level_names.cpp
|
private.cpp placeholder.cpp aliases.cpp level_names.cpp
|
||||||
update_declaration.cpp choice.cpp scoped_ext.cpp locals.cpp
|
update_declaration.cpp choice.cpp scoped_ext.cpp locals.cpp
|
||||||
unifier.cpp explicit.cpp num.cpp)
|
unifier.cpp explicit.cpp num.cpp string.cpp)
|
||||||
# hop_match.cpp)
|
# hop_match.cpp)
|
||||||
|
|
||||||
target_link_libraries(library ${LEAN_LIBS})
|
target_link_libraries(library ${LEAN_LIBS})
|
||||||
|
|
188
src/library/string.cpp
Normal file
188
src/library/string.cpp
Normal file
|
@ -0,0 +1,188 @@
|
||||||
|
/*
|
||||||
|
Copyright (c) 2014 Microsoft Corporation. All rights reserved.
|
||||||
|
Released under Apache 2.0 license as described in the file LICENSE.
|
||||||
|
|
||||||
|
Author: Leonardo de Moura
|
||||||
|
*/
|
||||||
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "kernel/type_checker.h"
|
||||||
|
#include "library/kernel_serializer.h"
|
||||||
|
#include "library/string.h"
|
||||||
|
|
||||||
|
namespace lean {
|
||||||
|
static name g_string_macro("string_macro");
|
||||||
|
static std::string g_string_opcode("Str");
|
||||||
|
|
||||||
|
static expr g_bit(Const(name("bit", "bit")));
|
||||||
|
static expr g_b0(Const(name("bit", "b0")));
|
||||||
|
static expr g_b1(Const(name("bit", "b1")));
|
||||||
|
static expr g_char(Const(name("string", "char")));
|
||||||
|
static expr g_ascii(Const(name("string", "ascii")));
|
||||||
|
static expr g_string(Const(name("string", "string")));
|
||||||
|
static expr g_empty(Const(name("string", "empty")));
|
||||||
|
static expr g_str(Const(name("string", "str")));
|
||||||
|
|
||||||
|
expr from_string_core(unsigned i, std::string const & s);
|
||||||
|
|
||||||
|
/** \brief The string macro is a compact way of encoding strings inside Lean expressions. */
|
||||||
|
class string_macro : public macro_definition_cell {
|
||||||
|
std::string m_value;
|
||||||
|
public:
|
||||||
|
string_macro(std::string const & v):m_value(v) {}
|
||||||
|
virtual bool lt(macro_definition_cell const & d) const {
|
||||||
|
return m_value < static_cast<string_macro const &>(d).m_value;
|
||||||
|
}
|
||||||
|
virtual name get_name() const { return g_string_macro; }
|
||||||
|
virtual expr get_type(expr const &, expr const *, extension_context &) const {
|
||||||
|
return g_string;
|
||||||
|
}
|
||||||
|
virtual optional<expr> expand(expr const &, extension_context &) const {
|
||||||
|
return some_expr(from_string_core(0, m_value));
|
||||||
|
}
|
||||||
|
virtual unsigned trust_level() const { return 0; }
|
||||||
|
virtual bool operator==(macro_definition_cell const & other) const {
|
||||||
|
string_macro const * other_ptr = dynamic_cast<string_macro const *>(&other);
|
||||||
|
return other_ptr && m_value == other_ptr->m_value;
|
||||||
|
}
|
||||||
|
virtual void display(std::ostream & out) const {
|
||||||
|
out << "\"";
|
||||||
|
for (unsigned i = 0; i < m_value.size(); i++) {
|
||||||
|
char c = m_value[i];
|
||||||
|
if (c == '\n')
|
||||||
|
out << "\\n";
|
||||||
|
else if (c == '\t')
|
||||||
|
out << "\\t";
|
||||||
|
else if (c == '\r')
|
||||||
|
out << "\\r";
|
||||||
|
else if (c == 0)
|
||||||
|
out << "\\0";
|
||||||
|
else if (c == '\"')
|
||||||
|
out << "\\\"";
|
||||||
|
else
|
||||||
|
out << c;
|
||||||
|
}
|
||||||
|
out << "\"";
|
||||||
|
}
|
||||||
|
virtual format pp(formatter const &, options const &) const {
|
||||||
|
std::ostringstream out;
|
||||||
|
display(out);
|
||||||
|
return format(out.str());
|
||||||
|
}
|
||||||
|
virtual bool is_atomic_pp(bool, bool) const { return true; }
|
||||||
|
virtual unsigned hash() const { return std::hash<std::string>()(m_value); }
|
||||||
|
virtual void write(serializer & s) const { s << g_string_opcode << m_value; }
|
||||||
|
std::string const & get_value() const { return m_value; }
|
||||||
|
};
|
||||||
|
|
||||||
|
expr mk_string_macro(std::string const & v) {
|
||||||
|
return mk_macro(macro_definition(new string_macro(v)));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_string_macro(expr const & e) {
|
||||||
|
return is_macro(e) && dynamic_cast<string_macro const *>(macro_def(e).raw()) != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
string_macro const & to_string_macro(expr const & e) {
|
||||||
|
lean_assert(is_string_macro(e));
|
||||||
|
return *static_cast<string_macro const *>(macro_def(e).raw());
|
||||||
|
}
|
||||||
|
|
||||||
|
static register_macro_deserializer_fn
|
||||||
|
string_macro_des_fn(g_string_opcode,
|
||||||
|
[](deserializer & d, unsigned num, expr const *) {
|
||||||
|
if (num != 0)
|
||||||
|
throw_corrupted_file();
|
||||||
|
std::string v = d.read_string();
|
||||||
|
return mk_string_macro(v);
|
||||||
|
});
|
||||||
|
|
||||||
|
bool has_string_decls(environment const & env) {
|
||||||
|
try {
|
||||||
|
type_checker tc(env);
|
||||||
|
return
|
||||||
|
tc.infer(g_b0) == g_bit &&
|
||||||
|
tc.infer(g_b1) == g_bit &&
|
||||||
|
tc.infer(g_ascii) == g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> g_char))))))) &&
|
||||||
|
tc.infer(g_empty) == g_string &&
|
||||||
|
tc.infer(g_str) == g_char >> (g_string >> g_string);
|
||||||
|
} catch (...) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
expr from_char(unsigned char c) {
|
||||||
|
buffer<expr> bits;
|
||||||
|
while (c != 0) {
|
||||||
|
if (c % 2 == 1)
|
||||||
|
bits.push_back(g_b1);
|
||||||
|
else
|
||||||
|
bits.push_back(g_b0);
|
||||||
|
c /= 2;
|
||||||
|
}
|
||||||
|
while (bits.size() < 8)
|
||||||
|
bits.push_back(g_b0);
|
||||||
|
return mk_rev_app(g_ascii, bits.size(), bits.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
expr from_string_core(unsigned i, std::string const & s) {
|
||||||
|
if (i == s.size())
|
||||||
|
return g_empty;
|
||||||
|
else
|
||||||
|
return mk_app(g_str, from_char(s[i]), from_string_core(i+1, s));
|
||||||
|
}
|
||||||
|
|
||||||
|
expr from_string(std::string const & s) {
|
||||||
|
return mk_string_macro(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool to_char_core(expr const & e, buffer<char> & tmp) {
|
||||||
|
buffer<expr> args;
|
||||||
|
if (get_app_rev_args(e, args) == g_ascii && args.size() == 8) {
|
||||||
|
unsigned v = 0;
|
||||||
|
for (unsigned i = 0; i < args.size(); i++) {
|
||||||
|
v *= 2;
|
||||||
|
if (args[i] == g_b1)
|
||||||
|
v++;
|
||||||
|
else if (args[i] != g_b0)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
tmp.push_back(v);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool to_string_core(expr const & e, buffer<char> & tmp) {
|
||||||
|
if (e == g_empty) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
buffer<expr> args;
|
||||||
|
return
|
||||||
|
get_app_args(e, args) == g_str &&
|
||||||
|
args.size() == 2 &&
|
||||||
|
to_char_core(args[0], tmp) &&
|
||||||
|
to_string_core(args[1], tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
optional<std::string> to_string(expr const & e) {
|
||||||
|
if (is_string_macro(e)) {
|
||||||
|
return optional<std::string>(to_string_macro(e).get_value());
|
||||||
|
} else {
|
||||||
|
buffer<char> tmp;
|
||||||
|
if (to_string_core(e, tmp)) {
|
||||||
|
std::string r;
|
||||||
|
unsigned i = tmp.size();
|
||||||
|
while (i > 0) {
|
||||||
|
--i;
|
||||||
|
r += tmp[i];
|
||||||
|
}
|
||||||
|
return optional<std::string>(r);
|
||||||
|
} else {
|
||||||
|
return optional<std::string>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
39
src/library/string.h
Normal file
39
src/library/string.h
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
/*
|
||||||
|
Copyright (c) 2014 Microsoft Corporation. All rights reserved.
|
||||||
|
Released under Apache 2.0 license as described in the file LICENSE.
|
||||||
|
|
||||||
|
Author: Leonardo de Moura
|
||||||
|
*/
|
||||||
|
#include <string>
|
||||||
|
#include "kernel/environment.h"
|
||||||
|
|
||||||
|
namespace lean {
|
||||||
|
/**
|
||||||
|
\brief Return true iff the environment \c env contains the following declarations
|
||||||
|
in the namespace 'bit'
|
||||||
|
b0 : bit
|
||||||
|
b1 : bit
|
||||||
|
and the following ones in the namespace 'string'
|
||||||
|
ascii : bit -> bit -> bit -> bit -> bit -> bit -> bit -> bit -> char
|
||||||
|
empty : string
|
||||||
|
str : char -> string -> string
|
||||||
|
*/
|
||||||
|
bool has_string_decls(environment const & env);
|
||||||
|
|
||||||
|
/**
|
||||||
|
\brief Return an expression that encodes the given string using the declarations
|
||||||
|
ascii, empty and str.
|
||||||
|
|
||||||
|
\see has_string_decls
|
||||||
|
|
||||||
|
\post *to_string(from_string(s)) == s
|
||||||
|
*/
|
||||||
|
expr from_string(std::string const & s);
|
||||||
|
|
||||||
|
/**
|
||||||
|
\brief If the given expression encodes a string, then convert it back to a string.
|
||||||
|
|
||||||
|
\see from_string
|
||||||
|
*/
|
||||||
|
optional<std::string> to_string(expr const & e);
|
||||||
|
}
|
3
tests/lean/run/string.lean
Normal file
3
tests/lean/run/string.lean
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
import string
|
||||||
|
check "aaa"
|
||||||
|
check "B"
|
Loading…
Reference in a new issue