diff --git a/src/frontends/lean/parser.cpp b/src/frontends/lean/parser.cpp index d433efa39..de9beb7fe 100644 --- a/src/frontends/lean/parser.cpp +++ b/src/frontends/lean/parser.cpp @@ -26,6 +26,7 @@ Author: Leonardo de Moura #include "library/module.h" #include "library/scoped_ext.h" #include "library/num.h" +#include "library/string.h" #include "library/error_handling/error_handling.h" #include "frontends/lean/parser.h" #include "frontends/lean/parser_bindings.h" @@ -865,9 +866,15 @@ expr parser::parse_decimal_expr() { } expr parser::parse_string_expr() { - // TODO(Leo) - next(); // to avoid loop - return expr(); + auto p = pos(); + std::string v = get_str_val(); + next(); + if (!m_has_string) + m_has_string = has_string_decls(m_env); + if (!*m_has_string) + throw parser_error("string cannot be encoded as expression, environment does not contain the type 'string' " + "(solution: use 'import string')", p); + return from_string(v); } expr parser::parse_nud() { diff --git a/src/frontends/lean/parser.h b/src/frontends/lean/parser.h index ba702f014..b599bd7b6 100644 --- a/src/frontends/lean/parser.h +++ b/src/frontends/lean/parser.h @@ -65,6 +65,7 @@ class parser { // This flag is when we are trying to parse mutually recursive declarations. bool m_no_undef_id_error; optional m_has_num; + optional m_has_string; void display_error_pos(unsigned line, unsigned pos); void display_error_pos(pos_info p); diff --git a/src/kernel/expr.h b/src/kernel/expr.h index 0f1f1a2cb..8c3669b01 100644 --- a/src/kernel/expr.h +++ b/src/kernel/expr.h @@ -360,6 +360,7 @@ public: bool is_atomic_pp(bool unicode, bool coercion) const { return m_ptr->is_atomic_pp(unicode, coercion); } unsigned hash() const { return m_ptr->hash(); } void write(serializer & s) const { return m_ptr->write(s); } + macro_definition_cell const * raw() const { return m_ptr; } }; /** \brief Macro attachments */ diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index 79e004bd3..876b771f6 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -4,7 +4,7 @@ add_library(library deep_copy.cpp expr_lt.cpp io_state.cpp occurs.cpp normalize.cpp shared_environment.cpp module.cpp coercion.cpp private.cpp placeholder.cpp aliases.cpp level_names.cpp update_declaration.cpp choice.cpp scoped_ext.cpp locals.cpp - unifier.cpp explicit.cpp num.cpp) + unifier.cpp explicit.cpp num.cpp string.cpp) # hop_match.cpp) target_link_libraries(library ${LEAN_LIBS}) diff --git a/src/library/string.cpp b/src/library/string.cpp new file mode 100644 index 000000000..02335d169 --- /dev/null +++ b/src/library/string.cpp @@ -0,0 +1,188 @@ +/* +Copyright (c) 2014 Microsoft Corporation. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. + +Author: Leonardo de Moura +*/ +#include +#include +#include "kernel/type_checker.h" +#include "library/kernel_serializer.h" +#include "library/string.h" + +namespace lean { +static name g_string_macro("string_macro"); +static std::string g_string_opcode("Str"); + +static expr g_bit(Const(name("bit", "bit"))); +static expr g_b0(Const(name("bit", "b0"))); +static expr g_b1(Const(name("bit", "b1"))); +static expr g_char(Const(name("string", "char"))); +static expr g_ascii(Const(name("string", "ascii"))); +static expr g_string(Const(name("string", "string"))); +static expr g_empty(Const(name("string", "empty"))); +static expr g_str(Const(name("string", "str"))); + +expr from_string_core(unsigned i, std::string const & s); + +/** \brief The string macro is a compact way of encoding strings inside Lean expressions. */ +class string_macro : public macro_definition_cell { + std::string m_value; +public: + string_macro(std::string const & v):m_value(v) {} + virtual bool lt(macro_definition_cell const & d) const { + return m_value < static_cast(d).m_value; + } + virtual name get_name() const { return g_string_macro; } + virtual expr get_type(expr const &, expr const *, extension_context &) const { + return g_string; + } + virtual optional expand(expr const &, extension_context &) const { + return some_expr(from_string_core(0, m_value)); + } + virtual unsigned trust_level() const { return 0; } + virtual bool operator==(macro_definition_cell const & other) const { + string_macro const * other_ptr = dynamic_cast(&other); + return other_ptr && m_value == other_ptr->m_value; + } + virtual void display(std::ostream & out) const { + out << "\""; + for (unsigned i = 0; i < m_value.size(); i++) { + char c = m_value[i]; + if (c == '\n') + out << "\\n"; + else if (c == '\t') + out << "\\t"; + else if (c == '\r') + out << "\\r"; + else if (c == 0) + out << "\\0"; + else if (c == '\"') + out << "\\\""; + else + out << c; + } + out << "\""; + } + virtual format pp(formatter const &, options const &) const { + std::ostringstream out; + display(out); + return format(out.str()); + } + virtual bool is_atomic_pp(bool, bool) const { return true; } + virtual unsigned hash() const { return std::hash()(m_value); } + virtual void write(serializer & s) const { s << g_string_opcode << m_value; } + std::string const & get_value() const { return m_value; } +}; + +expr mk_string_macro(std::string const & v) { + return mk_macro(macro_definition(new string_macro(v))); +} + +bool is_string_macro(expr const & e) { + return is_macro(e) && dynamic_cast(macro_def(e).raw()) != nullptr; +} + +string_macro const & to_string_macro(expr const & e) { + lean_assert(is_string_macro(e)); + return *static_cast(macro_def(e).raw()); +} + +static register_macro_deserializer_fn +string_macro_des_fn(g_string_opcode, + [](deserializer & d, unsigned num, expr const *) { + if (num != 0) + throw_corrupted_file(); + std::string v = d.read_string(); + return mk_string_macro(v); + }); + +bool has_string_decls(environment const & env) { + try { + type_checker tc(env); + return + tc.infer(g_b0) == g_bit && + tc.infer(g_b1) == g_bit && + tc.infer(g_ascii) == g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> (g_bit >> g_char))))))) && + tc.infer(g_empty) == g_string && + tc.infer(g_str) == g_char >> (g_string >> g_string); + } catch (...) { + return false; + } +} + +expr from_char(unsigned char c) { + buffer bits; + while (c != 0) { + if (c % 2 == 1) + bits.push_back(g_b1); + else + bits.push_back(g_b0); + c /= 2; + } + while (bits.size() < 8) + bits.push_back(g_b0); + return mk_rev_app(g_ascii, bits.size(), bits.data()); +} + +expr from_string_core(unsigned i, std::string const & s) { + if (i == s.size()) + return g_empty; + else + return mk_app(g_str, from_char(s[i]), from_string_core(i+1, s)); +} + +expr from_string(std::string const & s) { + return mk_string_macro(s); +} + +bool to_char_core(expr const & e, buffer & tmp) { + buffer args; + if (get_app_rev_args(e, args) == g_ascii && args.size() == 8) { + unsigned v = 0; + for (unsigned i = 0; i < args.size(); i++) { + v *= 2; + if (args[i] == g_b1) + v++; + else if (args[i] != g_b0) + return false; + } + tmp.push_back(v); + return true; + } else { + return false; + } +} + +bool to_string_core(expr const & e, buffer & tmp) { + if (e == g_empty) { + return true; + } else { + buffer args; + return + get_app_args(e, args) == g_str && + args.size() == 2 && + to_char_core(args[0], tmp) && + to_string_core(args[1], tmp); + } +} + +optional to_string(expr const & e) { + if (is_string_macro(e)) { + return optional(to_string_macro(e).get_value()); + } else { + buffer tmp; + if (to_string_core(e, tmp)) { + std::string r; + unsigned i = tmp.size(); + while (i > 0) { + --i; + r += tmp[i]; + } + return optional(r); + } else { + return optional(); + } + } +} +} diff --git a/src/library/string.h b/src/library/string.h new file mode 100644 index 000000000..30fdc5c02 --- /dev/null +++ b/src/library/string.h @@ -0,0 +1,39 @@ +/* +Copyright (c) 2014 Microsoft Corporation. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. + +Author: Leonardo de Moura +*/ +#include +#include "kernel/environment.h" + +namespace lean { +/** + \brief Return true iff the environment \c env contains the following declarations + in the namespace 'bit' + b0 : bit + b1 : bit + and the following ones in the namespace 'string' + ascii : bit -> bit -> bit -> bit -> bit -> bit -> bit -> bit -> char + empty : string + str : char -> string -> string +*/ +bool has_string_decls(environment const & env); + +/** + \brief Return an expression that encodes the given string using the declarations + ascii, empty and str. + + \see has_string_decls + + \post *to_string(from_string(s)) == s +*/ +expr from_string(std::string const & s); + +/** + \brief If the given expression encodes a string, then convert it back to a string. + + \see from_string +*/ +optional to_string(expr const & e); +} diff --git a/tests/lean/run/string.lean b/tests/lean/run/string.lean new file mode 100644 index 000000000..4768ea11d --- /dev/null +++ b/tests/lean/run/string.lean @@ -0,0 +1,3 @@ +import string +check "aaa" +check "B" \ No newline at end of file