fear(frontends/lean): pretty-printing

This commit is contained in:
Sebastian Ullrich 2016-08-26 17:26:11 -04:00 committed by Leonardo de Moura
parent 692b358031
commit 7ece6ca3a8
7 changed files with 114 additions and 86 deletions

View file

@ -549,6 +549,10 @@ optional<name> pretty_fn::is_abbreviated(expr const & e) const {
return optional<name>();
}
static format escape(name const & n) {
return format(n.escape());
}
auto pretty_fn::pp_const(expr const & e, optional<unsigned> const & num_ref_univ_params) -> result {
if (auto it = is_abbreviated(e))
return pp_abbreviation(e, *it, false);
@ -588,11 +592,11 @@ auto pretty_fn::pp_const(expr const & e, optional<unsigned> const & num_ref_univ
to_buffer(const_levels(e), ls);
if (num_ref_univ_params) {
if (ls.size() <= *num_ref_univ_params)
return result(format(n));
return result(escape(n));
else
first_idx = *num_ref_univ_params;
}
format r = compose(format(n), format(".{"));
format r = compose(escape(n), format(".{"));
bool first = true;
for (unsigned i = first_idx; i < ls.size(); i++) {
level const & l = ls[i];
@ -608,7 +612,7 @@ auto pretty_fn::pp_const(expr const & e, optional<unsigned> const & num_ref_univ
r += format("}");
return result(group(r));
} else {
return result(format(n));
return result(escape(n));
}
}
@ -620,7 +624,7 @@ auto pretty_fn::pp_meta(expr const & e) -> result {
}
auto pretty_fn::pp_local(expr const & e) -> result {
return result(format(local_pp_name(e)));
return result(escape(local_pp_name(e)));
}
bool pretty_fn::has_implicit_args(expr const & f) {
@ -669,7 +673,7 @@ format pretty_fn::pp_binder(expr const & local) {
auto bi = local_info(local);
if (bi != binder_info())
r += format(open_binder_string(bi, m_unicode));
r += format(local_pp_name(local));
r += escape(local_pp_name(local));
if (m_binder_types) {
r += space();
r += compose(colon(), nest(m_indent, compose(line(), pp_child(mlocal_type(local), 0).fmt())));
@ -684,7 +688,7 @@ format pretty_fn::pp_binder_block(buffer<name> const & names, expr const & type,
if (m_binder_types || bi != binder_info())
r += format(open_binder_string(bi, m_unicode));
for (name const & n : names) {
r += format(n);
r += escape(n);
}
if (m_binder_types) {
r += space();
@ -784,7 +788,7 @@ auto pretty_fn::pp_have(expr const & e) -> result {
format proof_fmt = pp_child(proof, 0).fmt();
format body_fmt = pp_child(body, 0).fmt();
format head_fmt = *g_have_fmt;
format r = head_fmt + space() + format(n) + space();
format r = head_fmt + space() + escape(n) + space();
r += colon() + nest(m_indent, line() + type_fmt + comma() + space() + *g_from_fmt);
r = group(r);
r += nest(m_indent, line() + proof_fmt + comma());

View file

@ -7,6 +7,7 @@ Author: Leonardo de Moura
#include <cctype>
#include <string>
#include "util/exception.h"
#include "util/name.h"
#include "util/utf8.h"
#include "frontends/lean/scanner.h"
#include "frontends/lean/parser_config.h"
@ -19,52 +20,6 @@ unsigned scanner::get_utf8_size(unsigned char c) {
return r;
}
unsigned char to_uchar(char c) { return static_cast<unsigned char>(c); }
unsigned utf8_to_unicode(char const * begin, char const * end) {
unsigned result = 0;
if (begin == end)
return result;
auto it = begin;
unsigned c = to_uchar(*it);
++it;
if (c < 128)
return c;
unsigned mask = (1u << 6) -1;
unsigned hmask = mask;
unsigned shift = 0;
unsigned num_bits = 0;
while ((c & 0xC0) == 0xC0) {
c <<= 1;
c &= 0xff;
num_bits += 6;
hmask >>= 1;
shift++;
result <<= 6;
if (it == end)
return 0;
result |= *it & mask;
++it;
}
result |= ((c >> shift) & hmask) << num_bits;
return result;
}
bool is_greek_unicode(unsigned u) { return 0x391 <= u && u <= 0x3DD; }
bool is_letter_like_unicode(unsigned u) {
return
(0x3b1 <= u && u <= 0x3c9 && u != 0x3bb) || // Lower greek, but lambda
(0x391 <= u && u <= 0x3A9 && u != 0x3A0 && u != 0x3A3) || // Upper greek, but Pi and Sigma
(0x3ca <= u && u <= 0x3fb) || // Coptic letters
(0x1f00 <= u && u <= 0x1ffe) || // Polytonic Greek Extended Character Set
(0x2100 <= u && u <= 0x214f); // Letter like block
}
bool is_sub_script_alnum_unicode(unsigned u) {
return
(0x207f <= u && u <= 0x2089) || // n superscript and numberic subscripts
(0x2090 <= u && u <= 0x209c); // letter-like subscripts
}
void scanner::next() {
lean_assert(m_curr != EOF);
m_spos++;
@ -314,22 +269,6 @@ void scanner::next_utf(buffer<char> & cs) {
}
static char const * g_error_key_msg = "unexpected token";
constexpr char16_t id_begin_escape = u'«';
constexpr char16_t id_end_escape = u'»';
static bool is_id_first(char const * begin, char const * end) {
if (std::isalpha(*begin) || *begin == '_')
return true;
unsigned u = utf8_to_unicode(begin, end);
return u == id_begin_escape || is_letter_like_unicode(u);
}
bool is_id_rest(char const * begin, char const * end) {
if (std::isalnum(*begin) || *begin == '_' || *begin == '\'')
return true;
unsigned u = utf8_to_unicode(begin, end);
return is_letter_like_unicode(u) || is_sub_script_alnum_unicode(u);
}
auto scanner::read_key_cmd_id() -> token_kind {
buffer<char> cs;

View file

@ -91,7 +91,6 @@ public:
};
};
std::ostream & operator<<(std::ostream & out, scanner::token_kind k);
bool is_id_rest(char const * begin, char const * end);
void initialize_scanner();
void finalize_scanner();
}

View file

@ -23,23 +23,64 @@ Author: Leonardo de Moura
namespace lean {
constexpr char const * anonymous_str = "[anonymous]";
void name::imp::display_core(std::ostream & out, imp * p, char const * sep) {
lean_assert(p != nullptr);
if (p->m_prefix) {
display_core(out, p->m_prefix, sep);
out << sep;
}
if (p->m_is_string)
out << p->m_str;
else
out << p->m_k;
bool is_greek_unicode(unsigned u) { return 0x391 <= u && u <= 0x3DD; }
bool is_letter_like_unicode(unsigned u) {
return
(0x3b1 <= u && u <= 0x3c9 && u != 0x3bb) || // Lower greek, but lambda
(0x391 <= u && u <= 0x3A9 && u != 0x3A0 && u != 0x3A3) || // Upper greek, but Pi and Sigma
(0x3ca <= u && u <= 0x3fb) || // Coptic letters
(0x1f00 <= u && u <= 0x1ffe) || // Polytonic Greek Extended Character Set
(0x2100 <= u && u <= 0x214f); // Letter like block
}
bool is_sub_script_alnum_unicode(unsigned u) {
return
(0x207f <= u && u <= 0x2089) || // n superscript and numberic subscripts
(0x2090 <= u && u <= 0x209c); // letter-like subscripts
}
void name::imp::display(std::ostream & out, imp * p, char const * sep) {
bool is_id_first(char const * begin, char const * end) {
if (std::isalpha(*begin) || *begin == '_')
return true;
unsigned u = utf8_to_unicode(begin, end);
return u == id_begin_escape || is_letter_like_unicode(u);
}
bool is_id_rest(char const * begin, char const * end) {
if (std::isalnum(*begin) || *begin == '_' || *begin == '\'')
return true;
unsigned u = utf8_to_unicode(begin, end);
return is_letter_like_unicode(u) || is_sub_script_alnum_unicode(u);
}
void name::imp::display_core(std::ostream & out, imp * p, bool escape, char const * sep) {
lean_assert(p != nullptr);
if (p->m_prefix) {
display_core(out, p->m_prefix, escape, sep);
out << sep;
}
if (p->m_is_string) {
size_t sz = strlen(p->m_str);
bool must_escape = escape && *p->m_str && !is_id_first(p->m_str, p->m_str + sz);
if (escape) {
for (char * s = p->m_str; !must_escape && *s; s += get_utf8_size(*s)) {
if (!is_id_rest(s, p->m_str + sz))
must_escape = true;
}
}
if (must_escape)
out << "«" << p->m_str << "»";
else
out << p->m_str;
} else {
out << p->m_k;
}
}
void name::imp::display(std::ostream & out, imp * p, bool escape, char const * sep) {
if (p == nullptr)
out << anonymous_str;
else
display_core(out, p, sep);
display_core(out, p, escape, sep);
}
void copy_limbs(name::imp * p, buffer<name::imp *> & limbs) {
@ -292,12 +333,18 @@ bool name::is_safe_ascii() const {
std::string name::to_string(char const * sep) const {
std::ostringstream s;
imp::display(s, m_ptr, sep);
imp::display(s, m_ptr, false, sep);
return s.str();
}
std::string name::escape(char const * sep) const {
std::ostringstream s;
imp::display(s, m_ptr, true, sep);
return s.str();
}
std::ostream & operator<<(std::ostream & out, name const & n) {
name::imp::display(out, n.m_ptr);
name::imp::display(out, n.m_ptr, false);
return out;
}

View file

@ -18,6 +18,12 @@ Author: Leonardo de Moura
namespace lean {
constexpr char const * lean_name_separator = ".";
constexpr char16_t id_begin_escape = u'«';
constexpr char16_t id_end_escape = u'»';
bool is_id_first(char const * begin, char const * end);
bool is_id_rest(char const * begin, char const * end);
enum class name_kind { ANONYMOUS, STRING, NUMERAL };
/**
\brief Hierarchical names.
@ -36,8 +42,8 @@ public:
};
void dealloc();
imp(bool s, imp * p):m_rc(1), m_is_string(s), m_hash(0), m_prefix(p) { if (p) p->inc_ref(); }
static void display_core(std::ostream & out, imp * p, char const * sep);
static void display(std::ostream & out, imp * p, char const * sep = lean_name_separator);
static void display_core(std::ostream & out, imp * p, bool escape, char const * sep);
static void display(std::ostream & out, imp * p, bool escape, char const * sep = lean_name_separator);
friend void copy_limbs(imp * p, buffer<name::imp *> & limbs);
};
private:
@ -124,6 +130,7 @@ public:
name get_prefix() const { return is_atomic() ? name() : name(m_ptr->m_prefix); }
/** \brief Convert this hierarchical name into a string. */
std::string to_string(char const * sep = lean_name_separator) const;
std::string escape(char const * sep = lean_name_separator) const;
/** \brief Size of the this name (in characters). */
size_t size() const;
/** \brief Size of the this name in unicode. */

View file

@ -67,4 +67,35 @@ std::string utf8_trim(std::string const & s) {
stop = s.size();
return s.substr(start, stop - start);
}
unsigned char to_uchar(char c) { return static_cast<unsigned char>(c); }
unsigned utf8_to_unicode(char const * begin, char const * end) {
unsigned result = 0;
if (begin == end)
return result;
auto it = begin;
unsigned c = to_uchar(*it);
++it;
if (c < 128)
return c;
unsigned mask = (1u << 6) -1;
unsigned hmask = mask;
unsigned shift = 0;
unsigned num_bits = 0;
while ((c & 0xC0) == 0xC0) {
c <<= 1;
c &= 0xff;
num_bits += 6;
hmask >>= 1;
shift++;
result <<= 6;
if (it == end)
return 0;
result |= *it & mask;
++it;
}
result |= ((c >> shift) & hmask) << num_bits;
return result;
}
}

View file

@ -13,4 +13,5 @@ unsigned get_utf8_size(unsigned char c);
size_t utf8_strlen(char const * str);
char const * get_utf8_last_char(char const * str);
std::string utf8_trim(std::string const & s);
unsigned utf8_to_unicode(char const * begin, char const * end);
}