feat(frontends/lean): improve pretty printing space insertion heuristic

This commit is contained in:
Sebastian Ullrich 2015-09-28 01:17:46 +02:00 committed by Leonardo de Moura
parent 8fd8ff2773
commit 189a300b11
9 changed files with 140 additions and 60 deletions

View file

@ -6,6 +6,8 @@ Author: Leonardo de Moura
*/
#include <algorithm>
#include <limits>
#include <string>
#include <util/utf8.h>
#include "util/flet.h"
#include "kernel/replace_fn.h"
#include "kernel/free_vars.h"
@ -36,6 +38,7 @@ Author: Leonardo de Moura
#include "frontends/lean/builtin_exprs.h"
#include "frontends/lean/parser_config.h"
#include "frontends/lean/local_ref_info.h"
#include "frontends/lean/scanner.h"
namespace lean {
static format * g_ellipsis_n_fmt = nullptr;
@ -996,20 +999,6 @@ auto pretty_fn::pp_notation_child(expr const & e, unsigned lbp, unsigned rbp) ->
}
}
static bool add_extra_space_first(name const & tk) {
// TODO(Leo): this is a hard-coded temporary solution for deciding whether extra
// spaces should be added or not when pretty printing notation.
// We should implement a better solution in the future.
return tk != "(" && tk != ")" && tk != "[";
}
static bool add_extra_space(name const & tk) {
// TODO(Leo): this is a hard-coded temporary solution for deciding whether extra
// spaces should be added or not when pretty printing notation.
// We should implement a better solution in the future.
return tk != "," && tk != "(" && tk != ")" && tk != "[";
}
static bool is_atomic_notation(notation_entry const & entry) {
if (!entry.is_nud())
return false;
@ -1034,21 +1023,18 @@ auto pretty_fn::pp_notation(notation_entry const & entry, buffer<optional<expr>>
unsigned i = ts.size();
unsigned last_rbp = inf_bp()-1;
unsigned token_lbp = 0;
bool extra_space = false;
bool last_is_skip = false;
bool last = true;
while (i > 0) {
--i;
format curr;
notation::action const & a = ts[i].get_action();
name const & tk = ts[i].get_token();
format tk_fmt = format(tk);
switch (a.kind()) {
case notation::action_kind::Skip:
curr = format(tk);
if (last) {
last_rbp = inf_bp();
last_is_skip = true;
}
curr = tk_fmt;
if (last)
last_rbp = inf_bp();
break;
case notation::action_kind::Expr:
if (args.empty() || !args.back()) {
@ -1057,14 +1043,7 @@ auto pretty_fn::pp_notation(notation_entry const & entry, buffer<optional<expr>>
expr e = *args.back();
args.pop_back();
result e_r = pp_notation_child(e, token_lbp, a.rbp());
format e_fmt = e_r.fmt();
curr = format(tk);
// we add space after the token only when
// 1- add_extra_space(tk) is true AND
// 2- tk is the first token in a nud notation
if (add_extra_space(tk) && (!entry.is_nud() || i != 0 || m_extra_spaces))
curr = curr + space();
curr = curr + e_fmt;
curr = tk_fmt + e_r.fmt();
if (last)
last_rbp = a.rbp();
break;
@ -1109,8 +1088,6 @@ auto pretty_fn::pp_notation(notation_entry const & entry, buffer<optional<expr>>
unsigned curr_lbp = token_lbp;
if (auto t = a.get_terminator()) {
curr = format(*t);
if (add_extra_space(*t) && m_extra_spaces)
curr = space() + curr;
curr_lbp = get_some_precedence(m_token_table, *t);
}
unsigned j = rec_args.size();
@ -1120,15 +1097,10 @@ auto pretty_fn::pp_notation(notation_entry const & entry, buffer<optional<expr>>
--j;
result arg_res = pp_notation_child(rec_args[j], curr_lbp, a.rbp());
if (j == 0) {
if (add_extra_space_first(tk) && (!entry.is_nud() || i != 0 || m_extra_spaces))
curr = format(tk) + space() + arg_res.fmt() + curr;
else
curr = format(tk) + arg_res.fmt() + curr;
curr = tk_fmt + arg_res.fmt() + curr;
} else {
curr = sep_fmt + space() + arg_res.fmt() + curr;
curr = sep_fmt + arg_res.fmt() + curr;
}
if (j > 0 && add_extra_space(a.get_sep()))
curr = space() + curr;
curr_lbp = sep_lbp;
}
break;
@ -1136,12 +1108,10 @@ auto pretty_fn::pp_notation(notation_entry const & entry, buffer<optional<expr>>
case notation::action_kind::Binder:
if (locals.size() != 1)
return optional<result>();
curr = format(tk) + pp_binder(locals[0]);
if (!last)
curr = curr + space();
curr = tk_fmt + pp_binder(locals[0]);
break;
case notation::action_kind::Binders:
curr = format(tk) + pp_binders(locals);
curr = tk_fmt + pp_binders(locals);
break;
case notation::action_kind::ScopedExpr:
if (args.empty() || !args.back()) {
@ -1177,8 +1147,7 @@ auto pretty_fn::pp_notation(notation_entry const & entry, buffer<optional<expr>>
if (locals.empty())
return optional<result>();
result e_r = pp_notation_child(e, token_lbp, a.rbp());
format e_fmt = e_r.fmt();
curr = format(tk) + space() + e_fmt;
curr = tk_fmt + e_r.fmt();
if (last)
last_rbp = a.rbp();
break;
@ -1192,12 +1161,8 @@ auto pretty_fn::pp_notation(notation_entry const & entry, buffer<optional<expr>>
fmt = curr;
last = false;
} else {
if (extra_space)
curr = curr + space();
fmt = curr + fmt;
}
if (m_extra_spaces || !last_is_skip)
extra_space = add_extra_space(tk);
}
unsigned first_lbp = inf_bp();
if (!entry.is_nud()) {
@ -1208,10 +1173,7 @@ auto pretty_fn::pp_notation(notation_entry const & entry, buffer<optional<expr>>
expr e = *args.back();
args.pop_back();
format e_fmt = pp_notation_child(e, token_lbp, 0).fmt();
if (m_extra_spaces || !last_is_skip)
fmt = e_fmt + space() + fmt;
else
fmt = e_fmt + fmt;
fmt = e_fmt + fmt;
}
return optional<result>(result(first_lbp, last_rbp, fmt));
}
@ -1221,7 +1183,7 @@ auto pretty_fn::pp_notation(expr const & e) -> optional<result> {
if (!m_notation || is_var(e))
return optional<result>();
for (notation_entry const & entry : get_notation_entries(m_env, head_index(e))) {
if (entry.group() != notation_entry_group::Main)
if (entry.group() == notation_entry_group::Reserve)
continue;
if (!m_unicode && !entry.is_safe_ascii())
continue; // ignore this notation declaration since unicode support is not enabled
@ -1338,12 +1300,50 @@ class pp_beta_reduce_fn : public replace_visitor {
}
};
std::string sexpr_to_string(sexpr const & s) {
if (is_string(s))
return to_string(s);
std::stringstream ss;
ss << s;
return ss.str();
}
// check whether a space must be inserted between the strings so that lexing them would
// produce separate tokens
bool pretty_fn::needs_space_sep(std::string const & s1, std::string const & s2) const {
if (is_id_rest(get_utf8_last_char(s1.data()), s1.data() + s1.size()) && is_id_rest(s2.data(), s2.data() + s2.size()))
return true; // would be lexed as a single identifier without space
// check whether s1 + s2 has a longer prefix in the token table than s1
token_table const * t = &m_token_table;
for (char c : s1) {
t = t->find(c);
if (!t)
return false; // s1 must be an identifier, and we know s2 does not start with is_id_rest
}
for (char c : s2) {
t = t->find(c);
if (!t)
return false;
if (t->value())
return true;
}
return true; // the next identifier may expand s1 + s2 to a token
}
format pretty_fn::operator()(expr const & e) {
m_depth = 0; m_num_steps = 0;
result r;
if (m_beta)
return pp_child(purify(pp_beta_reduce_fn()(e)), 0).fmt();
r = pp_child(purify(pp_beta_reduce_fn()(e)), 0);
else
return pp_child(purify(e), 0).fmt();
r = pp_child(purify(e), 0);
// insert spaces so that lexing the result round-trips
std::function<bool(sexpr const &, sexpr const &)> sep; // NOLINT
sep = [&](sexpr const & s1, sexpr const & s2) { return needs_space_sep(sexpr_to_string(s1), sexpr_to_string(s2)); };
return r.fmt().separate_tokens(sep);
}
formatter_factory mk_pretty_formatter_factory() {

View file

@ -93,6 +93,7 @@ private:
optional<result> pp_notation(expr const & e);
result add_paren_if_needed(result const & r, unsigned bp);
bool needs_space_sep(std::string const &s1, std::string const &s2) const;
result pp_overriden_local_ref(expr const & e);
bool ignore_local_ref(expr const & e);

View file

@ -314,10 +314,10 @@ static bool is_id_first(buffer<char> const & cs, unsigned i) {
return is_letter_like_unicode(u);
}
static bool is_id_rest(buffer<char> const & cs, unsigned i) {
if (std::isalnum(cs[i]) || cs[i] == '_' || cs[i] == '\'')
bool is_id_rest(char const * begin, char const * end) {
if (std::isalnum(*begin) || *begin == '_' || *begin == '\'')
return true;
unsigned u = utf8_to_unicode(cs.begin() + i, cs.end());
unsigned u = utf8_to_unicode(begin, end);
return is_letter_like_unicode(u) || is_super_sub_script_alnum_unicode(u);
}
@ -337,7 +337,7 @@ auto scanner::read_key_cmd_id() -> token_kind {
unsigned i = id_sz;
next_utf(cs);
num_utfs++;
if (is_id_rest(cs, i)) {
if (is_id_rest(&cs[i], cs.end())) {
} else if (cs[i] == '.') {
next_utf(cs);
num_utfs++;

View file

@ -92,6 +92,7 @@ public:
};
};
std::ostream & operator<<(std::ostream & out, scanner::token_kind k);
bool is_id_rest(char const * begin, char const * end);
void initialize_scanner();
void finalize_scanner();
}

View file

@ -177,6 +177,64 @@ format wrap(format const & f1, format const & f2) {
return f1 + choice(format(" "), line()) + f2;
}
std::tuple<sexpr, sexpr const *> format::separate_tokens(sexpr const & s, sexpr const * last,
std::function<bool(sexpr const &, sexpr const &)> sep // NOLINT
) const {
switch (sexpr_kind(s)) {
case format_kind::NIL:
case format_kind::LINE:
case format_kind::COLOR_BEGIN:
case format_kind::COLOR_END:
return std::make_tuple(s, last);
case format_kind::COMPOSE:
case format_kind::FLAT_COMPOSE:
{
sexpr list = sexpr_compose_list(s);
list = map(list, [&](sexpr const & s) {
sexpr t;
std::tie(t, last) = separate_tokens(s, last, sep);
return t;
});
sexpr t = sexpr_kind(m_value) == format_kind::COMPOSE ? sexpr_compose(list) : sexpr_flat_compose(list);
return std::make_tuple(t, last);
}
case format_kind::NEST:
{
sexpr t;
std::tie(t, last) = separate_tokens(sexpr_nest_s(s), last, sep);
return std::make_tuple(sexpr_nest(sexpr_nest_i(s), t), last);
}
case format_kind::TEXT:
{
sexpr const & text = sexpr_text_t(s);
if (last && sep(*last, text))
return std::make_tuple(sexpr_compose({*g_sexpr_space, s}), &text);
else
return std::make_tuple(s, &text);
}
case format_kind::CHOICE:
{
// we assume that choices only differ in spacing and thus share their last token
sexpr s1, s2; sexpr const * last1, * last2;
std::tie(s1, last1) = separate_tokens(sexpr_choice_1(s), last, sep);
std::tie(s2, last2) = separate_tokens(sexpr_choice_2(s), last, sep);
lean_assert(last1 == last2 || (last1 && last2 && *last1 == *last2));
return std::make_tuple(sexpr_choice(s1, s2), last1);
}
}
lean_unreachable(); // LCOV_EXCL_LINE
}
/**
\brief Replaces every text sepxr \c t with <tt>compose(" ", t)</tt> if there is a preceding
text sexpr \c s and <tt>sep(s, t)</tt> is true
*/
format format::separate_tokens(
std::function<bool(sexpr const &, sexpr const &)> sep // NOLINT
) const {
return format(std::get<0>(separate_tokens(m_value, nullptr, sep)));
}
/**
\brief Auxiliary exception used to sign that the amount of
available space was exhausted. It is used in \c space_upto_line_break and

View file

@ -120,6 +120,10 @@ private:
return sexpr(sexpr(format::format_kind::LINE), sexpr());
}
std::tuple<sexpr, sexpr const *> separate_tokens(sexpr const & s, sexpr const * last,
std::function<bool(sexpr const &, sexpr const &)> sep //NOLINT
) const;
// Functions used inside of pretty printing
static bool space_upto_line_break_list_exceeded(sexpr const & s, int available, std::vector<pair<sexpr, unsigned>> const & todo);
static int space_upto_line_break(sexpr const & s, int available, bool & found);
@ -169,6 +173,8 @@ public:
bool is_nil_fmt() const { return kind() == format_kind::NIL; }
unsigned hash() const { return m_value.hash(); }
format separate_tokens(std::function<bool(sexpr const &, sexpr const &)> sep) const; // NOLINT
friend format compose(format const & f1, format const & f2);
friend format nest(int i, format const & f);
friend format highlight(format const & f, format::format_color const c);

View file

@ -32,7 +32,8 @@ sexpr map(sexpr const & l, F f) {
return l;
} else {
lean_assert(is_cons(l));
return sexpr(f(head(l)), map(tail(l), f));
auto x = f(head(l)); // force left-to-right evaluation order
return sexpr(x, map(tail(l), f));
}
}

View file

@ -5,6 +5,7 @@ Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
#include <cstdlib>
#include "util/debug.h"
namespace lean {
bool is_utf8_next(unsigned char c) { return (c & 0xC0) == 0x80; }
@ -37,4 +38,15 @@ size_t utf8_strlen(char const * str) {
}
return r;
}
char const * get_utf8_last_char(char const * str) {
char const * r;
lean_assert(*str != 0);
do {
r = str;
unsigned sz = get_utf8_size(*str);
str += sz;
} while (*str != 0);
return r;
}
}

View file

@ -9,4 +9,5 @@ namespace lean {
bool is_utf8_next(unsigned char c);
unsigned get_utf8_size(unsigned char c);
size_t utf8_strlen(char const * str);
char const * get_utf8_last_char(char const * str);
}