From da08079af9737184aca2846290f0b5daf9e5cc97 Mon Sep 17 00:00:00 2001 From: Sebastian Ullrich Date: Wed, 30 Sep 2015 16:15:07 +0200 Subject: [PATCH] feat(frontends/lean): allow specifying notation spacing via quoted symbols Unquoted tokens inherit their spacing from the respective reserved definition. --- src/frontends/lean/notation_cmd.cpp | 77 ++++++++++++++++------------ src/frontends/lean/parse_table.cpp | 2 +- src/frontends/lean/parse_table.h | 6 ++- src/frontends/lean/parser.cpp | 4 +- src/frontends/lean/parser_config.cpp | 9 ++-- src/frontends/lean/pp.cpp | 2 +- src/frontends/lean/scanner.cpp | 3 +- src/util/sexpr/format.h | 3 +- src/util/utf8.cpp | 18 +++++++ src/util/utf8.h | 3 ++ 10 files changed, 83 insertions(+), 44 deletions(-) diff --git a/src/frontends/lean/notation_cmd.cpp b/src/frontends/lean/notation_cmd.cpp index 2fd36512f..10bfbcd7e 100644 --- a/src/frontends/lean/notation_cmd.cpp +++ b/src/frontends/lean/notation_cmd.cpp @@ -122,7 +122,9 @@ static token_entry mk_token_entry(std::string const & tk, unsigned prec, notatio static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, notation_entry_group grp, bool parse_only, unsigned priority) -> pair> { - std::string tk = parse_symbol(p, "invalid notation declaration, quoted symbol or identifier expected"); + bool explicit_pp = p.curr_is_quoted_symbol(); + std::string pp_tk = parse_symbol(p, "invalid notation declaration, quoted symbol or identifier expected"); + std::string tk = utf8_trim(pp_tk); char const * tks = tk.c_str(); check_not_forbidden(tks); environment const & env = p.env(); @@ -130,19 +132,22 @@ static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, nota optional prec; optional reserved_pt; + optional reserved_transition; optional reserved_action; if (grp == notation_entry_group::Main) { if (k == mixfix_kind::prefix) { if (auto ls = get_reserved_nud_table(p.env()).find(tks)) { // Remark: we are ignoring multiple actions in the reserved notation table reserved_pt = head(ls).second; - reserved_action = head(ls).first.get_action(); + reserved_transition = head(ls).first; + reserved_action = reserved_transition->get_action(); } } else { if (auto ls = get_reserved_led_table(p.env()).find(tks)) { // Remark: we are ignoring multiple actions in the reserved notation table reserved_pt = head(ls).second; - reserved_action = head(ls).first.get_action(); + reserved_transition = head(ls).first; + reserved_action = reserved_transition->get_action(); } } } @@ -211,6 +216,9 @@ static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, nota } } + if (reserved_action && !explicit_pp) + pp_tk = reserved_transition->get_pp_token().to_string(); + if (grp == notation_entry_group::Reserve) { // reserve notation commands do not have a denotation expr dummy = mk_Prop(); @@ -218,16 +226,16 @@ static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, nota throw parser_error("invalid reserve notation, found `:=`", p.pos()); switch (k) { case mixfix_kind::infixl: - return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec))), + return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec), pp_tk)), dummy, overload, priority, grp, parse_only), new_token); case mixfix_kind::infixr: - return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec))), + return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec), pp_tk)), dummy, overload, priority, grp, parse_only), new_token); case mixfix_kind::postfix: - return mk_pair(notation_entry(false, to_list(transition(tks, mk_skip_action())), + return mk_pair(notation_entry(false, to_list(transition(tks, mk_skip_action(), pp_tk)), dummy, overload, priority, grp, parse_only), new_token); case mixfix_kind::prefix: - return mk_pair(notation_entry(true, to_list(transition(tks, mk_expr_action(*prec))), + return mk_pair(notation_entry(true, to_list(transition(tks, mk_expr_action(*prec), pp_tk)), dummy, overload, priority, grp, parse_only), new_token); } } else { @@ -239,17 +247,17 @@ static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, nota case mixfix_kind::infixl: #if defined(__GNUC__) && !defined(__CLANG__) #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" - return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec))), + return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec), pp_tk)), mk_app(f, Var(1), Var(0)), overload, priority, grp, parse_only), new_token); #endif case mixfix_kind::infixr: - return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec))), + return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec), pp_tk)), mk_app(f, Var(1), Var(0)), overload, priority, grp, parse_only), new_token); case mixfix_kind::postfix: - return mk_pair(notation_entry(false, to_list(transition(tks, mk_skip_action())), + return mk_pair(notation_entry(false, to_list(transition(tks, mk_skip_action(), pp_tk)), mk_app(f, Var(0)), overload, priority, grp, parse_only), new_token); case mixfix_kind::prefix: - return mk_pair(notation_entry(true, to_list(transition(tks, mk_expr_action(*prec))), + return mk_pair(notation_entry(true, to_list(transition(tks, mk_expr_action(*prec), pp_tk)), mk_app(f, Var(0)), overload, priority, grp, parse_only), new_token); } } @@ -268,9 +276,9 @@ static name parse_quoted_symbol_or_token(parser & p, buffer & new_t used_default = false; if (p.curr_is_quoted_symbol()) { environment const & env = p.env(); - auto tk = p.get_name_val(); - auto tks = tk.to_string(); - auto tkcs = tks.c_str(); + auto pp_tk = p.get_name_val(); + auto tks = utf8_trim(pp_tk.to_string()); + auto tkcs = tks.c_str(); check_not_forbidden(tkcs); p.next(); if (p.curr_is_token(get_colon_tk())) { @@ -281,7 +289,7 @@ static name parse_quoted_symbol_or_token(parser & p, buffer & new_t new_tokens.push_back(mk_token_entry(tkcs, LEAN_DEFAULT_PRECEDENCE, grp)); used_default = true; } - return tk; + return pp_tk; } else if (p.curr_is_keyword()) { auto tk = p.get_token_info().token(); check_not_forbidden(tk.to_string().c_str()); @@ -452,15 +460,16 @@ static unsigned parse_binders_rbp(parser & p) { } static transition parse_transition(parser & p, optional const & pt, name const & tk, - buffer & locals, buffer & new_tokens, notation_entry_group grp) { + buffer & locals, buffer & new_tokens, notation_entry_group grp, + name const & pp_tk) { if (p.curr_is_token_or_id(get_binder_tk())) { p.next(); unsigned rbp = parse_binders_rbp(p); - return transition(tk, mk_binder_action(rbp)); + return transition(tk, mk_binder_action(rbp), pp_tk); } else if (p.curr_is_token_or_id(get_binders_tk())) { p.next(); unsigned rbp = parse_binders_rbp(p); - return transition(tk, mk_binders_action(rbp)); + return transition(tk, mk_binders_action(rbp), pp_tk); } else if (p.curr_is_identifier()) { unsigned default_prec = get_default_prec(pt, tk); name n = p.get_name_val(); @@ -470,10 +479,10 @@ static transition parse_transition(parser & p, optional const & pt, expr l = mk_local(n, local_type); p.add_local(l); locals.push_back(l); - return transition(tk, a); + return transition(tk, a, pp_tk); } else if (p.curr_is_quoted_symbol() || p.curr_is_keyword() || p.curr_is_token(get_assign_tk()) || p.curr_is_command() || p.curr_is_eof()) { - return transition(tk, mk_skip_action()); + return transition(tk, mk_skip_action(), pp_tk); } else { throw parser_error("invalid notation declaration, quoted-symbol, identifier, " "'binder', 'binders' expected", p.pos()); @@ -511,48 +520,52 @@ static notation_entry parse_notation_core(parser & p, bool overload, notation_en bool used_default = false; while ((grp != notation_entry_group::Reserve && !p.curr_is_token(get_assign_tk())) || (grp == notation_entry_group::Reserve && !p.curr_is_command() && !p.curr_is_eof())) { - name tk = parse_quoted_symbol_or_token(p, new_tokens, used_default, grp); + name pp_tk = parse_quoted_symbol_or_token(p, new_tokens, used_default, grp).to_string(); + name tk = utf8_trim(pp_tk.to_string()); if (auto at = find_next(reserved_pt, tk)) { // Remark: we are ignoring multiple actions in the reserved notation table - action const & a = head(at).first.get_action(); + transition const & trans = head(at).first; + action const & a = trans.get_action(); reserved_pt = head(at).second; + if (!p.curr_is_quoted_symbol()) + pp_tk = trans.get_pp_token(); switch (a.kind()) { case notation::action_kind::Skip: if (!p.curr_is_quoted_symbol() && !p.curr_is_keyword() && !p.curr_is_token(get_assign_tk())) { if (g_allow_local && !p.curr_is_token_or_id(get_binders_tk())) { - ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp)); + ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp, pp_tk)); break; } p.check_token_or_id_next(get_binders_tk(), "invalid notation declaration, quoted-symbol, keyword or `:=` expected " "(declaration prefix matches reserved notation)"); } - ts.push_back(transition(tk, a)); + ts.push_back(transition(tk, a, pp_tk)); break; case notation::action_kind::Binder: if (g_allow_local && !p.curr_is_token_or_id(get_binder_tk())) { - ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp)); + ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp, pp_tk)); break; } p.check_token_or_id_next(get_binder_tk(), "invalid notation declaration, 'binder' expected " "(declaration prefix matches reserved notation)"); - ts.push_back(transition(tk, a)); + ts.push_back(transition(tk, a, pp_tk)); break; case notation::action_kind::Binders: if (g_allow_local && !p.curr_is_token_or_id(get_binders_tk())) { - ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp)); + ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp, pp_tk)); break; } p.check_token_or_id_next(get_binders_tk(), "invalid notation declaration, 'binders' expected " "(declaration prefix matches reserved notation)"); - ts.push_back(transition(tk, a)); + ts.push_back(transition(tk, a, pp_tk)); break; case notation::action_kind::Expr: case notation::action_kind::Exprs: case notation::action_kind::ScopedExpr: case notation::action_kind::Ext: case notation::action_kind::LuaExt: { if (g_allow_local && !p.curr_is_identifier()) { - ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp)); + ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp, pp_tk)); break; } name n = p.check_id_next("invalid notation declaration, identifier expected " @@ -565,7 +578,7 @@ static notation_entry parse_notation_core(parser & p, bool overload, notation_en expr l = mk_local(n, local_type); p.add_local(l); locals.push_back(l); - ts.push_back(transition(tk, a)); + ts.push_back(transition(tk, a, pp_tk)); break; } else { throw parser_error("invalid notation declaration, invalid ':' occurrence " @@ -576,13 +589,13 @@ static notation_entry parse_notation_core(parser & p, bool overload, notation_en expr l = mk_local(n, local_type); p.add_local(l); locals.push_back(l); - ts.push_back(transition(tk, a)); + ts.push_back(transition(tk, a, pp_tk)); break; } }} } else { reserved_pt = optional(); - ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp)); + ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp, pp_tk)); } pt = find_match(pt, ts.back()); } diff --git a/src/frontends/lean/parse_table.cpp b/src/frontends/lean/parse_table.cpp index e998ad815..e958293cd 100644 --- a/src/frontends/lean/parse_table.cpp +++ b/src/frontends/lean/parse_table.cpp @@ -327,7 +327,7 @@ action replace(action const & a, std::function const & f) { } transition replace(transition const & t, std::function const & f) { - return transition(t.get_token(), replace(t.get_action(), f)); + return transition(t.get_token(), replace(t.get_action(), f), t.get_pp_token()); } struct parse_table::cell { diff --git a/src/frontends/lean/parse_table.h b/src/frontends/lean/parse_table.h index 236930122..3b6785775 100644 --- a/src/frontends/lean/parse_table.h +++ b/src/frontends/lean/parse_table.h @@ -114,11 +114,13 @@ action replace(action const & a, std::function const & f); class transition { name m_token; + name m_pp_token; action m_action; public: - transition(name const & t, action const & a): - m_token(t), m_action(a) {} + transition(name const & t, action const & a, name pp_token = name::anonymous()): + m_token(t), m_pp_token(pp_token ? pp_token : t), m_action(a) {} name const & get_token() const { return m_token; } + name const & get_pp_token() const { return m_pp_token; } action const & get_action() const { return m_action; } bool is_simple() const { return m_action.is_simple(); } bool is_safe_ascii() const { return m_token.is_safe_ascii(); } diff --git a/src/frontends/lean/parser.cpp b/src/frontends/lean/parser.cpp index 63741541e..218473d73 100644 --- a/src/frontends/lean/parser.cpp +++ b/src/frontends/lean/parser.cpp @@ -8,6 +8,7 @@ Author: Leonardo de Moura #include #include #include +#include #include "util/interrupt.h" #include "util/script_exception.h" #include "util/sstream.h" @@ -1266,7 +1267,8 @@ expr parser::parse_notation_core(parse_table t, expr * left, bool as_tactic) { auto terminator = a.get_terminator(); if (!terminator || !curr_is_token(*terminator)) { r_args.push_back(parse_expr_or_tactic(a.rbp(), as_tactic)); - while (curr_is_token(a.get_sep())) { + name sep = utf8_trim(a.get_sep().to_string()); + while (curr_is_token(sep)) { next(); r_args.push_back(parse_expr_or_tactic(a.rbp(), as_tactic)); } diff --git a/src/frontends/lean/parser_config.cpp b/src/frontends/lean/parser_config.cpp index b18efc4f3..f6376d12a 100644 --- a/src/frontends/lean/parser_config.cpp +++ b/src/frontends/lean/parser_config.cpp @@ -204,14 +204,15 @@ action read_action(deserializer & d) { } serializer & operator<<(serializer & s, transition const & t) { - s << t.get_token() << t.get_action(); + s << t.get_token() << t.get_pp_token() << t.get_action(); return s; } transition read_transition(deserializer & d) { - name n = read_name(d); - action a = read_action(d); - return transition(n, a); + name n = read_name(d); + name pp = read_name(d); + action a = read_action(d); + return transition(n, a, pp); } struct notation_state { diff --git a/src/frontends/lean/pp.cpp b/src/frontends/lean/pp.cpp index 38705b72f..583619466 100644 --- a/src/frontends/lean/pp.cpp +++ b/src/frontends/lean/pp.cpp @@ -1029,7 +1029,7 @@ auto pretty_fn::pp_notation(notation_entry const & entry, buffer> format curr; notation::action const & a = ts[i].get_action(); name const & tk = ts[i].get_token(); - format tk_fmt = format(tk); + format tk_fmt = format(ts[i].get_pp_token()); switch (a.kind()) { case notation::action_kind::Skip: curr = tk_fmt; diff --git a/src/frontends/lean/scanner.cpp b/src/frontends/lean/scanner.cpp index 87ebd3903..c63446f84 100644 --- a/src/frontends/lean/scanner.cpp +++ b/src/frontends/lean/scanner.cpp @@ -157,9 +157,10 @@ auto scanner::read_quoted_symbol() -> token_kind { if (c == '`') { m_name_val = name(m_buffer.c_str()); return token_kind::QuotedSymbol; - } else if (c != ' ' && c != '\"' && c != '\n' && c != '\t') { + } else if (c != '\"' && c != '\n' && c != '\t') { m_buffer += c; } else { + // TODO: intra-token space throw_exception("invalid quoted symbol, invalid character"); } } diff --git a/src/util/sexpr/format.h b/src/util/sexpr/format.h index fffc0587f..2f468bbb4 100644 --- a/src/util/sexpr/format.h +++ b/src/util/sexpr/format.h @@ -121,8 +121,7 @@ private: } std::tuple separate_tokens(sexpr const & s, sexpr const * last, - std::function sep //NOLINT - ) const; + std::function sep) const; //NOLINT // Functions used inside of pretty printing static bool space_upto_line_break_list_exceeded(sexpr const & s, int available, std::vector> const & todo); diff --git a/src/util/utf8.cpp b/src/util/utf8.cpp index 5cbd8d9af..3110538b4 100644 --- a/src/util/utf8.cpp +++ b/src/util/utf8.cpp @@ -5,6 +5,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Leonardo de Moura */ #include +#include #include "util/debug.h" namespace lean { @@ -49,4 +50,21 @@ char const * get_utf8_last_char(char const * str) { } while (*str != 0); return r; } + +std::string utf8_trim(std::string const & s) { + int start = -1, stop = -1; + for (unsigned i = 0; i < s.size(); i += get_utf8_size(s[i])) { + if (s[i] == ' ') { + if (stop == -1) + stop = i; + } else { + if (start == -1) + start = i; + stop = -1; + } + } + if (stop == -1) + stop = s.size(); + return s.substr(start, stop - start); +} } diff --git a/src/util/utf8.h b/src/util/utf8.h index 75b2d4854..455a14955 100644 --- a/src/util/utf8.h +++ b/src/util/utf8.h @@ -5,9 +5,12 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Leonardo de Moura */ #pragma once +#include + namespace lean { bool is_utf8_next(unsigned char c); unsigned get_utf8_size(unsigned char c); size_t utf8_strlen(char const * str); char const * get_utf8_last_char(char const * str); +std::string utf8_trim(std::string const & s); }