feat(frontends/lean): allow specifying notation spacing via quoted symbols

Unquoted tokens inherit their spacing from the respective reserved definition.
This commit is contained in:
Sebastian Ullrich 2015-09-30 16:15:07 +02:00 committed by Leonardo de Moura
parent 8f96b725e3
commit da08079af9
10 changed files with 83 additions and 44 deletions

View file

@ -122,7 +122,9 @@ static token_entry mk_token_entry(std::string const & tk, unsigned prec, notatio
static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, notation_entry_group grp, bool parse_only, static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, notation_entry_group grp, bool parse_only,
unsigned priority) unsigned priority)
-> pair<notation_entry, optional<token_entry>> { -> pair<notation_entry, optional<token_entry>> {
std::string tk = parse_symbol(p, "invalid notation declaration, quoted symbol or identifier expected"); bool explicit_pp = p.curr_is_quoted_symbol();
std::string pp_tk = parse_symbol(p, "invalid notation declaration, quoted symbol or identifier expected");
std::string tk = utf8_trim(pp_tk);
char const * tks = tk.c_str(); char const * tks = tk.c_str();
check_not_forbidden(tks); check_not_forbidden(tks);
environment const & env = p.env(); environment const & env = p.env();
@ -130,19 +132,22 @@ static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, nota
optional<unsigned> prec; optional<unsigned> prec;
optional<parse_table> reserved_pt; optional<parse_table> reserved_pt;
optional<transition> reserved_transition;
optional<action> reserved_action; optional<action> reserved_action;
if (grp == notation_entry_group::Main) { if (grp == notation_entry_group::Main) {
if (k == mixfix_kind::prefix) { if (k == mixfix_kind::prefix) {
if (auto ls = get_reserved_nud_table(p.env()).find(tks)) { if (auto ls = get_reserved_nud_table(p.env()).find(tks)) {
// Remark: we are ignoring multiple actions in the reserved notation table // Remark: we are ignoring multiple actions in the reserved notation table
reserved_pt = head(ls).second; reserved_pt = head(ls).second;
reserved_action = head(ls).first.get_action(); reserved_transition = head(ls).first;
reserved_action = reserved_transition->get_action();
} }
} else { } else {
if (auto ls = get_reserved_led_table(p.env()).find(tks)) { if (auto ls = get_reserved_led_table(p.env()).find(tks)) {
// Remark: we are ignoring multiple actions in the reserved notation table // Remark: we are ignoring multiple actions in the reserved notation table
reserved_pt = head(ls).second; reserved_pt = head(ls).second;
reserved_action = head(ls).first.get_action(); reserved_transition = head(ls).first;
reserved_action = reserved_transition->get_action();
} }
} }
} }
@ -211,6 +216,9 @@ static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, nota
} }
} }
if (reserved_action && !explicit_pp)
pp_tk = reserved_transition->get_pp_token().to_string();
if (grp == notation_entry_group::Reserve) { if (grp == notation_entry_group::Reserve) {
// reserve notation commands do not have a denotation // reserve notation commands do not have a denotation
expr dummy = mk_Prop(); expr dummy = mk_Prop();
@ -218,16 +226,16 @@ static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, nota
throw parser_error("invalid reserve notation, found `:=`", p.pos()); throw parser_error("invalid reserve notation, found `:=`", p.pos());
switch (k) { switch (k) {
case mixfix_kind::infixl: case mixfix_kind::infixl:
return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec))), return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec), pp_tk)),
dummy, overload, priority, grp, parse_only), new_token); dummy, overload, priority, grp, parse_only), new_token);
case mixfix_kind::infixr: case mixfix_kind::infixr:
return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec))), return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec), pp_tk)),
dummy, overload, priority, grp, parse_only), new_token); dummy, overload, priority, grp, parse_only), new_token);
case mixfix_kind::postfix: case mixfix_kind::postfix:
return mk_pair(notation_entry(false, to_list(transition(tks, mk_skip_action())), return mk_pair(notation_entry(false, to_list(transition(tks, mk_skip_action(), pp_tk)),
dummy, overload, priority, grp, parse_only), new_token); dummy, overload, priority, grp, parse_only), new_token);
case mixfix_kind::prefix: case mixfix_kind::prefix:
return mk_pair(notation_entry(true, to_list(transition(tks, mk_expr_action(*prec))), return mk_pair(notation_entry(true, to_list(transition(tks, mk_expr_action(*prec), pp_tk)),
dummy, overload, priority, grp, parse_only), new_token); dummy, overload, priority, grp, parse_only), new_token);
} }
} else { } else {
@ -239,17 +247,17 @@ static auto parse_mixfix_notation(parser & p, mixfix_kind k, bool overload, nota
case mixfix_kind::infixl: case mixfix_kind::infixl:
#if defined(__GNUC__) && !defined(__CLANG__) #if defined(__GNUC__) && !defined(__CLANG__)
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec))), return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec), pp_tk)),
mk_app(f, Var(1), Var(0)), overload, priority, grp, parse_only), new_token); mk_app(f, Var(1), Var(0)), overload, priority, grp, parse_only), new_token);
#endif #endif
case mixfix_kind::infixr: case mixfix_kind::infixr:
return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec))), return mk_pair(notation_entry(false, to_list(transition(tks, mk_expr_action(*prec), pp_tk)),
mk_app(f, Var(1), Var(0)), overload, priority, grp, parse_only), new_token); mk_app(f, Var(1), Var(0)), overload, priority, grp, parse_only), new_token);
case mixfix_kind::postfix: case mixfix_kind::postfix:
return mk_pair(notation_entry(false, to_list(transition(tks, mk_skip_action())), return mk_pair(notation_entry(false, to_list(transition(tks, mk_skip_action(), pp_tk)),
mk_app(f, Var(0)), overload, priority, grp, parse_only), new_token); mk_app(f, Var(0)), overload, priority, grp, parse_only), new_token);
case mixfix_kind::prefix: case mixfix_kind::prefix:
return mk_pair(notation_entry(true, to_list(transition(tks, mk_expr_action(*prec))), return mk_pair(notation_entry(true, to_list(transition(tks, mk_expr_action(*prec), pp_tk)),
mk_app(f, Var(0)), overload, priority, grp, parse_only), new_token); mk_app(f, Var(0)), overload, priority, grp, parse_only), new_token);
} }
} }
@ -268,9 +276,9 @@ static name parse_quoted_symbol_or_token(parser & p, buffer<token_entry> & new_t
used_default = false; used_default = false;
if (p.curr_is_quoted_symbol()) { if (p.curr_is_quoted_symbol()) {
environment const & env = p.env(); environment const & env = p.env();
auto tk = p.get_name_val(); auto pp_tk = p.get_name_val();
auto tks = tk.to_string(); auto tks = utf8_trim(pp_tk.to_string());
auto tkcs = tks.c_str(); auto tkcs = tks.c_str();
check_not_forbidden(tkcs); check_not_forbidden(tkcs);
p.next(); p.next();
if (p.curr_is_token(get_colon_tk())) { if (p.curr_is_token(get_colon_tk())) {
@ -281,7 +289,7 @@ static name parse_quoted_symbol_or_token(parser & p, buffer<token_entry> & new_t
new_tokens.push_back(mk_token_entry(tkcs, LEAN_DEFAULT_PRECEDENCE, grp)); new_tokens.push_back(mk_token_entry(tkcs, LEAN_DEFAULT_PRECEDENCE, grp));
used_default = true; used_default = true;
} }
return tk; return pp_tk;
} else if (p.curr_is_keyword()) { } else if (p.curr_is_keyword()) {
auto tk = p.get_token_info().token(); auto tk = p.get_token_info().token();
check_not_forbidden(tk.to_string().c_str()); check_not_forbidden(tk.to_string().c_str());
@ -452,15 +460,16 @@ static unsigned parse_binders_rbp(parser & p) {
} }
static transition parse_transition(parser & p, optional<parse_table> const & pt, name const & tk, static transition parse_transition(parser & p, optional<parse_table> const & pt, name const & tk,
buffer<expr> & locals, buffer<token_entry> & new_tokens, notation_entry_group grp) { buffer<expr> & locals, buffer<token_entry> & new_tokens, notation_entry_group grp,
name const & pp_tk) {
if (p.curr_is_token_or_id(get_binder_tk())) { if (p.curr_is_token_or_id(get_binder_tk())) {
p.next(); p.next();
unsigned rbp = parse_binders_rbp(p); unsigned rbp = parse_binders_rbp(p);
return transition(tk, mk_binder_action(rbp)); return transition(tk, mk_binder_action(rbp), pp_tk);
} else if (p.curr_is_token_or_id(get_binders_tk())) { } else if (p.curr_is_token_or_id(get_binders_tk())) {
p.next(); p.next();
unsigned rbp = parse_binders_rbp(p); unsigned rbp = parse_binders_rbp(p);
return transition(tk, mk_binders_action(rbp)); return transition(tk, mk_binders_action(rbp), pp_tk);
} else if (p.curr_is_identifier()) { } else if (p.curr_is_identifier()) {
unsigned default_prec = get_default_prec(pt, tk); unsigned default_prec = get_default_prec(pt, tk);
name n = p.get_name_val(); name n = p.get_name_val();
@ -470,10 +479,10 @@ static transition parse_transition(parser & p, optional<parse_table> const & pt,
expr l = mk_local(n, local_type); expr l = mk_local(n, local_type);
p.add_local(l); p.add_local(l);
locals.push_back(l); locals.push_back(l);
return transition(tk, a); return transition(tk, a, pp_tk);
} else if (p.curr_is_quoted_symbol() || p.curr_is_keyword() || } else if (p.curr_is_quoted_symbol() || p.curr_is_keyword() ||
p.curr_is_token(get_assign_tk()) || p.curr_is_command() || p.curr_is_eof()) { p.curr_is_token(get_assign_tk()) || p.curr_is_command() || p.curr_is_eof()) {
return transition(tk, mk_skip_action()); return transition(tk, mk_skip_action(), pp_tk);
} else { } else {
throw parser_error("invalid notation declaration, quoted-symbol, identifier, " throw parser_error("invalid notation declaration, quoted-symbol, identifier, "
"'binder', 'binders' expected", p.pos()); "'binder', 'binders' expected", p.pos());
@ -511,48 +520,52 @@ static notation_entry parse_notation_core(parser & p, bool overload, notation_en
bool used_default = false; bool used_default = false;
while ((grp != notation_entry_group::Reserve && !p.curr_is_token(get_assign_tk())) || while ((grp != notation_entry_group::Reserve && !p.curr_is_token(get_assign_tk())) ||
(grp == notation_entry_group::Reserve && !p.curr_is_command() && !p.curr_is_eof())) { (grp == notation_entry_group::Reserve && !p.curr_is_command() && !p.curr_is_eof())) {
name tk = parse_quoted_symbol_or_token(p, new_tokens, used_default, grp); name pp_tk = parse_quoted_symbol_or_token(p, new_tokens, used_default, grp).to_string();
name tk = utf8_trim(pp_tk.to_string());
if (auto at = find_next(reserved_pt, tk)) { if (auto at = find_next(reserved_pt, tk)) {
// Remark: we are ignoring multiple actions in the reserved notation table // Remark: we are ignoring multiple actions in the reserved notation table
action const & a = head(at).first.get_action(); transition const & trans = head(at).first;
action const & a = trans.get_action();
reserved_pt = head(at).second; reserved_pt = head(at).second;
if (!p.curr_is_quoted_symbol())
pp_tk = trans.get_pp_token();
switch (a.kind()) { switch (a.kind()) {
case notation::action_kind::Skip: case notation::action_kind::Skip:
if (!p.curr_is_quoted_symbol() && !p.curr_is_keyword() && !p.curr_is_token(get_assign_tk())) { if (!p.curr_is_quoted_symbol() && !p.curr_is_keyword() && !p.curr_is_token(get_assign_tk())) {
if (g_allow_local && !p.curr_is_token_or_id(get_binders_tk())) { if (g_allow_local && !p.curr_is_token_or_id(get_binders_tk())) {
ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp)); ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp, pp_tk));
break; break;
} }
p.check_token_or_id_next(get_binders_tk(), p.check_token_or_id_next(get_binders_tk(),
"invalid notation declaration, quoted-symbol, keyword or `:=` expected " "invalid notation declaration, quoted-symbol, keyword or `:=` expected "
"(declaration prefix matches reserved notation)"); "(declaration prefix matches reserved notation)");
} }
ts.push_back(transition(tk, a)); ts.push_back(transition(tk, a, pp_tk));
break; break;
case notation::action_kind::Binder: case notation::action_kind::Binder:
if (g_allow_local && !p.curr_is_token_or_id(get_binder_tk())) { if (g_allow_local && !p.curr_is_token_or_id(get_binder_tk())) {
ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp)); ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp, pp_tk));
break; break;
} }
p.check_token_or_id_next(get_binder_tk(), p.check_token_or_id_next(get_binder_tk(),
"invalid notation declaration, 'binder' expected " "invalid notation declaration, 'binder' expected "
"(declaration prefix matches reserved notation)"); "(declaration prefix matches reserved notation)");
ts.push_back(transition(tk, a)); ts.push_back(transition(tk, a, pp_tk));
break; break;
case notation::action_kind::Binders: case notation::action_kind::Binders:
if (g_allow_local && !p.curr_is_token_or_id(get_binders_tk())) { if (g_allow_local && !p.curr_is_token_or_id(get_binders_tk())) {
ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp)); ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp, pp_tk));
break; break;
} }
p.check_token_or_id_next(get_binders_tk(), p.check_token_or_id_next(get_binders_tk(),
"invalid notation declaration, 'binders' expected " "invalid notation declaration, 'binders' expected "
"(declaration prefix matches reserved notation)"); "(declaration prefix matches reserved notation)");
ts.push_back(transition(tk, a)); ts.push_back(transition(tk, a, pp_tk));
break; break;
case notation::action_kind::Expr: case notation::action_kind::Exprs: case notation::action_kind::ScopedExpr: case notation::action_kind::Expr: case notation::action_kind::Exprs: case notation::action_kind::ScopedExpr:
case notation::action_kind::Ext: case notation::action_kind::LuaExt: { case notation::action_kind::Ext: case notation::action_kind::LuaExt: {
if (g_allow_local && !p.curr_is_identifier()) { if (g_allow_local && !p.curr_is_identifier()) {
ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp)); ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp, pp_tk));
break; break;
} }
name n = p.check_id_next("invalid notation declaration, identifier expected " name n = p.check_id_next("invalid notation declaration, identifier expected "
@ -565,7 +578,7 @@ static notation_entry parse_notation_core(parser & p, bool overload, notation_en
expr l = mk_local(n, local_type); expr l = mk_local(n, local_type);
p.add_local(l); p.add_local(l);
locals.push_back(l); locals.push_back(l);
ts.push_back(transition(tk, a)); ts.push_back(transition(tk, a, pp_tk));
break; break;
} else { } else {
throw parser_error("invalid notation declaration, invalid ':' occurrence " throw parser_error("invalid notation declaration, invalid ':' occurrence "
@ -576,13 +589,13 @@ static notation_entry parse_notation_core(parser & p, bool overload, notation_en
expr l = mk_local(n, local_type); expr l = mk_local(n, local_type);
p.add_local(l); p.add_local(l);
locals.push_back(l); locals.push_back(l);
ts.push_back(transition(tk, a)); ts.push_back(transition(tk, a, pp_tk));
break; break;
} }
}} }}
} else { } else {
reserved_pt = optional<parse_table>(); reserved_pt = optional<parse_table>();
ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp)); ts.push_back(parse_transition(p, pt, tk, locals, new_tokens, grp, pp_tk));
} }
pt = find_match(pt, ts.back()); pt = find_match(pt, ts.back());
} }

View file

@ -327,7 +327,7 @@ action replace(action const & a, std::function<expr(expr const &)> const & f) {
} }
transition replace(transition const & t, std::function<expr(expr const &)> const & f) { transition replace(transition const & t, std::function<expr(expr const &)> const & f) {
return transition(t.get_token(), replace(t.get_action(), f)); return transition(t.get_token(), replace(t.get_action(), f), t.get_pp_token());
} }
struct parse_table::cell { struct parse_table::cell {

View file

@ -114,11 +114,13 @@ action replace(action const & a, std::function<expr(expr const &)> const & f);
class transition { class transition {
name m_token; name m_token;
name m_pp_token;
action m_action; action m_action;
public: public:
transition(name const & t, action const & a): transition(name const & t, action const & a, name pp_token = name::anonymous()):
m_token(t), m_action(a) {} m_token(t), m_pp_token(pp_token ? pp_token : t), m_action(a) {}
name const & get_token() const { return m_token; } name const & get_token() const { return m_token; }
name const & get_pp_token() const { return m_pp_token; }
action const & get_action() const { return m_action; } action const & get_action() const { return m_action; }
bool is_simple() const { return m_action.is_simple(); } bool is_simple() const { return m_action.is_simple(); }
bool is_safe_ascii() const { return m_token.is_safe_ascii(); } bool is_safe_ascii() const { return m_token.is_safe_ascii(); }

View file

@ -8,6 +8,7 @@ Author: Leonardo de Moura
#include <string> #include <string>
#include <limits> #include <limits>
#include <vector> #include <vector>
#include <util/utf8.h>
#include "util/interrupt.h" #include "util/interrupt.h"
#include "util/script_exception.h" #include "util/script_exception.h"
#include "util/sstream.h" #include "util/sstream.h"
@ -1266,7 +1267,8 @@ expr parser::parse_notation_core(parse_table t, expr * left, bool as_tactic) {
auto terminator = a.get_terminator(); auto terminator = a.get_terminator();
if (!terminator || !curr_is_token(*terminator)) { if (!terminator || !curr_is_token(*terminator)) {
r_args.push_back(parse_expr_or_tactic(a.rbp(), as_tactic)); r_args.push_back(parse_expr_or_tactic(a.rbp(), as_tactic));
while (curr_is_token(a.get_sep())) { name sep = utf8_trim(a.get_sep().to_string());
while (curr_is_token(sep)) {
next(); next();
r_args.push_back(parse_expr_or_tactic(a.rbp(), as_tactic)); r_args.push_back(parse_expr_or_tactic(a.rbp(), as_tactic));
} }

View file

@ -204,14 +204,15 @@ action read_action(deserializer & d) {
} }
serializer & operator<<(serializer & s, transition const & t) { serializer & operator<<(serializer & s, transition const & t) {
s << t.get_token() << t.get_action(); s << t.get_token() << t.get_pp_token() << t.get_action();
return s; return s;
} }
transition read_transition(deserializer & d) { transition read_transition(deserializer & d) {
name n = read_name(d); name n = read_name(d);
action a = read_action(d); name pp = read_name(d);
return transition(n, a); action a = read_action(d);
return transition(n, a, pp);
} }
struct notation_state { struct notation_state {

View file

@ -1029,7 +1029,7 @@ auto pretty_fn::pp_notation(notation_entry const & entry, buffer<optional<expr>>
format curr; format curr;
notation::action const & a = ts[i].get_action(); notation::action const & a = ts[i].get_action();
name const & tk = ts[i].get_token(); name const & tk = ts[i].get_token();
format tk_fmt = format(tk); format tk_fmt = format(ts[i].get_pp_token());
switch (a.kind()) { switch (a.kind()) {
case notation::action_kind::Skip: case notation::action_kind::Skip:
curr = tk_fmt; curr = tk_fmt;

View file

@ -157,9 +157,10 @@ auto scanner::read_quoted_symbol() -> token_kind {
if (c == '`') { if (c == '`') {
m_name_val = name(m_buffer.c_str()); m_name_val = name(m_buffer.c_str());
return token_kind::QuotedSymbol; return token_kind::QuotedSymbol;
} else if (c != ' ' && c != '\"' && c != '\n' && c != '\t') { } else if (c != '\"' && c != '\n' && c != '\t') {
m_buffer += c; m_buffer += c;
} else { } else {
// TODO: intra-token space
throw_exception("invalid quoted symbol, invalid character"); throw_exception("invalid quoted symbol, invalid character");
} }
} }

View file

@ -121,8 +121,7 @@ private:
} }
std::tuple<sexpr, sexpr const *> separate_tokens(sexpr const & s, sexpr const * last, std::tuple<sexpr, sexpr const *> separate_tokens(sexpr const & s, sexpr const * last,
std::function<bool(sexpr const &, sexpr const &)> sep //NOLINT std::function<bool(sexpr const &, sexpr const &)> sep) const; //NOLINT
) const;
// Functions used inside of pretty printing // Functions used inside of pretty printing
static bool space_upto_line_break_list_exceeded(sexpr const & s, int available, std::vector<pair<sexpr, unsigned>> const & todo); static bool space_upto_line_break_list_exceeded(sexpr const & s, int available, std::vector<pair<sexpr, unsigned>> const & todo);

View file

@ -5,6 +5,7 @@ Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura Author: Leonardo de Moura
*/ */
#include <cstdlib> #include <cstdlib>
#include <string>
#include "util/debug.h" #include "util/debug.h"
namespace lean { namespace lean {
@ -49,4 +50,21 @@ char const * get_utf8_last_char(char const * str) {
} while (*str != 0); } while (*str != 0);
return r; return r;
} }
std::string utf8_trim(std::string const & s) {
int start = -1, stop = -1;
for (unsigned i = 0; i < s.size(); i += get_utf8_size(s[i])) {
if (s[i] == ' ') {
if (stop == -1)
stop = i;
} else {
if (start == -1)
start = i;
stop = -1;
}
}
if (stop == -1)
stop = s.size();
return s.substr(start, stop - start);
}
} }

View file

@ -5,9 +5,12 @@ Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura Author: Leonardo de Moura
*/ */
#pragma once #pragma once
#include <string>
namespace lean { namespace lean {
bool is_utf8_next(unsigned char c); bool is_utf8_next(unsigned char c);
unsigned get_utf8_size(unsigned char c); unsigned get_utf8_size(unsigned char c);
size_t utf8_strlen(char const * str); size_t utf8_strlen(char const * str);
char const * get_utf8_last_char(char const * str); char const * get_utf8_last_char(char const * str);
std::string utf8_trim(std::string const & s);
} }