diff --git a/src/frontends/lean/parser.cpp b/src/frontends/lean/parser.cpp index 6add4bbc7..b8e84586b 100644 --- a/src/frontends/lean/parser.cpp +++ b/src/frontends/lean/parser.cpp @@ -174,7 +174,7 @@ void parser::scan() { if (p.first == m_info_at_line) { if (curr_is_identifier()) { name const & id = get_name_val(); - if (p.second <= m_info_at_col && m_info_at_col < p.second + id.size()) { + if (p.second <= m_info_at_col && m_info_at_col < p.second + id.utf8_size()) { print_lean_info_header(regular_stream().get_stream()); bool ok = true; try { @@ -190,7 +190,7 @@ void parser::scan() { } } else if (curr_is_keyword()) { name const & tk = get_token_info().token(); - if (p.second <= m_info_at_col && m_info_at_col < p.second + tk.size()) { + if (p.second <= m_info_at_col && m_info_at_col < p.second + tk.utf8_size()) { print_lean_info_header(regular_stream().get_stream()); try { print_token_info(*this, tk); @@ -200,7 +200,7 @@ void parser::scan() { } } else if (curr_is_command()) { name const & tk = get_token_info().token(); - if (p.second <= m_info_at_col && m_info_at_col < p.second + tk.size()) { + if (p.second <= m_info_at_col && m_info_at_col < p.second + tk.utf8_size()) { print_lean_info_header(regular_stream().get_stream()); regular_stream() << "'" << tk << "' is a command\n"; print_lean_info_footer(regular_stream().get_stream()); diff --git a/src/util/name.cpp b/src/util/name.cpp index 67ffbc5c3..938e92d5e 100644 --- a/src/util/name.cpp +++ b/src/util/name.cpp @@ -19,6 +19,7 @@ Author: Leonardo de Moura #include "util/hash.h" #include "util/trace.h" #include "util/ascii.h" +#include "util/utf8.h" #include "util/object_serializer.h" #include "util/lua_list.h" @@ -294,7 +295,7 @@ static unsigned num_digits(unsigned k) { return r; } -size_t name::size() const { +size_t name::size_core(bool unicode) const { if (m_ptr == nullptr) { return strlen(anonymous_str); } else { @@ -303,7 +304,7 @@ size_t name::size() const { size_t r = 0; while (true) { if (i->m_is_string) { - r += strlen(i->m_str); + r += unicode ? utf8_strlen(i->m_str) : strlen(i->m_str); } else { r += num_digits(i->m_k); } @@ -318,6 +319,14 @@ size_t name::size() const { } } +size_t name::size() const { + return size_core(false); +} + +size_t name::utf8_size() const { + return size_core(true); +} + unsigned name::hash() const { return m_ptr ? m_ptr->m_hash : 11; } diff --git a/src/util/name.h b/src/util/name.h index 731223252..2f5f562a0 100644 --- a/src/util/name.h +++ b/src/util/name.h @@ -33,6 +33,8 @@ private: explicit name(unsigned k); // the parameter bool is only used to distinguish this constructor from the public one. name(name const & prefix, unsigned k, bool); + + size_t size_core(bool unicode) const; public: name(); name(char const * name); @@ -100,6 +102,8 @@ public: std::string to_string(char const * sep = lean_name_separator) const; /** \brief Size of the this name (in characters). */ size_t size() const; + /** \brief Size of the this name in unicode. */ + size_t utf8_size() const; unsigned hash() const; /** \brief Return true iff the name contains only safe ASCII chars */ bool is_safe_ascii() const; diff --git a/src/util/utf8.cpp b/src/util/utf8.cpp index c508ef1cc..11dd3e66b 100644 --- a/src/util/utf8.cpp +++ b/src/util/utf8.cpp @@ -4,6 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Leonardo de Moura */ +#include namespace lean { bool is_utf8_next(unsigned char c) { return (c & 0xC0) == 0x80; } @@ -26,4 +27,14 @@ unsigned get_utf8_size(unsigned char c) { else return 0; } + +size_t utf8_strlen(char const * str) { + size_t r = 0; + while (*str != 0) { + unsigned sz = get_utf8_size(*str); + r++; + str += sz; + } + return r; +} } diff --git a/src/util/utf8.h b/src/util/utf8.h index 19518dc4b..3641952cf 100644 --- a/src/util/utf8.h +++ b/src/util/utf8.h @@ -8,4 +8,5 @@ Author: Leonardo de Moura namespace lean { bool is_utf8_next(unsigned char c); unsigned get_utf8_size(unsigned char c); +size_t utf8_strlen(char const * str); } diff --git a/tests/lean/extra/print_info.21.0.expected.out b/tests/lean/extra/print_info.21.0.expected.out new file mode 100644 index 000000000..381410b0e --- /dev/null +++ b/tests/lean/extra/print_info.21.0.expected.out @@ -0,0 +1,4 @@ +print_info.lean:13:8: warning: using 'sorry' +LEAN_INFORMATION +parameter a₁ : ℕ +END_LEAN_INFORMATION diff --git a/tests/lean/extra/print_info.21.1.expected.out b/tests/lean/extra/print_info.21.1.expected.out new file mode 100644 index 000000000..381410b0e --- /dev/null +++ b/tests/lean/extra/print_info.21.1.expected.out @@ -0,0 +1,4 @@ +print_info.lean:13:8: warning: using 'sorry' +LEAN_INFORMATION +parameter a₁ : ℕ +END_LEAN_INFORMATION diff --git a/tests/lean/extra/print_info.21.3.expected.out b/tests/lean/extra/print_info.21.3.expected.out new file mode 100644 index 000000000..7f2a803ee --- /dev/null +++ b/tests/lean/extra/print_info.21.3.expected.out @@ -0,0 +1,6 @@ +print_info.lean:13:8: warning: using 'sorry' +LEAN_INFORMATION +_ `+`:65 _:65 := + | nat.add #1 #0 + | [priority 999] int.add #1 #0 +END_LEAN_INFORMATION diff --git a/tests/lean/extra/print_info.lean b/tests/lean/extra/print_info.lean index 6f9646dad..ea87922b7 100644 --- a/tests/lean/extra/print_info.lean +++ b/tests/lean/extra/print_info.lean @@ -15,3 +15,9 @@ end example (n : nat) : n + n = n + n := rfl + +example (a₁ a₂ a₃ : nat) : a₁ = 0 → a₂ = 0 → a₃ = 0 → a₁ + a₂ + a₃ = 0 := +assume h₁ h₂ h₃, calc +a₁ + a₂ + a₃ = 0 + a₂ + a₃ : h₁ + ... = 0 + 0 + a₃ : h₂ + ... = 0 + 0 + 0 : h₃ diff --git a/tests/lean/extra/print_info.sh b/tests/lean/extra/print_info.sh index 7a8172548..24af9dafd 100755 --- a/tests/lean/extra/print_info.sh +++ b/tests/lean/extra/print_info.sh @@ -7,8 +7,8 @@ fi LEAN=$1 export LEAN_PATH=../../../library:. -lines=('4' '7' '8' '12' '12' '12' '17' '17'); -cols=('16' '18' '19' '19' '20' '30' '0' '2'); +lines=('4' '7' '8' '12' '12' '12' '17' '17' '21' '21' '21'); +cols=('16' '18' '19' '19' '20' '30' '0' '2' '0' '1' '3'); size=${#lines[@]} i=0