fix(frontends/lean): identifier size when using unicode

see issue #756
This commit is contained in:
Leonardo de Moura 2015-07-30 11:21:45 -07:00
parent a39cac4fad
commit 656b642c4a
10 changed files with 52 additions and 7 deletions

View file

@ -174,7 +174,7 @@ void parser::scan() {
if (p.first == m_info_at_line) { if (p.first == m_info_at_line) {
if (curr_is_identifier()) { if (curr_is_identifier()) {
name const & id = get_name_val(); name const & id = get_name_val();
if (p.second <= m_info_at_col && m_info_at_col < p.second + id.size()) { if (p.second <= m_info_at_col && m_info_at_col < p.second + id.utf8_size()) {
print_lean_info_header(regular_stream().get_stream()); print_lean_info_header(regular_stream().get_stream());
bool ok = true; bool ok = true;
try { try {
@ -190,7 +190,7 @@ void parser::scan() {
} }
} else if (curr_is_keyword()) { } else if (curr_is_keyword()) {
name const & tk = get_token_info().token(); name const & tk = get_token_info().token();
if (p.second <= m_info_at_col && m_info_at_col < p.second + tk.size()) { if (p.second <= m_info_at_col && m_info_at_col < p.second + tk.utf8_size()) {
print_lean_info_header(regular_stream().get_stream()); print_lean_info_header(regular_stream().get_stream());
try { try {
print_token_info(*this, tk); print_token_info(*this, tk);
@ -200,7 +200,7 @@ void parser::scan() {
} }
} else if (curr_is_command()) { } else if (curr_is_command()) {
name const & tk = get_token_info().token(); name const & tk = get_token_info().token();
if (p.second <= m_info_at_col && m_info_at_col < p.second + tk.size()) { if (p.second <= m_info_at_col && m_info_at_col < p.second + tk.utf8_size()) {
print_lean_info_header(regular_stream().get_stream()); print_lean_info_header(regular_stream().get_stream());
regular_stream() << "'" << tk << "' is a command\n"; regular_stream() << "'" << tk << "' is a command\n";
print_lean_info_footer(regular_stream().get_stream()); print_lean_info_footer(regular_stream().get_stream());

View file

@ -19,6 +19,7 @@ Author: Leonardo de Moura
#include "util/hash.h" #include "util/hash.h"
#include "util/trace.h" #include "util/trace.h"
#include "util/ascii.h" #include "util/ascii.h"
#include "util/utf8.h"
#include "util/object_serializer.h" #include "util/object_serializer.h"
#include "util/lua_list.h" #include "util/lua_list.h"
@ -294,7 +295,7 @@ static unsigned num_digits(unsigned k) {
return r; return r;
} }
size_t name::size() const { size_t name::size_core(bool unicode) const {
if (m_ptr == nullptr) { if (m_ptr == nullptr) {
return strlen(anonymous_str); return strlen(anonymous_str);
} else { } else {
@ -303,7 +304,7 @@ size_t name::size() const {
size_t r = 0; size_t r = 0;
while (true) { while (true) {
if (i->m_is_string) { if (i->m_is_string) {
r += strlen(i->m_str); r += unicode ? utf8_strlen(i->m_str) : strlen(i->m_str);
} else { } else {
r += num_digits(i->m_k); r += num_digits(i->m_k);
} }
@ -318,6 +319,14 @@ size_t name::size() const {
} }
} }
size_t name::size() const {
return size_core(false);
}
size_t name::utf8_size() const {
return size_core(true);
}
unsigned name::hash() const { unsigned name::hash() const {
return m_ptr ? m_ptr->m_hash : 11; return m_ptr ? m_ptr->m_hash : 11;
} }

View file

@ -33,6 +33,8 @@ private:
explicit name(unsigned k); explicit name(unsigned k);
// the parameter bool is only used to distinguish this constructor from the public one. // the parameter bool is only used to distinguish this constructor from the public one.
name(name const & prefix, unsigned k, bool); name(name const & prefix, unsigned k, bool);
size_t size_core(bool unicode) const;
public: public:
name(); name();
name(char const * name); name(char const * name);
@ -100,6 +102,8 @@ public:
std::string to_string(char const * sep = lean_name_separator) const; std::string to_string(char const * sep = lean_name_separator) const;
/** \brief Size of the this name (in characters). */ /** \brief Size of the this name (in characters). */
size_t size() const; size_t size() const;
/** \brief Size of the this name in unicode. */
size_t utf8_size() const;
unsigned hash() const; unsigned hash() const;
/** \brief Return true iff the name contains only safe ASCII chars */ /** \brief Return true iff the name contains only safe ASCII chars */
bool is_safe_ascii() const; bool is_safe_ascii() const;

View file

@ -4,6 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura Author: Leonardo de Moura
*/ */
#include <cstdlib>
namespace lean { namespace lean {
bool is_utf8_next(unsigned char c) { return (c & 0xC0) == 0x80; } bool is_utf8_next(unsigned char c) { return (c & 0xC0) == 0x80; }
@ -26,4 +27,14 @@ unsigned get_utf8_size(unsigned char c) {
else else
return 0; return 0;
} }
size_t utf8_strlen(char const * str) {
size_t r = 0;
while (*str != 0) {
unsigned sz = get_utf8_size(*str);
r++;
str += sz;
}
return r;
}
} }

View file

@ -8,4 +8,5 @@ Author: Leonardo de Moura
namespace lean { namespace lean {
bool is_utf8_next(unsigned char c); bool is_utf8_next(unsigned char c);
unsigned get_utf8_size(unsigned char c); unsigned get_utf8_size(unsigned char c);
size_t utf8_strlen(char const * str);
} }

View file

@ -0,0 +1,4 @@
print_info.lean:13:8: warning: using 'sorry'
LEAN_INFORMATION
parameter a₁ :
END_LEAN_INFORMATION

View file

@ -0,0 +1,4 @@
print_info.lean:13:8: warning: using 'sorry'
LEAN_INFORMATION
parameter a₁ :
END_LEAN_INFORMATION

View file

@ -0,0 +1,6 @@
print_info.lean:13:8: warning: using 'sorry'
LEAN_INFORMATION
_ `+`:65 _:65 :=
| nat.add #1 #0
| [priority 999] int.add #1 #0
END_LEAN_INFORMATION

View file

@ -15,3 +15,9 @@ end
example (n : nat) : n + n = n + n := example (n : nat) : n + n = n + n :=
rfl rfl
example (a₁ a₂ a₃ : nat) : a₁ = 0 → a₂ = 0 → a₃ = 0 → a₁ + a₂ + a₃ = 0 :=
assume h₁ h₂ h₃, calc
a₁ + a₂ + a₃ = 0 + a₂ + a₃ : h₁
... = 0 + 0 + a₃ : h₂
... = 0 + 0 + 0 : h₃

View file

@ -7,8 +7,8 @@ fi
LEAN=$1 LEAN=$1
export LEAN_PATH=../../../library:. export LEAN_PATH=../../../library:.
lines=('4' '7' '8' '12' '12' '12' '17' '17'); lines=('4' '7' '8' '12' '12' '12' '17' '17' '21' '21' '21');
cols=('16' '18' '19' '19' '20' '30' '0' '2'); cols=('16' '18' '19' '19' '20' '30' '0' '2' '0' '1' '3');
size=${#lines[@]} size=${#lines[@]}
i=0 i=0