refactor(frontends/lean/scanner): move basic UTF8 procedures to separate module
This commit is contained in:
parent
4d4bc0551f
commit
469368f090
4 changed files with 45 additions and 18 deletions
|
@ -7,29 +7,16 @@ Author: Leonardo de Moura
|
|||
#include <cctype>
|
||||
#include <string>
|
||||
#include "util/exception.h"
|
||||
#include "util/utf8.h"
|
||||
#include "frontends/lean/scanner.h"
|
||||
#include "frontends/lean/parser_config.h"
|
||||
|
||||
namespace lean {
|
||||
bool is_utf8_next(unsigned char c) { return (c & 0xC0) == 0x80; }
|
||||
|
||||
unsigned scanner::get_utf8_size(unsigned char c) {
|
||||
if ((c & 0x80) == 0)
|
||||
return 1;
|
||||
else if ((c & 0xE0) == 0xC0)
|
||||
return 2;
|
||||
else if ((c & 0xF0) == 0xE0)
|
||||
return 3;
|
||||
else if ((c & 0xF8) == 0xF0)
|
||||
return 4;
|
||||
else if ((c & 0xFC) == 0xF8)
|
||||
return 5;
|
||||
else if ((c & 0xFE) == 0xFC)
|
||||
return 6;
|
||||
else if (c == 0xFF)
|
||||
return 1;
|
||||
else
|
||||
unsigned r = ::lean::get_utf8_size(c);
|
||||
if (r == 0)
|
||||
throw_exception("invalid utf-8 head character");
|
||||
return r;
|
||||
}
|
||||
|
||||
unsigned char to_uchar(char c) { return static_cast<unsigned char>(c); }
|
||||
|
|
|
@ -4,6 +4,6 @@ add_library(util trace.cpp debug.cpp name.cpp name_set.cpp
|
|||
realpath.cpp script_state.cpp script_exception.cpp rb_map.cpp
|
||||
lua.cpp luaref.cpp lua_named_param.cpp stackinfo.cpp lean_path.cpp
|
||||
serializer.cpp lbool.cpp thread_script_state.cpp bitap_fuzzy_search.cpp
|
||||
init_module.cpp thread.cpp memory_pool.cpp)
|
||||
init_module.cpp thread.cpp memory_pool.cpp utf8.cpp)
|
||||
|
||||
target_link_libraries(util ${LEAN_LIBS})
|
||||
|
|
29
src/util/utf8.cpp
Normal file
29
src/util/utf8.cpp
Normal file
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
Copyright (c) 2014 Microsoft Corporation. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
|
||||
Author: Leonardo de Moura
|
||||
*/
|
||||
|
||||
namespace lean {
|
||||
bool is_utf8_next(unsigned char c) { return (c & 0xC0) == 0x80; }
|
||||
|
||||
unsigned get_utf8_size(unsigned char c) {
|
||||
if ((c & 0x80) == 0)
|
||||
return 1;
|
||||
else if ((c & 0xE0) == 0xC0)
|
||||
return 2;
|
||||
else if ((c & 0xF0) == 0xE0)
|
||||
return 3;
|
||||
else if ((c & 0xF8) == 0xF0)
|
||||
return 4;
|
||||
else if ((c & 0xFC) == 0xF8)
|
||||
return 5;
|
||||
else if ((c & 0xFE) == 0xFC)
|
||||
return 6;
|
||||
else if (c == 0xFF)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
}
|
11
src/util/utf8.h
Normal file
11
src/util/utf8.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
/*
|
||||
Copyright (c) 2014 Microsoft Corporation. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
|
||||
Author: Leonardo de Moura
|
||||
*/
|
||||
#pragma once
|
||||
namespace lean {
|
||||
bool is_utf8_next(unsigned char c);
|
||||
unsigned get_utf8_size(unsigned char c);
|
||||
}
|
Loading…
Reference in a new issue