refactor(frontends/lean/scanner): move basic UTF8 procedures to separate module

This commit is contained in:
Leonardo de Moura 2014-10-19 13:29:15 -07:00
parent 4d4bc0551f
commit 469368f090
4 changed files with 45 additions and 18 deletions

View file

@ -7,29 +7,16 @@ Author: Leonardo de Moura
#include <cctype>
#include <string>
#include "util/exception.h"
#include "util/utf8.h"
#include "frontends/lean/scanner.h"
#include "frontends/lean/parser_config.h"
namespace lean {
bool is_utf8_next(unsigned char c) { return (c & 0xC0) == 0x80; }
unsigned scanner::get_utf8_size(unsigned char c) {
if ((c & 0x80) == 0)
return 1;
else if ((c & 0xE0) == 0xC0)
return 2;
else if ((c & 0xF0) == 0xE0)
return 3;
else if ((c & 0xF8) == 0xF0)
return 4;
else if ((c & 0xFC) == 0xF8)
return 5;
else if ((c & 0xFE) == 0xFC)
return 6;
else if (c == 0xFF)
return 1;
else
unsigned r = ::lean::get_utf8_size(c);
if (r == 0)
throw_exception("invalid utf-8 head character");
return r;
}
unsigned char to_uchar(char c) { return static_cast<unsigned char>(c); }

View file

@ -4,6 +4,6 @@ add_library(util trace.cpp debug.cpp name.cpp name_set.cpp
realpath.cpp script_state.cpp script_exception.cpp rb_map.cpp
lua.cpp luaref.cpp lua_named_param.cpp stackinfo.cpp lean_path.cpp
serializer.cpp lbool.cpp thread_script_state.cpp bitap_fuzzy_search.cpp
init_module.cpp thread.cpp memory_pool.cpp)
init_module.cpp thread.cpp memory_pool.cpp utf8.cpp)
target_link_libraries(util ${LEAN_LIBS})

29
src/util/utf8.cpp Normal file
View file

@ -0,0 +1,29 @@
/*
Copyright (c) 2014 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
namespace lean {
bool is_utf8_next(unsigned char c) { return (c & 0xC0) == 0x80; }
unsigned get_utf8_size(unsigned char c) {
if ((c & 0x80) == 0)
return 1;
else if ((c & 0xE0) == 0xC0)
return 2;
else if ((c & 0xF0) == 0xE0)
return 3;
else if ((c & 0xF8) == 0xF0)
return 4;
else if ((c & 0xFC) == 0xF8)
return 5;
else if ((c & 0xFE) == 0xFC)
return 6;
else if (c == 0xFF)
return 1;
else
return 0;
}
}

11
src/util/utf8.h Normal file
View file

@ -0,0 +1,11 @@
/*
Copyright (c) 2014 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
#pragma once
namespace lean {
bool is_utf8_next(unsigned char c);
unsigned get_utf8_size(unsigned char c);
}