refactor(frontends/lean/scanner): remove dependency to seekg and unget

methods

It is not safe to use seekg for textual files. Here is a fragment from a
C++ manual:

seekg() and seekp()

This pair of functions serve respectively to change the position of stream pointers get and put. Both functions are overloaded with two different prototypes:

     seekg ( pos_type position );
     seekp ( pos_type position );

Using this prototype the stream pointer is changed to an absolute position from the beginning of the file. The type required is the same as that returned by functions tellg and tellp.

      seekg ( off_type offset, seekdir direction );
      seekp ( off_type offset, seekdir direction );

Using this prototype, an offset from a concrete point determined by
parameter direction can be specified. It can be:

          ios::beg	offset specified from the beginning of the stream
          ios::cur	offset specified from the current position of the stream pointer
          ios::end	offset specified from the end of the stream

The values of both stream pointers get and put are counted in different
ways for text files than for binary files, since in text mode files some
modifications to the appearance of some special characters can
occur. For that reason it is advisable to use only the first prototype
of seekg and seekp with files opened in text mode and always use
non-modified values returned by tellg or tellp. With binary files, you
can freely use all the implementations for these functions. They should
not have any unexpected behavior.
This commit is contained in:
Leonardo de Moura 2014-09-18 15:24:48 -07:00
parent 243f80231a
commit b3e05a2fe9
3 changed files with 49 additions and 28 deletions

View file

@ -80,12 +80,33 @@ bool is_super_sub_script_alnum_unicode(unsigned u) {
void scanner::next() {
lean_assert(m_curr != EOF);
m_curr = m_stream.get();
m_spos++;
if (m_uskip > 0) {
if (!is_utf8_next(m_curr)) {
throw_exception("invalid utf-8 sequence character");
while (m_spos >= static_cast<int>(m_curr_line.size())) {
if (m_last_line) {
m_curr = EOF;
return;
} else {
m_curr_line.clear();
if (std::getline(m_stream, m_curr_line)) {
m_curr_line.push_back('\n');
m_sline++;
m_spos = 0;
m_upos = 0;
m_curr = m_curr_line[m_spos];
m_uskip = get_utf8_size(m_curr);
m_uskip--;
return;
} else {
m_last_line = true;
m_curr = EOF;
return;
}
}
}
m_curr = m_curr_line[m_spos];
if (m_uskip > 0) {
if (!is_utf8_next(m_curr))
throw_exception("invalid utf-8 sequence character");
m_uskip--;
} else {
m_upos++;
@ -113,7 +134,6 @@ auto scanner::read_string() -> token_kind {
m_buffer.clear();
while (true) {
check_not_eof(end_error_msg);
update_line();
char c = curr();
if (c == '\"') {
next();
@ -155,10 +175,10 @@ auto scanner::read_quoted_symbol() -> token_kind {
bool scanner::is_next_digit() {
lean_assert(curr() != EOF);
char c = m_stream.get();
bool r = std::isdigit(c);
m_stream.unget();
return r;
if (m_spos + 1 < static_cast<int>(m_curr_line.size()))
return std::isdigit(m_curr_line[m_spos+1]);
else
return false;
}
auto scanner::read_number() -> token_kind {
@ -198,7 +218,6 @@ void scanner::read_single_line_comment() {
while (true) {
if (curr() == '\n') {
next();
new_line();
return;
} else if (curr() == EOF) {
return;
@ -218,7 +237,6 @@ bool scanner::consume(char const * str, char const * error_msg) {
if (!str[i])
return true;
check_not_eof(error_msg);
update_line();
if (curr_next() != str[i])
return false;
i++;
@ -244,7 +262,6 @@ void scanner::read_comment_block() {
return;
}
check_not_eof(end_error_msg);
update_line();
next();
}
}
@ -257,7 +274,6 @@ void scanner::read_until(char const * end_str, char const * error_msg) {
m_buffer.clear();
while (true) {
check_not_eof(error_msg);
update_line();
char c = curr_next();
if (c == end_str[0]) {
m_aux_buffer.clear();
@ -267,7 +283,6 @@ void scanner::read_until(char const * end_str, char const * error_msg) {
if (!end_str[i])
return;
check_not_eof(error_msg);
update_line();
c = curr_next();
if (c != end_str[i]) {
m_buffer += m_aux_buffer;
@ -291,14 +306,12 @@ void scanner::move_back(unsigned offset, unsigned u_offset) {
if (offset != 0) {
if (curr() == EOF) {
m_curr = 0;
m_stream.clear();
m_spos--;
m_upos--;
offset--;
u_offset--;
}
if (offset != 0) {
m_stream.seekg(-static_cast<std::streamoff>(offset), std::ios_base::cur);
m_spos -= offset;
m_upos -= u_offset;
}
@ -431,12 +444,9 @@ auto scanner::scan(environment const & env) -> token_kind {
m_pos = m_upos;
m_line = m_sline;
switch (c) {
case ' ': case '\r': case '\t':
case ' ': case '\r': case '\t': case '\n':
next();
break;
case '\n':
next(); new_line();
break;
case '\"':
return read_string();
case '`':
@ -468,12 +478,23 @@ auto scanner::scan(environment const & env) -> token_kind {
}
scanner::scanner(std::istream & strm, char const * strm_name, unsigned line):
m_tokens(nullptr), m_stream(strm), m_spos(0), m_upos(0), m_uskip(0), m_sline(line), m_curr(0), m_pos(0), m_line(line),
m_token_info(nullptr) {
m_tokens(nullptr), m_stream(strm), m_token_info(nullptr) {
m_stream_name = strm_name ? strm_name : "[unknown]";
next();
m_sline = line;
m_line = line;
m_spos = 0;
m_upos = 0;
if (std::getline(m_stream, m_curr_line)) {
m_last_line = false;
m_curr_line.push_back('\n');
m_curr = m_curr_line[m_spos];
m_uskip = get_utf8_size(m_curr);
m_uskip--;
} else {
m_last_line = true;
m_curr = EOF;
m_uskip = 0;
}
}
std::ostream & operator<<(std::ostream & out, scanner::token_kind k) {

View file

@ -27,6 +27,8 @@ protected:
token_table const * m_tokens;
std::istream & m_stream;
std::string m_stream_name;
std::string m_curr_line;
bool m_last_line;
int m_spos; // current position
int m_upos; // current position taking into account utf-8 encoding
@ -47,8 +49,6 @@ protected:
void next();
char curr() const { return m_curr; }
char curr_next() { char c = curr(); next(); return c; }
void new_line() { m_sline++; m_spos = 0; m_upos = 0; }
void update_line() { if (curr() == '\n') new_line(); }
void check_not_eof(char const * error_msg);
bool is_next_digit();
bool is_next_id_rest();

View file

@ -11,7 +11,7 @@ rfl
-- AFTER REMOVE 8&9
-- BEGININFO STALE NAY
-- ENDINFO
-- BEGININFO NAY
-- BEGININFO STALE NAY
-- ENDINFO
-- BEGINWAIT
-- ENDWAIT