Reviewers: dcarney,
Message:
Committed patchset #1 manually as r17857.
Description:
Experimental lexer generator: Misc fixes.
- eof inside a multiline comment
- other eof fixes (buffer check)
- rename eof -> eos
[email protected]
BUG=
Committed: https://code.google.com/p/v8/source/detail?r=17857
Please review this at https://codereview.chromium.org/64023004/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+27, -21 lines):
M src/lexer/experimental-scanner.cc
M src/lexer/lexer_py.re
M tools/lexer_generator/code_generator.jinja
M tools/lexer_generator/transition_keys.py
Index: src/lexer/experimental-scanner.cc
diff --git a/src/lexer/experimental-scanner.cc
b/src/lexer/experimental-scanner.cc
index
312bb7b3839fe37aa215d72975a3b4eb6ac3985a..0e797b2a11c30f2fcc492a78879d7fc95b5f30fb
100644
--- a/src/lexer/experimental-scanner.cc
+++ b/src/lexer/experimental-scanner.cc
@@ -51,7 +51,7 @@ const byte* ReadFile(const char* name, Isolate* isolate,
*size = file_size * repeat;
- byte* chars = new byte[*size + 1];
+ byte* chars = new byte[*size];
for (int i = 0; i < file_size;) {
int read = static_cast<int>(fread(&chars[i], 1, file_size - i, file));
i += read;
@@ -61,7 +61,6 @@ const byte* ReadFile(const char* name, Isolate* isolate,
for (int i = file_size; i < *size; i++) {
chars[i] = chars[i - file_size];
}
- chars[*size] = 0;
return chars;
}
Index: src/lexer/lexer_py.re
diff --git a/src/lexer/lexer_py.re b/src/lexer/lexer_py.re
index
f5a0713bfcedcf42accf852492a4211e71474ccb..195d2fb9464243dba3261ac126c7fac27fe3be8e
100644
--- a/src/lexer/lexer_py.re
+++ b/src/lexer/lexer_py.re
@@ -40,7 +40,7 @@ number =
/[:digit:]+(\.[:digit:]*)?/ maybe_exponent );
# TODO this is incomplete/incorrect
line_terminator_sequence = (/\n\r?/)|(/\r\n?/);
-eof = [:eof:];
+eos = [:eos:];
# grammar is
# regex <action_on_state_entry|action_on_match|transition>
@@ -191,7 +191,7 @@ identifier_start <|push_token(IDENTIFIER)|Identifier>
}
}|push_token(IDENTIFIER)|Identifier>
-eof <|terminate|>
+eos <|terminate|>
default_action <push_token_and_go_forward(ILLEGAL)>
<<DoubleQuoteString>>
@@ -202,7 +202,7 @@ default_action <push_token_and_go_forward(ILLEGAL)>
"\\" <|push_token(ILLEGAL)|>
/\n|\r/ <|push_token(ILLEGAL)|>
"\"" <|push_token(STRING)|>
-eof <|terminate_illegal|>
+eos <|terminate_illegal|>
catch_all <||continue>
<<SingleQuoteString>>
@@ -214,7 +214,7 @@ catch_all <||continue>
"\\" <|push_token(ILLEGAL)|>
/\n|\r/ <|push_token(ILLEGAL)|>
"'" <|push_token(STRING)|>
-eof <|terminate_illegal|>
+eos <|terminate_illegal|>
catch_all <||continue>
<<Identifier>>
@@ -227,7 +227,7 @@ identifier_char <|push_token(IDENTIFIER)|continue>
<<SingleLineComment>>
line_terminator <|push_line_terminator|>
-eof <|skip_and_terminate|>
+eos <|skip_and_terminate|>
catch_all <||continue>
<<MultiLineComment>>
@@ -235,5 +235,5 @@ catch_all <||continue>
# TODO find a way to generate the below rule
/\*+[^\/*]/ <||continue>
line_terminator <push_line_terminator||continue>
-eof <|{start_ = marker_; BACKWARD(); PUSH_TOKEN(Token::ILLEGAL);}|>
+eos <|{start_ = marker_; PUSH_TOKEN(Token::ILLEGAL);}|>
catch_all <||continue>
Index: tools/lexer_generator/code_generator.jinja
diff --git a/tools/lexer_generator/code_generator.jinja
b/tools/lexer_generator/code_generator.jinja
index
acc2262b0c513ac435f2ed3af685142a96b79a71..ed780f98d4b5d696a876b19c52b90e1dccf1e688
100644
--- a/tools/lexer_generator/code_generator.jinja
+++ b/tools/lexer_generator/code_generator.jinja
@@ -1,6 +1,5 @@
#include "lexer/even-more-experimental-scanner.h"
-
{# TODO implement CLASS checks #}
{%- macro do_key(key) -%}
{%- for r in key -%}
@@ -16,8 +15,10 @@
({{r[1][0]}} <= yych && yych <= {{r[1][1]}})
{%- endif -%}
{%- elif r[0] == 'CLASS' -%}
- {%- if r[1] == 'eof' -%}
- (yych == 0 && cursor_ == buffer_end_)
+ {%- if r[1] == 'eos' -%}
+ (yych == 0 && cursor_ >= buffer_end_)
+ {%- elif r[1] == 'zero' -%}
+ (yych == 0 && cursor_ < buffer_end_)
{%- else -%}
false
{%- endif -%}
@@ -142,7 +143,8 @@
}
#define PUSH_EOS() { \
- --cursor_; \
+ --start_; \
+ cursor_ -= 2; \
PUSH_TOKEN(Token::EOS); \
}
@@ -152,11 +154,13 @@
}
#define FORWARD() { \
- yych = *(++cursor_); \
+ if (++cursor_ >= buffer_end_) yych = 0; \
+ else yych = *(cursor_); \
}
#define BACKWARD() { \
- yych = *(--cursor_); \
+ if (--cursor_ >= buffer_end_) yych = 0; \
+ else yych = *(cursor_); \
}
#define SKIP() { \
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py
b/tools/lexer_generator/transition_keys.py
index
4e650969f1ae8d866c6b99c32d433d46b6d14b92..92d50b96334301c3cce92a65ebc66fc5fb3efc88
100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -30,13 +30,14 @@ from string import printable
class TransitionKey:
__class_bounds = {
- "latin_1" : (0, 255),
+ "latin_1" : (1, 255),
# These are not "real" ranges; they just need to be separate.
"whitespace" : (256, 256),
"literal" : (257, 257),
- "eof" : (258, 258),
+ "eos" : (258, 258),
+ "zero" : (259, 259),
}
- __lower_bound = 0
+ __lower_bound = 1
__upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]),
__class_bounds.items(), 0)
__cached_keys = {}
@@ -135,11 +136,13 @@ class TransitionKey:
elif key == 'CHARACTER_CLASS':
class_name = graph[1]
if class_name == 'ws':
- ranges.append(TransitionKey.__class_bounds["whitespace"])
+ ranges.append(TransitionKey.__class_bounds['whitespace'])
elif class_name == 'lit':
- ranges.append(TransitionKey.__class_bounds["literal"])
- elif class_name == 'eof':
- ranges.append(TransitionKey.__class_bounds["eof"])
+ ranges.append(TransitionKey.__class_bounds['literal'])
+ elif class_name == 'eos':
+ ranges.append(TransitionKey.__class_bounds['eos'])
+ elif class_name == 'zero':
+ ranges.append(TransitionKey.__class_bounds['zero'])
elif class_name in key_map:
ranges += key_map[class_name].__ranges
else:
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.