Reviewers: marja,
Message:
Committed patchset #1 manually as r17819 (presubmit successful).
Description:
Experimental parser: make string rules look more like ecma spec
[email protected]
BUG=
Committed: https://code.google.com/p/v8/source/detail?r=17819
Please review this at https://codereview.chromium.org/75143002/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+26, -21 lines):
M src/lexer/lexer_py.re
M tools/lexer_generator/regex_lexer.py
Index: src/lexer/lexer_py.re
diff --git a/src/lexer/lexer_py.re b/src/lexer/lexer_py.re
index
8afbf9ab8507a538b97419e8dc6f347ae569ce80..f34b575fcc62ddea68a7db72c6df26b2baa166e3
100644
--- a/src/lexer/lexer_py.re
+++ b/src/lexer/lexer_py.re
@@ -32,11 +32,14 @@ identifier_char = [0-9:identifier_start:];
line_terminator = [\n\r];
digit = [0-9];
hex_digit = [0-9a-fA-F];
+single_escape_char = ['"\\bfnrtva];
maybe_exponent = /([eE][\-+]?[:digit:]+)?/;
number =
/0[xX][:hex_digit:]+/ | (
/\.[:digit:]+/ maybe_exponent |
/[:digit:]+(\.[:digit:]*)?/ maybe_exponent );
+# TODO this is incomplete/incorrect
+line_terminator_sequence = (/\n\r?/)|(/\r\n?/);
# grammar is
# regex <action_on_state_entry|action_on_match|transition>
@@ -104,7 +107,6 @@ number =
">" <|push_token(GT)|>
number <|push_token(NUMBER)|>
-# is this necessary?
number identifier_char <|push_token(ILLEGAL)|>
"(" <|push_token(LPAREN)|>
@@ -188,7 +190,7 @@ whitespace <|skip|>
"yield" <|push_token(YIELD)|>
identifier_start <|push_token(IDENTIFIER)|Identifier>
-/\\u[0-9a-fA-F]{4}/ <{
+/\\u[:hex_digit:]{4}/ <{
if (V8_UNLIKELY(!ValidIdentifierStart())) {
goto default_action;
}
@@ -198,28 +200,31 @@ eof <|terminate|>
default_action <push_token(ILLEGAL)>
<<DoubleQuoteString>>
-/\\\n\r?/ <||continue>
-/\\\r\n?/ <||continue>
-/\\[xX][:hex_digit:]{2}/ <||continue>
-/\\[^xX\r\n]/ <||continue>
-/\n|\r/ <|push_token(ILLEGAL)|>
-"\"" <|push_token(STRING)|>
-eof <|terminate_illegal|>
-catch_all <||continue>
+"\\" line_terminator_sequence <||continue>
+/\\[xX][:hex_digit:]{2}/ <||continue>
+/\\[u][:hex_digit:]{4}/ <||continue>
+/\\[^xXu\r\n]/ <||continue>
+"\\" <|push_token(ILLEGAL)|>
+/\n|\r/ <|push_token(ILLEGAL)|>
+"\"" <|push_token(STRING)|>
+eof <|terminate_illegal|>
+catch_all <||continue>
<<SingleQuoteString>>
-/\\\n\r?/ <||continue>
-/\\\r\n?/ <||continue>
-/\\[xX][:hex_digit:]{2}/ <||continue>
-/\\[^xX\r\n]/ <||continue>
-/\n|\r/ <|push_token(ILLEGAL)|>
-"'" <|push_token(STRING)|>
-eof <|terminate_illegal|>
-catch_all <||continue>
+# TODO subgraph for '\'
+"\\" line_terminator_sequence <||continue>
+/\\[xX][:hex_digit:]{2}/ <||continue>
+/\\[u][:hex_digit:]{4}/ <||continue>
+/\\[^xXu\r\n]/ <||continue>
+"\\" <|push_token(ILLEGAL)|>
+/\n|\r/ <|push_token(ILLEGAL)|>
+"'" <|push_token(STRING)|>
+eof <|terminate_illegal|>
+catch_all <||continue>
<<Identifier>>
identifier_char <|push_token(IDENTIFIER)|continue>
-/\\u[0-9a-fA-F]{4}/ <{
+/\\u[:hex_digit:]{4}/ <{
if (V8_UNLIKELY(!ValidIdentifierPart())) {
goto default_action;
}
Index: tools/lexer_generator/regex_lexer.py
diff --git a/tools/lexer_generator/regex_lexer.py
b/tools/lexer_generator/regex_lexer.py
index
b653fdd752054deb3f3e920a0f15ef1f4e6b2afa..ceab99dc79a7ed19fd9435d4941fa6e7fae2ce60
100644
--- a/tools/lexer_generator/regex_lexer.py
+++ b/tools/lexer_generator/regex_lexer.py
@@ -69,7 +69,7 @@ class RegexLexer:
('repeat','exclusive'),
)
- __escaped_literals = build_escape_map("(){}[]?+.*|\\")
+ __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")
def t_ESCAPED_LITERAL(self, t):
r'\\.'
@@ -115,7 +115,7 @@ class RegexLexer:
t.value = RegexLexer.__escaped_class_literals[t.value]
return t
- t_class_CLASS_LITERAL = r'[\w *$_+\'/]'
+ t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'
def t_REPEAT_BEGIN(self, t):
r'\{'
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.