Revision: 17819
Author: [email protected]
Date: Mon Nov 18 10:01:13 2013 UTC
Log: Experimental parser: make string rules look more like ecma spec
[email protected]
BUG=
Review URL: https://codereview.chromium.org/75143002
http://code.google.com/p/v8/source/detail?r=17819
Modified:
/branches/experimental/parser/src/lexer/lexer_py.re
/branches/experimental/parser/tools/lexer_generator/regex_lexer.py
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Mon Nov 18 08:26:36
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer_py.re Mon Nov 18 10:01:13
2013 UTC
@@ -32,11 +32,14 @@
line_terminator = [\n\r];
digit = [0-9];
hex_digit = [0-9a-fA-F];
+single_escape_char = ['"\\bfnrtva];
maybe_exponent = /([eE][\-+]?[:digit:]+)?/;
number =
/0[xX][:hex_digit:]+/ | (
/\.[:digit:]+/ maybe_exponent |
/[:digit:]+(\.[:digit:]*)?/ maybe_exponent );
+# TODO this is incomplete/incorrect
+line_terminator_sequence = (/\n\r?/)|(/\r\n?/);
# grammar is
# regex <action_on_state_entry|action_on_match|transition>
@@ -104,7 +107,6 @@
">" <|push_token(GT)|>
number <|push_token(NUMBER)|>
-# is this necessary?
number identifier_char <|push_token(ILLEGAL)|>
"(" <|push_token(LPAREN)|>
@@ -188,7 +190,7 @@
"yield" <|push_token(YIELD)|>
identifier_start <|push_token(IDENTIFIER)|Identifier>
-/\\u[0-9a-fA-F]{4}/ <{
+/\\u[:hex_digit:]{4}/ <{
if (V8_UNLIKELY(!ValidIdentifierStart())) {
goto default_action;
}
@@ -198,28 +200,31 @@
default_action <push_token(ILLEGAL)>
<<DoubleQuoteString>>
-/\\\n\r?/ <||continue>
-/\\\r\n?/ <||continue>
-/\\[xX][:hex_digit:]{2}/ <||continue>
-/\\[^xX\r\n]/ <||continue>
-/\n|\r/ <|push_token(ILLEGAL)|>
-"\"" <|push_token(STRING)|>
-eof <|terminate_illegal|>
-catch_all <||continue>
+"\\" line_terminator_sequence <||continue>
+/\\[xX][:hex_digit:]{2}/ <||continue>
+/\\[u][:hex_digit:]{4}/ <||continue>
+/\\[^xXu\r\n]/ <||continue>
+"\\" <|push_token(ILLEGAL)|>
+/\n|\r/ <|push_token(ILLEGAL)|>
+"\"" <|push_token(STRING)|>
+eof <|terminate_illegal|>
+catch_all <||continue>
<<SingleQuoteString>>
-/\\\n\r?/ <||continue>
-/\\\r\n?/ <||continue>
-/\\[xX][:hex_digit:]{2}/ <||continue>
-/\\[^xX\r\n]/ <||continue>
-/\n|\r/ <|push_token(ILLEGAL)|>
-"'" <|push_token(STRING)|>
-eof <|terminate_illegal|>
-catch_all <||continue>
+# TODO subgraph for '\'
+"\\" line_terminator_sequence <||continue>
+/\\[xX][:hex_digit:]{2}/ <||continue>
+/\\[u][:hex_digit:]{4}/ <||continue>
+/\\[^xXu\r\n]/ <||continue>
+"\\" <|push_token(ILLEGAL)|>
+/\n|\r/ <|push_token(ILLEGAL)|>
+"'" <|push_token(STRING)|>
+eof <|terminate_illegal|>
+catch_all <||continue>
<<Identifier>>
identifier_char <|push_token(IDENTIFIER)|continue>
-/\\u[0-9a-fA-F]{4}/ <{
+/\\u[:hex_digit:]{4}/ <{
if (V8_UNLIKELY(!ValidIdentifierPart())) {
goto default_action;
}
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Fri
Nov 15 15:32:46 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Mon
Nov 18 10:01:13 2013 UTC
@@ -69,7 +69,7 @@
('repeat','exclusive'),
)
- __escaped_literals = build_escape_map("(){}[]?+.*|\\")
+ __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")
def t_ESCAPED_LITERAL(self, t):
r'\\.'
@@ -115,7 +115,7 @@
t.value = RegexLexer.__escaped_class_literals[t.value]
return t
- t_class_CLASS_LITERAL = r'[\w *$_+\'/]'
+ t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'
def t_REPEAT_BEGIN(self, t):
r'\{'
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.