[v8-dev] Experimental parser: make string rules look more like ecma spec (issue 75143002)

dcarney Mon, 18 Nov 2013 02:02:03 -0800

Reviewers: marja,

Message:
Committed patchset #1 manually as r17819 (presubmit successful).


Description:
Experimental parser: make string rules look more like ecma spec

[email protected]

BUG=

Committed: https://code.google.com/p/v8/source/detail?r=17819

Please review this at https://codereview.chromium.org/75143002/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+26, -21 lines):
  M src/lexer/lexer_py.re
  M tools/lexer_generator/regex_lexer.py


Index: src/lexer/lexer_py.re
diff --git a/src/lexer/lexer_py.re b/src/lexer/lexer_py.re

index8afbf9ab8507a538b97419e8dc6f347ae569ce80..f34b575fcc62ddea68a7db72c6df26b2baa166e3100644

--- a/src/lexer/lexer_py.re
+++ b/src/lexer/lexer_py.re
@@ -32,11 +32,14 @@ identifier_char = [0-9:identifier_start:];
 line_terminator = [\n\r];
 digit = [0-9];
 hex_digit = [0-9a-fA-F];
+single_escape_char = ['"\\bfnrtva];
 maybe_exponent = /([eE][\-+]?[:digit:]+)?/;
 number =
   /0[xX][:hex_digit:]+/ | (
   /\.[:digit:]+/ maybe_exponent |
   /[:digit:]+(\.[:digit:]*)?/ maybe_exponent );
+# TODO this is incomplete/incorrect
+line_terminator_sequence = (/\n\r?/)|(/\r\n?/);

 # grammar is
 #   regex <action_on_state_entry|action_on_match|transition>
@@ -104,7 +107,6 @@ number =
 ">"           <|push_token(GT)|>

 number        <|push_token(NUMBER)|>
-# is this necessary?
 number identifier_char   <|push_token(ILLEGAL)|>

 "("           <|push_token(LPAREN)|>
@@ -188,7 +190,7 @@ whitespace        <|skip|>
 "yield"       <|push_token(YIELD)|>

 identifier_start <|push_token(IDENTIFIER)|Identifier>
-/\\u[0-9a-fA-F]{4}/ <{
+/\\u[:hex_digit:]{4}/ <{
   if (V8_UNLIKELY(!ValidIdentifierStart())) {
     goto default_action;
   }
@@ -198,28 +200,31 @@ eof             <|terminate|>
 default_action  <push_token(ILLEGAL)>

 <<DoubleQuoteString>>
-/\\\n\r?/ <||continue>
-/\\\r\n?/ <||continue>
-/\\[xX][:hex_digit:]{2}/ <||continue>
-/\\[^xX\r\n]/     <||continue>
-/\n|\r/   <|push_token(ILLEGAL)|>
-"\""      <|push_token(STRING)|>
-eof       <|terminate_illegal|>
-catch_all <||continue>
+"\\" line_terminator_sequence <||continue>
+/\\[xX][:hex_digit:]{2}/      <||continue>
+/\\[u][:hex_digit:]{4}/       <||continue>
+/\\[^xXu\r\n]/                <||continue>
+"\\"                          <|push_token(ILLEGAL)|>
+/\n|\r/                       <|push_token(ILLEGAL)|>
+"\""                          <|push_token(STRING)|>
+eof                           <|terminate_illegal|>
+catch_all                     <||continue>

 <<SingleQuoteString>>
-/\\\n\r?/ <||continue>
-/\\\r\n?/ <||continue>
-/\\[xX][:hex_digit:]{2}/ <||continue>
-/\\[^xX\r\n]/     <||continue>
-/\n|\r/   <|push_token(ILLEGAL)|>
-"'"       <|push_token(STRING)|>
-eof       <|terminate_illegal|>
-catch_all <||continue>
+# TODO subgraph for '\'
+"\\" line_terminator_sequence <||continue>
+/\\[xX][:hex_digit:]{2}/      <||continue>
+/\\[u][:hex_digit:]{4}/       <||continue>
+/\\[^xXu\r\n]/                <||continue>
+"\\"                          <|push_token(ILLEGAL)|>
+/\n|\r/                       <|push_token(ILLEGAL)|>
+"'"                           <|push_token(STRING)|>
+eof                           <|terminate_illegal|>
+catch_all                     <||continue>

 <<Identifier>>
 identifier_char <|push_token(IDENTIFIER)|continue>
-/\\u[0-9a-fA-F]{4}/ <{
+/\\u[:hex_digit:]{4}/ <{
   if (V8_UNLIKELY(!ValidIdentifierPart())) {
     goto default_action;
   }
Index: tools/lexer_generator/regex_lexer.py

diff --git a/tools/lexer_generator/regex_lexer.pyb/tools/lexer_generator/regex_lexer.pyindexb653fdd752054deb3f3e920a0f15ef1f4e6b2afa..ceab99dc79a7ed19fd9435d4941fa6e7fae2ce60100644

--- a/tools/lexer_generator/regex_lexer.py
+++ b/tools/lexer_generator/regex_lexer.py
@@ -69,7 +69,7 @@ class RegexLexer:
     ('repeat','exclusive'),
   )

-  __escaped_literals = build_escape_map("(){}[]?+.*|\\")
+  __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")

   def t_ESCAPED_LITERAL(self, t):
     r'\\.'
@@ -115,7 +115,7 @@ class RegexLexer:
     t.value = RegexLexer.__escaped_class_literals[t.value]
     return t

-  t_class_CLASS_LITERAL = r'[\w *$_+\'/]'
+  t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'

   def t_REPEAT_BEGIN(self, t):
     r'\{'


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

---You received this message because you are subscribed to the Google Groups "v8-dev" group.

To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

[v8-dev] Experimental parser: make string rules look more like ecma spec (issue 75143002)

Reply via email to