Revision: 17261
Author:   [email protected]
Date:     Fri Oct 18 09:01:34 2013 UTC
Log:      Experimental parser: Small fixes.

- Endless loops with erroneous strings (we still don't recover like
the baseline scanner does).
- Identifier cannot start right after a number.
- Efficiency: reserve a reasonably big buffer for tokens.
- Style fixes

BUG=
[email protected]

Review URL: https://codereview.chromium.org/28643002
http://code.google.com/p/v8/source/detail?r=17261

Modified:
 /branches/experimental/parser/src/lexer/lexer-shell.cc
 /branches/experimental/parser/src/lexer/lexer.re

=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Thu Oct 17 13:56:53 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer-shell.cc Fri Oct 18 09:01:34 2013 UTC
@@ -109,6 +109,13 @@
   scanner_ = new PushScanner(this);
   if (read_all_at_once_) {
     source_ = ReadFile(fname, NULL, &length_);
+    token_.resize(1500);
+    beg_.resize(1500);
+    end_.resize(1500);
+  } else {
+    token_.resize(BUFFER_SIZE);
+    beg_.resize(BUFFER_SIZE);
+    end_.resize(BUFFER_SIZE);
   }
 }

@@ -134,16 +141,13 @@


 Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
-  while (current_ == fetched_) {
+  while (current_ == fetched_)
     FillTokens();
-  }
   *beg_pos = beg_[current_];
   *end_pos = end_[current_];
   Token::Value res = token_[current_];
-  if (token_[current_] != Token::Token::EOS &&
-      token_[current_] != Token::ILLEGAL) {
+  if (res != Token::Token::EOS)
     current_++;
-  }
   return res;
 }

@@ -151,14 +155,13 @@
 void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
   if (token == Token::EOS) end--;
   if (fetched_ >= token_.size()) {
-    token_.push_back(token);
-    beg_.push_back(beg);
-    end_.push_back(end);
-  } else {
-    token_[fetched_] = token;
-    beg_[fetched_] = beg;
-    end_[fetched_] = end;
+    token_.resize(token_.size() * 2);
+    beg_.resize(beg_.size() * 2);
+    end_.resize(end_.size() * 2);
   }
+  token_[fetched_] = token;
+  beg_[fetched_] = beg;
+  end_[fetched_] = end;
   fetched_++;
 }

@@ -226,6 +229,7 @@
           return 1;
         }
       }
+      printf("No of tokens: %d\n", experimental_tokens.size());
       printf("Baseline: %f ms\nExperimental %f ms\n",
              baseline_time.InMillisecondsF(),
              experimental_time.InMillisecondsF());
=======================================
--- /branches/experimental/parser/src/lexer/lexer.re Thu Oct 17 13:56:53 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer.re Fri Oct 18 09:01:34 2013 UTC
@@ -27,18 +27,17 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-#include <fcntl.h>
 #include <stdio.h>
-#include <stddef.h>
 #include <stdlib.h>
 #include <string.h>

 // TODO:
 // - SpiderMonkey compatibility hack: "  --> something" is treated
 //   as a single line comment.
-// - An identifier cannot start immediately after a number.
// - Run-time lexing modifications: harmony number literals, keywords depending
 //   on harmony_modules, harmony_scoping
+// - Escaping the string literals (like the baseline does)
+// - Error recovery after illegal tokens.

 enum Condition {
   kConditionNormal,
@@ -79,7 +78,7 @@
 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); }
 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;}
 #define PUSH_LINE_TERMINATOR() { SKIP(); }
-#define TERMINATE_ILLEGAL() { return 1; }
+#define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS); return 1; }

 PushScanner::PushScanner(ExperimentalScanner* sink)
 : eof_(false),
@@ -167,20 +166,14 @@
   memcpy(limit_, input, input_size);
   limit_ += input_size;

-  // The scanner starts here
-#define YYLIMIT     limit_
-#define YYCURSOR    cursor_
-#define YYMARKER    marker_
-#define YYCTYPE     uint8_t
+#define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); goto yy0; }
+#define YYFILL(n)             { goto fill;        }

-#define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); goto yy0; }
-#define YYFILL(n)    { goto fill;        }
+#define YYGETSTATE()          state_
+#define YYSETSTATE(x)         { state_ = (x); }

-#define YYGETSTATE()  state_
-#define YYSETSTATE(x)  { state_ = (x); }
-
-#define YYGETCONDITION() condition_
-#define YYSETCONDITION(x) { condition_ = (x); }
+#define YYGETCONDITION()      condition_
+#define YYSETCONDITION(x)     { condition_ = (x); }

 start_:
   if (FLAG_trace_lexer) {
@@ -188,10 +181,14 @@
   }

   /*!re2c
-    re2c:indent:top   = 1;
+    re2c:indent:top = 1;
     re2c:yych:conversion = 0;
-    re2c:condenumprefix     = kCondition;
-    re2c:define:YYCONDTYPE    = Condition;
+    re2c:condenumprefix = kCondition;
+    re2c:define:YYCONDTYPE = Condition;
+    re2c:define:YYCURSOR = cursor_;
+    re2c:define:YYCTYPE = uint8_t;
+    re2c:define:YYLIMIT = limit_;
+    re2c:define:YYMARKER = marker_;

     eof = "\000";
     any = [\000-\377];
@@ -281,9 +278,9 @@
     <Normal> "<"           { PUSH_TOKEN(Token::LT); }
     <Normal> ">"           { PUSH_TOKEN(Token::GT); }

- <Normal> '0x' hex_digit+ { PUSH_TOKEN(Token::NUMBER); } - <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); } - <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_TOKEN(Token::NUMBER); } + <Normal> '0x' hex_digit+ not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::NUMBER); } + <Normal> "." digit+ maybe_exponent not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::NUMBER); } + <Normal> digit+ ("." digit+)? maybe_exponent not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::NUMBER); }

     <Normal> "("           { PUSH_TOKEN(Token::LPAREN); }
     <Normal> ")"           { PUSH_TOKEN(Token::RPAREN); }
@@ -312,7 +309,7 @@
     <Normal> "~"           { PUSH_TOKEN(Token::BIT_NOT); }
     <Normal> ","           { PUSH_TOKEN(Token::COMMA); }

-    <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); }
+    <Normal> line_terminator  { PUSH_LINE_TERMINATOR(); }
     <Normal> whitespace       { SKIP(); }

     <Normal> ["]           :=> DoubleQuoteString
@@ -325,10 +322,14 @@

     <DoubleQuoteString> "\\\""  { goto yy0; }
     <DoubleQuoteString> '"'     { PUSH_TOKEN(Token::STRING);}
+    <DoubleQuoteString> line_terminator { TERMINATE_ILLEGAL(); }
+    <DoubleQuoteString> eof     { TERMINATE_ILLEGAL(); }
     <DoubleQuoteString> any     { goto yy0; }

     <SingleQuoteString> "\\'"   { goto yy0; }
     <SingleQuoteString> "'"     { PUSH_TOKEN(Token::STRING);}
+    <SingleQuoteString> line_terminator { TERMINATE_ILLEGAL(); }
+    <SingleQuoteString> eof     { TERMINATE_ILLEGAL(); }
     <SingleQuoteString> any     { goto yy0; }

     <Identifier> identifier_char+  { goto yy0; }
@@ -356,7 +357,7 @@
         "  unfinished token size = %d\n",
         state_,
         unfinished_size);
-    if(0 < unfinished_size && start_ < limit_) {
+    if (0 < unfinished_size && start_ < limit_) {
       printf("  unfinished token is: ");
       fwrite(start_, 1, cursor_ - start_, stdout);
       putchar('\n');

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to