Revision: 17200
Author:   [email protected]
Date:     Mon Oct 14 14:35:38 2013 UTC
Log: Invoke generated lexer along with baseline lexer to compare results.

BUG=
[email protected]

Review URL: https://chromiumcodereview.appspot.com/26764004
http://code.google.com/p/v8/source/detail?r=17200

Added:
 /branches/experimental/parser/src/lexer/lexer.h
Modified:
 /branches/experimental/parser/src/flag-definitions.h
 /branches/experimental/parser/src/lexer/lexer-shell.cc
 /branches/experimental/parser/src/lexer/lexer.gyp
 /branches/experimental/parser/src/lexer/lexer.re

=======================================
--- /dev/null
+++ /branches/experimental/parser/src/lexer/lexer.h Mon Oct 14 14:35:38 2013 UTC
@@ -0,0 +1,54 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_LEXER_LEXER_H
+#define V8_LEXER_LEXER_H
+
+#include "token.h"
+#include "flags.h"
+
+class PushScanner;
+
+class ExperimentalScanner {
+ public:
+  explicit ExperimentalScanner(const char* fname);
+  ~ExperimentalScanner();
+  v8::internal::Token::Value Next(int* beg_pos, int* end_pos);
+  void Record(v8::internal::Token::Value token, int beg_pos, int end_pos);
+ private:
+  void FillTokens();
+  static const int BUFFER_SIZE = 256;
+  v8::internal::Token::Value token_[BUFFER_SIZE];
+  int beg_[BUFFER_SIZE];
+  int end_[BUFFER_SIZE];
+  int current_;
+  int fetched_;
+  FILE* file_;
+  PushScanner* scanner_;
+};
+
+#endif  // V8_LEXER_LEXER_H
=======================================
--- /branches/experimental/parser/src/flag-definitions.h Fri Oct 4 07:50:36 2013 UTC +++ /branches/experimental/parser/src/flag-definitions.h Mon Oct 14 14:35:38 2013 UTC
@@ -572,6 +572,8 @@
 DEFINE_bool(allow_natives_syntax, false, "allow natives syntax")
 DEFINE_bool(trace_parse, false, "trace parsing and preparsing")

+DEFINE_bool(trace_lexer, false, "trace lexing")
+
 // simulator-arm.cc and simulator-mips.cc
 DEFINE_bool(trace_sim, false, "Trace simulator execution")
 DEFINE_bool(check_icache, false,
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Mon Oct 14 11:35:31 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer-shell.cc Mon Oct 14 14:35:38 2013 UTC
@@ -42,6 +42,7 @@
 #include "scopeinfo.h"
 #include "string-stream.h"
 #include "scanner.h"
+#include "lexer.h"

 using namespace v8::internal;

@@ -84,12 +85,11 @@
     delete unicode_cache_;
   }

-  Token::Value Next() {
-    return scanner_->Next();
-  }
-
-  Scanner::Location Location() {
-    return scanner_->location();
+  Token::Value Next(int* beg_pos, int* end_pos) {
+    Token::Value res = scanner_->Next();
+    *beg_pos = scanner_->location().beg_pos;
+    *end_pos = scanner_->location().end_pos;
+    return res;
   }

  private:
@@ -99,14 +99,6 @@
 };


-class ExperimentalScanner {
-  explicit ExperimentalScanner(const char* fname);
-  ~ExperimentalScanner();
-  Token::Value Next();
-  Scanner::Location Location();
-};
-
-
 int main(int argc, char* argv[]) {
   v8::V8::InitializeICU();
   v8::V8::SetFlagsFromCommandLine(&argc, argv, true);
@@ -121,13 +113,28 @@
       Isolate* isolate = Isolate::Current();
       HandleScope handle_scope(isolate);
       BaselineScanner baseline(argv[1], isolate);
-      Token::Value current;
-      while ((current = baseline.Next()) != Token::EOS) {
-        printf("%11s => (%d, %d)\n",
-               Token::Name(current),
-               baseline.Location().beg_pos,
-               baseline.Location().end_pos);
-      }
+      ExperimentalScanner experimental(argv[1]);
+      Token::Value expected_token, actual_token;
+      int expected_beg, expected_end, actual_beg, actual_end;
+      do {
+        expected_token = baseline.Next(&expected_beg, &expected_end);
+        actual_token = experimental.Next(&actual_beg, &actual_end);
+        printf("=> %11s at (%d, %d)\n",
+               Token::Name(actual_token),
+               actual_beg, actual_end);
+        if (expected_token != actual_token ||
+            expected_beg != actual_beg ||
+            expected_end != actual_end) {
+          printf("MISMATCH:\n");
+          printf("Expected: %s at (%d, %d)\n",
+                 Token::Name(expected_token),
+                 expected_beg, expected_end);
+          printf("Actual:   %s at (%d, %d)\n",
+                 Token::Name(actual_token),
+                 actual_beg, actual_end);
+          return 1;
+        }
+      } while (actual_token != Token::EOS);
     }
   }
   v8::V8::Dispose();
=======================================
--- /branches/experimental/parser/src/lexer/lexer.gyp Mon Oct 14 11:35:31 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer.gyp Mon Oct 14 14:35:38 2013 UTC
@@ -41,10 +41,12 @@
       # Generated source files need this explicitly:
       'include_dirs+': [
         '../../src',
+        '../../src/lexer',
       ],
       'sources': [
         'lexer-shell.cc',
-# TODO: fix compiler errors and add '<(SHARED_INTERMEDIATE_DIR)/lexer.cc',
+        'lexer.h',
+        '<(SHARED_INTERMEDIATE_DIR)/lexer.cc',
       ],
       'conditions': [
         ['v8_enable_i18n_support==1', {
=======================================
--- /branches/experimental/parser/src/lexer/lexer.re Mon Oct 14 12:15:07 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer.re Mon Oct 14 14:35:38 2013 UTC
@@ -4,7 +4,6 @@
 #include <stdlib.h>
 #include <string.h>

-
 /*
 TODO:
- SpiderMonkey compatibility hack: " --> something" is treated as a single line comment.
@@ -13,7 +12,16 @@
 */


-/*!types:re2c */
+
+enum Condition {
+    EConditionNormal,
+    EConditionDoubleQuoteString,
+    EConditionSingleQuoteString,
+    EConditionIdentifier,
+    EConditionSingleLineComment,
+    EConditionMultiLineComment,
+    EConditionHtmlComment
+};

 #if defined(WIN32)

@@ -36,102 +44,17 @@

 #endif

+#include "lexer.h"
+using namespace v8::internal;
+
 // ----------------------------------------------------------------------
-#define PUSH_EOS(T) { printf("got eos\n"); }
-#define PUSH_TOKEN(T) { \
-        printf("got token %s (%d)\n", tokenNames[T], T); \
-         SKIP(); }
-#define PUSH_STRING() { \
-        printf("got string\n"); \
-        size_t tokenSize = cursor-start; \
-        fwrite(start, tokenSize, 1, stdout); \
-        printf("\n"); \
-        SKIP(); }
-#define PUSH_NUMBER() { \
-        printf("got number\n"); \
-        size_t tokenSize = cursor-start; \
-        fwrite(start, tokenSize, 1, stdout); \
-        printf("\n"); \
-        SKIP(); }
-#define PUSH_IDENTIFIER() { \
-        --cursor; \
-        printf("got identifier: "); \
-        size_t tokenSize = cursor-start; \
-        fwrite(start, tokenSize, 1, stdout); \
-        printf("\n"); \
-        SKIP(); }
-#define PUSH_LINE_TERMINATOR() { printf("got line terminator\n"); SKIP();}
+#define PUSH_TOKEN(T) { send(T); SKIP(); }
+#define PUSH_LINE_TERMINATOR() { SKIP(); }
 #define TERMINATE_ILLEGAL() { return 1; }
-
-#define TOKENS \
-        TOK(EOS) \
-        TOK(LPAREN) \
-        TOK(RPAREN) \
-        TOK(LBRACK) \
-        TOK(RBRACK) \
-        TOK(LBRACE) \
-        TOK(RBRACE) \
-        TOK(COLON) \
-        TOK(SEMICOLON) \
-        TOK(PERIOD) \
-        TOK(CONDITIONAL) \
-        TOK(INC) \
-        TOK(DEC) \
-        TOK(ASSIGN) \
-        TOK(ASSIGN_BIT_OR) \
-        TOK(ASSIGN_BIT_XOR) \
-        TOK(ASSIGN_BIT_AND) \
-        TOK(ASSIGN_SHL) \
-        TOK(ASSIGN_SAR) \
-        TOK(ASSIGN_SHR) \
-        TOK(ASSIGN_ADD) \
-        TOK(ASSIGN_SUB) \
-        TOK(ASSIGN_MUL) \
-        TOK(ASSIGN_DIV) \
-        TOK(ASSIGN_MOD) \
-        TOK(COMMA) \
-        TOK(OR) \
-        TOK(AND) \
-        TOK(BIT_OR) \
-        TOK(BIT_XOR) \
-        TOK(BIT_AND) \
-        TOK(SHL) \
-        TOK(SAR) \
-        TOK(ADD) \
-        TOK(SUB) \
-        TOK(MUL) \
-        TOK(DIV) \
-        TOK(MOD) \
-        TOK(EQ) \
-        TOK(NE) \
-        TOK(EQ_STRICT) \
-        TOK(NE_STRICT) \
-        TOK(LT) \
-        TOK(GT) \
-        TOK(LTE) \
-        TOK(GTE) \
-        TOK(NOT) \
-        TOK(BIT_NOT) \
-
-// ----------------------------------------------------------------------
-static const char *tokenNames[] =
-{
-    #define TOK(x) #x,
-        TOKENS
-    #undef TOK
-};

 // ----------------------------------------------------------------------
 class PushScanner
 {
-public:
-
-    enum Token
-    {
-        #define TOK(x) x,
-            TOKENS
-        #undef TOK
-    };

 private:

@@ -143,6 +66,7 @@
     uint8_t     *start;
     uint8_t     *cursor;
     uint8_t     *marker;
+    int real_start;

     uint8_t     *buffer;
     uint8_t     *bufferEnd;
@@ -150,10 +74,12 @@
     uint8_t     yych;
     uint32_t    yyaccept;

+    ExperimentalScanner* sink_;
+
 public:

// ----------------------------------------------------------------------
-    PushScanner()
+    PushScanner(ExperimentalScanner* sink)
     {
         limit = 0;
         start = 0;
@@ -164,6 +90,8 @@
         buffer = 0;
         eof = false;
         bufferEnd = 0;
+        sink_ = sink;
+        real_start = 0;
     }

// ----------------------------------------------------------------------
@@ -172,49 +100,27 @@
     }

// ----------------------------------------------------------------------
-    void send(
-        Token token
-    )
-    {
-        size_t tokenSize = cursor-start;
-        const char *tokenName = tokenNames[token];
-        printf(
-            "scanner is pushing out a token of type %d (%s)",
-            token,
-            tokenName
-        );
-
-        if(token==EOS) putchar('\n');
-        else
-        {
-            size_t tokenNameSize = strlen(tokenNames[token]);
-            size_t padSize = 20-(20<tokenNameSize ? 20 : tokenNameSize);
-            for(size_t i=0; i<padSize; ++i) putchar(' ');
-            printf(" : ---->");
-
-            fwrite(
-                start,
-                tokenSize,
-                1,
-                stdout
-            );
-
-            printf("<----\n");
+    void send(Token::Value token) {
+        int beg = (start - buffer) + real_start;
+        int end = (cursor - buffer) + real_start;
+        if (FLAG_trace_lexer) {
+            printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
+ for (uint8_t* s = start; s != cursor; s++) printf("%c", (char)*s);
+            printf(".\n");
         }
+        sink_->Record(token, beg, end);
     }

// ----------------------------------------------------------------------
-    uint32_t push(
-        const void  *input,
-        ssize_t     inputSize
-    )
-    {
-        printf(
-            "scanner is receiving a new data batch of length %ld\n"
-            "scanner continues with saved state = %d\n",
-            inputSize,
-            state
-        );
+    uint32_t push(const void *input, int input_size) {
+        if (FLAG_trace_lexer) {
+            printf(
+                "scanner is receiving a new data batch of length %d\n"
+                "scanner continues with saved state = %d\n",
+                input_size,
+                state
+            );
+        }

         /*
          * Data source is signaling end of file when batch size
@@ -224,12 +130,12 @@
          * the longest keyword, so given our grammar, 32 is a safe bet.
          */
         uint8_t null[64];
-        const ssize_t maxFill = 32;
-        if(inputSize<maxFill) // FIXME: do something about this!!!
+        const int maxFill = 32;
+        if(input_size<maxFill) // FIXME: do something about this!!!
         {
             eof = true;
             input = null;
-            inputSize = sizeof(null);
+            input_size = sizeof(null);
             memset(null, 0, sizeof(null));
         }

@@ -246,7 +152,7 @@
          *
          */
         size_t used = limit-buffer;
-        size_t needed = used+inputSize;
+        size_t needed = used+input_size;
         size_t allocated = bufferEnd-buffer;
         if(allocated<needed)
         {
@@ -263,8 +169,8 @@
             start = buffer + startOffset;
             limit = limitOffset + buffer;
         }
-        memcpy(limit, input, inputSize);
-        limit += inputSize;
+        memcpy(limit, input, input_size);
+        limit += input_size;

         // The scanner starts here
         #define YYLIMIT         limit
@@ -283,7 +189,9 @@

     start:

- printf("Starting a round; state: %d, condition: %d\n", state, condition);
+        if (FLAG_trace_lexer) {
+ printf("Starting a round; state: %d, condition: %d\n", state, condition);
+        }

         /*!re2c
         re2c:indent:top      = 1;
@@ -302,66 +210,66 @@
         hex_digit = [0-9a-fA-F];
         maybe_exponent = ('e' [-+]? digit+)?;

-        <Normal> "|="                    { PUSH_TOKEN(ASSIGN_BIT_OR); }
-        <Normal> "^="                    { PUSH_TOKEN(ASSIGN_BIT_XOR); }
-        <Normal> "&="                    { PUSH_TOKEN(ASSIGN_BIT_AND); }
-        <Normal> "+="                    { PUSH_TOKEN(ASSIGN_ADD); }
-        <Normal> "-="                    { PUSH_TOKEN(ASSIGN_SUB); }
-        <Normal> "*="                    { PUSH_TOKEN(ASSIGN_MUL); }
-        <Normal> "/="                    { PUSH_TOKEN(ASSIGN_DIV); }
-        <Normal> "%="                    { PUSH_TOKEN(ASSIGN_MOD); }
+ <Normal> "|=" { PUSH_TOKEN(Token::ASSIGN_BIT_OR); } + <Normal> "^=" { PUSH_TOKEN(Token::ASSIGN_BIT_XOR); } + <Normal> "&=" { PUSH_TOKEN(Token::ASSIGN_BIT_AND); }
+        <Normal> "+="                    { PUSH_TOKEN(Token::ASSIGN_ADD); }
+        <Normal> "-="                    { PUSH_TOKEN(Token::ASSIGN_SUB); }
+        <Normal> "*="                    { PUSH_TOKEN(Token::ASSIGN_MUL); }
+        <Normal> "/="                    { PUSH_TOKEN(Token::ASSIGN_DIV); }
+        <Normal> "%="                    { PUSH_TOKEN(Token::ASSIGN_MOD); }

-        <Normal> "==="                   { PUSH_TOKEN(EQ_STRICT); }
-        <Normal> "=="                    { PUSH_TOKEN(EQ); }
-        <Normal> "="                     { PUSH_TOKEN(ASSIGN); }
-        <Normal> "!=="                   { PUSH_TOKEN(NE_STRICT); }
-        <Normal> "!="                    { PUSH_TOKEN(NE); }
-        <Normal> "!"                     { PUSH_TOKEN(NOT); }
+        <Normal> "==="                   { PUSH_TOKEN(Token::EQ_STRICT); }
+        <Normal> "=="                    { PUSH_TOKEN(Token::EQ); }
+        <Normal> "="                     { PUSH_TOKEN(Token::ASSIGN); }
+        <Normal> "!=="                   { PUSH_TOKEN(Token::NE_STRICT); }
+        <Normal> "!="                    { PUSH_TOKEN(Token::NE); }
+        <Normal> "!"                     { PUSH_TOKEN(Token::NOT); }

         <Normal> "//"                    :=> SingleLineComment
         <Normal> "/*"                    :=> MultiLineComment
         <Normal> "<!--"                  :=> HtmlComment

-        <Normal> ">>>="                  { PUSH_TOKEN(ASSIGN_SHR); }
-        <Normal> "<<="                   { PUSH_TOKEN(ASSIGN_SHL); }
-        <Normal> ">>="                   { PUSH_TOKEN(ASSIGN_SAR); }
-        <Normal> "<="                    { PUSH_TOKEN(LTE); }
-        <Normal> ">="                    { PUSH_TOKEN(GTE); }
-        <Normal> "<<"                    { PUSH_TOKEN(SHL); }
-        <Normal> ">>"                    { PUSH_TOKEN(SAR); }
-        <Normal> "<"                     { PUSH_TOKEN(LT); }
-        <Normal> ">"                     { PUSH_TOKEN(GT); }
+        <Normal> ">>>="                  { PUSH_TOKEN(Token::ASSIGN_SHR); }
+        <Normal> "<<="                   { PUSH_TOKEN(Token::ASSIGN_SHL); }
+        <Normal> ">>="                   { PUSH_TOKEN(Token::ASSIGN_SAR); }
+        <Normal> "<="                    { PUSH_TOKEN(Token::LTE); }
+        <Normal> ">="                    { PUSH_TOKEN(Token::GTE); }
+        <Normal> "<<"                    { PUSH_TOKEN(Token::SHL); }
+        <Normal> ">>"                    { PUSH_TOKEN(Token::SAR); }
+        <Normal> "<"                     { PUSH_TOKEN(Token::LT); }
+        <Normal> ">"                     { PUSH_TOKEN(Token::GT); }

-        <Normal> '0x' hex_digit+         { PUSH_NUMBER(); }
-        <Normal> "." digit+ maybe_exponent { PUSH_NUMBER(); }
-        <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_NUMBER(); }
+        <Normal> '0x' hex_digit+         { PUSH_TOKEN(Token::NUMBER); }
+        <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); }
+ <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_TOKEN(Token::NUMBER); }

-        <Normal> "("                     { PUSH_TOKEN(LPAREN); }
-        <Normal> ")"                     { PUSH_TOKEN(RPAREN); }
-        <Normal> "["                     { PUSH_TOKEN(LBRACK); }
-        <Normal> "]"                     { PUSH_TOKEN(RBRACK); }
-        <Normal> "{"                     { PUSH_TOKEN(LBRACE); }
-        <Normal> "}"                     { PUSH_TOKEN(RBRACE); }
-        <Normal> ":"                     { PUSH_TOKEN(COLON); }
-        <Normal> ";"                     { PUSH_TOKEN(SEMICOLON); }
-        <Normal> "."                     { PUSH_TOKEN(PERIOD); }
-        <Normal> "?"                     { PUSH_TOKEN(CONDITIONAL); }
-        <Normal> "++"                    { PUSH_TOKEN(INC); }
-        <Normal> "--"                    { PUSH_TOKEN(DEC); }
+        <Normal> "("                     { PUSH_TOKEN(Token::LPAREN); }
+        <Normal> ")"                     { PUSH_TOKEN(Token::RPAREN); }
+        <Normal> "["                     { PUSH_TOKEN(Token::LBRACK); }
+        <Normal> "]"                     { PUSH_TOKEN(Token::RBRACK); }
+        <Normal> "{"                     { PUSH_TOKEN(Token::LBRACE); }
+        <Normal> "}"                     { PUSH_TOKEN(Token::RBRACE); }
+        <Normal> ":"                     { PUSH_TOKEN(Token::COLON); }
+        <Normal> ";"                     { PUSH_TOKEN(Token::SEMICOLON); }
+        <Normal> "."                     { PUSH_TOKEN(Token::PERIOD); }
+ <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); }
+        <Normal> "++"                    { PUSH_TOKEN(Token::INC); }
+        <Normal> "--"                    { PUSH_TOKEN(Token::DEC); }

-        <Normal> "||"                    { PUSH_TOKEN(OR); }
-        <Normal> "&&"                    { PUSH_TOKEN(AND); }
+        <Normal> "||"                    { PUSH_TOKEN(Token::OR); }
+        <Normal> "&&"                    { PUSH_TOKEN(Token::AND); }

-        <Normal> "|"                     { PUSH_TOKEN(BIT_OR); }
-        <Normal> "^"                     { PUSH_TOKEN(BIT_XOR); }
-        <Normal> "&"                     { PUSH_TOKEN(BIT_AND); }
-        <Normal> "+"                     { PUSH_TOKEN(ADD); }
-        <Normal> "-"                     { PUSH_TOKEN(SUB); }
-        <Normal> "*"                     { PUSH_TOKEN(MUL); }
-        <Normal> "/"                     { PUSH_TOKEN(DIV); }
-        <Normal> "%"                     { PUSH_TOKEN(MOD); }
-        <Normal> "~"                     { PUSH_TOKEN(BIT_NOT); }
-        <Normal> ","                     { PUSH_TOKEN(COMMA); }
+        <Normal> "|"                     { PUSH_TOKEN(Token::BIT_OR); }
+        <Normal> "^"                     { PUSH_TOKEN(Token::BIT_XOR); }
+        <Normal> "&"                     { PUSH_TOKEN(Token::BIT_AND); }
+        <Normal> "+"                     { PUSH_TOKEN(Token::ADD); }
+        <Normal> "-"                     { PUSH_TOKEN(Token::SUB); }
+        <Normal> "*"                     { PUSH_TOKEN(Token::MUL); }
+        <Normal> "/"                     { PUSH_TOKEN(Token::DIV); }
+        <Normal> "%"                     { PUSH_TOKEN(Token::MOD); }
+        <Normal> "~"                     { PUSH_TOKEN(Token::BIT_NOT); }
+        <Normal> ","                     { PUSH_TOKEN(Token::COMMA); }

         <Normal> line_terminator+        { PUSH_LINE_TERMINATOR(); }
         <Normal> whitespace              { SKIP(); }
@@ -371,19 +279,19 @@

         <Normal> identifier_start        :=> Identifier

-        <Normal> eof                     { PUSH_EOS(); return 1; }
+ <Normal> eof { PUSH_TOKEN(Token::EOS); return 1; }
         <Normal> any                     { TERMINATE_ILLEGAL(); }

         <DoubleQuoteString> "\\\""       { goto yy0; }
-        <DoubleQuoteString> '"'          { PUSH_STRING();}
+        <DoubleQuoteString> '"'          { PUSH_TOKEN(Token::STRING);}
         <DoubleQuoteString> any          { goto yy0; }

         <SingleQuoteString> "\\'"        { goto yy0; }
-        <SingleQuoteString> "'"          { PUSH_STRING();}
+        <SingleQuoteString> "'"          { PUSH_TOKEN(Token::STRING);}
         <SingleQuoteString> any          { goto yy0; }

         <Identifier> identifier_char+    { goto yy0; }
-        <Identifier> any                 { PUSH_IDENTIFIER(); }
+ <Identifier> any { cursor--; PUSH_TOKEN(Token::IDENTIFIER); }

         <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
         <SingleLineComment> eof          { PUSH_LINE_TERMINATOR();}
@@ -399,82 +307,80 @@
         */

     fill:
-        ssize_t unfinishedSize = cursor-start;
-        printf(
-            "scanner needs a refill. Exiting for now with:\n"
-            "    saved fill state = %d\n"
-            "    unfinished token size = %ld\n",
-            state,
-            unfinishedSize
-        );
-
-        if(0<unfinishedSize && start<limit)
-        {
-            printf("    unfinished token is: ");
-            fwrite(start, 1, cursor-start, stdout);
+        int unfinishedSize = cursor-start;
+        if (FLAG_trace_lexer) {
+            printf(
+                "scanner needs a refill. Exiting for now with:\n"
+                "    saved fill state = %d\n"
+                "    unfinished token size = %d\n",
+                state,
+                unfinishedSize
+            );
+            if(0 < unfinishedSize && start < limit) {
+                printf("    unfinished token is: ");
+                fwrite(start, 1, cursor-start, stdout);
+                putchar('\n');
+            }
             putchar('\n');
         }
-        putchar('\n');

         /*
          * Once we get here, we can get rid of
          * everything before start and after limit.
          */
-        if(eof==true) goto start;
-        if(buffer<start)
-        {
-            size_t startOffset = start-buffer;
-            memmove(buffer, start, limit-start);
-            marker -= startOffset;
-            cursor -= startOffset;
-            limit -= startOffset;
-            start -= startOffset;
+        if (eof == true) goto start;
+        if (buffer < start) {
+            size_t start_offset = start - buffer;
+            memmove(buffer, start, limit - start);
+            marker -= start_offset;
+            cursor -= start_offset;
+            limit -= start_offset;
+            start -= start_offset;
+            real_start += start_offset;
         }
         return 0;
     }
 };

-// ----------------------------------------------------------------------
-int main(
-    int     argc,
-    char    **argv
-)
-{
-    // Parse cmd line
-    int input = 0;
-    if(1<argc)
-    {
-        input = open(argv[1], O_RDONLY | O_BINARY);
-        if(input<0)
-        {
-            fprintf(
-                stderr,
-                "could not open file %s\n",
-                argv[1]
-            );
-            exit(1);
-        }
-    }
+
+ExperimentalScanner::ExperimentalScanner(const char* fname) :
+    current_(0), fetched_(0) {
+  file_ = fopen(fname, "rb");
+  scanner_ = new PushScanner(this);
+}
+
+
+ExperimentalScanner::~ExperimentalScanner() {
+  fclose(file_);
+}
+
+
+void ExperimentalScanner::FillTokens() {
+  current_ = 0;
+  fetched_ = 0;
+  uint8_t chars[BUFFER_SIZE];
+  int n = static_cast<int>(fread(&chars, 1, BUFFER_SIZE, file_));
+  scanner_->push(chars, n);
+}
+
+
+Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
+  if (current_ == fetched_) {
+    FillTokens();
+  }
+  *beg_pos = beg_[current_];
+  *end_pos = end_[current_];
+  Token::Value res = token_[current_];
+  if (token_[current_] != Token::Token::EOS &&
+      token_[current_] != Token::ILLEGAL) current_++;
+  return res;
+}

-    /*
-     * Tokenize input file by pushing batches
-     * of data one by one into the scanner.
-     */
-    const size_t batchSize = 256;
-    uint8_t buffer[batchSize];
-    PushScanner scanner;
-    while(1)
-    {
-        ssize_t n = read(input, buffer, batchSize);
-        if (scanner.push(buffer, n)) {
-          printf("Scanner: illegal data\n");
-          return 1;
-       }
-        if(n<batchSize) break;
-    }
-    scanner.push(0, -1);
-    close(input);

-    // Done
-    return 0;
+void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
+  if (token == Token::EOS) end--;
+  token_[fetched_] = token;
+  beg_[fetched_] = beg;
+  end_[fetched_] = end;
+  fetched_++;
 }

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to