Revision: 17200
Author: [email protected]
Date: Mon Oct 14 14:35:38 2013 UTC
Log: Invoke generated lexer along with baseline lexer to compare
results.
BUG=
[email protected]
Review URL: https://chromiumcodereview.appspot.com/26764004
http://code.google.com/p/v8/source/detail?r=17200
Added:
/branches/experimental/parser/src/lexer/lexer.h
Modified:
/branches/experimental/parser/src/flag-definitions.h
/branches/experimental/parser/src/lexer/lexer-shell.cc
/branches/experimental/parser/src/lexer/lexer.gyp
/branches/experimental/parser/src/lexer/lexer.re
=======================================
--- /dev/null
+++ /branches/experimental/parser/src/lexer/lexer.h Mon Oct 14 14:35:38
2013 UTC
@@ -0,0 +1,54 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_LEXER_LEXER_H
+#define V8_LEXER_LEXER_H
+
+#include "token.h"
+#include "flags.h"
+
+class PushScanner;
+
+class ExperimentalScanner {
+ public:
+ explicit ExperimentalScanner(const char* fname);
+ ~ExperimentalScanner();
+ v8::internal::Token::Value Next(int* beg_pos, int* end_pos);
+ void Record(v8::internal::Token::Value token, int beg_pos, int end_pos);
+ private:
+ void FillTokens();
+ static const int BUFFER_SIZE = 256;
+ v8::internal::Token::Value token_[BUFFER_SIZE];
+ int beg_[BUFFER_SIZE];
+ int end_[BUFFER_SIZE];
+ int current_;
+ int fetched_;
+ FILE* file_;
+ PushScanner* scanner_;
+};
+
+#endif // V8_LEXER_LEXER_H
=======================================
--- /branches/experimental/parser/src/flag-definitions.h Fri Oct 4
07:50:36 2013 UTC
+++ /branches/experimental/parser/src/flag-definitions.h Mon Oct 14
14:35:38 2013 UTC
@@ -572,6 +572,8 @@
DEFINE_bool(allow_natives_syntax, false, "allow natives syntax")
DEFINE_bool(trace_parse, false, "trace parsing and preparsing")
+DEFINE_bool(trace_lexer, false, "trace lexing")
+
// simulator-arm.cc and simulator-mips.cc
DEFINE_bool(trace_sim, false, "Trace simulator execution")
DEFINE_bool(check_icache, false,
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Mon Oct 14
11:35:31 2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer-shell.cc Mon Oct 14
14:35:38 2013 UTC
@@ -42,6 +42,7 @@
#include "scopeinfo.h"
#include "string-stream.h"
#include "scanner.h"
+#include "lexer.h"
using namespace v8::internal;
@@ -84,12 +85,11 @@
delete unicode_cache_;
}
- Token::Value Next() {
- return scanner_->Next();
- }
-
- Scanner::Location Location() {
- return scanner_->location();
+ Token::Value Next(int* beg_pos, int* end_pos) {
+ Token::Value res = scanner_->Next();
+ *beg_pos = scanner_->location().beg_pos;
+ *end_pos = scanner_->location().end_pos;
+ return res;
}
private:
@@ -99,14 +99,6 @@
};
-class ExperimentalScanner {
- explicit ExperimentalScanner(const char* fname);
- ~ExperimentalScanner();
- Token::Value Next();
- Scanner::Location Location();
-};
-
-
int main(int argc, char* argv[]) {
v8::V8::InitializeICU();
v8::V8::SetFlagsFromCommandLine(&argc, argv, true);
@@ -121,13 +113,28 @@
Isolate* isolate = Isolate::Current();
HandleScope handle_scope(isolate);
BaselineScanner baseline(argv[1], isolate);
- Token::Value current;
- while ((current = baseline.Next()) != Token::EOS) {
- printf("%11s => (%d, %d)\n",
- Token::Name(current),
- baseline.Location().beg_pos,
- baseline.Location().end_pos);
- }
+ ExperimentalScanner experimental(argv[1]);
+ Token::Value expected_token, actual_token;
+ int expected_beg, expected_end, actual_beg, actual_end;
+ do {
+ expected_token = baseline.Next(&expected_beg, &expected_end);
+ actual_token = experimental.Next(&actual_beg, &actual_end);
+ printf("=> %11s at (%d, %d)\n",
+ Token::Name(actual_token),
+ actual_beg, actual_end);
+ if (expected_token != actual_token ||
+ expected_beg != actual_beg ||
+ expected_end != actual_end) {
+ printf("MISMATCH:\n");
+ printf("Expected: %s at (%d, %d)\n",
+ Token::Name(expected_token),
+ expected_beg, expected_end);
+ printf("Actual: %s at (%d, %d)\n",
+ Token::Name(actual_token),
+ actual_beg, actual_end);
+ return 1;
+ }
+ } while (actual_token != Token::EOS);
}
}
v8::V8::Dispose();
=======================================
--- /branches/experimental/parser/src/lexer/lexer.gyp Mon Oct 14 11:35:31
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer.gyp Mon Oct 14 14:35:38
2013 UTC
@@ -41,10 +41,12 @@
# Generated source files need this explicitly:
'include_dirs+': [
'../../src',
+ '../../src/lexer',
],
'sources': [
'lexer-shell.cc',
-# TODO: fix compiler errors and add '<(SHARED_INTERMEDIATE_DIR)/lexer.cc',
+ 'lexer.h',
+ '<(SHARED_INTERMEDIATE_DIR)/lexer.cc',
],
'conditions': [
['v8_enable_i18n_support==1', {
=======================================
--- /branches/experimental/parser/src/lexer/lexer.re Mon Oct 14 12:15:07
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer.re Mon Oct 14 14:35:38
2013 UTC
@@ -4,7 +4,6 @@
#include <stdlib.h>
#include <string.h>
-
/*
TODO:
- SpiderMonkey compatibility hack: " --> something" is treated as a
single line comment.
@@ -13,7 +12,16 @@
*/
-/*!types:re2c */
+
+enum Condition {
+ EConditionNormal,
+ EConditionDoubleQuoteString,
+ EConditionSingleQuoteString,
+ EConditionIdentifier,
+ EConditionSingleLineComment,
+ EConditionMultiLineComment,
+ EConditionHtmlComment
+};
#if defined(WIN32)
@@ -36,102 +44,17 @@
#endif
+#include "lexer.h"
+using namespace v8::internal;
+
// ----------------------------------------------------------------------
-#define PUSH_EOS(T) { printf("got eos\n"); }
-#define PUSH_TOKEN(T) { \
- printf("got token %s (%d)\n", tokenNames[T], T); \
- SKIP(); }
-#define PUSH_STRING() { \
- printf("got string\n"); \
- size_t tokenSize = cursor-start; \
- fwrite(start, tokenSize, 1, stdout); \
- printf("\n"); \
- SKIP(); }
-#define PUSH_NUMBER() { \
- printf("got number\n"); \
- size_t tokenSize = cursor-start; \
- fwrite(start, tokenSize, 1, stdout); \
- printf("\n"); \
- SKIP(); }
-#define PUSH_IDENTIFIER() { \
- --cursor; \
- printf("got identifier: "); \
- size_t tokenSize = cursor-start; \
- fwrite(start, tokenSize, 1, stdout); \
- printf("\n"); \
- SKIP(); }
-#define PUSH_LINE_TERMINATOR() { printf("got line terminator\n"); SKIP();}
+#define PUSH_TOKEN(T) { send(T); SKIP(); }
+#define PUSH_LINE_TERMINATOR() { SKIP(); }
#define TERMINATE_ILLEGAL() { return 1; }
-
-#define TOKENS \
- TOK(EOS) \
- TOK(LPAREN) \
- TOK(RPAREN) \
- TOK(LBRACK) \
- TOK(RBRACK) \
- TOK(LBRACE) \
- TOK(RBRACE) \
- TOK(COLON) \
- TOK(SEMICOLON) \
- TOK(PERIOD) \
- TOK(CONDITIONAL) \
- TOK(INC) \
- TOK(DEC) \
- TOK(ASSIGN) \
- TOK(ASSIGN_BIT_OR) \
- TOK(ASSIGN_BIT_XOR) \
- TOK(ASSIGN_BIT_AND) \
- TOK(ASSIGN_SHL) \
- TOK(ASSIGN_SAR) \
- TOK(ASSIGN_SHR) \
- TOK(ASSIGN_ADD) \
- TOK(ASSIGN_SUB) \
- TOK(ASSIGN_MUL) \
- TOK(ASSIGN_DIV) \
- TOK(ASSIGN_MOD) \
- TOK(COMMA) \
- TOK(OR) \
- TOK(AND) \
- TOK(BIT_OR) \
- TOK(BIT_XOR) \
- TOK(BIT_AND) \
- TOK(SHL) \
- TOK(SAR) \
- TOK(ADD) \
- TOK(SUB) \
- TOK(MUL) \
- TOK(DIV) \
- TOK(MOD) \
- TOK(EQ) \
- TOK(NE) \
- TOK(EQ_STRICT) \
- TOK(NE_STRICT) \
- TOK(LT) \
- TOK(GT) \
- TOK(LTE) \
- TOK(GTE) \
- TOK(NOT) \
- TOK(BIT_NOT) \
-
-// ----------------------------------------------------------------------
-static const char *tokenNames[] =
-{
- #define TOK(x) #x,
- TOKENS
- #undef TOK
-};
// ----------------------------------------------------------------------
class PushScanner
{
-public:
-
- enum Token
- {
- #define TOK(x) x,
- TOKENS
- #undef TOK
- };
private:
@@ -143,6 +66,7 @@
uint8_t *start;
uint8_t *cursor;
uint8_t *marker;
+ int real_start;
uint8_t *buffer;
uint8_t *bufferEnd;
@@ -150,10 +74,12 @@
uint8_t yych;
uint32_t yyaccept;
+ ExperimentalScanner* sink_;
+
public:
//
----------------------------------------------------------------------
- PushScanner()
+ PushScanner(ExperimentalScanner* sink)
{
limit = 0;
start = 0;
@@ -164,6 +90,8 @@
buffer = 0;
eof = false;
bufferEnd = 0;
+ sink_ = sink;
+ real_start = 0;
}
//
----------------------------------------------------------------------
@@ -172,49 +100,27 @@
}
//
----------------------------------------------------------------------
- void send(
- Token token
- )
- {
- size_t tokenSize = cursor-start;
- const char *tokenName = tokenNames[token];
- printf(
- "scanner is pushing out a token of type %d (%s)",
- token,
- tokenName
- );
-
- if(token==EOS) putchar('\n');
- else
- {
- size_t tokenNameSize = strlen(tokenNames[token]);
- size_t padSize = 20-(20<tokenNameSize ? 20 : tokenNameSize);
- for(size_t i=0; i<padSize; ++i) putchar(' ');
- printf(" : ---->");
-
- fwrite(
- start,
- tokenSize,
- 1,
- stdout
- );
-
- printf("<----\n");
+ void send(Token::Value token) {
+ int beg = (start - buffer) + real_start;
+ int end = (cursor - buffer) + real_start;
+ if (FLAG_trace_lexer) {
+ printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
+ for (uint8_t* s = start; s != cursor; s++) printf("%c",
(char)*s);
+ printf(".\n");
}
+ sink_->Record(token, beg, end);
}
//
----------------------------------------------------------------------
- uint32_t push(
- const void *input,
- ssize_t inputSize
- )
- {
- printf(
- "scanner is receiving a new data batch of length %ld\n"
- "scanner continues with saved state = %d\n",
- inputSize,
- state
- );
+ uint32_t push(const void *input, int input_size) {
+ if (FLAG_trace_lexer) {
+ printf(
+ "scanner is receiving a new data batch of length %d\n"
+ "scanner continues with saved state = %d\n",
+ input_size,
+ state
+ );
+ }
/*
* Data source is signaling end of file when batch size
@@ -224,12 +130,12 @@
* the longest keyword, so given our grammar, 32 is a safe bet.
*/
uint8_t null[64];
- const ssize_t maxFill = 32;
- if(inputSize<maxFill) // FIXME: do something about this!!!
+ const int maxFill = 32;
+ if(input_size<maxFill) // FIXME: do something about this!!!
{
eof = true;
input = null;
- inputSize = sizeof(null);
+ input_size = sizeof(null);
memset(null, 0, sizeof(null));
}
@@ -246,7 +152,7 @@
*
*/
size_t used = limit-buffer;
- size_t needed = used+inputSize;
+ size_t needed = used+input_size;
size_t allocated = bufferEnd-buffer;
if(allocated<needed)
{
@@ -263,8 +169,8 @@
start = buffer + startOffset;
limit = limitOffset + buffer;
}
- memcpy(limit, input, inputSize);
- limit += inputSize;
+ memcpy(limit, input, input_size);
+ limit += input_size;
// The scanner starts here
#define YYLIMIT limit
@@ -283,7 +189,9 @@
start:
- printf("Starting a round; state: %d, condition: %d\n", state,
condition);
+ if (FLAG_trace_lexer) {
+ printf("Starting a round; state: %d, condition: %d\n", state,
condition);
+ }
/*!re2c
re2c:indent:top = 1;
@@ -302,66 +210,66 @@
hex_digit = [0-9a-fA-F];
maybe_exponent = ('e' [-+]? digit+)?;
- <Normal> "|=" { PUSH_TOKEN(ASSIGN_BIT_OR); }
- <Normal> "^=" { PUSH_TOKEN(ASSIGN_BIT_XOR); }
- <Normal> "&=" { PUSH_TOKEN(ASSIGN_BIT_AND); }
- <Normal> "+=" { PUSH_TOKEN(ASSIGN_ADD); }
- <Normal> "-=" { PUSH_TOKEN(ASSIGN_SUB); }
- <Normal> "*=" { PUSH_TOKEN(ASSIGN_MUL); }
- <Normal> "/=" { PUSH_TOKEN(ASSIGN_DIV); }
- <Normal> "%=" { PUSH_TOKEN(ASSIGN_MOD); }
+ <Normal> "|=" {
PUSH_TOKEN(Token::ASSIGN_BIT_OR); }
+ <Normal> "^=" {
PUSH_TOKEN(Token::ASSIGN_BIT_XOR); }
+ <Normal> "&=" {
PUSH_TOKEN(Token::ASSIGN_BIT_AND); }
+ <Normal> "+=" { PUSH_TOKEN(Token::ASSIGN_ADD); }
+ <Normal> "-=" { PUSH_TOKEN(Token::ASSIGN_SUB); }
+ <Normal> "*=" { PUSH_TOKEN(Token::ASSIGN_MUL); }
+ <Normal> "/=" { PUSH_TOKEN(Token::ASSIGN_DIV); }
+ <Normal> "%=" { PUSH_TOKEN(Token::ASSIGN_MOD); }
- <Normal> "===" { PUSH_TOKEN(EQ_STRICT); }
- <Normal> "==" { PUSH_TOKEN(EQ); }
- <Normal> "=" { PUSH_TOKEN(ASSIGN); }
- <Normal> "!==" { PUSH_TOKEN(NE_STRICT); }
- <Normal> "!=" { PUSH_TOKEN(NE); }
- <Normal> "!" { PUSH_TOKEN(NOT); }
+ <Normal> "===" { PUSH_TOKEN(Token::EQ_STRICT); }
+ <Normal> "==" { PUSH_TOKEN(Token::EQ); }
+ <Normal> "=" { PUSH_TOKEN(Token::ASSIGN); }
+ <Normal> "!==" { PUSH_TOKEN(Token::NE_STRICT); }
+ <Normal> "!=" { PUSH_TOKEN(Token::NE); }
+ <Normal> "!" { PUSH_TOKEN(Token::NOT); }
<Normal> "//" :=> SingleLineComment
<Normal> "/*" :=> MultiLineComment
<Normal> "<!--" :=> HtmlComment
- <Normal> ">>>=" { PUSH_TOKEN(ASSIGN_SHR); }
- <Normal> "<<=" { PUSH_TOKEN(ASSIGN_SHL); }
- <Normal> ">>=" { PUSH_TOKEN(ASSIGN_SAR); }
- <Normal> "<=" { PUSH_TOKEN(LTE); }
- <Normal> ">=" { PUSH_TOKEN(GTE); }
- <Normal> "<<" { PUSH_TOKEN(SHL); }
- <Normal> ">>" { PUSH_TOKEN(SAR); }
- <Normal> "<" { PUSH_TOKEN(LT); }
- <Normal> ">" { PUSH_TOKEN(GT); }
+ <Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); }
+ <Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); }
+ <Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); }
+ <Normal> "<=" { PUSH_TOKEN(Token::LTE); }
+ <Normal> ">=" { PUSH_TOKEN(Token::GTE); }
+ <Normal> "<<" { PUSH_TOKEN(Token::SHL); }
+ <Normal> ">>" { PUSH_TOKEN(Token::SAR); }
+ <Normal> "<" { PUSH_TOKEN(Token::LT); }
+ <Normal> ">" { PUSH_TOKEN(Token::GT); }
- <Normal> '0x' hex_digit+ { PUSH_NUMBER(); }
- <Normal> "." digit+ maybe_exponent { PUSH_NUMBER(); }
- <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_NUMBER(); }
+ <Normal> '0x' hex_digit+ { PUSH_TOKEN(Token::NUMBER); }
+ <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); }
+ <Normal> digit+ ("." digit+)? maybe_exponent {
PUSH_TOKEN(Token::NUMBER); }
- <Normal> "(" { PUSH_TOKEN(LPAREN); }
- <Normal> ")" { PUSH_TOKEN(RPAREN); }
- <Normal> "[" { PUSH_TOKEN(LBRACK); }
- <Normal> "]" { PUSH_TOKEN(RBRACK); }
- <Normal> "{" { PUSH_TOKEN(LBRACE); }
- <Normal> "}" { PUSH_TOKEN(RBRACE); }
- <Normal> ":" { PUSH_TOKEN(COLON); }
- <Normal> ";" { PUSH_TOKEN(SEMICOLON); }
- <Normal> "." { PUSH_TOKEN(PERIOD); }
- <Normal> "?" { PUSH_TOKEN(CONDITIONAL); }
- <Normal> "++" { PUSH_TOKEN(INC); }
- <Normal> "--" { PUSH_TOKEN(DEC); }
+ <Normal> "(" { PUSH_TOKEN(Token::LPAREN); }
+ <Normal> ")" { PUSH_TOKEN(Token::RPAREN); }
+ <Normal> "[" { PUSH_TOKEN(Token::LBRACK); }
+ <Normal> "]" { PUSH_TOKEN(Token::RBRACK); }
+ <Normal> "{" { PUSH_TOKEN(Token::LBRACE); }
+ <Normal> "}" { PUSH_TOKEN(Token::RBRACE); }
+ <Normal> ":" { PUSH_TOKEN(Token::COLON); }
+ <Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); }
+ <Normal> "." { PUSH_TOKEN(Token::PERIOD); }
+ <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL);
}
+ <Normal> "++" { PUSH_TOKEN(Token::INC); }
+ <Normal> "--" { PUSH_TOKEN(Token::DEC); }
- <Normal> "||" { PUSH_TOKEN(OR); }
- <Normal> "&&" { PUSH_TOKEN(AND); }
+ <Normal> "||" { PUSH_TOKEN(Token::OR); }
+ <Normal> "&&" { PUSH_TOKEN(Token::AND); }
- <Normal> "|" { PUSH_TOKEN(BIT_OR); }
- <Normal> "^" { PUSH_TOKEN(BIT_XOR); }
- <Normal> "&" { PUSH_TOKEN(BIT_AND); }
- <Normal> "+" { PUSH_TOKEN(ADD); }
- <Normal> "-" { PUSH_TOKEN(SUB); }
- <Normal> "*" { PUSH_TOKEN(MUL); }
- <Normal> "/" { PUSH_TOKEN(DIV); }
- <Normal> "%" { PUSH_TOKEN(MOD); }
- <Normal> "~" { PUSH_TOKEN(BIT_NOT); }
- <Normal> "," { PUSH_TOKEN(COMMA); }
+ <Normal> "|" { PUSH_TOKEN(Token::BIT_OR); }
+ <Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); }
+ <Normal> "&" { PUSH_TOKEN(Token::BIT_AND); }
+ <Normal> "+" { PUSH_TOKEN(Token::ADD); }
+ <Normal> "-" { PUSH_TOKEN(Token::SUB); }
+ <Normal> "*" { PUSH_TOKEN(Token::MUL); }
+ <Normal> "/" { PUSH_TOKEN(Token::DIV); }
+ <Normal> "%" { PUSH_TOKEN(Token::MOD); }
+ <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); }
+ <Normal> "," { PUSH_TOKEN(Token::COMMA); }
<Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); }
<Normal> whitespace { SKIP(); }
@@ -371,19 +279,19 @@
<Normal> identifier_start :=> Identifier
- <Normal> eof { PUSH_EOS(); return 1; }
+ <Normal> eof { PUSH_TOKEN(Token::EOS); return
1; }
<Normal> any { TERMINATE_ILLEGAL(); }
<DoubleQuoteString> "\\\"" { goto yy0; }
- <DoubleQuoteString> '"' { PUSH_STRING();}
+ <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);}
<DoubleQuoteString> any { goto yy0; }
<SingleQuoteString> "\\'" { goto yy0; }
- <SingleQuoteString> "'" { PUSH_STRING();}
+ <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);}
<SingleQuoteString> any { goto yy0; }
<Identifier> identifier_char+ { goto yy0; }
- <Identifier> any { PUSH_IDENTIFIER(); }
+ <Identifier> any { cursor--;
PUSH_TOKEN(Token::IDENTIFIER); }
<SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
<SingleLineComment> eof { PUSH_LINE_TERMINATOR();}
@@ -399,82 +307,80 @@
*/
fill:
- ssize_t unfinishedSize = cursor-start;
- printf(
- "scanner needs a refill. Exiting for now with:\n"
- " saved fill state = %d\n"
- " unfinished token size = %ld\n",
- state,
- unfinishedSize
- );
-
- if(0<unfinishedSize && start<limit)
- {
- printf(" unfinished token is: ");
- fwrite(start, 1, cursor-start, stdout);
+ int unfinishedSize = cursor-start;
+ if (FLAG_trace_lexer) {
+ printf(
+ "scanner needs a refill. Exiting for now with:\n"
+ " saved fill state = %d\n"
+ " unfinished token size = %d\n",
+ state,
+ unfinishedSize
+ );
+ if(0 < unfinishedSize && start < limit) {
+ printf(" unfinished token is: ");
+ fwrite(start, 1, cursor-start, stdout);
+ putchar('\n');
+ }
putchar('\n');
}
- putchar('\n');
/*
* Once we get here, we can get rid of
* everything before start and after limit.
*/
- if(eof==true) goto start;
- if(buffer<start)
- {
- size_t startOffset = start-buffer;
- memmove(buffer, start, limit-start);
- marker -= startOffset;
- cursor -= startOffset;
- limit -= startOffset;
- start -= startOffset;
+ if (eof == true) goto start;
+ if (buffer < start) {
+ size_t start_offset = start - buffer;
+ memmove(buffer, start, limit - start);
+ marker -= start_offset;
+ cursor -= start_offset;
+ limit -= start_offset;
+ start -= start_offset;
+ real_start += start_offset;
}
return 0;
}
};
-// ----------------------------------------------------------------------
-int main(
- int argc,
- char **argv
-)
-{
- // Parse cmd line
- int input = 0;
- if(1<argc)
- {
- input = open(argv[1], O_RDONLY | O_BINARY);
- if(input<0)
- {
- fprintf(
- stderr,
- "could not open file %s\n",
- argv[1]
- );
- exit(1);
- }
- }
+
+ExperimentalScanner::ExperimentalScanner(const char* fname) :
+ current_(0), fetched_(0) {
+ file_ = fopen(fname, "rb");
+ scanner_ = new PushScanner(this);
+}
+
+
+ExperimentalScanner::~ExperimentalScanner() {
+ fclose(file_);
+}
+
+
+void ExperimentalScanner::FillTokens() {
+ current_ = 0;
+ fetched_ = 0;
+ uint8_t chars[BUFFER_SIZE];
+ int n = static_cast<int>(fread(&chars, 1, BUFFER_SIZE, file_));
+ scanner_->push(chars, n);
+}
+
+
+Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
+ if (current_ == fetched_) {
+ FillTokens();
+ }
+ *beg_pos = beg_[current_];
+ *end_pos = end_[current_];
+ Token::Value res = token_[current_];
+ if (token_[current_] != Token::Token::EOS &&
+ token_[current_] != Token::ILLEGAL) current_++;
+ return res;
+}
- /*
- * Tokenize input file by pushing batches
- * of data one by one into the scanner.
- */
- const size_t batchSize = 256;
- uint8_t buffer[batchSize];
- PushScanner scanner;
- while(1)
- {
- ssize_t n = read(input, buffer, batchSize);
- if (scanner.push(buffer, n)) {
- printf("Scanner: illegal data\n");
- return 1;
- }
- if(n<batchSize) break;
- }
- scanner.push(0, -1);
- close(input);
- // Done
- return 0;
+void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
+ if (token == Token::EOS) end--;
+ token_[fetched_] = token;
+ beg_[fetched_] = beg;
+ end_[fetched_] = end;
+ fetched_++;
}
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.