Revision: 17261
Author: [email protected]
Date: Fri Oct 18 09:01:34 2013 UTC
Log: Experimental parser: Small fixes.
- Endless loops with erroneous strings (we still don't recover like
the baseline scanner does).
- Identifier cannot start right after a number.
- Efficiency: reserve a reasonably big buffer for tokens.
- Style fixes
BUG=
[email protected]
Review URL: https://codereview.chromium.org/28643002
http://code.google.com/p/v8/source/detail?r=17261
Modified:
/branches/experimental/parser/src/lexer/lexer-shell.cc
/branches/experimental/parser/src/lexer/lexer.re
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Thu Oct 17
13:56:53 2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer-shell.cc Fri Oct 18
09:01:34 2013 UTC
@@ -109,6 +109,13 @@
scanner_ = new PushScanner(this);
if (read_all_at_once_) {
source_ = ReadFile(fname, NULL, &length_);
+ token_.resize(1500);
+ beg_.resize(1500);
+ end_.resize(1500);
+ } else {
+ token_.resize(BUFFER_SIZE);
+ beg_.resize(BUFFER_SIZE);
+ end_.resize(BUFFER_SIZE);
}
}
@@ -134,16 +141,13 @@
Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
- while (current_ == fetched_) {
+ while (current_ == fetched_)
FillTokens();
- }
*beg_pos = beg_[current_];
*end_pos = end_[current_];
Token::Value res = token_[current_];
- if (token_[current_] != Token::Token::EOS &&
- token_[current_] != Token::ILLEGAL) {
+ if (res != Token::Token::EOS)
current_++;
- }
return res;
}
@@ -151,14 +155,13 @@
void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
if (token == Token::EOS) end--;
if (fetched_ >= token_.size()) {
- token_.push_back(token);
- beg_.push_back(beg);
- end_.push_back(end);
- } else {
- token_[fetched_] = token;
- beg_[fetched_] = beg;
- end_[fetched_] = end;
+ token_.resize(token_.size() * 2);
+ beg_.resize(beg_.size() * 2);
+ end_.resize(end_.size() * 2);
}
+ token_[fetched_] = token;
+ beg_[fetched_] = beg;
+ end_[fetched_] = end;
fetched_++;
}
@@ -226,6 +229,7 @@
return 1;
}
}
+ printf("No of tokens: %d\n", experimental_tokens.size());
printf("Baseline: %f ms\nExperimental %f ms\n",
baseline_time.InMillisecondsF(),
experimental_time.InMillisecondsF());
=======================================
--- /branches/experimental/parser/src/lexer/lexer.re Thu Oct 17 13:56:53
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer.re Fri Oct 18 09:01:34
2013 UTC
@@ -27,18 +27,17 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include <fcntl.h>
#include <stdio.h>
-#include <stddef.h>
#include <stdlib.h>
#include <string.h>
// TODO:
// - SpiderMonkey compatibility hack: " --> something" is treated
// as a single line comment.
-// - An identifier cannot start immediately after a number.
// - Run-time lexing modifications: harmony number literals, keywords
depending
// on harmony_modules, harmony_scoping
+// - Escaping the string literals (like the baseline does)
+// - Error recovery after illegal tokens.
enum Condition {
kConditionNormal,
@@ -79,7 +78,7 @@
#define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); }
#define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;}
#define PUSH_LINE_TERMINATOR() { SKIP(); }
-#define TERMINATE_ILLEGAL() { return 1; }
+#define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS);
return 1; }
PushScanner::PushScanner(ExperimentalScanner* sink)
: eof_(false),
@@ -167,20 +166,14 @@
memcpy(limit_, input, input_size);
limit_ += input_size;
- // The scanner starts here
-#define YYLIMIT limit_
-#define YYCURSOR cursor_
-#define YYMARKER marker_
-#define YYCTYPE uint8_t
+#define SKIP() { start_ = cursor_;
YYSETCONDITION(kConditionNormal); goto yy0; }
+#define YYFILL(n) { goto fill; }
-#define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal);
goto yy0; }
-#define YYFILL(n) { goto fill; }
+#define YYGETSTATE() state_
+#define YYSETSTATE(x) { state_ = (x); }
-#define YYGETSTATE() state_
-#define YYSETSTATE(x) { state_ = (x); }
-
-#define YYGETCONDITION() condition_
-#define YYSETCONDITION(x) { condition_ = (x); }
+#define YYGETCONDITION() condition_
+#define YYSETCONDITION(x) { condition_ = (x); }
start_:
if (FLAG_trace_lexer) {
@@ -188,10 +181,14 @@
}
/*!re2c
- re2c:indent:top = 1;
+ re2c:indent:top = 1;
re2c:yych:conversion = 0;
- re2c:condenumprefix = kCondition;
- re2c:define:YYCONDTYPE = Condition;
+ re2c:condenumprefix = kCondition;
+ re2c:define:YYCONDTYPE = Condition;
+ re2c:define:YYCURSOR = cursor_;
+ re2c:define:YYCTYPE = uint8_t;
+ re2c:define:YYLIMIT = limit_;
+ re2c:define:YYMARKER = marker_;
eof = "\000";
any = [\000-\377];
@@ -281,9 +278,9 @@
<Normal> "<" { PUSH_TOKEN(Token::LT); }
<Normal> ">" { PUSH_TOKEN(Token::GT); }
- <Normal> '0x' hex_digit+ {
PUSH_TOKEN(Token::NUMBER); }
- <Normal> "." digit+ maybe_exponent {
PUSH_TOKEN(Token::NUMBER); }
- <Normal> digit+ ("." digit+)? maybe_exponent {
PUSH_TOKEN(Token::NUMBER); }
+ <Normal> '0x' hex_digit+ not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::NUMBER); }
+ <Normal> "." digit+ maybe_exponent not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::NUMBER); }
+ <Normal> digit+ ("." digit+)? maybe_exponent not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::NUMBER); }
<Normal> "(" { PUSH_TOKEN(Token::LPAREN); }
<Normal> ")" { PUSH_TOKEN(Token::RPAREN); }
@@ -312,7 +309,7 @@
<Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); }
<Normal> "," { PUSH_TOKEN(Token::COMMA); }
- <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); }
+ <Normal> line_terminator { PUSH_LINE_TERMINATOR(); }
<Normal> whitespace { SKIP(); }
<Normal> ["] :=> DoubleQuoteString
@@ -325,10 +322,14 @@
<DoubleQuoteString> "\\\"" { goto yy0; }
<DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);}
+ <DoubleQuoteString> line_terminator { TERMINATE_ILLEGAL(); }
+ <DoubleQuoteString> eof { TERMINATE_ILLEGAL(); }
<DoubleQuoteString> any { goto yy0; }
<SingleQuoteString> "\\'" { goto yy0; }
<SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);}
+ <SingleQuoteString> line_terminator { TERMINATE_ILLEGAL(); }
+ <SingleQuoteString> eof { TERMINATE_ILLEGAL(); }
<SingleQuoteString> any { goto yy0; }
<Identifier> identifier_char+ { goto yy0; }
@@ -356,7 +357,7 @@
" unfinished token size = %d\n",
state_,
unfinished_size);
- if(0 < unfinished_size && start_ < limit_) {
+ if (0 < unfinished_size && start_ < limit_) {
printf(" unfinished token is: ");
fwrite(start_, 1, cursor_ - start_, stdout);
putchar('\n');
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.