Reviewers: ulan,
Message:
Committed patchset #4 manually as r17261 (presubmit successful).
Description:
Experimental parser: Small fixes.
- Endless loops with erroneous strings (we still don't recover like
the baseline scanner does).
- Identifier cannot start right after a number.
- Efficiency: reserve a reasonably big buffer for tokens.
- Style fixes
BUG=
[email protected]
Committed: https://code.google.com/p/v8/source/detail?r=17261
Please review this at https://codereview.chromium.org/28643002/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+41, -36 lines):
M src/lexer/lexer-shell.cc
M src/lexer/lexer.re
Index: src/lexer/lexer-shell.cc
diff --git a/src/lexer/lexer-shell.cc b/src/lexer/lexer-shell.cc
index
20a603818e7b97da9523d1754e587595a5ae22e9..a3a02bb3ffa935fd03ab03b28f17e02424b2b5cb
100644
--- a/src/lexer/lexer-shell.cc
+++ b/src/lexer/lexer-shell.cc
@@ -109,6 +109,13 @@ ExperimentalScanner::ExperimentalScanner(const char*
fname,
scanner_ = new PushScanner(this);
if (read_all_at_once_) {
source_ = ReadFile(fname, NULL, &length_);
+ token_.resize(1500);
+ beg_.resize(1500);
+ end_.resize(1500);
+ } else {
+ token_.resize(BUFFER_SIZE);
+ beg_.resize(BUFFER_SIZE);
+ end_.resize(BUFFER_SIZE);
}
}
@@ -134,16 +141,13 @@ void ExperimentalScanner::FillTokens() {
Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
- while (current_ == fetched_) {
+ while (current_ == fetched_)
FillTokens();
- }
*beg_pos = beg_[current_];
*end_pos = end_[current_];
Token::Value res = token_[current_];
- if (token_[current_] != Token::Token::EOS &&
- token_[current_] != Token::ILLEGAL) {
+ if (res != Token::Token::EOS)
current_++;
- }
return res;
}
@@ -151,14 +155,13 @@ Token::Value ExperimentalScanner::Next(int* beg_pos,
int* end_pos) {
void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
if (token == Token::EOS) end--;
if (fetched_ >= token_.size()) {
- token_.push_back(token);
- beg_.push_back(beg);
- end_.push_back(end);
- } else {
- token_[fetched_] = token;
- beg_[fetched_] = beg;
- end_[fetched_] = end;
+ token_.resize(token_.size() * 2);
+ beg_.resize(beg_.size() * 2);
+ end_.resize(end_.size() * 2);
}
+ token_[fetched_] = token;
+ beg_[fetched_] = beg;
+ end_[fetched_] = end;
fetched_++;
}
@@ -226,6 +229,7 @@ int main(int argc, char* argv[]) {
return 1;
}
}
+ printf("No of tokens: %d\n", experimental_tokens.size());
printf("Baseline: %f ms\nExperimental %f ms\n",
baseline_time.InMillisecondsF(),
experimental_time.InMillisecondsF());
Index: src/lexer/lexer.re
diff --git a/src/lexer/lexer.re b/src/lexer/lexer.re
index
cddcda2816128741de0d88e20189c5e3bcab28ca..6a48880c550a2e705bd7ce567578b733db889218
100644
--- a/src/lexer/lexer.re
+++ b/src/lexer/lexer.re
@@ -27,18 +27,17 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include <fcntl.h>
#include <stdio.h>
-#include <stddef.h>
#include <stdlib.h>
#include <string.h>
// TODO:
// - SpiderMonkey compatibility hack: " --> something" is treated
// as a single line comment.
-// - An identifier cannot start immediately after a number.
// - Run-time lexing modifications: harmony number literals, keywords
depending
// on harmony_modules, harmony_scoping
+// - Escaping the string literals (like the baseline does)
+// - Error recovery after illegal tokens.
enum Condition {
kConditionNormal,
@@ -79,7 +78,7 @@ using namespace v8::internal;
#define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); }
#define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;}
#define PUSH_LINE_TERMINATOR() { SKIP(); }
-#define TERMINATE_ILLEGAL() { return 1; }
+#define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS);
return 1; }
PushScanner::PushScanner(ExperimentalScanner* sink)
: eof_(false),
@@ -167,20 +166,14 @@ uint32_t PushScanner::push(const void *input, int
input_size) {
memcpy(limit_, input, input_size);
limit_ += input_size;
- // The scanner starts here
-#define YYLIMIT limit_
-#define YYCURSOR cursor_
-#define YYMARKER marker_
-#define YYCTYPE uint8_t
+#define SKIP() { start_ = cursor_;
YYSETCONDITION(kConditionNormal); goto yy0; }
+#define YYFILL(n) { goto fill; }
-#define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal);
goto yy0; }
-#define YYFILL(n) { goto fill; }
+#define YYGETSTATE() state_
+#define YYSETSTATE(x) { state_ = (x); }
-#define YYGETSTATE() state_
-#define YYSETSTATE(x) { state_ = (x); }
-
-#define YYGETCONDITION() condition_
-#define YYSETCONDITION(x) { condition_ = (x); }
+#define YYGETCONDITION() condition_
+#define YYSETCONDITION(x) { condition_ = (x); }
start_:
if (FLAG_trace_lexer) {
@@ -188,10 +181,14 @@ start_:
}
/*!re2c
- re2c:indent:top = 1;
+ re2c:indent:top = 1;
re2c:yych:conversion = 0;
- re2c:condenumprefix = kCondition;
- re2c:define:YYCONDTYPE = Condition;
+ re2c:condenumprefix = kCondition;
+ re2c:define:YYCONDTYPE = Condition;
+ re2c:define:YYCURSOR = cursor_;
+ re2c:define:YYCTYPE = uint8_t;
+ re2c:define:YYLIMIT = limit_;
+ re2c:define:YYMARKER = marker_;
eof = "\000";
any = [\000-\377];
@@ -281,9 +278,9 @@ start_:
<Normal> "<" { PUSH_TOKEN(Token::LT); }
<Normal> ">" { PUSH_TOKEN(Token::GT); }
- <Normal> '0x' hex_digit+ {
PUSH_TOKEN(Token::NUMBER); }
- <Normal> "." digit+ maybe_exponent {
PUSH_TOKEN(Token::NUMBER); }
- <Normal> digit+ ("." digit+)? maybe_exponent {
PUSH_TOKEN(Token::NUMBER); }
+ <Normal> '0x' hex_digit+ not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::NUMBER); }
+ <Normal> "." digit+ maybe_exponent not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::NUMBER); }
+ <Normal> digit+ ("." digit+)? maybe_exponent not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::NUMBER); }
<Normal> "(" { PUSH_TOKEN(Token::LPAREN); }
<Normal> ")" { PUSH_TOKEN(Token::RPAREN); }
@@ -312,7 +309,7 @@ start_:
<Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); }
<Normal> "," { PUSH_TOKEN(Token::COMMA); }
- <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); }
+ <Normal> line_terminator { PUSH_LINE_TERMINATOR(); }
<Normal> whitespace { SKIP(); }
<Normal> ["] :=> DoubleQuoteString
@@ -325,10 +322,14 @@ start_:
<DoubleQuoteString> "\\\"" { goto yy0; }
<DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);}
+ <DoubleQuoteString> line_terminator { TERMINATE_ILLEGAL(); }
+ <DoubleQuoteString> eof { TERMINATE_ILLEGAL(); }
<DoubleQuoteString> any { goto yy0; }
<SingleQuoteString> "\\'" { goto yy0; }
<SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);}
+ <SingleQuoteString> line_terminator { TERMINATE_ILLEGAL(); }
+ <SingleQuoteString> eof { TERMINATE_ILLEGAL(); }
<SingleQuoteString> any { goto yy0; }
<Identifier> identifier_char+ { goto yy0; }
@@ -356,7 +357,7 @@ fill:
" unfinished token size = %d\n",
state_,
unfinished_size);
- if(0 < unfinished_size && start_ < limit_) {
+ if (0 < unfinished_size && start_ < limit_) {
printf(" unfinished token is: ");
fwrite(start_, 1, cursor_ - start_, stdout);
putchar('\n');
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.