Revision: 17302
Author: [email protected]
Date: Mon Oct 21 15:30:42 2013 UTC
Log: Experimental parser: add UnicodeCache.
BUG=
[email protected]
Review URL: https://codereview.chromium.org/32573003
http://code.google.com/p/v8/source/detail?r=17302
Modified:
/branches/experimental/parser/src/lexer/experimental-scanner.cc
/branches/experimental/parser/src/lexer/experimental-scanner.h
/branches/experimental/parser/src/lexer/lexer-shell.cc
/branches/experimental/parser/src/lexer/lexer.h
/branches/experimental/parser/src/lexer/lexer.re
=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.cc Fri Oct
18 14:33:27 2013 UTC
+++ /branches/experimental/parser/src/lexer/experimental-scanner.cc Mon Oct
21 15:30:42 2013 UTC
@@ -27,6 +27,12 @@
#include "experimental-scanner.h"
+#include "v8.h"
+
+#include "objects.h"
+#include "objects-inl.h"
+#include "spaces-inl.h"
+#include "isolate.h"
#include "lexer.h"
namespace v8 {
@@ -57,16 +63,17 @@
}
ExperimentalScanner::ExperimentalScanner(const char* fname,
- bool read_all_at_once)
+ bool read_all_at_once,
+ Isolate* isolate)
: current_(0),
fetched_(0),
read_all_at_once_(read_all_at_once),
source_(0),
length_(0) {
file_ = fopen(fname, "rb");
- scanner_ = new PushScanner(this);
+ scanner_ = new PushScanner(this, isolate->unicode_cache());
if (read_all_at_once_) {
- source_ = ReadFile(fname, NULL, &length_);
+ source_ = ReadFile(fname, isolate, &length_);
token_.resize(1500);
beg_.resize(1500);
end_.resize(1500);
=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.h Fri Oct
18 14:33:27 2013 UTC
+++ /branches/experimental/parser/src/lexer/experimental-scanner.h Mon Oct
21 15:30:42 2013 UTC
@@ -54,7 +54,9 @@
int end_pos;
};
- ExperimentalScanner(const char* fname, bool read_all_at_once);
+ ExperimentalScanner(const char* fname,
+ bool read_all_at_once,
+ Isolate* isolate);
~ExperimentalScanner();
Token::Value Next();
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Mon Oct 21
11:10:55 2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer-shell.cc Mon Oct 21
15:30:42 2013 UTC
@@ -115,7 +115,7 @@
Isolate* isolate = Isolate::Current();
HandleScope handle_scope(isolate);
BaselineScanner baseline(argv[1], isolate);
- ExperimentalScanner experimental(argv[1], true);
+ ExperimentalScanner experimental(argv[1], true, isolate);
std::vector<Token::Value> baseline_tokens, experimental_tokens;
std::vector<size_t> baseline_beg, baseline_end, experimental_beg,
=======================================
--- /branches/experimental/parser/src/lexer/lexer.h Fri Oct 18 14:33:27
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer.h Mon Oct 21 15:30:42
2013 UTC
@@ -37,10 +37,11 @@
namespace internal {
class ExperimentalScanner;
+class UnicodeCache;
class PushScanner {
public:
- explicit PushScanner(ExperimentalScanner* sink);
+ explicit PushScanner(ExperimentalScanner* sink, UnicodeCache*
unicode_cache);
~PushScanner();
@@ -48,6 +49,12 @@
uint32_t push(const void *input, int input_size);
private:
+ bool ValidIdentifierStart();
+ bool ValidIdentifierPart();
+ uc32 ScanHexNumber(int length);
+
+ UnicodeCache* unicode_cache_;
+
bool eof_;
int32_t state_;
int32_t condition_;
=======================================
--- /branches/experimental/parser/src/lexer/lexer.re Mon Oct 21 14:09:28
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer.re Mon Oct 21 15:30:42
2013 UTC
@@ -31,6 +31,30 @@
#include <stdlib.h>
#include <string.h>
+// FIXME: some of this is probably not needed.
+#include "allocation.h"
+#include "ast.h"
+#include "preparse-data-format.h"
+#include "preparse-data.h"
+#include "scopes.h"
+#include "preparser.h"
+#include "api.h"
+#include "ast.h"
+#include "bootstrapper.h"
+#include "char-predicates-inl.h"
+#include "codegen.h"
+#include "compiler.h"
+#include "func-name-inferrer.h"
+#include "messages.h"
+#include "parser.h"
+#include "platform.h"
+#include "preparser.h"
+#include "runtime.h"
+#include "scanner-character-streams.h"
+#include "scopeinfo.h"
+#include "string-stream.h"
+
+
// TODO:
// - SpiderMonkey compatibility hack: " --> something" is treated
// as a single line comment.
@@ -76,14 +100,29 @@
using namespace v8::internal;
+namespace {
+
+inline int HexValue(uc32 c) {
+ c -= '0';
+ if (static_cast<unsigned>(c) <= 9) return c;
+ c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
+ if (static_cast<unsigned>(c) <= 5) return c + 10;
+ return -1;
+}
+
+}
+
#define PUSH_TOKEN(T) { send(T); SKIP(); }
#define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); }
#define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;}
#define PUSH_LINE_TERMINATOR() { SKIP(); }
#define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS);
return 1; }
-PushScanner::PushScanner(ExperimentalScanner* sink)
-: eof_(false),
+#define YYCTYPE uint8_t
+
+PushScanner::PushScanner(ExperimentalScanner* sink, UnicodeCache*
unicode_cache)
+: unicode_cache_(unicode_cache),
+ eof_(false),
state_(-1),
condition_(kConditionNormal),
limit_(NULL),
@@ -101,6 +140,31 @@
PushScanner::~PushScanner() {
}
+
+
+uc32 PushScanner::ScanHexNumber(int length) {
+ // We have seen \uXXXX, let's see what it is.
+ // FIXME: we never end up in here if only a subset of the 4 chars are
valid
+ // hex digits -> handle the case where they're not.
+ uc32 x = 0;
+ for (YYCTYPE* s = cursor_ - length; s != cursor_; ++s) {
+ int d = HexValue(*s);
+ if (d < 0) {
+ return -1;
+ }
+ x = x * 16 + d;
+ }
+ return x;
+}
+
+
+bool PushScanner::ValidIdentifierPart() {
+ return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));
+}
+
+bool PushScanner::ValidIdentifierStart() {
+ return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));
+}
void PushScanner::send(Token::Value token) {
int beg = (start_ - buffer_) + real_start_;
@@ -188,7 +252,6 @@
re2c:condenumprefix = kCondition;
re2c:define:YYCONDTYPE = Condition;
re2c:define:YYCURSOR = cursor_;
- re2c:define:YYCTYPE = uint8_t;
re2c:define:YYLIMIT = limit_;
re2c:define:YYMARKER = marker_;
@@ -319,8 +382,7 @@
<Normal> ['] :=> SingleQuoteString
<Normal> identifier_start_ :=> Identifier
- <Normal> "\\u0000" :=> IdentifierIllegal
- <Normal> "\\u" [0-9a-fA-F]{4} :=> Identifier
+ <Normal> "\\u" [0-9a-fA-F]{4} { if (ValidIdentifierStart()) {
YYSETCONDITION(kConditionIdentifier); goto yy0; }
YYSETCONDITION(kConditionIdentifierIllegal); send(Token::ILLEGAL); start_ =
cursor_; goto yy0; }
<Normal> "\\" { PUSH_TOKEN(Token::ILLEGAL); }
<Normal> eof { PUSH_EOF_AND_RETURN();}
@@ -347,13 +409,12 @@
<SingleQuoteString> any { goto yy0; }
<Identifier> identifier_char+ { goto yy0; }
- <Identifier> "\\u0000" :=> IdentifierIllegal
- <Identifier> "\\u" [0-9a-fA-F]{4} { goto yy0; }
+ <Identifier> "\\u" [0-9a-fA-F]{4} { if (ValidIdentifierPart()) goto
yy0; YYSETCONDITION(kConditionIdentifierIllegal); send(Token::ILLEGAL); }
<Identifier> "\\" { PUSH_TOKEN(Token::ILLEGAL); }
<Identifier> any {
PUSH_TOKEN_LOOKAHEAD(Token::IDENTIFIER); }
<IdentifierIllegal> identifier_char+ { goto yy0; }
- <IdentifierIllegal> "\\"+ { goto yy0; }
+ <IdentifierIllegal> "\\"+ { goto yy0; }
<IdentifierIllegal> any {
PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }
<SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.