Revision: 17302
Author:   [email protected]
Date:     Mon Oct 21 15:30:42 2013 UTC
Log:      Experimental parser: add UnicodeCache.

BUG=
[email protected]

Review URL: https://codereview.chromium.org/32573003
http://code.google.com/p/v8/source/detail?r=17302

Modified:
 /branches/experimental/parser/src/lexer/experimental-scanner.cc
 /branches/experimental/parser/src/lexer/experimental-scanner.h
 /branches/experimental/parser/src/lexer/lexer-shell.cc
 /branches/experimental/parser/src/lexer/lexer.h
 /branches/experimental/parser/src/lexer/lexer.re

=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.cc Fri Oct 18 14:33:27 2013 UTC +++ /branches/experimental/parser/src/lexer/experimental-scanner.cc Mon Oct 21 15:30:42 2013 UTC
@@ -27,6 +27,12 @@

 #include "experimental-scanner.h"

+#include "v8.h"
+
+#include "objects.h"
+#include "objects-inl.h"
+#include "spaces-inl.h"
+#include "isolate.h"
 #include "lexer.h"

 namespace v8 {
@@ -57,16 +63,17 @@
 }

 ExperimentalScanner::ExperimentalScanner(const char* fname,
-                                         bool read_all_at_once)
+                                         bool read_all_at_once,
+                                         Isolate* isolate)
     : current_(0),
       fetched_(0),
       read_all_at_once_(read_all_at_once),
       source_(0),
       length_(0) {
   file_ = fopen(fname, "rb");
-  scanner_ = new PushScanner(this);
+  scanner_ = new PushScanner(this, isolate->unicode_cache());
   if (read_all_at_once_) {
-    source_ = ReadFile(fname, NULL, &length_);
+    source_ = ReadFile(fname, isolate, &length_);
     token_.resize(1500);
     beg_.resize(1500);
     end_.resize(1500);
=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.h Fri Oct 18 14:33:27 2013 UTC +++ /branches/experimental/parser/src/lexer/experimental-scanner.h Mon Oct 21 15:30:42 2013 UTC
@@ -54,7 +54,9 @@
     int end_pos;
   };

-  ExperimentalScanner(const char* fname, bool read_all_at_once);
+  ExperimentalScanner(const char* fname,
+                      bool read_all_at_once,
+                      Isolate* isolate);
   ~ExperimentalScanner();

   Token::Value Next();
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Mon Oct 21 11:10:55 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer-shell.cc Mon Oct 21 15:30:42 2013 UTC
@@ -115,7 +115,7 @@
       Isolate* isolate = Isolate::Current();
       HandleScope handle_scope(isolate);
       BaselineScanner baseline(argv[1], isolate);
-      ExperimentalScanner experimental(argv[1], true);
+      ExperimentalScanner experimental(argv[1], true, isolate);

       std::vector<Token::Value> baseline_tokens, experimental_tokens;
       std::vector<size_t> baseline_beg, baseline_end, experimental_beg,
=======================================
--- /branches/experimental/parser/src/lexer/lexer.h Fri Oct 18 14:33:27 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer.h Mon Oct 21 15:30:42 2013 UTC
@@ -37,10 +37,11 @@
 namespace internal {

 class ExperimentalScanner;
+class UnicodeCache;

 class PushScanner {
  public:
-  explicit PushScanner(ExperimentalScanner* sink);
+ explicit PushScanner(ExperimentalScanner* sink, UnicodeCache* unicode_cache);

   ~PushScanner();

@@ -48,6 +49,12 @@
   uint32_t push(const void *input, int input_size);

  private:
+  bool ValidIdentifierStart();
+  bool ValidIdentifierPart();
+  uc32 ScanHexNumber(int length);
+
+  UnicodeCache* unicode_cache_;
+
   bool eof_;
   int32_t state_;
   int32_t condition_;
=======================================
--- /branches/experimental/parser/src/lexer/lexer.re Mon Oct 21 14:09:28 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer.re Mon Oct 21 15:30:42 2013 UTC
@@ -31,6 +31,30 @@
 #include <stdlib.h>
 #include <string.h>

+// FIXME: some of this is probably not needed.
+#include "allocation.h"
+#include "ast.h"
+#include "preparse-data-format.h"
+#include "preparse-data.h"
+#include "scopes.h"
+#include "preparser.h"
+#include "api.h"
+#include "ast.h"
+#include "bootstrapper.h"
+#include "char-predicates-inl.h"
+#include "codegen.h"
+#include "compiler.h"
+#include "func-name-inferrer.h"
+#include "messages.h"
+#include "parser.h"
+#include "platform.h"
+#include "preparser.h"
+#include "runtime.h"
+#include "scanner-character-streams.h"
+#include "scopeinfo.h"
+#include "string-stream.h"
+
+
 // TODO:
 // - SpiderMonkey compatibility hack: "  --> something" is treated
 //   as a single line comment.
@@ -76,14 +100,29 @@

 using namespace v8::internal;

+namespace {
+
+inline int HexValue(uc32 c) {
+  c -= '0';
+  if (static_cast<unsigned>(c) <= 9) return c;
+  c = (c | 0x20) - ('a' - '0');  // detect 0x11..0x16 and 0x31..0x36.
+  if (static_cast<unsigned>(c) <= 5) return c + 10;
+  return -1;
+}
+
+}
+
 #define PUSH_TOKEN(T) { send(T); SKIP(); }
 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); }
 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;}
 #define PUSH_LINE_TERMINATOR() { SKIP(); }
#define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS); return 1; }

-PushScanner::PushScanner(ExperimentalScanner* sink)
-: eof_(false),
+#define YYCTYPE uint8_t
+
+PushScanner::PushScanner(ExperimentalScanner* sink, UnicodeCache* unicode_cache)
+: unicode_cache_(unicode_cache),
+  eof_(false),
   state_(-1),
   condition_(kConditionNormal),
   limit_(NULL),
@@ -101,6 +140,31 @@

 PushScanner::~PushScanner() {
 }
+
+
+uc32 PushScanner::ScanHexNumber(int length) {
+  // We have seen \uXXXX, let's see what it is.
+ // FIXME: we never end up in here if only a subset of the 4 chars are valid
+  // hex digits -> handle the case where they're not.
+  uc32 x = 0;
+  for (YYCTYPE* s = cursor_ - length; s != cursor_; ++s) {
+    int d = HexValue(*s);
+    if (d < 0) {
+      return -1;
+    }
+    x = x * 16 + d;
+  }
+  return x;
+}
+
+
+bool PushScanner::ValidIdentifierPart() {
+  return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));
+}
+
+bool PushScanner::ValidIdentifierStart() {
+  return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));
+}

 void PushScanner::send(Token::Value token) {
   int beg = (start_ - buffer_) + real_start_;
@@ -188,7 +252,6 @@
     re2c:condenumprefix = kCondition;
     re2c:define:YYCONDTYPE = Condition;
     re2c:define:YYCURSOR = cursor_;
-    re2c:define:YYCTYPE = uint8_t;
     re2c:define:YYLIMIT = limit_;
     re2c:define:YYMARKER = marker_;

@@ -319,8 +382,7 @@
     <Normal> [']           :=> SingleQuoteString

     <Normal> identifier_start_    :=> Identifier
-    <Normal> "\\u0000"            :=> IdentifierIllegal
-    <Normal> "\\u" [0-9a-fA-F]{4}    :=> Identifier
+ <Normal> "\\u" [0-9a-fA-F]{4} { if (ValidIdentifierStart()) { YYSETCONDITION(kConditionIdentifier); goto yy0; } YYSETCONDITION(kConditionIdentifierIllegal); send(Token::ILLEGAL); start_ = cursor_; goto yy0; }
     <Normal> "\\"                 { PUSH_TOKEN(Token::ILLEGAL); }

     <Normal> eof           { PUSH_EOF_AND_RETURN();}
@@ -347,13 +409,12 @@
     <SingleQuoteString> any     { goto yy0; }

     <Identifier> identifier_char+  { goto yy0; }
-    <Identifier> "\\u0000"         :=> IdentifierIllegal
-    <Identifier> "\\u" [0-9a-fA-F]{4} { goto yy0; }
+ <Identifier> "\\u" [0-9a-fA-F]{4} { if (ValidIdentifierPart()) goto yy0; YYSETCONDITION(kConditionIdentifierIllegal); send(Token::ILLEGAL); }
     <Identifier> "\\"              { PUSH_TOKEN(Token::ILLEGAL); }
<Identifier> any { PUSH_TOKEN_LOOKAHEAD(Token::IDENTIFIER); }

     <IdentifierIllegal> identifier_char+  { goto yy0; }
-    <IdentifierIllegal> "\\"+              { goto yy0; }
+    <IdentifierIllegal> "\\"+             { goto yy0; }
<IdentifierIllegal> any { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }

     <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to