Reviewers: ulan,

Message:
Committed patchset #1 manually as r17427.

Description:
Experimental parser: make the baseline lex utf8, utf16, ascii and latin1.

... so we can compare to the experimental lexer.

BUG=
[email protected]

Committed: https://code.google.com/p/v8/source/detail?r=17427

Please review this at https://codereview.chromium.org/50843002/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+62, -22 lines):
  M src/lexer/lexer-shell.cc
  M src/lexer/lexer.h


Index: src/lexer/lexer-shell.cc
diff --git a/src/lexer/lexer-shell.cc b/src/lexer/lexer-shell.cc
index 7b0fde5021451058d89cf3ccc06305fbb2e346f2..41ced7e30755c7f3aa4d8c5495bbdd6c466d2a6c 100644
--- a/src/lexer/lexer-shell.cc
+++ b/src/lexer/lexer-shell.cc
@@ -48,6 +48,14 @@

 using namespace v8::internal;

+enum Encoding {
+  ASCII,
+  LATIN1,
+  UTF8,
+  UTF16
+};
+
+
 const byte* ReadFile(const char* name, Isolate* isolate, int* size) {
   FILE* file = fopen(name, "rb");
   *size = 0;
@@ -67,15 +75,37 @@ const byte* ReadFile(const char* name, Isolate* isolate, int* size) {
   return chars;
 }

-
 class BaselineScanner {
  public:
-  BaselineScanner(const char* fname, Isolate* isolate) {
+  BaselineScanner(const char* fname, Isolate* isolate, Encoding encoding) {
     int length = 0;
     source_ = ReadFile(fname, isolate, &length);
     unicode_cache_ = new UnicodeCache();
     scanner_ = new Scanner(unicode_cache_);
-    stream_ = new Utf8ToUtf16CharacterStream(source_, length);
+    switch (encoding) {
+      case ASCII:
+      case UTF8:
+        stream_ = new Utf8ToUtf16CharacterStream(source_, length);
+        break;
+      case UTF16: {
+        Handle<String> result = isolate->factory()->NewStringFromTwoByte(
+            Vector<const uint16_t>(
+                reinterpret_cast<const uint16_t*>(source_),
+                length / 2));
+        stream_ =
+ new GenericStringUtf16CharacterStream(result, 0, result->length());
+        break;
+      }
+      case LATIN1: {
+        Handle<String> result = isolate->factory()->NewStringFromOneByte(
+            Vector<const uint8_t>(source_, length));
+        stream_ =
+ new GenericStringUtf16CharacterStream(result, 0, result->length());
+        break;
+      }
+      default:
+        break;
+    }
     scanner_->Initialize(stream_);
   }

@@ -97,13 +127,28 @@ class BaselineScanner {
   UnicodeCache* unicode_cache_;
   Scanner* scanner_;
   const byte* source_;
-  Utf8ToUtf16CharacterStream* stream_;
+  BufferedUtf16CharacterStream* stream_;
 };


 int main(int argc, char* argv[]) {
   v8::V8::InitializeICU();
   v8::V8::SetFlagsFromCommandLine(&argc, argv, true);
+  Encoding encoding = ASCII;
+  bool print_baseline = false;
+  for (int i = 0; i < argc; ++i) {
+    if (strcmp(argv[i], "--latin1") == 0) {
+      encoding = LATIN1;
+    } else if (strcmp(argv[i], "--utf8") == 0) {
+      encoding = UTF8;
+    } else if (strcmp(argv[i], "--utf16") == 0) {
+      encoding = UTF16;
+    } else if (strcmp(argv[i], "--ascii") == 0) {
+      encoding = ASCII;
+    } else if (strcmp(argv[i], "--print-baseline") == 0) {
+      print_baseline = true;
+    }
+  }
   v8::Isolate* isolate = v8::Isolate::GetCurrent();
   {
     v8::HandleScope handle_scope(isolate);
@@ -114,7 +159,7 @@ int main(int argc, char* argv[]) {
       v8::Context::Scope scope(context);
       Isolate* isolate = Isolate::Current();
       HandleScope handle_scope(isolate);
-      BaselineScanner baseline(argv[1], isolate);
+      BaselineScanner baseline(argv[1], isolate, encoding);
       ExperimentalScanner experimental(argv[1], true, isolate);

       std::vector<Token::Value> baseline_tokens, experimental_tokens;
@@ -148,6 +193,17 @@ int main(int argc, char* argv[]) {
         experimental_time = timer.Elapsed();
       }

+      if (print_baseline) {
+        printf("Baseline:\n");
+        for (size_t i = 0; i < baseline_tokens.size(); ++i) {
+          printf("=> %11s at (%d, %d)\n",
+                 Token::Name(baseline_tokens[i]),
+                 static_cast<int>(baseline_beg[i]),
+                 static_cast<int>(baseline_end[i]));
+        }
+        printf("(Mis)matches:\n");
+      }
+
       for (size_t i = 0; i < experimental_tokens.size(); ++i) {
         printf("=> %11s at (%d, %d)\n",
                Token::Name(experimental_tokens[i]),
Index: src/lexer/lexer.h
diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h
index c9b762302e162d6e1f2f9699db544960a61d9668..afedb789925f27df0c503095043b8f76dfa64439 100644
--- a/src/lexer/lexer.h
+++ b/src/lexer/lexer.h
@@ -30,25 +30,9 @@
 #ifndef V8_LEXER_LEXER_H
 #define V8_LEXER_LEXER_H

-#if defined(WIN32)  // FIXME: does this work?
-
-typedef signed char   int8_t;
-typedef signed short  int16_t;
-typedef signed int   int32_t;
-
-typedef unsigned char  uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int  uint32_t;
-
-#else
-
-#include <stdint.h>
-#include <unistd.h>
-
-#endif  // defined(WIN32)
-
 #include "token.h"
 #include "flags.h"
+#include "v8stdint.h"

 #define YYCTYPE uint8_t



--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to