Reviewers: ulan,
Message:
Committed patchset #1 manually as r17427.
Description:
Experimental parser: make the baseline lex utf8, utf16, ascii and latin1.
... so we can compare to the experimental lexer.
BUG=
[email protected]
Committed: https://code.google.com/p/v8/source/detail?r=17427
Please review this at https://codereview.chromium.org/50843002/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+62, -22 lines):
M src/lexer/lexer-shell.cc
M src/lexer/lexer.h
Index: src/lexer/lexer-shell.cc
diff --git a/src/lexer/lexer-shell.cc b/src/lexer/lexer-shell.cc
index
7b0fde5021451058d89cf3ccc06305fbb2e346f2..41ced7e30755c7f3aa4d8c5495bbdd6c466d2a6c
100644
--- a/src/lexer/lexer-shell.cc
+++ b/src/lexer/lexer-shell.cc
@@ -48,6 +48,14 @@
using namespace v8::internal;
+enum Encoding {
+ ASCII,
+ LATIN1,
+ UTF8,
+ UTF16
+};
+
+
const byte* ReadFile(const char* name, Isolate* isolate, int* size) {
FILE* file = fopen(name, "rb");
*size = 0;
@@ -67,15 +75,37 @@ const byte* ReadFile(const char* name, Isolate*
isolate, int* size) {
return chars;
}
-
class BaselineScanner {
public:
- BaselineScanner(const char* fname, Isolate* isolate) {
+ BaselineScanner(const char* fname, Isolate* isolate, Encoding encoding) {
int length = 0;
source_ = ReadFile(fname, isolate, &length);
unicode_cache_ = new UnicodeCache();
scanner_ = new Scanner(unicode_cache_);
- stream_ = new Utf8ToUtf16CharacterStream(source_, length);
+ switch (encoding) {
+ case ASCII:
+ case UTF8:
+ stream_ = new Utf8ToUtf16CharacterStream(source_, length);
+ break;
+ case UTF16: {
+ Handle<String> result = isolate->factory()->NewStringFromTwoByte(
+ Vector<const uint16_t>(
+ reinterpret_cast<const uint16_t*>(source_),
+ length / 2));
+ stream_ =
+ new GenericStringUtf16CharacterStream(result, 0,
result->length());
+ break;
+ }
+ case LATIN1: {
+ Handle<String> result = isolate->factory()->NewStringFromOneByte(
+ Vector<const uint8_t>(source_, length));
+ stream_ =
+ new GenericStringUtf16CharacterStream(result, 0,
result->length());
+ break;
+ }
+ default:
+ break;
+ }
scanner_->Initialize(stream_);
}
@@ -97,13 +127,28 @@ class BaselineScanner {
UnicodeCache* unicode_cache_;
Scanner* scanner_;
const byte* source_;
- Utf8ToUtf16CharacterStream* stream_;
+ BufferedUtf16CharacterStream* stream_;
};
int main(int argc, char* argv[]) {
v8::V8::InitializeICU();
v8::V8::SetFlagsFromCommandLine(&argc, argv, true);
+ Encoding encoding = ASCII;
+ bool print_baseline = false;
+ for (int i = 0; i < argc; ++i) {
+ if (strcmp(argv[i], "--latin1") == 0) {
+ encoding = LATIN1;
+ } else if (strcmp(argv[i], "--utf8") == 0) {
+ encoding = UTF8;
+ } else if (strcmp(argv[i], "--utf16") == 0) {
+ encoding = UTF16;
+ } else if (strcmp(argv[i], "--ascii") == 0) {
+ encoding = ASCII;
+ } else if (strcmp(argv[i], "--print-baseline") == 0) {
+ print_baseline = true;
+ }
+ }
v8::Isolate* isolate = v8::Isolate::GetCurrent();
{
v8::HandleScope handle_scope(isolate);
@@ -114,7 +159,7 @@ int main(int argc, char* argv[]) {
v8::Context::Scope scope(context);
Isolate* isolate = Isolate::Current();
HandleScope handle_scope(isolate);
- BaselineScanner baseline(argv[1], isolate);
+ BaselineScanner baseline(argv[1], isolate, encoding);
ExperimentalScanner experimental(argv[1], true, isolate);
std::vector<Token::Value> baseline_tokens, experimental_tokens;
@@ -148,6 +193,17 @@ int main(int argc, char* argv[]) {
experimental_time = timer.Elapsed();
}
+ if (print_baseline) {
+ printf("Baseline:\n");
+ for (size_t i = 0; i < baseline_tokens.size(); ++i) {
+ printf("=> %11s at (%d, %d)\n",
+ Token::Name(baseline_tokens[i]),
+ static_cast<int>(baseline_beg[i]),
+ static_cast<int>(baseline_end[i]));
+ }
+ printf("(Mis)matches:\n");
+ }
+
for (size_t i = 0; i < experimental_tokens.size(); ++i) {
printf("=> %11s at (%d, %d)\n",
Token::Name(experimental_tokens[i]),
Index: src/lexer/lexer.h
diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h
index
c9b762302e162d6e1f2f9699db544960a61d9668..afedb789925f27df0c503095043b8f76dfa64439
100644
--- a/src/lexer/lexer.h
+++ b/src/lexer/lexer.h
@@ -30,25 +30,9 @@
#ifndef V8_LEXER_LEXER_H
#define V8_LEXER_LEXER_H
-#if defined(WIN32) // FIXME: does this work?
-
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef signed int int32_t;
-
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-
-#else
-
-#include <stdint.h>
-#include <unistd.h>
-
-#endif // defined(WIN32)
-
#include "token.h"
#include "flags.h"
+#include "v8stdint.h"
#define YYCTYPE uint8_t
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.