Revision: 18155
Author: [email protected]
Date: Fri Nov 29 10:35:20 2013 UTC
Log: Experimental scanner fix: recognize one byte strings inside utf16
files.
BUG=
[email protected]
Review URL: https://codereview.chromium.org/96173004
http://code.google.com/p/v8/source/detail?r=18155
Modified:
/branches/experimental/parser/src/lexer/experimental-scanner.cc
/branches/experimental/parser/src/lexer/experimental-scanner.h
/branches/experimental/parser/src/lexer/lexer-shell.cc
=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.cc Wed Nov
27 13:01:38 2013 UTC
+++ /branches/experimental/parser/src/lexer/experimental-scanner.cc Fri Nov
29 10:35:20 2013 UTC
@@ -118,9 +118,24 @@
--end;
}
if (!token.has_escapes) {
- literal->is_ascii = false; // FIXME: utf16 can contain only ascii
chars.
+ // UTF-16 can also contain only one byte chars. Note that is_ascii here
+ // means is_onebyte.
+ literal->is_ascii = true;
+ literal->buffer.Reset();
+ for (const uint16_t* cursor = start; cursor != end; ++cursor) {
+ if (*cursor >= unibrow::Latin1::kMaxChar) {
+ literal->is_ascii = false;
+ break;
+ }
+ literal->buffer.AddChar(*cursor);
+ }
literal->length = end - start;
- literal->utf16_string = Vector<const uint16_t>(start, literal->length);
+ if (literal->is_ascii) {
+ literal->ascii_string = literal->buffer.ascii_literal();
+ } else {
+ literal->buffer.Reset();
+ literal->utf16_string = Vector<const uint16_t>(start,
literal->length);
+ }
return true;
}
literal->buffer.Reset();
=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.h Thu Nov
28 15:29:06 2013 UTC
+++ /branches/experimental/parser/src/lexer/experimental-scanner.h Fri Nov
29 10:35:20 2013 UTC
@@ -163,6 +163,7 @@
return current_literal_->length;
}
+ // This should be is_onebyte or is_latin1; it doesn't mean ASCII for
real.
bool is_literal_ascii() {
if (!current_literal_->Valid(current_.beg_pos)) {
FillLiteral(current_, current_literal_);
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Wed Nov 27
16:07:52 2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer-shell.cc Fri Nov 29
10:35:20 2013 UTC
@@ -196,6 +196,7 @@
for (size_t i = 0; i < literal.size(); i++) {
printf(is_ascii ? " %02x" : " %04x", literal[i]);
}
+ printf(" (is ascii: %d)", is_ascii);
}
printf(" (last octal start: %d)\n", octal_beg);
}
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.