Revision: 18155
Author:   [email protected]
Date:     Fri Nov 29 10:35:20 2013 UTC
Log: Experimental scanner fix: recognize one byte strings inside utf16 files.

BUG=
[email protected]

Review URL: https://codereview.chromium.org/96173004
http://code.google.com/p/v8/source/detail?r=18155

Modified:
 /branches/experimental/parser/src/lexer/experimental-scanner.cc
 /branches/experimental/parser/src/lexer/experimental-scanner.h
 /branches/experimental/parser/src/lexer/lexer-shell.cc

=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.cc Wed Nov 27 13:01:38 2013 UTC +++ /branches/experimental/parser/src/lexer/experimental-scanner.cc Fri Nov 29 10:35:20 2013 UTC
@@ -118,9 +118,24 @@
     --end;
   }
   if (!token.has_escapes) {
- literal->is_ascii = false; // FIXME: utf16 can contain only ascii chars.
+    // UTF-16 can also contain only one byte chars. Note that is_ascii here
+    // means is_onebyte.
+    literal->is_ascii = true;
+    literal->buffer.Reset();
+    for (const uint16_t* cursor = start; cursor != end; ++cursor) {
+      if (*cursor >= unibrow::Latin1::kMaxChar) {
+        literal->is_ascii = false;
+        break;
+      }
+      literal->buffer.AddChar(*cursor);
+    }
     literal->length = end - start;
-    literal->utf16_string = Vector<const uint16_t>(start, literal->length);
+    if (literal->is_ascii) {
+      literal->ascii_string = literal->buffer.ascii_literal();
+    } else {
+      literal->buffer.Reset();
+ literal->utf16_string = Vector<const uint16_t>(start, literal->length);
+    }
     return true;
   }
   literal->buffer.Reset();
=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.h Thu Nov 28 15:29:06 2013 UTC +++ /branches/experimental/parser/src/lexer/experimental-scanner.h Fri Nov 29 10:35:20 2013 UTC
@@ -163,6 +163,7 @@
     return current_literal_->length;
   }

+ // This should be is_onebyte or is_latin1; it doesn't mean ASCII for real.
   bool is_literal_ascii() {
     if (!current_literal_->Valid(current_.beg_pos)) {
       FillLiteral(current_, current_literal_);
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Wed Nov 27 16:07:52 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer-shell.cc Fri Nov 29 10:35:20 2013 UTC
@@ -196,6 +196,7 @@
       for (size_t i = 0; i < literal.size(); i++) {
         printf(is_ascii ? " %02x" : " %04x", literal[i]);
       }
+      printf(" (is ascii: %d)", is_ascii);
     }
     printf(" (last octal start: %d)\n", octal_beg);
   }

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to