Revision: 5023
Author: [email protected]
Date: Tue Jul  6 00:31:35 2010
Log: Convert Unicode code points outside the basic multilingual plane to the replacement character.
Previous behavior was to silently truncate the value to 16 bits.

Review URL: http://codereview.chromium.org/2832050
http://code.google.com/p/v8/source/detail?r=5023

Modified:
 /branches/bleeding_edge/src/heap.cc

=======================================
--- /branches/bleeding_edge/src/heap.cc Mon Jul  5 04:45:11 2010
+++ /branches/bleeding_edge/src/heap.cc Tue Jul  6 00:31:35 2010
@@ -2868,6 +2868,8 @@

 Object* Heap::AllocateStringFromUtf8(Vector<const char> string,
                                      PretenureFlag pretenure) {
+  // V8 only supports characters in the Basic Multilingual Plane.
+  const uc32 kMaxSupportedChar = 0xFFFF;
   // Count the number of characters in the UTF-8 string and check if
   // it is an ASCII string.
   Access<Scanner::Utf8Decoder> decoder(Scanner::utf8_decoder());
@@ -2892,6 +2894,7 @@
   decoder->Reset(string.start(), string.length());
   for (int i = 0; i < chars; i++) {
     uc32 r = decoder->GetNext();
+    if (r > kMaxSupportedChar) { r = unibrow::Utf8::kBadChar; }
     string_result->Set(i, r);
   }
   return result;

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to