Reviewers: marja,

Description:
Version 3.29.88.3 (merged r24251)

Script streaming: fix split UTF-8 character handling.

BUG=chromium:417891
LOG=N
[email protected]

Please review this at https://codereview.chromium.org/617753002/

SVN Base: https://v8.googlecode.com/svn/branches/3.29

Affected files (+27, -3 lines):
  M src/scanner-character-streams.cc
  M src/version.cc
  M test/cctest/test-api.cc


Index: src/scanner-character-streams.cc
diff --git a/src/scanner-character-streams.cc b/src/scanner-character-streams.cc index 31b4ee47c4835077998ee189c226411d70f5e0a3..d06f479f94bef5e4d6507d0018406955bdc80360 100644
--- a/src/scanner-character-streams.cc
+++ b/src/scanner-character-streams.cc
@@ -411,13 +411,17 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters(

// Move bytes which are part of an incomplete character from the end of the // current chunk to utf8_split_char_buffer_. They will be converted when the
-  // next data chunk arrives.
+ // next data chunk arrives. Note that all valid UTF-8 characters are at most 4 + // bytes long, but if the data is invalid, we can have character values bigger
+  // than unibrow::Utf8::kMaxOneByteChar for more than 4 consecutive bytes.
   while (current_data_length_ > current_data_offset_ &&
          (c = current_data_[current_data_length_ - 1]) >
-             unibrow::Utf8::kMaxOneByteChar) {
+             unibrow::Utf8::kMaxOneByteChar &&
+         utf8_split_char_buffer_length_ < 4) {
     --current_data_length_;
     ++utf8_split_char_buffer_length_;
   }
+  CHECK(utf8_split_char_buffer_length_ <= 4);
   for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {
     utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];
   }
Index: src/version.cc
diff --git a/src/version.cc b/src/version.cc
index 8319fa6b10c65271520f8604899bdf906f5065a8..95015548403b2b356c07ff812de096bffc03e78d 100644
--- a/src/version.cc
+++ b/src/version.cc
@@ -35,7 +35,7 @@
 #define MAJOR_VERSION     3
 #define MINOR_VERSION     29
 #define BUILD_NUMBER      88
-#define PATCH_LEVEL       2
+#define PATCH_LEVEL       3
 // Use 1 for candidates and 0 otherwise.
 // (Boolean macro values are not supported by all preprocessors.)
 #define IS_CANDIDATE_VERSION 0
Index: test/cctest/test-api.cc
diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc
index 8fd87434c1b27aaeb5c31ce8af06026aa834cc62..0e80384125c260e4edd7ec94818e1cf671bb383b 100644
--- a/test/cctest/test-api.cc
+++ b/test/cctest/test-api.cc
@@ -23334,3 +23334,23 @@ TEST(StreamingProducesParserCache) {
   CHECK(cached_data->data != NULL);
   CHECK_GT(cached_data->length, 0);
 }
+
+
+TEST(StreamingScriptWithInvalidUtf8) {
+ // Regression test for a crash: test that invalid UTF-8 bytes in the end of a
+  // chunk don't produce a crash.
+  const char* reference = "\xeb\x91\x80\x80\x80";
+  char chunk1[] =
+      "function foo() {\n"
+ " // This function will contain an UTF-8 character which is not in\n"
+      "  // ASCII.\n"
+ " var foobXXXXX"; // Too many bytes which look like incomplete chars!
+  char chunk2[] =
+      "r = 13;\n"
+      "  return foob\xeb\x91\x80\x80\x80r;\n"
+      "}\n";
+ for (int i = 0; i < 5; ++i) chunk1[strlen(chunk1) - 5 + i] = reference[i];
+
+  const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
+ RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);
+}


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to