Revision: 17931
Author:   [email protected]
Date:     Wed Nov 20 14:53:10 2013 UTC
Log:      Lexer-shell: skip utf16 magic bytes when reading files.

[email protected]
BUG=

Review URL: https://codereview.chromium.org/78233003
http://code.google.com/p/v8/source/detail?r=17931

Modified:
 /branches/experimental/parser/src/lexer/even-more-experimental-scanner.cc

=======================================
--- /branches/experimental/parser/src/lexer/even-more-experimental-scanner.cc Wed Nov 20 13:56:20 2013 UTC +++ /branches/experimental/parser/src/lexer/even-more-experimental-scanner.cc Wed Nov 20 14:53:10 2013 UTC
@@ -69,18 +69,31 @@
   int file_size = ftell(file);
   rewind(file);

-  *size = file_size * repeat;
-
-  byte* chars = new byte[*size];
+  byte* file_contents = new byte[file_size];
   for (int i = 0; i < file_size;) {
-    int read = static_cast<int>(fread(&chars[i], 1, file_size - i, file));
+    int read =
+        static_cast<int>(fread(&file_contents[i], 1, file_size - i, file));
     i += read;
   }
   fclose(file);

-  for (int i = file_size; i < *size; i++) {
-    chars[i] = chars[i - file_size];
+  // If the file contains the UTF16 little endian magic bytes, skip them.
+ // FIXME: what if we see big endian magic bytes? Do we do the right thing for
+  // big endian anyway?
+  byte* start = file_contents;
+  if (*start == 0xff && *(start + 1) == 0xfe) {
+    start += 2;
+    file_size -= 2;
+  }
+
+  *size = file_size * repeat;
+  byte* chars = new byte[*size];
+
+  for (int i = 0; i < *size; i++) {
+    chars[i] = start[i % file_size];
   }
+
+  delete file_contents;

   return chars;
 }

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to