Revision: 24473
Author:   [email protected]
Date:     Wed Oct  8 14:55:03 2014 UTC
Log:      Update unicode to 7.0.0.

And do not use code points with PATTERN_* property for identifier start.
Maintain that \u180E is a white space character.

BUG=v8:2892
LOG=Y
[email protected], [email protected]

Review URL: https://codereview.chromium.org/638643002
https://code.google.com/p/v8/source/detail?r=24473

Added:
 /branches/bleeding_edge/src/unicode-decoder.cc
 /branches/bleeding_edge/src/unicode-decoder.h
 /branches/bleeding_edge/test/unittests/unicode
/branches/bleeding_edge/test/unittests/unicode/unicode-predicates-unittest.cc
Modified:
 /branches/bleeding_edge/BUILD.gn
 /branches/bleeding_edge/src/char-predicates.h
 /branches/bleeding_edge/src/jsregexp.cc
 /branches/bleeding_edge/src/objects.h
 /branches/bleeding_edge/src/scanner.h
 /branches/bleeding_edge/src/unicode-inl.h
 /branches/bleeding_edge/src/unicode.cc
 /branches/bleeding_edge/src/unicode.h
 /branches/bleeding_edge/test/cctest/test-strings.cc
 /branches/bleeding_edge/test/mjsunit/var.js
 /branches/bleeding_edge/test/unittests/unittests.gyp
 /branches/bleeding_edge/tools/gyp/v8.gyp

=======================================
--- /dev/null
+++ /branches/bleeding_edge/src/unicode-decoder.cc Wed Oct 8 14:55:03 2014 UTC
@@ -0,0 +1,78 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "src/unicode-inl.h"
+#include "src/unicode-decoder.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+namespace unibrow {
+
+void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
+ const uint8_t* stream, unsigned stream_length) {
+  // Assume everything will fit in the buffer and stream won't be needed.
+  last_byte_of_buffer_unused_ = false;
+  unbuffered_start_ = NULL;
+  bool writing_to_buffer = true;
+ // Loop until stream is read, writing to buffer as long as buffer has space.
+  unsigned utf16_length = 0;
+  while (stream_length != 0) {
+    unsigned cursor = 0;
+    uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
+    DCHECK(cursor > 0 && cursor <= stream_length);
+    stream += cursor;
+    stream_length -= cursor;
+    bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
+    utf16_length += is_two_characters ? 2 : 1;
+    // Don't need to write to the buffer, but still need utf16_length.
+    if (!writing_to_buffer) continue;
+    // Write out the characters to the buffer.
+ // Must check for equality with buffer_length as we've already updated it.
+    if (utf16_length <= buffer_length) {
+      if (is_two_characters) {
+        *buffer++ = Utf16::LeadSurrogate(character);
+        *buffer++ = Utf16::TrailSurrogate(character);
+      } else {
+        *buffer++ = character;
+      }
+      if (utf16_length == buffer_length) {
+        // Just wrote last character of buffer
+        writing_to_buffer = false;
+        unbuffered_start_ = stream;
+      }
+      continue;
+    }
+    // Have gone over buffer.
+    // Last char of buffer is unused, set cursor back.
+    DCHECK(is_two_characters);
+    writing_to_buffer = false;
+    last_byte_of_buffer_unused_ = true;
+    unbuffered_start_ = stream - cursor;
+  }
+  utf16_length_ = utf16_length;
+}
+
+
+void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
+                                     unsigned data_length) {
+  while (data_length != 0) {
+    unsigned cursor = 0;
+ uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
+    // There's a total lack of bounds checking for stream
+    // as it was already done in Reset.
+    stream += cursor;
+    if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
+      *data++ = Utf16::LeadSurrogate(character);
+      *data++ = Utf16::TrailSurrogate(character);
+      DCHECK(data_length > 1);
+      data_length -= 2;
+    } else {
+      *data++ = character;
+      data_length -= 1;
+    }
+  }
+}
+
+}  // namespace unibrow
=======================================
--- /dev/null
+++ /branches/bleeding_edge/src/unicode-decoder.h Wed Oct 8 14:55:03 2014 UTC
@@ -0,0 +1,121 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_UNICODE_DECODER_H_
+#define V8_UNICODE_DECODER_H_
+
+#include <sys/types.h>
+#include "src/globals.h"
+
+namespace unibrow {
+
+class Utf8DecoderBase {
+ public:
+  // Initialization done in subclass.
+  inline Utf8DecoderBase();
+  inline Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
+                         const uint8_t* stream, unsigned stream_length);
+  inline unsigned Utf16Length() const { return utf16_length_; }
+
+ protected:
+  // This reads all characters and sets the utf16_length_.
+  // The first buffer_length utf16 chars are cached in the buffer.
+ void Reset(uint16_t* buffer, unsigned buffer_length, const uint8_t* stream,
+             unsigned stream_length);
+  static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
+                             unsigned length);
+  const uint8_t* unbuffered_start_;
+  unsigned utf16_length_;
+  bool last_byte_of_buffer_unused_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
+};
+
+template <unsigned kBufferSize>
+class Utf8Decoder : public Utf8DecoderBase {
+ public:
+  inline Utf8Decoder() {}
+  inline Utf8Decoder(const char* stream, unsigned length);
+  inline void Reset(const char* stream, unsigned length);
+  inline unsigned WriteUtf16(uint16_t* data, unsigned length) const;
+
+ private:
+  uint16_t buffer_[kBufferSize];
+};
+
+
+Utf8DecoderBase::Utf8DecoderBase()
+    : unbuffered_start_(NULL),
+      utf16_length_(0),
+      last_byte_of_buffer_unused_(false) {}
+
+
+Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
+                                 const uint8_t* stream,
+                                 unsigned stream_length) {
+  Reset(buffer, buffer_length, stream, stream_length);
+}
+
+
+template <unsigned kBufferSize>
+Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length)
+    : Utf8DecoderBase(buffer_, kBufferSize,
+                      reinterpret_cast<const uint8_t*>(stream), length) {}
+
+
+template <unsigned kBufferSize>
+void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) {
+  Utf8DecoderBase::Reset(buffer_, kBufferSize,
+                         reinterpret_cast<const uint8_t*>(stream), length);
+}
+
+
+template <unsigned kBufferSize>
+unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
+                                              unsigned length) const {
+  DCHECK(length > 0);
+  if (length > utf16_length_) length = utf16_length_;
+  // memcpy everything in buffer.
+  unsigned buffer_length =
+      last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
+ unsigned memcpy_length = length <= buffer_length ? length : buffer_length;
+  v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
+  if (length <= buffer_length) return length;
+  DCHECK(unbuffered_start_ != NULL);
+  // Copy the rest the slow way.
+  WriteUtf16Slow(unbuffered_start_, data + buffer_length,
+                 length - buffer_length);
+  return length;
+}
+
+class Latin1 {
+ public:
+  static const unsigned kMaxChar = 0xff;
+  // Returns 0 if character does not convert to single latin-1 character
+  // or if the character doesn't not convert back to latin-1 via inverse
+  // operation (upper to lower, etc).
+  static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
+};
+
+
+uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
+  DCHECK(c > Latin1::kMaxChar);
+  switch (c) {
+    // This are equivalent characters in unicode.
+    case 0x39c:
+    case 0x3bc:
+      return 0xb5;
+    // This is an uppercase of a Latin-1 character
+    // outside of Latin-1.
+    case 0x178:
+      return 0xff;
+  }
+  return 0;
+}
+
+
+}  // namespace unibrow
+
+#endif  // V8_UNICODE_DECODER_H_
=======================================
--- /dev/null
+++ /branches/bleeding_edge/test/unittests/unicode/unicode-predicates-unittest.cc Wed Oct 8 14:55:03 2014 UTC
@@ -0,0 +1,90 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/char-predicates.h"
+#include "src/unicode.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace v8 {
+namespace internal {
+
+TEST(UnicodePredicatesTest, WhiteSpace) {
+ // As of Unicode 6.3.0, \u180E is no longer a white space. We still consider + // it to be one though, since JS recognizes all white spaces in Unicode 5.1.
+  EXPECT_TRUE(WhiteSpace::Is(0x0009));
+  EXPECT_TRUE(WhiteSpace::Is(0x000B));
+  EXPECT_TRUE(WhiteSpace::Is(0x000C));
+  EXPECT_TRUE(WhiteSpace::Is(' '));
+  EXPECT_TRUE(WhiteSpace::Is(0x00A0));
+  EXPECT_TRUE(WhiteSpace::Is(0x180E));
+  EXPECT_TRUE(WhiteSpace::Is(0xFEFF));
+}
+
+
+TEST(UnicodePredicatesTest, WhiteSpaceOrLineTerminator) {
+ // As of Unicode 6.3.0, \u180E is no longer a white space. We still consider + // it to be one though, since JS recognizes all white spaces in Unicode 5.1.
+  // White spaces
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x0009));
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000B));
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000C));
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(' '));
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x00A0));
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x180E));
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0xFEFF));
+  // Line terminators
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000A));
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000D));
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2028));
+  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2029));
+}
+
+
+TEST(UnicodePredicatesTest, IdentifierStart) {
+  EXPECT_TRUE(IdentifierStart::Is('$'));
+  EXPECT_TRUE(IdentifierStart::Is('_'));
+  EXPECT_TRUE(IdentifierStart::Is('\\'));
+
+  // http://www.unicode.org/reports/tr31/
+  // Other_ID_Start
+  EXPECT_TRUE(IdentifierStart::Is(0x2118));
+  EXPECT_TRUE(IdentifierStart::Is(0x212E));
+  EXPECT_TRUE(IdentifierStart::Is(0x309B));
+  EXPECT_TRUE(IdentifierStart::Is(0x309C));
+
+  // Issue 2892:
+  // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
+  EXPECT_FALSE(unibrow::ID_Start::Is(0x2E2F));
+}
+
+
+TEST(UnicodePredicatesTest, IdentifierPart) {
+  EXPECT_TRUE(IdentifierPart::Is('$'));
+  EXPECT_TRUE(IdentifierPart::Is('_'));
+  EXPECT_TRUE(IdentifierPart::Is('\\'));
+  EXPECT_TRUE(IdentifierPart::Is(0x200C));
+  EXPECT_TRUE(IdentifierPart::Is(0x200D));
+
+  // http://www.unicode.org/reports/tr31/
+  // Other_ID_Start
+  EXPECT_TRUE(IdentifierPart::Is(0x2118));
+  EXPECT_TRUE(IdentifierPart::Is(0x212E));
+  EXPECT_TRUE(IdentifierPart::Is(0x309B));
+  EXPECT_TRUE(IdentifierPart::Is(0x309C));
+
+  // Other_ID_Continue
+  EXPECT_TRUE(IdentifierPart::Is(0x00B7));
+  EXPECT_TRUE(IdentifierPart::Is(0x0387));
+  EXPECT_TRUE(IdentifierPart::Is(0x1369));
+  EXPECT_TRUE(IdentifierPart::Is(0x1370));
+  EXPECT_TRUE(IdentifierPart::Is(0x1371));
+  EXPECT_TRUE(IdentifierPart::Is(0x19DA));
+
+  // Issue 2892:
+  // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
+  EXPECT_FALSE(IdentifierPart::Is(0x2E2F));
+}
+
+}  // namespace internal
+}  // namespace v8
=======================================
--- /branches/bleeding_edge/BUILD.gn    Wed Oct  8 11:19:51 2014 UTC
+++ /branches/bleeding_edge/BUILD.gn    Wed Oct  8 14:55:03 2014 UTC
@@ -895,6 +895,8 @@
     "src/unicode-inl.h",
     "src/unicode.cc",
     "src/unicode.h",
+    "src/unicode-decoder.cc",
+    "src/unicode-decoder.h",
     "src/unique.h",
     "src/uri.h",
     "src/utils-inl.h",
=======================================
--- /branches/bleeding_edge/src/char-predicates.h Tue Jun 3 08:12:43 2014 UTC +++ /branches/bleeding_edge/src/char-predicates.h Wed Oct 8 14:55:03 2014 UTC
@@ -22,42 +22,40 @@
 inline bool IsRegExpWord(uc32 c);
 inline bool IsRegExpNewline(uc32 c);

+// ES6 draft section 11.6
+// This includes '_', '$' and '\', and ID_Start according to
+// http://www.unicode.org/reports/tr31/, which consists of categories
+// 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
+// 'Pattern_Syntax' or 'Pattern_White_Space'.
 struct IdentifierStart {
-  static inline bool Is(uc32 c) {
-    switch (c) {
-      case '$': case '_': case '\\': return true;
-      default: return unibrow::Letter::Is(c);
-    }
-  }
+  static inline bool Is(uc32 c) { return unibrow::ID_Start::Is(c); }
 };


+// ES6 draft section 11.6
+// This includes \u200c and \u200d, and ID_Continue according to
+// http://www.unicode.org/reports/tr31/, which consists of ID_Start,
+// the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
+// 'Pattern_Syntax' or 'Pattern_White_Space'.
 struct IdentifierPart {
   static inline bool Is(uc32 c) {
-    return IdentifierStart::Is(c)
-        || unibrow::Number::Is(c)
-        || c == 0x200C  // U+200C is Zero-Width Non-Joiner.
-        || c == 0x200D  // U+200D is Zero-Width Joiner.
-        || unibrow::CombiningMark::Is(c)
-        || unibrow::ConnectorPunctuation::Is(c);
+    return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
   }
 };


-// WhiteSpace according to ECMA-262 5.1, 7.2.
+// ES6 draft section 11.2
+// This includes all code points of Unicode category 'Zs'.
+// \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1,
+// so it is also included.
+// Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff.
 struct WhiteSpace {
-  static inline bool Is(uc32 c) {
-    return c == 0x0009 ||  // <TAB>
-           c == 0x000B ||  // <VT>
-           c == 0x000C ||  // <FF>
-           c == 0xFEFF ||  // <BOM>
-           // \u0020 and \u00A0 are included in unibrow::WhiteSpace.
-           unibrow::WhiteSpace::Is(c);
-  }
+  static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); }
 };


-// WhiteSpace and LineTerminator according to ECMA-262 5.1, 7.2 and 7.3.
+// WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3
+// This consists of \000a, \000d, \u2028, and \u2029.
 struct WhiteSpaceOrLineTerminator {
   static inline bool Is(uc32 c) {
     return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
=======================================
--- /branches/bleeding_edge/src/jsregexp.cc     Tue Sep 30 10:29:32 2014 UTC
+++ /branches/bleeding_edge/src/jsregexp.cc     Wed Oct  8 14:55:03 2014 UTC
@@ -20,6 +20,7 @@
 #include "src/regexp-stack.h"
 #include "src/runtime/runtime.h"
 #include "src/string-search.h"
+#include "src/unicode-decoder.h"

 #ifndef V8_INTERPRETED_REGEXP
 #if V8_TARGET_ARCH_IA32
=======================================
--- /branches/bleeding_edge/src/objects.h       Wed Oct  8 14:42:31 2014 UTC
+++ /branches/bleeding_edge/src/objects.h       Wed Oct  8 14:55:03 2014 UTC
@@ -20,6 +20,7 @@
 #include "src/property-details.h"
 #include "src/smart-pointers.h"
 #include "src/unicode-inl.h"
+#include "src/unicode-decoder.h"
 #include "src/zone.h"

 #if V8_TARGET_ARCH_ARM
=======================================
--- /branches/bleeding_edge/src/scanner.h       Tue Sep 16 22:15:39 2014 UTC
+++ /branches/bleeding_edge/src/scanner.h       Wed Oct  8 14:55:03 2014 UTC
@@ -15,6 +15,7 @@
 #include "src/list.h"
 #include "src/token.h"
 #include "src/unicode-inl.h"
+#include "src/unicode-decoder.h"
 #include "src/utils.h"

 namespace v8 {
=======================================
--- /branches/bleeding_edge/src/unicode-inl.h   Mon Aug  4 11:34:54 2014 UTC
+++ /branches/bleeding_edge/src/unicode-inl.h   Wed Oct  8 14:55:03 2014 UTC
@@ -55,22 +55,6 @@
     return length;
   }
 }
-
-
-uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
-  DCHECK(c > Latin1::kMaxChar);
-  switch (c) {
-    // This are equivalent characters in unicode.
-    case 0x39c:
-    case 0x3bc:
-      return 0xb5;
-    // This is an uppercase of a Latin-1 character
-    // outside of Latin-1.
-    case 0x178:
-      return 0xff;
-  }
-  return 0;
-}


 unsigned Utf8::EncodeOneByte(char* str, uint8_t c) {
@@ -152,53 +136,6 @@
     return 4;
   }
 }
-
-Utf8DecoderBase::Utf8DecoderBase()
-  : unbuffered_start_(NULL),
-    utf16_length_(0),
-    last_byte_of_buffer_unused_(false) {}
-
-Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer,
-                                 unsigned buffer_length,
-                                 const uint8_t* stream,
-                                 unsigned stream_length) {
-  Reset(buffer, buffer_length, stream, stream_length);
-}
-
-template<unsigned kBufferSize>
-Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length)
-  : Utf8DecoderBase(buffer_,
-                    kBufferSize,
-                    reinterpret_cast<const uint8_t*>(stream),
-                    length) {
-}
-
-template<unsigned kBufferSize>
-void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) {
-  Utf8DecoderBase::Reset(buffer_,
-                         kBufferSize,
-                         reinterpret_cast<const uint8_t*>(stream),
-                         length);
-}
-
-template <unsigned kBufferSize>
-unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
-                                              unsigned length) const {
-  DCHECK(length > 0);
-  if (length > utf16_length_) length = utf16_length_;
-  // memcpy everything in buffer.
-  unsigned buffer_length =
-      last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
- unsigned memcpy_length = length <= buffer_length ? length : buffer_length;
-  v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
-  if (length <= buffer_length) return length;
-  DCHECK(unbuffered_start_ != NULL);
-  // Copy the rest the slow way.
-  WriteUtf16Slow(unbuffered_start_,
-                 data + buffer_length,
-                 length - buffer_length);
-  return length;
-}

 }  // namespace unibrow

=======================================
--- /branches/bleeding_edge/src/unicode.cc      Mon Aug  4 11:34:54 2014 UTC
+++ /branches/bleeding_edge/src/unicode.cc      Wed Oct  8 14:55:03 2014 UTC
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
-// This file was generated at 2014-02-07 15:31:16.733174
+// This file was generated at 2014-10-08 15:25:47.940335

 #include "src/unicode-inl.h"
 #include <stdio.h>
@@ -23,6 +23,7 @@
 typedef unsigned short uint16_t;  // NOLINT
 typedef int int32_t;  // NOLINT

+
 // All access to the character table should go through this function.
 template <int D>
 static inline uchar TableGet(const int32_t* table, int index) {
@@ -187,6 +188,7 @@
     return 0;
   }
 }
+

 uchar Utf8::CalculateValue(const byte* str,
                            unsigned length,
@@ -256,138 +258,126 @@
   *cursor += 1;
   return kBadChar;
 }
-
-
-void Utf8DecoderBase::Reset(uint16_t* buffer,
-                            unsigned buffer_length,
-                            const uint8_t* stream,
-                            unsigned stream_length) {
-  // Assume everything will fit in the buffer and stream won't be needed.
-  last_byte_of_buffer_unused_ = false;
-  unbuffered_start_ = NULL;
-  bool writing_to_buffer = true;
- // Loop until stream is read, writing to buffer as long as buffer has space.
-  unsigned utf16_length = 0;
-  while (stream_length != 0) {
-    unsigned cursor = 0;
-    uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
-    DCHECK(cursor > 0 && cursor <= stream_length);
-    stream += cursor;
-    stream_length -= cursor;
-    bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
-    utf16_length += is_two_characters ? 2 : 1;
-    // Don't need to write to the buffer, but still need utf16_length.
-    if (!writing_to_buffer) continue;
-    // Write out the characters to the buffer.
- // Must check for equality with buffer_length as we've already updated it.
-    if (utf16_length <= buffer_length) {
-      if (is_two_characters) {
-        *buffer++ = Utf16::LeadSurrogate(character);
-        *buffer++ = Utf16::TrailSurrogate(character);
-      } else {
-        *buffer++ = character;
-      }
-      if (utf16_length == buffer_length) {
-        // Just wrote last character of buffer
-        writing_to_buffer = false;
-        unbuffered_start_ = stream;
-      }
-      continue;
-    }
-    // Have gone over buffer.
-    // Last char of buffer is unused, set cursor back.
-    DCHECK(is_two_characters);
-    writing_to_buffer = false;
-    last_byte_of_buffer_unused_ = true;
-    unbuffered_start_ = stream - cursor;
-  }
-  utf16_length_ = utf16_length;
-}
-
-
-void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream,
-                                     uint16_t* data,
-                                     unsigned data_length) {
-  while (data_length != 0) {
-    unsigned cursor = 0;
- uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
-    // There's a total lack of bounds checking for stream
-    // as it was already done in Reset.
-    stream += cursor;
-    if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
-      *data++ = Utf16::LeadSurrogate(character);
-      *data++ = Utf16::TrailSurrogate(character);
-      DCHECK(data_length > 1);
-      data_length -= 2;
-    } else {
-      *data++ = character;
-      data_length -= 1;
-    }
-  }
-}


 // Uppercase:            point.category == 'Lu'

-static const uint16_t kUppercaseTable0Size = 450;
-static const int32_t kUppercaseTable0[450] = {
-  1073741889, 90, 1073742016, 214, 1073742040, 222, 256, 258,  // NOLINT
-  260, 262, 264, 266, 268, 270, 272, 274,  // NOLINT
-  276, 278, 280, 282, 284, 286, 288, 290,  // NOLINT
-  292, 294, 296, 298, 300, 302, 304, 306,  // NOLINT
-  308, 310, 313, 315, 317, 319, 321, 323,  // NOLINT
-  325, 327, 330, 332, 334, 336, 338, 340,  // NOLINT
-  342, 344, 346, 348, 350, 352, 354, 356,  // NOLINT
-  358, 360, 362, 364, 366, 368, 370, 372,  // NOLINT
-  374, 1073742200, 377, 379, 381, 1073742209, 386, 388,  // NOLINT
- 1073742214, 391, 1073742217, 395, 1073742222, 401, 1073742227, 404, // NOLINT
-  1073742230, 408, 1073742236, 413, 1073742239, 416, 418, 420,  // NOLINT
-  1073742246, 423, 425, 428, 1073742254, 431, 1073742257, 435,  // NOLINT
-  437, 1073742263, 440, 444, 452, 455, 458, 461,  // NOLINT
-  463, 465, 467, 469, 471, 473, 475, 478,  // NOLINT
-  480, 482, 484, 486, 488, 490, 492, 494,  // NOLINT
-  497, 500, 1073742326, 504, 506, 508, 510, 512,  // NOLINT
-  514, 516, 518, 520, 522, 524, 526, 528,  // NOLINT
-  530, 532, 534, 536, 538, 540, 542, 544,  // NOLINT
-  546, 548, 550, 552, 554, 556, 558, 560,  // NOLINT
-  562, 1073742394, 571, 1073742397, 574, 577, 1073742403, 582,  // NOLINT
-  584, 586, 588, 590, 880, 882, 886, 902,  // NOLINT
- 1073742728, 906, 908, 1073742734, 911, 1073742737, 929, 1073742755, // NOLINT
-  939, 975, 1073742802, 980, 984, 986, 988, 990,  // NOLINT
-  992, 994, 996, 998, 1000, 1002, 1004, 1006,  // NOLINT
-  1012, 1015, 1073742841, 1018, 1073742845, 1071, 1120, 1122,  // NOLINT
-  1124, 1126, 1128, 1130, 1132, 1134, 1136, 1138,  // NOLINT
-  1140, 1142, 1144, 1146, 1148, 1150, 1152, 1162,  // NOLINT
-  1164, 1166, 1168, 1170, 1172, 1174, 1176, 1178,  // NOLINT
-  1180, 1182, 1184, 1186, 1188, 1190, 1192, 1194,  // NOLINT
-  1196, 1198, 1200, 1202, 1204, 1206, 1208, 1210,  // NOLINT
-  1212, 1214, 1073743040, 1217, 1219, 1221, 1223, 1225,  // NOLINT
-  1227, 1229, 1232, 1234, 1236, 1238, 1240, 1242,  // NOLINT
-  1244, 1246, 1248, 1250, 1252, 1254, 1256, 1258,  // NOLINT
-  1260, 1262, 1264, 1266, 1268, 1270, 1272, 1274,  // NOLINT
-  1276, 1278, 1280, 1282, 1284, 1286, 1288, 1290,  // NOLINT
-  1292, 1294, 1296, 1298, 1300, 1302, 1304, 1306,  // NOLINT
-  1308, 1310, 1312, 1314, 1316, 1318, 1073743153, 1366,  // NOLINT
-  1073746080, 4293, 4295, 4301, 7680, 7682, 7684, 7686,  // NOLINT
-  7688, 7690, 7692, 7694, 7696, 7698, 7700, 7702,  // NOLINT
-  7704, 7706, 7708, 7710, 7712, 7714, 7716, 7718,  // NOLINT
-  7720, 7722, 7724, 7726, 7728, 7730, 7732, 7734,  // NOLINT
-  7736, 7738, 7740, 7742, 7744, 7746, 7748, 7750,  // NOLINT
-  7752, 7754, 7756, 7758, 7760, 7762, 7764, 7766,  // NOLINT
-  7768, 7770, 7772, 7774, 7776, 7778, 7780, 7782,  // NOLINT
-  7784, 7786, 7788, 7790, 7792, 7794, 7796, 7798,  // NOLINT
-  7800, 7802, 7804, 7806, 7808, 7810, 7812, 7814,  // NOLINT
-  7816, 7818, 7820, 7822, 7824, 7826, 7828, 7838,  // NOLINT
-  7840, 7842, 7844, 7846, 7848, 7850, 7852, 7854,  // NOLINT
-  7856, 7858, 7860, 7862, 7864, 7866, 7868, 7870,  // NOLINT
-  7872, 7874, 7876, 7878, 7880, 7882, 7884, 7886,  // NOLINT
-  7888, 7890, 7892, 7894, 7896, 7898, 7900, 7902,  // NOLINT
-  7904, 7906, 7908, 7910, 7912, 7914, 7916, 7918,  // NOLINT
-  7920, 7922, 7924, 7926, 7928, 7930, 7932, 7934,  // NOLINT
- 1073749768, 7951, 1073749784, 7965, 1073749800, 7983, 1073749816, 7999, // NOLINT
-  1073749832, 8013, 8025, 8027, 8029, 8031, 1073749864, 8047,  // NOLINT
- 1073749944, 8123, 1073749960, 8139, 1073749976, 8155, 1073749992, 8172, // NOLINT
-  1073750008, 8187 };  // NOLINT
+static const uint16_t kUppercaseTable0Size = 455;
+static const int32_t kUppercaseTable0[455] = {
+    1073741889, 90,         1073742016, 214,
+    1073742040, 222,        256,        258,  // NOLINT
+    260,        262,        264,        266,
+    268,        270,        272,        274,  // NOLINT
+    276,        278,        280,        282,
+    284,        286,        288,        290,  // NOLINT
+    292,        294,        296,        298,
+    300,        302,        304,        306,  // NOLINT
+    308,        310,        313,        315,
+    317,        319,        321,        323,  // NOLINT
+    325,        327,        330,        332,
+    334,        336,        338,        340,  // NOLINT
+    342,        344,        346,        348,
+    350,        352,        354,        356,  // NOLINT
+    358,        360,        362,        364,
+    366,        368,        370,        372,  // NOLINT
+    374,        1073742200, 377,        379,
+    381,        1073742209, 386,        388,  // NOLINT
+    1073742214, 391,        1073742217, 395,
+    1073742222, 401,        1073742227, 404,  // NOLINT
+    1073742230, 408,        1073742236, 413,
+    1073742239, 416,        418,        420,  // NOLINT
+    1073742246, 423,        425,        428,
+    1073742254, 431,        1073742257, 435,  // NOLINT
+    437,        1073742263, 440,        444,
+    452,        455,        458,        461,  // NOLINT
+    463,        465,        467,        469,
+    471,        473,        475,        478,  // NOLINT
+    480,        482,        484,        486,
+    488,        490,        492,        494,  // NOLINT
+    497,        500,        1073742326, 504,
+    506,        508,        510,        512,  // NOLINT
+    514,        516,        518,        520,
+    522,        524,        526,        528,  // NOLINT
+    530,        532,        534,        536,
+    538,        540,        542,        544,  // NOLINT
+    546,        548,        550,        552,
+    554,        556,        558,        560,  // NOLINT
+    562,        1073742394, 571,        1073742397,
+    574,        577,        1073742403, 582,  // NOLINT
+    584,        586,        588,        590,
+    880,        882,        886,        895,  // NOLINT
+    902,        1073742728, 906,        908,
+    1073742734, 911,        1073742737, 929,  // NOLINT
+    1073742755, 939,        975,        1073742802,
+    980,        984,        986,        988,  // NOLINT
+    990,        992,        994,        996,
+    998,        1000,       1002,       1004,  // NOLINT
+    1006,       1012,       1015,       1073742841,
+    1018,       1073742845, 1071,       1120,  // NOLINT
+    1122,       1124,       1126,       1128,
+    1130,       1132,       1134,       1136,  // NOLINT
+    1138,       1140,       1142,       1144,
+    1146,       1148,       1150,       1152,  // NOLINT
+    1162,       1164,       1166,       1168,
+    1170,       1172,       1174,       1176,  // NOLINT
+    1178,       1180,       1182,       1184,
+    1186,       1188,       1190,       1192,  // NOLINT
+    1194,       1196,       1198,       1200,
+    1202,       1204,       1206,       1208,  // NOLINT
+    1210,       1212,       1214,       1073743040,
+    1217,       1219,       1221,       1223,  // NOLINT
+    1225,       1227,       1229,       1232,
+    1234,       1236,       1238,       1240,  // NOLINT
+    1242,       1244,       1246,       1248,
+    1250,       1252,       1254,       1256,  // NOLINT
+    1258,       1260,       1262,       1264,
+    1266,       1268,       1270,       1272,  // NOLINT
+    1274,       1276,       1278,       1280,
+    1282,       1284,       1286,       1288,  // NOLINT
+    1290,       1292,       1294,       1296,
+    1298,       1300,       1302,       1304,  // NOLINT
+    1306,       1308,       1310,       1312,
+    1314,       1316,       1318,       1320,  // NOLINT
+    1322,       1324,       1326,       1073743153,
+    1366,       1073746080, 4293,       4295,  // NOLINT
+    4301,       7680,       7682,       7684,
+    7686,       7688,       7690,       7692,  // NOLINT
+    7694,       7696,       7698,       7700,
+    7702,       7704,       7706,       7708,  // NOLINT
+    7710,       7712,       7714,       7716,
+    7718,       7720,       7722,       7724,  // NOLINT
+    7726,       7728,       7730,       7732,
+    7734,       7736,       7738,       7740,  // NOLINT
+    7742,       7744,       7746,       7748,
+    7750,       7752,       7754,       7756,  // NOLINT
+    7758,       7760,       7762,       7764,
+    7766,       7768,       7770,       7772,  // NOLINT
+    7774,       7776,       7778,       7780,
+    7782,       7784,       7786,       7788,  // NOLINT
+    7790,       7792,       7794,       7796,
+    7798,       7800,       7802,       7804,  // NOLINT
+    7806,       7808,       7810,       7812,
+    7814,       7816,       7818,       7820,  // NOLINT
+    7822,       7824,       7826,       7828,
+    7838,       7840,       7842,       7844,  // NOLINT
+    7846,       7848,       7850,       7852,
+    7854,       7856,       7858,       7860,  // NOLINT
+    7862,       7864,       7866,       7868,
+    7870,       7872,       7874,       7876,  // NOLINT
+    7878,       7880,       7882,       7884,
+    7886,       7888,       7890,       7892,  // NOLINT
+    7894,       7896,       7898,       7900,
+    7902,       7904,       7906,       7908,  // NOLINT
+    7910,       7912,       7914,       7916,
+    7918,       7920,       7922,       7924,  // NOLINT
+    7926,       7928,       7930,       7932,
+    7934,       1073749768, 7951,       1073749784,  // NOLINT
+    7965,       1073749800, 7983,       1073749816,
+    7999,       1073749832, 8013,       8025,  // NOLINT
+    8027,       8029,       8031,       1073749864,
+    8047,       1073749944, 8123,       1073749960,  // NOLINT
+    8139,       1073749976, 8155,       1073749992,
+    8172,       1073750008, 8187};  // NOLINT
 static const uint16_t kUppercaseTable1Size = 86;
 static const int32_t kUppercaseTable1[86] = {
   258, 263, 1073742091, 269, 1073742096, 274, 277, 1073742105,  // NOLINT
@@ -401,20 +391,21 @@
   3262, 3264, 3266, 3268, 3270, 3272, 3274, 3276,  // NOLINT
   3278, 3280, 3282, 3284, 3286, 3288, 3290, 3292,  // NOLINT
   3294, 3296, 3298, 3307, 3309, 3314 };  // NOLINT
-static const uint16_t kUppercaseTable5Size = 91;
-static const int32_t kUppercaseTable5[91] = {
-  1600, 1602, 1604, 1606, 1608, 1610, 1612, 1614,  // NOLINT
-  1616, 1618, 1620, 1622, 1624, 1626, 1628, 1630,  // NOLINT
-  1632, 1634, 1636, 1638, 1640, 1642, 1644, 1664,  // NOLINT
-  1666, 1668, 1670, 1672, 1674, 1676, 1678, 1680,  // NOLINT
-  1682, 1684, 1686, 1826, 1828, 1830, 1832, 1834,  // NOLINT
-  1836, 1838, 1842, 1844, 1846, 1848, 1850, 1852,  // NOLINT
-  1854, 1856, 1858, 1860, 1862, 1864, 1866, 1868,  // NOLINT
-  1870, 1872, 1874, 1876, 1878, 1880, 1882, 1884,  // NOLINT
-  1886, 1888, 1890, 1892, 1894, 1896, 1898, 1900,  // NOLINT
-  1902, 1913, 1915, 1073743741, 1918, 1920, 1922, 1924,  // NOLINT
-  1926, 1931, 1933, 1936, 1938, 1952, 1954, 1956,  // NOLINT
-  1958, 1960, 1962 };  // NOLINT
+static const uint16_t kUppercaseTable5Size = 101;
+static const int32_t kUppercaseTable5[101] = {
+ 1600, 1602, 1604, 1606, 1608, 1610, 1612, 1614, // NOLINT + 1616, 1618, 1620, 1622, 1624, 1626, 1628, 1630, // NOLINT + 1632, 1634, 1636, 1638, 1640, 1642, 1644, 1664, // NOLINT + 1666, 1668, 1670, 1672, 1674, 1676, 1678, 1680, // NOLINT + 1682, 1684, 1686, 1688, 1690, 1826, 1828, 1830, // NOLINT + 1832, 1834, 1836, 1838, 1842, 1844, 1846, 1848, // NOLINT + 1850, 1852, 1854, 1856, 1858, 1860, 1862, 1864, // NOLINT + 1866, 1868, 1870, 1872, 1874, 1876, 1878, 1880, // NOLINT + 1882, 1884, 1886, 1888, 1890, 1892, 1894, 1896, // NOLINT + 1898, 1900, 1902, 1913, 1915, 1073743741, 1918, 1920, // NOLINT + 1922, 1924, 1926, 1931, 1933, 1936, 1938, 1942, // NOLINT + 1944, 1946, 1948, 1950, 1952, 1954, 1956, 1958, // NOLINT + 1960, 1073743786, 1965, 1073743792, 1969}; // NOLINT
 static const uint16_t kUppercaseTable7Size = 2;
 static const int32_t kUppercaseTable7[2] = {
   1073749793, 7994 };  // NOLINT
@@ -440,66 +431,125 @@

 // Lowercase:            point.category == 'Ll'

-static const uint16_t kLowercaseTable0Size = 463;
-static const int32_t kLowercaseTable0[463] = {
-  1073741921, 122, 181, 1073742047, 246, 1073742072, 255, 257,  // NOLINT
-  259, 261, 263, 265, 267, 269, 271, 273,  // NOLINT
-  275, 277, 279, 281, 283, 285, 287, 289,  // NOLINT
-  291, 293, 295, 297, 299, 301, 303, 305,  // NOLINT
-  307, 309, 1073742135, 312, 314, 316, 318, 320,  // NOLINT
-  322, 324, 326, 1073742152, 329, 331, 333, 335,  // NOLINT
-  337, 339, 341, 343, 345, 347, 349, 351,  // NOLINT
-  353, 355, 357, 359, 361, 363, 365, 367,  // NOLINT
-  369, 371, 373, 375, 378, 380, 1073742206, 384,  // NOLINT
-  387, 389, 392, 1073742220, 397, 402, 405, 1073742233,  // NOLINT
-  411, 414, 417, 419, 421, 424, 1073742250, 427,  // NOLINT
-  429, 432, 436, 438, 1073742265, 442, 1073742269, 447,  // NOLINT
-  454, 457, 460, 462, 464, 466, 468, 470,  // NOLINT
-  472, 474, 1073742300, 477, 479, 481, 483, 485,  // NOLINT
-  487, 489, 491, 493, 1073742319, 496, 499, 501,  // NOLINT
-  505, 507, 509, 511, 513, 515, 517, 519,  // NOLINT
-  521, 523, 525, 527, 529, 531, 533, 535,  // NOLINT
-  537, 539, 541, 543, 545, 547, 549, 551,  // NOLINT
-  553, 555, 557, 559, 561, 1073742387, 569, 572,  // NOLINT
-  1073742399, 576, 578, 583, 585, 587, 589, 1073742415,  // NOLINT
-  659, 1073742485, 687, 881, 883, 887, 1073742715, 893,  // NOLINT
-  912, 1073742764, 974, 1073742800, 977, 1073742805, 983, 985,  // NOLINT
-  987, 989, 991, 993, 995, 997, 999, 1001,  // NOLINT
-  1003, 1005, 1073742831, 1011, 1013, 1016, 1073742843, 1020,  // NOLINT
-  1073742896, 1119, 1121, 1123, 1125, 1127, 1129, 1131,  // NOLINT
-  1133, 1135, 1137, 1139, 1141, 1143, 1145, 1147,  // NOLINT
-  1149, 1151, 1153, 1163, 1165, 1167, 1169, 1171,  // NOLINT
-  1173, 1175, 1177, 1179, 1181, 1183, 1185, 1187,  // NOLINT
-  1189, 1191, 1193, 1195, 1197, 1199, 1201, 1203,  // NOLINT
-  1205, 1207, 1209, 1211, 1213, 1215, 1218, 1220,  // NOLINT
-  1222, 1224, 1226, 1228, 1073743054, 1231, 1233, 1235,  // NOLINT
-  1237, 1239, 1241, 1243, 1245, 1247, 1249, 1251,  // NOLINT
-  1253, 1255, 1257, 1259, 1261, 1263, 1265, 1267,  // NOLINT
-  1269, 1271, 1273, 1275, 1277, 1279, 1281, 1283,  // NOLINT
-  1285, 1287, 1289, 1291, 1293, 1295, 1297, 1299,  // NOLINT
-  1301, 1303, 1305, 1307, 1309, 1311, 1313, 1315,  // NOLINT
- 1317, 1319, 1073743201, 1415, 1073749248, 7467, 1073749355, 7543, // NOLINT
-  1073749369, 7578, 7681, 7683, 7685, 7687, 7689, 7691,  // NOLINT
-  7693, 7695, 7697, 7699, 7701, 7703, 7705, 7707,  // NOLINT
-  7709, 7711, 7713, 7715, 7717, 7719, 7721, 7723,  // NOLINT
-  7725, 7727, 7729, 7731, 7733, 7735, 7737, 7739,  // NOLINT
-  7741, 7743, 7745, 7747, 7749, 7751, 7753, 7755,  // NOLINT
-  7757, 7759, 7761, 7763, 7765, 7767, 7769, 7771,  // NOLINT
-  7773, 7775, 7777, 7779, 7781, 7783, 7785, 7787,  // NOLINT
-  7789, 7791, 7793, 7795, 7797, 7799, 7801, 7803,  // NOLINT
-  7805, 7807, 7809, 7811, 7813, 7815, 7817, 7819,  // NOLINT
-  7821, 7823, 7825, 7827, 1073749653, 7837, 7839, 7841,  // NOLINT
-  7843, 7845, 7847, 7849, 7851, 7853, 7855, 7857,  // NOLINT
-  7859, 7861, 7863, 7865, 7867, 7869, 7871, 7873,  // NOLINT
-  7875, 7877, 7879, 7881, 7883, 7885, 7887, 7889,  // NOLINT
-  7891, 7893, 7895, 7897, 7899, 7901, 7903, 7905,  // NOLINT
-  7907, 7909, 7911, 7913, 7915, 7917, 7919, 7921,  // NOLINT
-  7923, 7925, 7927, 7929, 7931, 7933, 1073749759, 7943,  // NOLINT
- 1073749776, 7957, 1073749792, 7975, 1073749808, 7991, 1073749824, 8005, // NOLINT - 1073749840, 8023, 1073749856, 8039, 1073749872, 8061, 1073749888, 8071, // NOLINT - 1073749904, 8087, 1073749920, 8103, 1073749936, 8116, 1073749942, 8119, // NOLINT - 8126, 1073749954, 8132, 1073749958, 8135, 1073749968, 8147, 1073749974, // NOLINT
-  8151, 1073749984, 8167, 1073750002, 8180, 1073750006, 8183 };  // NOLINT
+static const uint16_t kLowercaseTable0Size = 467;
+static const int32_t kLowercaseTable0[467] = {
+    1073741921, 122,        181,        1073742047,
+    246,        1073742072, 255,        257,  // NOLINT
+    259,        261,        263,        265,
+    267,        269,        271,        273,  // NOLINT
+    275,        277,        279,        281,
+    283,        285,        287,        289,  // NOLINT
+    291,        293,        295,        297,
+    299,        301,        303,        305,  // NOLINT
+    307,        309,        1073742135, 312,
+    314,        316,        318,        320,  // NOLINT
+    322,        324,        326,        1073742152,
+    329,        331,        333,        335,  // NOLINT
+    337,        339,        341,        343,
+    345,        347,        349,        351,  // NOLINT
+    353,        355,        357,        359,
+    361,        363,        365,        367,  // NOLINT
+    369,        371,        373,        375,
+    378,        380,        1073742206, 384,  // NOLINT
+    387,        389,        392,        1073742220,
+    397,        402,        405,        1073742233,  // NOLINT
+    411,        414,        417,        419,
+    421,        424,        1073742250, 427,  // NOLINT
+    429,        432,        436,        438,
+    1073742265, 442,        1073742269, 447,  // NOLINT
+    454,        457,        460,        462,
+    464,        466,        468,        470,  // NOLINT
+    472,        474,        1073742300, 477,
+    479,        481,        483,        485,  // NOLINT
+    487,        489,        491,        493,
+    1073742319, 496,        499,        501,  // NOLINT
+    505,        507,        509,        511,
+    513,        515,        517,        519,  // NOLINT
+    521,        523,        525,        527,
+    529,        531,        533,        535,  // NOLINT
+    537,        539,        541,        543,
+    545,        547,        549,        551,  // NOLINT
+    553,        555,        557,        559,
+    561,        1073742387, 569,        572,  // NOLINT
+    1073742399, 576,        578,        583,
+    585,        587,        589,        1073742415,  // NOLINT
+    659,        1073742485, 687,        881,
+    883,        887,        1073742715, 893,  // NOLINT
+    912,        1073742764, 974,        1073742800,
+    977,        1073742805, 983,        985,  // NOLINT
+    987,        989,        991,        993,
+    995,        997,        999,        1001,  // NOLINT
+    1003,       1005,       1073742831, 1011,
+    1013,       1016,       1073742843, 1020,  // NOLINT
+    1073742896, 1119,       1121,       1123,
+    1125,       1127,       1129,       1131,  // NOLINT
+    1133,       1135,       1137,       1139,
+    1141,       1143,       1145,       1147,  // NOLINT
+    1149,       1151,       1153,       1163,
+    1165,       1167,       1169,       1171,  // NOLINT
+    1173,       1175,       1177,       1179,
+    1181,       1183,       1185,       1187,  // NOLINT
+    1189,       1191,       1193,       1195,
+    1197,       1199,       1201,       1203,  // NOLINT
+    1205,       1207,       1209,       1211,
+    1213,       1215,       1218,       1220,  // NOLINT
+    1222,       1224,       1226,       1228,
+    1073743054, 1231,       1233,       1235,  // NOLINT
+    1237,       1239,       1241,       1243,
+    1245,       1247,       1249,       1251,  // NOLINT
+    1253,       1255,       1257,       1259,
+    1261,       1263,       1265,       1267,  // NOLINT
+    1269,       1271,       1273,       1275,
+    1277,       1279,       1281,       1283,  // NOLINT
+    1285,       1287,       1289,       1291,
+    1293,       1295,       1297,       1299,  // NOLINT
+    1301,       1303,       1305,       1307,
+    1309,       1311,       1313,       1315,  // NOLINT
+    1317,       1319,       1321,       1323,
+    1325,       1327,       1073743201, 1415,  // NOLINT
+    1073749248, 7467,       1073749355, 7543,
+    1073749369, 7578,       7681,       7683,  // NOLINT
+    7685,       7687,       7689,       7691,
+    7693,       7695,       7697,       7699,  // NOLINT
+    7701,       7703,       7705,       7707,
+    7709,       7711,       7713,       7715,  // NOLINT
+    7717,       7719,       7721,       7723,
+    7725,       7727,       7729,       7731,  // NOLINT
+    7733,       7735,       7737,       7739,
+    7741,       7743,       7745,       7747,  // NOLINT
+    7749,       7751,       7753,       7755,
+    7757,       7759,       7761,       7763,  // NOLINT
+    7765,       7767,       7769,       7771,
+    7773,       7775,       7777,       7779,  // NOLINT
+    7781,       7783,       7785,       7787,
+    7789,       7791,       7793,       7795,  // NOLINT
+    7797,       7799,       7801,       7803,
+    7805,       7807,       7809,       7811,  // NOLINT
+    7813,       7815,       7817,       7819,
+    7821,       7823,       7825,       7827,  // NOLINT
+    1073749653, 7837,       7839,       7841,
+    7843,       7845,       7847,       7849,  // NOLINT
+    7851,       7853,       7855,       7857,
+    7859,       7861,       7863,       7865,  // NOLINT
+    7867,       7869,       7871,       7873,
+    7875,       7877,       7879,       7881,  // NOLINT
+    7883,       7885,       7887,       7889,
+    7891,       7893,       7895,       7897,  // NOLINT
+    7899,       7901,       7903,       7905,
+    7907,       7909,       7911,       7913,  // NOLINT
+    7915,       7917,       7919,       7921,
+    7923,       7925,       7927,       7929,  // NOLINT
+    7931,       7933,       1073749759, 7943,
+    1073749776, 7957,       1073749792, 7975,  // NOLINT
+    1073749808, 7991,       1073749824, 8005,
+    1073749840, 8023,       1073749856, 8039,  // NOLINT
+    1073749872, 8061,       1073749888, 8071,
+    1073749904, 8087,       1073749920, 8103,  // NOLINT
+    1073749936, 8116,       1073749942, 8119,
+    8126,       1073749954, 8132,       1073749958,  // NOLINT
+    8135,       1073749968, 8147,       1073749974,
+    8151,       1073749984, 8167,       1073750002,  // NOLINT
+    8180,       1073750006, 8183};                   // NOLINT
 static const uint16_t kLowercaseTable1Size = 84;
 static const int32_t kLowercaseTable1[84] = {
   266, 1073742094, 271, 275, 303, 308, 313, 1073742140,  // NOLINT
@@ -513,20 +563,35 @@
   3277, 3279, 3281, 3283, 3285, 3287, 3289, 3291,  // NOLINT
   3293, 3295, 3297, 1073745123, 3300, 3308, 3310, 3315,  // NOLINT
   1073745152, 3365, 3367, 3373 };  // NOLINT
-static const uint16_t kLowercaseTable5Size = 93;
-static const int32_t kLowercaseTable5[93] = {
-  1601, 1603, 1605, 1607, 1609, 1611, 1613, 1615,  // NOLINT
-  1617, 1619, 1621, 1623, 1625, 1627, 1629, 1631,  // NOLINT
-  1633, 1635, 1637, 1639, 1641, 1643, 1645, 1665,  // NOLINT
-  1667, 1669, 1671, 1673, 1675, 1677, 1679, 1681,  // NOLINT
-  1683, 1685, 1687, 1827, 1829, 1831, 1833, 1835,  // NOLINT
-  1837, 1073743663, 1841, 1843, 1845, 1847, 1849, 1851,  // NOLINT
-  1853, 1855, 1857, 1859, 1861, 1863, 1865, 1867,  // NOLINT
-  1869, 1871, 1873, 1875, 1877, 1879, 1881, 1883,  // NOLINT
-  1885, 1887, 1889, 1891, 1893, 1895, 1897, 1899,  // NOLINT
-  1901, 1903, 1073743729, 1912, 1914, 1916, 1919, 1921,  // NOLINT
-  1923, 1925, 1927, 1932, 1934, 1937, 1939, 1953,  // NOLINT
-  1955, 1957, 1959, 1961, 2042 };  // NOLINT
+static const uint16_t kLowercaseTable5Size = 105;
+static const int32_t kLowercaseTable5[105] = {
+    1601,       1603,       1605, 1607,
+    1609,       1611,       1613, 1615,  // NOLINT
+    1617,       1619,       1621, 1623,
+    1625,       1627,       1629, 1631,  // NOLINT
+    1633,       1635,       1637, 1639,
+    1641,       1643,       1645, 1665,  // NOLINT
+    1667,       1669,       1671, 1673,
+    1675,       1677,       1679, 1681,  // NOLINT
+    1683,       1685,       1687, 1689,
+    1691,       1827,       1829, 1831,  // NOLINT
+    1833,       1835,       1837, 1073743663,
+    1841,       1843,       1845, 1847,  // NOLINT
+    1849,       1851,       1853, 1855,
+    1857,       1859,       1861, 1863,  // NOLINT
+    1865,       1867,       1869, 1871,
+    1873,       1875,       1877, 1879,  // NOLINT
+    1881,       1883,       1885, 1887,
+    1889,       1891,       1893, 1895,  // NOLINT
+    1897,       1899,       1901, 1903,
+    1073743729, 1912,       1914, 1916,  // NOLINT
+    1919,       1921,       1923, 1925,
+    1927,       1932,       1934, 1937,  // NOLINT
+    1073743763, 1941,       1943, 1945,
+    1947,       1949,       1951, 1953,  // NOLINT
+    1955,       1957,       1959, 1961,
+    2042,       1073744688, 2906, 1073744740,  // NOLINT
+    2917};                                     // NOLINT
 static const uint16_t kLowercaseTable7Size = 6;
 static const int32_t kLowercaseTable7[6] = {
   1073748736, 6918, 1073748755, 6935, 1073749825, 8026 };  // NOLINT
@@ -550,65 +615,118 @@
 }


-// Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl' ] +// Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']

-static const uint16_t kLetterTable0Size = 435;
-static const int32_t kLetterTable0[435] = {
-  1073741889, 90, 1073741921, 122, 170, 181, 186, 1073742016,  // NOLINT
- 214, 1073742040, 246, 1073742072, 705, 1073742534, 721, 1073742560, // NOLINT
-  740, 748, 750, 1073742704, 884, 1073742710, 887, 1073742714,  // NOLINT
-  893, 902, 1073742728, 906, 908, 1073742734, 929, 1073742755,  // NOLINT
- 1013, 1073742839, 1153, 1073742986, 1319, 1073743153, 1366, 1369, // NOLINT - 1073743201, 1415, 1073743312, 1514, 1073743344, 1522, 1073743392, 1610, // NOLINT - 1073743470, 1647, 1073743473, 1747, 1749, 1073743589, 1766, 1073743598, // NOLINT - 1775, 1073743610, 1788, 1791, 1808, 1073743634, 1839, 1073743693, // NOLINT - 1957, 1969, 1073743818, 2026, 1073743860, 2037, 2042, 1073743872, // NOLINT
-  2069, 2074, 2084, 2088, 1073743936, 2136, 2208, 1073744034,  // NOLINT
- 2220, 1073744132, 2361, 2365, 2384, 1073744216, 2401, 1073744241, // NOLINT - 2423, 1073744249, 2431, 1073744261, 2444, 1073744271, 2448, 1073744275, // NOLINT
-  2472, 1073744298, 2480, 2482, 1073744310, 2489, 2493, 2510,  // NOLINT
- 1073744348, 2525, 1073744351, 2529, 1073744368, 2545, 1073744389, 2570, // NOLINT - 1073744399, 2576, 1073744403, 2600, 1073744426, 2608, 1073744434, 2611, // NOLINT - 1073744437, 2614, 1073744440, 2617, 1073744473, 2652, 2654, 1073744498, // NOLINT - 2676, 1073744517, 2701, 1073744527, 2705, 1073744531, 2728, 1073744554, // NOLINT - 2736, 1073744562, 2739, 1073744565, 2745, 2749, 2768, 1073744608, // NOLINT - 2785, 1073744645, 2828, 1073744655, 2832, 1073744659, 2856, 1073744682, // NOLINT - 2864, 1073744690, 2867, 1073744693, 2873, 2877, 1073744732, 2909, // NOLINT - 1073744735, 2913, 2929, 2947, 1073744773, 2954, 1073744782, 2960, // NOLINT - 1073744786, 2965, 1073744793, 2970, 2972, 1073744798, 2975, 1073744803, // NOLINT - 2980, 1073744808, 2986, 1073744814, 3001, 3024, 1073744901, 3084, // NOLINT - 1073744910, 3088, 1073744914, 3112, 1073744938, 3123, 1073744949, 3129, // NOLINT - 3133, 1073744984, 3161, 1073744992, 3169, 1073745029, 3212, 1073745038, // NOLINT - 3216, 1073745042, 3240, 1073745066, 3251, 1073745077, 3257, 3261, // NOLINT - 3294, 1073745120, 3297, 1073745137, 3314, 1073745157, 3340, 1073745166, // NOLINT - 3344, 1073745170, 3386, 3389, 3406, 1073745248, 3425, 1073745274, // NOLINT - 3455, 1073745285, 3478, 1073745306, 3505, 1073745331, 3515, 3517, // NOLINT - 1073745344, 3526, 1073745409, 3632, 1073745458, 3635, 1073745472, 3654, // NOLINT - 1073745537, 3714, 3716, 1073745543, 3720, 3722, 3725, 1073745556, // NOLINT - 3735, 1073745561, 3743, 1073745569, 3747, 3749, 3751, 1073745578, // NOLINT - 3755, 1073745581, 3760, 1073745586, 3763, 3773, 1073745600, 3780, // NOLINT - 3782, 1073745628, 3807, 3840, 1073745728, 3911, 1073745737, 3948, // NOLINT - 1073745800, 3980, 1073745920, 4138, 4159, 1073746000, 4181, 1073746010, // NOLINT - 4189, 4193, 1073746021, 4198, 1073746030, 4208, 1073746037, 4225, // NOLINT - 4238, 1073746080, 4293, 4295, 4301, 1073746128, 4346, 1073746172, // NOLINT - 4680, 1073746506, 4685, 1073746512, 4694, 4696, 1073746522, 4701, // NOLINT - 1073746528, 4744, 1073746570, 4749, 1073746576, 4784, 1073746610, 4789, // NOLINT - 1073746616, 4798, 4800, 1073746626, 4805, 1073746632, 4822, 1073746648, // NOLINT - 4880, 1073746706, 4885, 1073746712, 4954, 1073746816, 5007, 1073746848, // NOLINT - 5108, 1073746945, 5740, 1073747567, 5759, 1073747585, 5786, 1073747616, // NOLINT - 5866, 1073747694, 5872, 1073747712, 5900, 1073747726, 5905, 1073747744, // NOLINT - 5937, 1073747776, 5969, 1073747808, 5996, 1073747822, 6000, 1073747840, // NOLINT
-  6067, 6103, 6108, 1073748000, 6263, 1073748096, 6312, 6314,  // NOLINT
- 1073748144, 6389, 1073748224, 6428, 1073748304, 6509, 1073748336, 6516, // NOLINT - 1073748352, 6571, 1073748417, 6599, 1073748480, 6678, 1073748512, 6740, // NOLINT - 6823, 1073748741, 6963, 1073748805, 6987, 1073748867, 7072, 1073748910, // NOLINT - 7087, 1073748922, 7141, 1073748992, 7203, 1073749069, 7247, 1073749082, // NOLINT - 7293, 1073749225, 7404, 1073749230, 7409, 1073749237, 7414, 1073749248, // NOLINT - 7615, 1073749504, 7957, 1073749784, 7965, 1073749792, 8005, 1073749832, // NOLINT
-  8013, 1073749840, 8023, 8025, 8027, 8029, 1073749855, 8061,  // NOLINT
- 1073749888, 8116, 1073749942, 8124, 8126, 1073749954, 8132, 1073749958, // NOLINT - 8140, 1073749968, 8147, 1073749974, 8155, 1073749984, 8172, 1073750002, // NOLINT
-  8180, 1073750006, 8188 };  // NOLINT
+static const uint16_t kLetterTable0Size = 431;
+static const int32_t kLetterTable0[431] = {
+    1073741889, 90,         1073741921, 122,
+    170,        181,        186,        1073742016,  // NOLINT
+    214,        1073742040, 246,        1073742072,
+    705,        1073742534, 721,        1073742560,  // NOLINT
+    740,        748,        750,        1073742704,
+    884,        1073742710, 887,        1073742714,  // NOLINT
+    893,        895,        902,        1073742728,
+    906,        908,        1073742734, 929,  // NOLINT
+    1073742755, 1013,       1073742839, 1153,
+    1073742986, 1327,       1073743153, 1366,  // NOLINT
+    1369,       1073743201, 1415,       1073743312,
+    1514,       1073743344, 1522,       1073743392,  // NOLINT
+    1610,       1073743470, 1647,       1073743473,
+    1747,       1749,       1073743589, 1766,  // NOLINT
+    1073743598, 1775,       1073743610, 1788,
+    1791,       1808,       1073743634, 1839,  // NOLINT
+    1073743693, 1957,       1969,       1073743818,
+    2026,       1073743860, 2037,       2042,  // NOLINT
+    1073743872, 2069,       2074,       2084,
+    2088,       1073743936, 2136,       1073744032,  // NOLINT
+    2226,       1073744132, 2361,       2365,
+    2384,       1073744216, 2401,       1073744241,  // NOLINT
+    2432,       1073744261, 2444,       1073744271,
+    2448,       1073744275, 2472,       1073744298,  // NOLINT
+    2480,       2482,       1073744310, 2489,
+    2493,       2510,       1073744348, 2525,  // NOLINT
+    1073744351, 2529,       1073744368, 2545,
+    1073744389, 2570,       1073744399, 2576,  // NOLINT
+    1073744403, 2600,       1073744426, 2608,
+    1073744434, 2611,       1073744437, 2614,  // NOLINT
+    1073744440, 2617,       1073744473, 2652,
+    2654,       1073744498, 2676,       1073744517,  // NOLINT
+    2701,       1073744527, 2705,       1073744531,
+    2728,       1073744554, 2736,       1073744562,  // NOLINT
+    2739,       1073744565, 2745,       2749,
+    2768,       1073744608, 2785,       1073744645,  // NOLINT
+    2828,       1073744655, 2832,       1073744659,
+    2856,       1073744682, 2864,       1073744690,  // NOLINT
+    2867,       1073744693, 2873,       2877,
+    1073744732, 2909,       1073744735, 2913,  // NOLINT
+    2929,       2947,       1073744773, 2954,
+    1073744782, 2960,       1073744786, 2965,  // NOLINT
+    1073744793, 2970,       2972,       1073744798,
+    2975,       1073744803, 2980,       1073744808,  // NOLINT
+    2986,       1073744814, 3001,       3024,
+    1073744901, 3084,       1073744910, 3088,  // NOLINT
+    1073744914, 3112,       1073744938, 3129,
+    3133,       1073744984, 3161,       1073744992,  // NOLINT
+    3169,       1073745029, 3212,       1073745038,
+    3216,       1073745042, 3240,       1073745066,  // NOLINT
+    3251,       1073745077, 3257,       3261,
+    3294,       1073745120, 3297,       1073745137,  // NOLINT
+    3314,       1073745157, 3340,       1073745166,
+    3344,       1073745170, 3386,       3389,  // NOLINT
+    3406,       1073745248, 3425,       1073745274,
+    3455,       1073745285, 3478,       1073745306,  // NOLINT
+    3505,       1073745331, 3515,       3517,
+    1073745344, 3526,       1073745409, 3632,  // NOLINT
+    1073745458, 3635,       1073745472, 3654,
+    1073745537, 3714,       3716,       1073745543,  // NOLINT
+    3720,       3722,       3725,       1073745556,
+    3735,       1073745561, 3743,       1073745569,  // NOLINT
+    3747,       3749,       3751,       1073745578,
+    3755,       1073745581, 3760,       1073745586,  // NOLINT
+    3763,       3773,       1073745600, 3780,
+    3782,       1073745628, 3807,       3840,  // NOLINT
+    1073745728, 3911,       1073745737, 3948,
+    1073745800, 3980,       1073745920, 4138,  // NOLINT
+    4159,       1073746000, 4181,       1073746010,
+    4189,       4193,       1073746021, 4198,  // NOLINT
+    1073746030, 4208,       1073746037, 4225,
+    4238,       1073746080, 4293,       4295,  // NOLINT
+    4301,       1073746128, 4346,       1073746172,
+    4680,       1073746506, 4685,       1073746512,  // NOLINT
+    4694,       4696,       1073746522, 4701,
+    1073746528, 4744,       1073746570, 4749,  // NOLINT
+    1073746576, 4784,       1073746610, 4789,
+    1073746616, 4798,       4800,       1073746626,  // NOLINT
+    4805,       1073746632, 4822,       1073746648,
+    4880,       1073746706, 4885,       1073746712,  // NOLINT
+    4954,       1073746816, 5007,       1073746848,
+    5108,       1073746945, 5740,       1073747567,  // NOLINT
+    5759,       1073747585, 5786,       1073747616,
+    5866,       1073747694, 5880,       1073747712,  // NOLINT
+    5900,       1073747726, 5905,       1073747744,
+    5937,       1073747776, 5969,       1073747808,  // NOLINT
+    5996,       1073747822, 6000,       1073747840,
+    6067,       6103,       6108,       1073748000,  // NOLINT
+    6263,       1073748096, 6312,       6314,
+    1073748144, 6389,       1073748224, 6430,  // NOLINT
+    1073748304, 6509,       1073748336, 6516,
+    1073748352, 6571,       1073748417, 6599,  // NOLINT
+    1073748480, 6678,       1073748512, 6740,
+    6823,       1073748741, 6963,       1073748805,  // NOLINT
+    6987,       1073748867, 7072,       1073748910,
+    7087,       1073748922, 7141,       1073748992,  // NOLINT
+    7203,       1073749069, 7247,       1073749082,
+    7293,       1073749225, 7404,       1073749230,  // NOLINT
+    7409,       1073749237, 7414,       1073749248,
+    7615,       1073749504, 7957,       1073749784,  // NOLINT
+    7965,       1073749792, 8005,       1073749832,
+    8013,       1073749840, 8023,       8025,  // NOLINT
+    8027,       8029,       1073749855, 8061,
+    1073749888, 8116,       1073749942, 8124,  // NOLINT
+    8126,       1073749954, 8132,       1073749958,
+    8140,       1073749968, 8147,       1073749974,  // NOLINT
+    8155,       1073749984, 8172,       1073750002,
+    8180,       1073750006, 8188};  // NOLINT
 static const uint16_t kLetterTable1Size = 87;
 static const int32_t kLetterTable1[87] = {
   113, 127, 1073741968, 156, 258, 263, 1073742090, 275,  // NOLINT
@@ -631,19 +749,33 @@
 static const uint16_t kLetterTable4Size = 2;
 static const int32_t kLetterTable4[2] = {
   1073741824, 8140 };  // NOLINT
-static const uint16_t kLetterTable5Size = 88;
-static const int32_t kLetterTable5[88] = {
- 1073741824, 1164, 1073743056, 1277, 1073743104, 1548, 1073743376, 1567, // NOLINT - 1073743402, 1579, 1073743424, 1646, 1073743487, 1687, 1073743520, 1775, // NOLINT - 1073743639, 1823, 1073743650, 1928, 1073743755, 1934, 1073743760, 1939, // NOLINT - 1073743776, 1962, 1073743864, 2049, 1073743875, 2053, 1073743879, 2058, // NOLINT - 1073743884, 2082, 1073743936, 2163, 1073744002, 2227, 1073744114, 2295, // NOLINT - 2299, 1073744138, 2341, 1073744176, 2374, 1073744224, 2428, 1073744260, // NOLINT - 2482, 2511, 1073744384, 2600, 1073744448, 2626, 1073744452, 2635, // NOLINT - 1073744480, 2678, 2682, 1073744512, 2735, 2737, 1073744565, 2742, // NOLINT - 1073744569, 2749, 2752, 2754, 1073744603, 2781, 1073744608, 2794, // NOLINT - 1073744626, 2804, 1073744641, 2822, 1073744649, 2830, 1073744657, 2838, // NOLINT - 1073744672, 2854, 1073744680, 2862, 1073744832, 3042, 1073744896, 8191 }; // NOLINT
+static const uint16_t kLetterTable5Size = 100;
+static const int32_t kLetterTable5[100] = {
+    1073741824, 1164,       1073743056, 1277,
+    1073743104, 1548,       1073743376, 1567,  // NOLINT
+    1073743402, 1579,       1073743424, 1646,
+    1073743487, 1693,       1073743520, 1775,  // NOLINT
+    1073743639, 1823,       1073743650, 1928,
+    1073743755, 1934,       1073743760, 1965,  // NOLINT
+    1073743792, 1969,       1073743863, 2049,
+    1073743875, 2053,       1073743879, 2058,  // NOLINT
+    1073743884, 2082,       1073743936, 2163,
+    1073744002, 2227,       1073744114, 2295,  // NOLINT
+    2299,       1073744138, 2341,       1073744176,
+    2374,       1073744224, 2428,       1073744260,  // NOLINT
+    2482,       2511,       1073744352, 2532,
+    1073744358, 2543,       1073744378, 2558,  // NOLINT
+    1073744384, 2600,       1073744448, 2626,
+    1073744452, 2635,       1073744480, 2678,  // NOLINT
+    2682,       1073744510, 2735,       2737,
+    1073744565, 2742,       1073744569, 2749,  // NOLINT
+    2752,       2754,       1073744603, 2781,
+    1073744608, 2794,       1073744626, 2804,  // NOLINT
+    1073744641, 2822,       1073744649, 2830,
+    1073744657, 2838,       1073744672, 2854,  // NOLINT
+    1073744680, 2862,       1073744688, 2906,
+    1073744732, 2911,       1073744740, 2917,   // NOLINT
+    1073744832, 3042,       1073744896, 8191};  // NOLINT
 static const uint16_t kLetterTable6Size = 6;
 static const int32_t kLetterTable6[6] = {
   1073741824, 6051, 1073747888, 6086, 1073747915, 6139 };  // NOLINT
@@ -687,49 +819,363 @@
 }


-// Number:               point.category == 'Nd'
+// ID_Start:             ((point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo',
+// 'Nl'] or 'Other_ID_Start' in point.properties) and ('Pattern_Syntax' not in +// point.properties) and ('Pattern_White_Space' not in point.properties)) or
+// ('JS_ID_Start' in point.properties)

-static const uint16_t kNumberTable0Size = 56;
-static const int32_t kNumberTable0[56] = {
- 1073741872, 57, 1073743456, 1641, 1073743600, 1785, 1073743808, 1993, // NOLINT - 1073744230, 2415, 1073744358, 2543, 1073744486, 2671, 1073744614, 2799, // NOLINT - 1073744742, 2927, 1073744870, 3055, 1073744998, 3183, 1073745126, 3311, // NOLINT - 1073745254, 3439, 1073745488, 3673, 1073745616, 3801, 1073745696, 3881, // NOLINT - 1073745984, 4169, 1073746064, 4249, 1073747936, 6121, 1073747984, 6169, // NOLINT - 1073748294, 6479, 1073748432, 6617, 1073748608, 6793, 1073748624, 6809, // NOLINT - 1073748816, 7001, 1073748912, 7097, 1073749056, 7241, 1073749072, 7257 }; // NOLINT
-static const uint16_t kNumberTable5Size = 12;
-static const int32_t kNumberTable5[12] = {
- 1073743392, 1577, 1073744080, 2265, 1073744128, 2313, 1073744336, 2521, // NOLINT
-  1073744464, 2649, 1073744880, 3065 };  // NOLINT
-static const uint16_t kNumberTable7Size = 2;
-static const int32_t kNumberTable7[2] = {
-  1073749776, 7961 };  // NOLINT
-bool Number::Is(uchar c) {
+static const uint16_t kID_StartTable0Size = 434;
+static const int32_t kID_StartTable0[434] = {
+    36,         1073741889, 90,         92,
+    95,         1073741921, 122,        170,  // NOLINT
+    181,        186,        1073742016, 214,
+    1073742040, 246,        1073742072, 705,  // NOLINT
+    1073742534, 721,        1073742560, 740,
+    748,        750,        1073742704, 884,  // NOLINT
+    1073742710, 887,        1073742714, 893,
+    895,        902,        1073742728, 906,  // NOLINT
+    908,        1073742734, 929,        1073742755,
+    1013,       1073742839, 1153,       1073742986,  // NOLINT
+    1327,       1073743153, 1366,       1369,
+    1073743201, 1415,       1073743312, 1514,  // NOLINT
+    1073743344, 1522,       1073743392, 1610,
+    1073743470, 1647,       1073743473, 1747,  // NOLINT
+    1749,       1073743589, 1766,       1073743598,
+    1775,       1073743610, 1788,       1791,  // NOLINT
+    1808,       1073743634, 1839,       1073743693,
+    1957,       1969,       1073743818, 2026,  // NOLINT
+    1073743860, 2037,       2042,       1073743872,
+    2069,       2074,       2084,       2088,  // NOLINT
+    1073743936, 2136,       1073744032, 2226,
+    1073744132, 2361,       2365,       2384,  // NOLINT
+    1073744216, 2401,       1073744241, 2432,
+    1073744261, 2444,       1073744271, 2448,  // NOLINT
+    1073744275, 2472,       1073744298, 2480,
+    2482,       1073744310, 2489,       2493,  // NOLINT
+    2510,       1073744348, 2525,       1073744351,
+    2529,       1073744368, 2545,       1073744389,  // NOLINT
+    2570,       1073744399, 2576,       1073744403,
+    2600,       1073744426, 2608,       1073744434,  // NOLINT
+    2611,       1073744437, 2614,       1073744440,
+    2617,       1073744473, 2652,       2654,  // NOLINT
+    1073744498, 2676,       1073744517, 2701,
+    1073744527, 2705,       1073744531, 2728,  // NOLINT
+    1073744554, 2736,       1073744562, 2739,
+    1073744565, 2745,       2749,       2768,  // NOLINT
+    1073744608, 2785,       1073744645, 2828,
+    1073744655, 2832,       1073744659, 2856,  // NOLINT
+    1073744682, 2864,       1073744690, 2867,
+    1073744693, 2873,       2877,       1073744732,  // NOLINT
+    2909,       1073744735, 2913,       2929,
+    2947,       1073744773, 2954,       1073744782,  // NOLINT
+    2960,       1073744786, 2965,       1073744793,
+    2970,       2972,       1073744798, 2975,  // NOLINT
+    1073744803, 2980,       1073744808, 2986,
+    1073744814, 3001,       3024,       1073744901,  // NOLINT
+    3084,       1073744910, 3088,       1073744914,
+    3112,       1073744938, 3129,       3133,  // NOLINT
+    1073744984, 3161,       1073744992, 3169,
+    1073745029, 3212,       1073745038, 3216,  // NOLINT
+    1073745042, 3240,       1073745066, 3251,
+    1073745077, 3257,       3261,       3294,  // NOLINT
+    1073745120, 3297,       1073745137, 3314,
+    1073745157, 3340,       1073745166, 3344,  // NOLINT
+    1073745170, 3386,       3389,       3406,
+    1073745248, 3425,       1073745274, 3455,  // NOLINT
+    1073745285, 3478,       1073745306, 3505,
+    1073745331, 3515,       3517,       1073745344,  // NOLINT
+    3526,       1073745409, 3632,       1073745458,
+    3635,       1073745472, 3654,       1073745537,  // NOLINT
+    3714,       3716,       1073745543, 3720,
+    3722,       3725,       1073745556, 3735,  // NOLINT
+    1073745561, 3743,       1073745569, 3747,
+    3749,       3751,       1073745578, 3755,  // NOLINT
+    1073745581, 3760,       1073745586, 3763,
+    3773,       1073745600, 3780,       3782,  // NOLINT
+    1073745628, 3807,       3840,       1073745728,
+    3911,       1073745737, 3948,       1073745800,  // NOLINT
+    3980,       1073745920, 4138,       4159,
+    1073746000, 4181,       1073746010, 4189,  // NOLINT
+    4193,       1073746021, 4198,       1073746030,
+    4208,       1073746037, 4225,       4238,  // NOLINT
+    1073746080, 4293,       4295,       4301,
+    1073746128, 4346,       1073746172, 4680,  // NOLINT
+    1073746506, 4685,       1073746512, 4694,
+    4696,       1073746522, 4701,       1073746528,  // NOLINT
+    4744,       1073746570, 4749,       1073746576,
+    4784,       1073746610, 4789,       1073746616,  // NOLINT
+    4798,       4800,       1073746626, 4805,
+    1073746632, 4822,       1073746648, 4880,  // NOLINT
+    1073746706, 4885,       1073746712, 4954,
+    1073746816, 5007,       1073746848, 5108,  // NOLINT
+    1073746945, 5740,       1073747567, 5759,
+    1073747585, 5786,       1073747616, 5866,  // NOLINT
+    1073747694, 5880,       1073747712, 5900,
+    1073747726, 5905,       1073747744, 5937,  // NOLINT
+    1073747776, 5969,       1073747808, 5996,
+    1073747822, 6000,       1073747840, 6067,  // NOLINT
+    6103,       6108,       1073748000, 6263,
+    1073748096, 6312,       6314,       1073748144,  // NOLINT
+    6389,       1073748224, 6430,       1073748304,
+    6509,       1073748336, 6516,       1073748352,  // NOLINT
+    6571,       1073748417, 6599,       1073748480,
+    6678,       1073748512, 6740,       6823,  // NOLINT
+    1073748741, 6963,       1073748805, 6987,
+    1073748867, 7072,       1073748910, 7087,  // NOLINT
+    1073748922, 7141,       1073748992, 7203,
+    1073749069, 7247,       1073749082, 7293,  // NOLINT
+    1073749225, 7404,       1073749230, 7409,
+    1073749237, 7414,       1073749248, 7615,  // NOLINT
+    1073749504, 7957,       1073749784, 7965,
+    1073749792, 8005,       1073749832, 8013,  // NOLINT
+    1073749840, 8023,       8025,       8027,
+    8029,       1073749855, 8061,       1073749888,  // NOLINT
+    8116,       1073749942, 8124,       8126,
+    1073749954, 8132,       1073749958, 8140,  // NOLINT
+    1073749968, 8147,       1073749974, 8155,
+    1073749984, 8172,       1073750002, 8180,  // NOLINT
+    1073750006, 8188};                         // NOLINT
+static const uint16_t kID_StartTable1Size = 84;
+static const int32_t kID_StartTable1[84] = {
+    113,        127,        1073741968, 156,
+    258,        263,        1073742090, 275,  // NOLINT
+    277,        1073742104, 285,        292,
+    294,        296,        1073742122, 313,  // NOLINT
+    1073742140, 319,        1073742149, 329,
+    334,        1073742176, 392,        1073744896,  // NOLINT
+    3118,       1073744944, 3166,       1073744992,
+    3300,       1073745131, 3310,       1073745138,  // NOLINT
+    3315,       1073745152, 3365,       3367,
+    3373,       1073745200, 3431,       3439,  // NOLINT
+    1073745280, 3478,       1073745312, 3494,
+    1073745320, 3502,       1073745328, 3510,  // NOLINT
+    1073745336, 3518,       1073745344, 3526,
+    1073745352, 3534,       1073745360, 3542,  // NOLINT
+    1073745368, 3550,       1073745925, 4103,
+    1073745953, 4137,       1073745969, 4149,  // NOLINT
+    1073745976, 4156,       1073745985, 4246,
+    1073746075, 4255,       1073746081, 4346,  // NOLINT
+    1073746172, 4351,       1073746181, 4397,
+    1073746225, 4494,       1073746336, 4538,   // NOLINT
+    1073746416, 4607,       1073746944, 8191};  // NOLINT
+static const uint16_t kID_StartTable2Size = 4;
+static const int32_t kID_StartTable2[4] = {1073741824, 3509, 1073745408,
+                                           8191};  // NOLINT
+static const uint16_t kID_StartTable3Size = 2;
+static const int32_t kID_StartTable3[2] = {1073741824, 8191};  // NOLINT
+static const uint16_t kID_StartTable4Size = 2;
+static const int32_t kID_StartTable4[2] = {1073741824, 8140};  // NOLINT
+static const uint16_t kID_StartTable5Size = 100;
+static const int32_t kID_StartTable5[100] = {
+    1073741824, 1164,       1073743056, 1277,
+    1073743104, 1548,       1073743376, 1567,  // NOLINT
+    1073743402, 1579,       1073743424, 1646,
+    1073743487, 1693,       1073743520, 1775,  // NOLINT
+    1073743639, 1823,       1073743650, 1928,
+    1073743755, 1934,       1073743760, 1965,  // NOLINT
+    1073743792, 1969,       1073743863, 2049,
+    1073743875, 2053,       1073743879, 2058,  // NOLINT
+    1073743884, 2082,       1073743936, 2163,
+    1073744002, 2227,       1073744114, 2295,  // NOLINT
+    2299,       1073744138, 2341,       1073744176,
+    2374,       1073744224, 2428,       1073744260,  // NOLINT
+    2482,       2511,       1073744352, 2532,
+    1073744358, 2543,       1073744378, 2558,  // NOLINT
+    1073744384, 2600,       1073744448, 2626,
+    1073744452, 2635,       1073744480, 2678,  // NOLINT
+    2682,       1073744510, 2735,       2737,
+    1073744565, 2742,       1073744569, 2749,  // NOLINT
+    2752,       2754,       1073744603, 2781,
+    1073744608, 2794,       1073744626, 2804,  // NOLINT
+    1073744641, 2822,       1073744649, 2830,
+    1073744657, 2838,       1073744672, 2854,  // NOLINT
+    1073744680, 2862,       1073744688, 2906,
+    1073744732, 2911,       1073744740, 2917,   // NOLINT
+    1073744832, 3042,       1073744896, 8191};  // NOLINT
+static const uint16_t kID_StartTable6Size = 6;
+static const int32_t kID_StartTable6[6] = {1073741824, 6051, 1073747888, 6086,
+                                           1073747915, 6139};  // NOLINT
+static const uint16_t kID_StartTable7Size = 48;
+static const int32_t kID_StartTable7[48] = {
+    1073748224, 6765,       1073748592, 6873,
+    1073748736, 6918,       1073748755, 6935,  // NOLINT
+    6941,       1073748767, 6952,       1073748778,
+    6966,       1073748792, 6972,       6974,  // NOLINT
+    1073748800, 6977,       1073748803, 6980,
+    1073748806, 7089,       1073748947, 7485,  // NOLINT
+    1073749328, 7567,       1073749394, 7623,
+    1073749488, 7675,       1073749616, 7796,  // NOLINT
+    1073749622, 7932,       1073749793, 7994,
+    1073749825, 8026,       1073749862, 8126,  // NOLINT
+    1073749954, 8135,       1073749962, 8143,
+    1073749970, 8151,       1073749978, 8156};  // NOLINT
+bool ID_Start::Is(uchar c) {
   int chunk_index = c >> 13;
   switch (chunk_index) {
-    case 0: return LookupPredicate(kNumberTable0,
-                                       kNumberTable0Size,
-                                       c);
-    case 5: return LookupPredicate(kNumberTable5,
-                                       kNumberTable5Size,
-                                       c);
-    case 7: return LookupPredicate(kNumberTable7,
-                                       kNumberTable7Size,
***The diff for this file has been truncated for email.***
=======================================
--- /branches/bleeding_edge/src/unicode.h       Tue Jun  3 08:12:43 2014 UTC
+++ /branches/bleeding_edge/src/unicode.h       Wed Oct  8 14:55:03 2014 UTC
@@ -44,6 +44,7 @@
   CacheEntry entries_[kSize];
 };

+
 // A cache used in case conversion.  It caches the value for characters
 // that either have no mapping or map to a single character independent
 // of context.  Characters that map to more than one character or that
@@ -70,6 +71,7 @@
   CacheEntry entries_[kSize];
 };

+
 class UnicodeData {
  private:
   friend class Test;
@@ -77,6 +79,7 @@
   static const uchar kMaxCodePoint;
 };

+
 class Utf16 {
  public:
   static inline bool IsSurrogatePair(int lead, int trail) {
@@ -113,14 +116,6 @@
   }
 };

-class Latin1 {
- public:
-  static const unsigned kMaxChar = 0xff;
-  // Returns 0 if character does not convert to single latin-1 character
-  // or if the character doesn't not convert back to latin-1 via inverse
-  // operation (upper to lower, etc).
-  static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
-};

 class Utf8 {
  public:
@@ -155,45 +150,6 @@
                               unsigned* cursor);
 };

-
-class Utf8DecoderBase {
- public:
-  // Initialization done in subclass.
-  inline Utf8DecoderBase();
-  inline Utf8DecoderBase(uint16_t* buffer,
-                         unsigned buffer_length,
-                         const uint8_t* stream,
-                         unsigned stream_length);
-  inline unsigned Utf16Length() const { return utf16_length_; }
- protected:
-  // This reads all characters and sets the utf16_length_.
-  // The first buffer_length utf16 chars are cached in the buffer.
-  void Reset(uint16_t* buffer,
-             unsigned buffer_length,
-             const uint8_t* stream,
-             unsigned stream_length);
-  static void WriteUtf16Slow(const uint8_t* stream,
-                             uint16_t* data,
-                             unsigned length);
-  const uint8_t* unbuffered_start_;
-  unsigned utf16_length_;
-  bool last_byte_of_buffer_unused_;
- private:
-  DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
-};
-
-template <unsigned kBufferSize>
-class Utf8Decoder : public Utf8DecoderBase {
- public:
-  inline Utf8Decoder() {}
-  inline Utf8Decoder(const char* stream, unsigned length);
-  inline void Reset(const char* stream, unsigned length);
-  inline unsigned WriteUtf16(uint16_t* data, unsigned length) const;
- private:
-  uint16_t buffer_[kBufferSize];
-};
-
-
 struct Uppercase {
   static bool Is(uchar c);
 };
@@ -203,7 +159,10 @@
 struct Letter {
   static bool Is(uchar c);
 };
-struct Number {
+struct ID_Start {
+  static bool Is(uchar c);
+};
+struct ID_Continue {
   static bool Is(uchar c);
 };
 struct WhiteSpace {
@@ -212,12 +171,6 @@
 struct LineTerminator {
   static bool Is(uchar c);
 };
-struct CombiningMark {
-  static bool Is(uchar c);
-};
-struct ConnectorPunctuation {
-  static bool Is(uchar c);
-};
 struct ToLowercase {
   static const int kMaxWidth = 3;
   static const bool kIsToLower = true;
=======================================
--- /branches/bleeding_edge/test/cctest/test-strings.cc Mon Oct 6 15:50:40 2014 UTC +++ /branches/bleeding_edge/test/cctest/test-strings.cc Wed Oct 8 14:55:03 2014 UTC
@@ -37,6 +37,7 @@
 #include "src/api.h"
 #include "src/factory.h"
 #include "src/objects.h"
+#include "src/unicode-decoder.h"
 #include "test/cctest/cctest.h"

 // Adapted from http://en.wikipedia.org/wiki/Multiply-with-carry
=======================================
--- /branches/bleeding_edge/test/mjsunit/var.js Tue Dec  7 11:01:02 2010 UTC
+++ /branches/bleeding_edge/test/mjsunit/var.js Wed Oct  8 14:55:03 2014 UTC
@@ -35,3 +35,22 @@
 assertTrue(!z && typeof z == 'undefined');
 if (false) { var z; }
 assertTrue(!z && typeof z == 'undefined');
+
+assertThrows("var \u2E2F;", SyntaxError);
+assertThrows("var \\u2E2F;", SyntaxError);
+
+assertDoesNotThrow("var \u2118;");
+assertDoesNotThrow("var \\u2118;");
+assertDoesNotThrow("var \u212E;");
+assertDoesNotThrow("var \\u212E;");
+assertDoesNotThrow("var \u309B;");
+assertDoesNotThrow("var \\u309B;");
+assertDoesNotThrow("var \u309C;");
+assertDoesNotThrow("var \\u309C;");
+
+assertDoesNotThrow("var $\u00B7;");
+assertDoesNotThrow("var $\u0387;");
+assertDoesNotThrow("var $\u1369;");
+assertDoesNotThrow("var $\u1370;");
+assertDoesNotThrow("var $\u1371;");
+assertDoesNotThrow("var $\u19DA;");
=======================================
--- /branches/bleeding_edge/test/unittests/unittests.gyp Tue Oct 7 07:36:21 2014 UTC +++ /branches/bleeding_edge/test/unittests/unittests.gyp Wed Oct 8 14:55:03 2014 UTC
@@ -58,6 +58,7 @@
         'run-all-unittests.cc',
         'test-utils.h',
         'test-utils.cc',
+        'unicode/unicode-predicates-unittest.cc',
       ],
       'conditions': [
         ['v8_target_arch=="arm"', {
=======================================
--- /branches/bleeding_edge/tools/gyp/v8.gyp    Wed Oct  8 11:19:51 2014 UTC
+++ /branches/bleeding_edge/tools/gyp/v8.gyp    Wed Oct  8 14:55:03 2014 UTC
@@ -808,6 +808,8 @@
         '../../src/unicode-inl.h',
         '../../src/unicode.cc',
         '../../src/unicode.h',
+        '../../src/unicode-decoder.cc',
+        '../../src/unicode-decoder.h',
         '../../src/unique.h',
         '../../src/uri.h',
         '../../src/utils-inl.h',
@@ -1694,6 +1696,12 @@
         '../../src/mksnapshot.cc',
       ],
       'conditions': [
+        ['v8_enable_i18n_support==1', {
+          'dependencies': [
+            '<(icu_gyp_path):icui18n',
+            '<(icu_gyp_path):icuuc',
+          ]
+        }],
         ['want_separate_host_toolset==1', {
           'toolsets': ['host'],
         }, {

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to