Revision: 24473
Author: [email protected]
Date: Wed Oct 8 14:55:03 2014 UTC
Log: Update unicode to 7.0.0.
And do not use code points with PATTERN_* property for identifier start.
Maintain that \u180E is a white space character.
BUG=v8:2892
LOG=Y
[email protected], [email protected]
Review URL: https://codereview.chromium.org/638643002
https://code.google.com/p/v8/source/detail?r=24473
Added:
/branches/bleeding_edge/src/unicode-decoder.cc
/branches/bleeding_edge/src/unicode-decoder.h
/branches/bleeding_edge/test/unittests/unicode
/branches/bleeding_edge/test/unittests/unicode/unicode-predicates-unittest.cc
Modified:
/branches/bleeding_edge/BUILD.gn
/branches/bleeding_edge/src/char-predicates.h
/branches/bleeding_edge/src/jsregexp.cc
/branches/bleeding_edge/src/objects.h
/branches/bleeding_edge/src/scanner.h
/branches/bleeding_edge/src/unicode-inl.h
/branches/bleeding_edge/src/unicode.cc
/branches/bleeding_edge/src/unicode.h
/branches/bleeding_edge/test/cctest/test-strings.cc
/branches/bleeding_edge/test/mjsunit/var.js
/branches/bleeding_edge/test/unittests/unittests.gyp
/branches/bleeding_edge/tools/gyp/v8.gyp
=======================================
--- /dev/null
+++ /branches/bleeding_edge/src/unicode-decoder.cc Wed Oct 8 14:55:03 2014
UTC
@@ -0,0 +1,78 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "src/unicode-inl.h"
+#include "src/unicode-decoder.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+namespace unibrow {
+
+void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
+ const uint8_t* stream, unsigned stream_length)
{
+ // Assume everything will fit in the buffer and stream won't be needed.
+ last_byte_of_buffer_unused_ = false;
+ unbuffered_start_ = NULL;
+ bool writing_to_buffer = true;
+ // Loop until stream is read, writing to buffer as long as buffer has
space.
+ unsigned utf16_length = 0;
+ while (stream_length != 0) {
+ unsigned cursor = 0;
+ uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
+ DCHECK(cursor > 0 && cursor <= stream_length);
+ stream += cursor;
+ stream_length -= cursor;
+ bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
+ utf16_length += is_two_characters ? 2 : 1;
+ // Don't need to write to the buffer, but still need utf16_length.
+ if (!writing_to_buffer) continue;
+ // Write out the characters to the buffer.
+ // Must check for equality with buffer_length as we've already updated
it.
+ if (utf16_length <= buffer_length) {
+ if (is_two_characters) {
+ *buffer++ = Utf16::LeadSurrogate(character);
+ *buffer++ = Utf16::TrailSurrogate(character);
+ } else {
+ *buffer++ = character;
+ }
+ if (utf16_length == buffer_length) {
+ // Just wrote last character of buffer
+ writing_to_buffer = false;
+ unbuffered_start_ = stream;
+ }
+ continue;
+ }
+ // Have gone over buffer.
+ // Last char of buffer is unused, set cursor back.
+ DCHECK(is_two_characters);
+ writing_to_buffer = false;
+ last_byte_of_buffer_unused_ = true;
+ unbuffered_start_ = stream - cursor;
+ }
+ utf16_length_ = utf16_length;
+}
+
+
+void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
+ unsigned data_length) {
+ while (data_length != 0) {
+ unsigned cursor = 0;
+ uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize,
&cursor);
+ // There's a total lack of bounds checking for stream
+ // as it was already done in Reset.
+ stream += cursor;
+ if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ *data++ = Utf16::LeadSurrogate(character);
+ *data++ = Utf16::TrailSurrogate(character);
+ DCHECK(data_length > 1);
+ data_length -= 2;
+ } else {
+ *data++ = character;
+ data_length -= 1;
+ }
+ }
+}
+
+} // namespace unibrow
=======================================
--- /dev/null
+++ /branches/bleeding_edge/src/unicode-decoder.h Wed Oct 8 14:55:03 2014
UTC
@@ -0,0 +1,121 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_UNICODE_DECODER_H_
+#define V8_UNICODE_DECODER_H_
+
+#include <sys/types.h>
+#include "src/globals.h"
+
+namespace unibrow {
+
+class Utf8DecoderBase {
+ public:
+ // Initialization done in subclass.
+ inline Utf8DecoderBase();
+ inline Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
+ const uint8_t* stream, unsigned stream_length);
+ inline unsigned Utf16Length() const { return utf16_length_; }
+
+ protected:
+ // This reads all characters and sets the utf16_length_.
+ // The first buffer_length utf16 chars are cached in the buffer.
+ void Reset(uint16_t* buffer, unsigned buffer_length, const uint8_t*
stream,
+ unsigned stream_length);
+ static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
+ unsigned length);
+ const uint8_t* unbuffered_start_;
+ unsigned utf16_length_;
+ bool last_byte_of_buffer_unused_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
+};
+
+template <unsigned kBufferSize>
+class Utf8Decoder : public Utf8DecoderBase {
+ public:
+ inline Utf8Decoder() {}
+ inline Utf8Decoder(const char* stream, unsigned length);
+ inline void Reset(const char* stream, unsigned length);
+ inline unsigned WriteUtf16(uint16_t* data, unsigned length) const;
+
+ private:
+ uint16_t buffer_[kBufferSize];
+};
+
+
+Utf8DecoderBase::Utf8DecoderBase()
+ : unbuffered_start_(NULL),
+ utf16_length_(0),
+ last_byte_of_buffer_unused_(false) {}
+
+
+Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
+ const uint8_t* stream,
+ unsigned stream_length) {
+ Reset(buffer, buffer_length, stream, stream_length);
+}
+
+
+template <unsigned kBufferSize>
+Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length)
+ : Utf8DecoderBase(buffer_, kBufferSize,
+ reinterpret_cast<const uint8_t*>(stream), length) {}
+
+
+template <unsigned kBufferSize>
+void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) {
+ Utf8DecoderBase::Reset(buffer_, kBufferSize,
+ reinterpret_cast<const uint8_t*>(stream), length);
+}
+
+
+template <unsigned kBufferSize>
+unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
+ unsigned length) const {
+ DCHECK(length > 0);
+ if (length > utf16_length_) length = utf16_length_;
+ // memcpy everything in buffer.
+ unsigned buffer_length =
+ last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
+ unsigned memcpy_length = length <= buffer_length ? length :
buffer_length;
+ v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
+ if (length <= buffer_length) return length;
+ DCHECK(unbuffered_start_ != NULL);
+ // Copy the rest the slow way.
+ WriteUtf16Slow(unbuffered_start_, data + buffer_length,
+ length - buffer_length);
+ return length;
+}
+
+class Latin1 {
+ public:
+ static const unsigned kMaxChar = 0xff;
+ // Returns 0 if character does not convert to single latin-1 character
+ // or if the character doesn't not convert back to latin-1 via inverse
+ // operation (upper to lower, etc).
+ static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
+};
+
+
+uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
+ DCHECK(c > Latin1::kMaxChar);
+ switch (c) {
+ // This are equivalent characters in unicode.
+ case 0x39c:
+ case 0x3bc:
+ return 0xb5;
+ // This is an uppercase of a Latin-1 character
+ // outside of Latin-1.
+ case 0x178:
+ return 0xff;
+ }
+ return 0;
+}
+
+
+} // namespace unibrow
+
+#endif // V8_UNICODE_DECODER_H_
=======================================
--- /dev/null
+++
/branches/bleeding_edge/test/unittests/unicode/unicode-predicates-unittest.cc
Wed Oct 8 14:55:03 2014 UTC
@@ -0,0 +1,90 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/char-predicates.h"
+#include "src/unicode.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace v8 {
+namespace internal {
+
+TEST(UnicodePredicatesTest, WhiteSpace) {
+ // As of Unicode 6.3.0, \u180E is no longer a white space. We still
consider
+ // it to be one though, since JS recognizes all white spaces in Unicode
5.1.
+ EXPECT_TRUE(WhiteSpace::Is(0x0009));
+ EXPECT_TRUE(WhiteSpace::Is(0x000B));
+ EXPECT_TRUE(WhiteSpace::Is(0x000C));
+ EXPECT_TRUE(WhiteSpace::Is(' '));
+ EXPECT_TRUE(WhiteSpace::Is(0x00A0));
+ EXPECT_TRUE(WhiteSpace::Is(0x180E));
+ EXPECT_TRUE(WhiteSpace::Is(0xFEFF));
+}
+
+
+TEST(UnicodePredicatesTest, WhiteSpaceOrLineTerminator) {
+ // As of Unicode 6.3.0, \u180E is no longer a white space. We still
consider
+ // it to be one though, since JS recognizes all white spaces in Unicode
5.1.
+ // White spaces
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x0009));
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000B));
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000C));
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(' '));
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x00A0));
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x180E));
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0xFEFF));
+ // Line terminators
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000A));
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000D));
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2028));
+ EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2029));
+}
+
+
+TEST(UnicodePredicatesTest, IdentifierStart) {
+ EXPECT_TRUE(IdentifierStart::Is('$'));
+ EXPECT_TRUE(IdentifierStart::Is('_'));
+ EXPECT_TRUE(IdentifierStart::Is('\\'));
+
+ // http://www.unicode.org/reports/tr31/
+ // Other_ID_Start
+ EXPECT_TRUE(IdentifierStart::Is(0x2118));
+ EXPECT_TRUE(IdentifierStart::Is(0x212E));
+ EXPECT_TRUE(IdentifierStart::Is(0x309B));
+ EXPECT_TRUE(IdentifierStart::Is(0x309C));
+
+ // Issue 2892:
+ // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
+ EXPECT_FALSE(unibrow::ID_Start::Is(0x2E2F));
+}
+
+
+TEST(UnicodePredicatesTest, IdentifierPart) {
+ EXPECT_TRUE(IdentifierPart::Is('$'));
+ EXPECT_TRUE(IdentifierPart::Is('_'));
+ EXPECT_TRUE(IdentifierPart::Is('\\'));
+ EXPECT_TRUE(IdentifierPart::Is(0x200C));
+ EXPECT_TRUE(IdentifierPart::Is(0x200D));
+
+ // http://www.unicode.org/reports/tr31/
+ // Other_ID_Start
+ EXPECT_TRUE(IdentifierPart::Is(0x2118));
+ EXPECT_TRUE(IdentifierPart::Is(0x212E));
+ EXPECT_TRUE(IdentifierPart::Is(0x309B));
+ EXPECT_TRUE(IdentifierPart::Is(0x309C));
+
+ // Other_ID_Continue
+ EXPECT_TRUE(IdentifierPart::Is(0x00B7));
+ EXPECT_TRUE(IdentifierPart::Is(0x0387));
+ EXPECT_TRUE(IdentifierPart::Is(0x1369));
+ EXPECT_TRUE(IdentifierPart::Is(0x1370));
+ EXPECT_TRUE(IdentifierPart::Is(0x1371));
+ EXPECT_TRUE(IdentifierPart::Is(0x19DA));
+
+ // Issue 2892:
+ // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
+ EXPECT_FALSE(IdentifierPart::Is(0x2E2F));
+}
+
+} // namespace internal
+} // namespace v8
=======================================
--- /branches/bleeding_edge/BUILD.gn Wed Oct 8 11:19:51 2014 UTC
+++ /branches/bleeding_edge/BUILD.gn Wed Oct 8 14:55:03 2014 UTC
@@ -895,6 +895,8 @@
"src/unicode-inl.h",
"src/unicode.cc",
"src/unicode.h",
+ "src/unicode-decoder.cc",
+ "src/unicode-decoder.h",
"src/unique.h",
"src/uri.h",
"src/utils-inl.h",
=======================================
--- /branches/bleeding_edge/src/char-predicates.h Tue Jun 3 08:12:43 2014
UTC
+++ /branches/bleeding_edge/src/char-predicates.h Wed Oct 8 14:55:03 2014
UTC
@@ -22,42 +22,40 @@
inline bool IsRegExpWord(uc32 c);
inline bool IsRegExpNewline(uc32 c);
+// ES6 draft section 11.6
+// This includes '_', '$' and '\', and ID_Start according to
+// http://www.unicode.org/reports/tr31/, which consists of categories
+// 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
+// 'Pattern_Syntax' or 'Pattern_White_Space'.
struct IdentifierStart {
- static inline bool Is(uc32 c) {
- switch (c) {
- case '$': case '_': case '\\': return true;
- default: return unibrow::Letter::Is(c);
- }
- }
+ static inline bool Is(uc32 c) { return unibrow::ID_Start::Is(c); }
};
+// ES6 draft section 11.6
+// This includes \u200c and \u200d, and ID_Continue according to
+// http://www.unicode.org/reports/tr31/, which consists of ID_Start,
+// the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
+// 'Pattern_Syntax' or 'Pattern_White_Space'.
struct IdentifierPart {
static inline bool Is(uc32 c) {
- return IdentifierStart::Is(c)
- || unibrow::Number::Is(c)
- || c == 0x200C // U+200C is Zero-Width Non-Joiner.
- || c == 0x200D // U+200D is Zero-Width Joiner.
- || unibrow::CombiningMark::Is(c)
- || unibrow::ConnectorPunctuation::Is(c);
+ return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
}
};
-// WhiteSpace according to ECMA-262 5.1, 7.2.
+// ES6 draft section 11.2
+// This includes all code points of Unicode category 'Zs'.
+// \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode
5.1,
+// so it is also included.
+// Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff.
struct WhiteSpace {
- static inline bool Is(uc32 c) {
- return c == 0x0009 || // <TAB>
- c == 0x000B || // <VT>
- c == 0x000C || // <FF>
- c == 0xFEFF || // <BOM>
- // \u0020 and \u00A0 are included in unibrow::WhiteSpace.
- unibrow::WhiteSpace::Is(c);
- }
+ static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); }
};
-// WhiteSpace and LineTerminator according to ECMA-262 5.1, 7.2 and 7.3.
+// WhiteSpace and LineTerminator according to ES6 draft section 11.2 and
11.3
+// This consists of \000a, \000d, \u2028, and \u2029.
struct WhiteSpaceOrLineTerminator {
static inline bool Is(uc32 c) {
return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
=======================================
--- /branches/bleeding_edge/src/jsregexp.cc Tue Sep 30 10:29:32 2014 UTC
+++ /branches/bleeding_edge/src/jsregexp.cc Wed Oct 8 14:55:03 2014 UTC
@@ -20,6 +20,7 @@
#include "src/regexp-stack.h"
#include "src/runtime/runtime.h"
#include "src/string-search.h"
+#include "src/unicode-decoder.h"
#ifndef V8_INTERPRETED_REGEXP
#if V8_TARGET_ARCH_IA32
=======================================
--- /branches/bleeding_edge/src/objects.h Wed Oct 8 14:42:31 2014 UTC
+++ /branches/bleeding_edge/src/objects.h Wed Oct 8 14:55:03 2014 UTC
@@ -20,6 +20,7 @@
#include "src/property-details.h"
#include "src/smart-pointers.h"
#include "src/unicode-inl.h"
+#include "src/unicode-decoder.h"
#include "src/zone.h"
#if V8_TARGET_ARCH_ARM
=======================================
--- /branches/bleeding_edge/src/scanner.h Tue Sep 16 22:15:39 2014 UTC
+++ /branches/bleeding_edge/src/scanner.h Wed Oct 8 14:55:03 2014 UTC
@@ -15,6 +15,7 @@
#include "src/list.h"
#include "src/token.h"
#include "src/unicode-inl.h"
+#include "src/unicode-decoder.h"
#include "src/utils.h"
namespace v8 {
=======================================
--- /branches/bleeding_edge/src/unicode-inl.h Mon Aug 4 11:34:54 2014 UTC
+++ /branches/bleeding_edge/src/unicode-inl.h Wed Oct 8 14:55:03 2014 UTC
@@ -55,22 +55,6 @@
return length;
}
}
-
-
-uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
- DCHECK(c > Latin1::kMaxChar);
- switch (c) {
- // This are equivalent characters in unicode.
- case 0x39c:
- case 0x3bc:
- return 0xb5;
- // This is an uppercase of a Latin-1 character
- // outside of Latin-1.
- case 0x178:
- return 0xff;
- }
- return 0;
-}
unsigned Utf8::EncodeOneByte(char* str, uint8_t c) {
@@ -152,53 +136,6 @@
return 4;
}
}
-
-Utf8DecoderBase::Utf8DecoderBase()
- : unbuffered_start_(NULL),
- utf16_length_(0),
- last_byte_of_buffer_unused_(false) {}
-
-Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer,
- unsigned buffer_length,
- const uint8_t* stream,
- unsigned stream_length) {
- Reset(buffer, buffer_length, stream, stream_length);
-}
-
-template<unsigned kBufferSize>
-Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length)
- : Utf8DecoderBase(buffer_,
- kBufferSize,
- reinterpret_cast<const uint8_t*>(stream),
- length) {
-}
-
-template<unsigned kBufferSize>
-void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) {
- Utf8DecoderBase::Reset(buffer_,
- kBufferSize,
- reinterpret_cast<const uint8_t*>(stream),
- length);
-}
-
-template <unsigned kBufferSize>
-unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
- unsigned length) const {
- DCHECK(length > 0);
- if (length > utf16_length_) length = utf16_length_;
- // memcpy everything in buffer.
- unsigned buffer_length =
- last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
- unsigned memcpy_length = length <= buffer_length ? length :
buffer_length;
- v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
- if (length <= buffer_length) return length;
- DCHECK(unbuffered_start_ != NULL);
- // Copy the rest the slow way.
- WriteUtf16Slow(unbuffered_start_,
- data + buffer_length,
- length - buffer_length);
- return length;
-}
} // namespace unibrow
=======================================
--- /branches/bleeding_edge/src/unicode.cc Mon Aug 4 11:34:54 2014 UTC
+++ /branches/bleeding_edge/src/unicode.cc Wed Oct 8 14:55:03 2014 UTC
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
-// This file was generated at 2014-02-07 15:31:16.733174
+// This file was generated at 2014-10-08 15:25:47.940335
#include "src/unicode-inl.h"
#include <stdio.h>
@@ -23,6 +23,7 @@
typedef unsigned short uint16_t; // NOLINT
typedef int int32_t; // NOLINT
+
// All access to the character table should go through this function.
template <int D>
static inline uchar TableGet(const int32_t* table, int index) {
@@ -187,6 +188,7 @@
return 0;
}
}
+
uchar Utf8::CalculateValue(const byte* str,
unsigned length,
@@ -256,138 +258,126 @@
*cursor += 1;
return kBadChar;
}
-
-
-void Utf8DecoderBase::Reset(uint16_t* buffer,
- unsigned buffer_length,
- const uint8_t* stream,
- unsigned stream_length) {
- // Assume everything will fit in the buffer and stream won't be needed.
- last_byte_of_buffer_unused_ = false;
- unbuffered_start_ = NULL;
- bool writing_to_buffer = true;
- // Loop until stream is read, writing to buffer as long as buffer has
space.
- unsigned utf16_length = 0;
- while (stream_length != 0) {
- unsigned cursor = 0;
- uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
- DCHECK(cursor > 0 && cursor <= stream_length);
- stream += cursor;
- stream_length -= cursor;
- bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
- utf16_length += is_two_characters ? 2 : 1;
- // Don't need to write to the buffer, but still need utf16_length.
- if (!writing_to_buffer) continue;
- // Write out the characters to the buffer.
- // Must check for equality with buffer_length as we've already updated
it.
- if (utf16_length <= buffer_length) {
- if (is_two_characters) {
- *buffer++ = Utf16::LeadSurrogate(character);
- *buffer++ = Utf16::TrailSurrogate(character);
- } else {
- *buffer++ = character;
- }
- if (utf16_length == buffer_length) {
- // Just wrote last character of buffer
- writing_to_buffer = false;
- unbuffered_start_ = stream;
- }
- continue;
- }
- // Have gone over buffer.
- // Last char of buffer is unused, set cursor back.
- DCHECK(is_two_characters);
- writing_to_buffer = false;
- last_byte_of_buffer_unused_ = true;
- unbuffered_start_ = stream - cursor;
- }
- utf16_length_ = utf16_length;
-}
-
-
-void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream,
- uint16_t* data,
- unsigned data_length) {
- while (data_length != 0) {
- unsigned cursor = 0;
- uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize,
&cursor);
- // There's a total lack of bounds checking for stream
- // as it was already done in Reset.
- stream += cursor;
- if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
- *data++ = Utf16::LeadSurrogate(character);
- *data++ = Utf16::TrailSurrogate(character);
- DCHECK(data_length > 1);
- data_length -= 2;
- } else {
- *data++ = character;
- data_length -= 1;
- }
- }
-}
// Uppercase: point.category == 'Lu'
-static const uint16_t kUppercaseTable0Size = 450;
-static const int32_t kUppercaseTable0[450] = {
- 1073741889, 90, 1073742016, 214, 1073742040, 222, 256, 258, // NOLINT
- 260, 262, 264, 266, 268, 270, 272, 274, // NOLINT
- 276, 278, 280, 282, 284, 286, 288, 290, // NOLINT
- 292, 294, 296, 298, 300, 302, 304, 306, // NOLINT
- 308, 310, 313, 315, 317, 319, 321, 323, // NOLINT
- 325, 327, 330, 332, 334, 336, 338, 340, // NOLINT
- 342, 344, 346, 348, 350, 352, 354, 356, // NOLINT
- 358, 360, 362, 364, 366, 368, 370, 372, // NOLINT
- 374, 1073742200, 377, 379, 381, 1073742209, 386, 388, // NOLINT
- 1073742214, 391, 1073742217, 395, 1073742222, 401, 1073742227, 404, //
NOLINT
- 1073742230, 408, 1073742236, 413, 1073742239, 416, 418, 420, // NOLINT
- 1073742246, 423, 425, 428, 1073742254, 431, 1073742257, 435, // NOLINT
- 437, 1073742263, 440, 444, 452, 455, 458, 461, // NOLINT
- 463, 465, 467, 469, 471, 473, 475, 478, // NOLINT
- 480, 482, 484, 486, 488, 490, 492, 494, // NOLINT
- 497, 500, 1073742326, 504, 506, 508, 510, 512, // NOLINT
- 514, 516, 518, 520, 522, 524, 526, 528, // NOLINT
- 530, 532, 534, 536, 538, 540, 542, 544, // NOLINT
- 546, 548, 550, 552, 554, 556, 558, 560, // NOLINT
- 562, 1073742394, 571, 1073742397, 574, 577, 1073742403, 582, // NOLINT
- 584, 586, 588, 590, 880, 882, 886, 902, // NOLINT
- 1073742728, 906, 908, 1073742734, 911, 1073742737, 929, 1073742755, //
NOLINT
- 939, 975, 1073742802, 980, 984, 986, 988, 990, // NOLINT
- 992, 994, 996, 998, 1000, 1002, 1004, 1006, // NOLINT
- 1012, 1015, 1073742841, 1018, 1073742845, 1071, 1120, 1122, // NOLINT
- 1124, 1126, 1128, 1130, 1132, 1134, 1136, 1138, // NOLINT
- 1140, 1142, 1144, 1146, 1148, 1150, 1152, 1162, // NOLINT
- 1164, 1166, 1168, 1170, 1172, 1174, 1176, 1178, // NOLINT
- 1180, 1182, 1184, 1186, 1188, 1190, 1192, 1194, // NOLINT
- 1196, 1198, 1200, 1202, 1204, 1206, 1208, 1210, // NOLINT
- 1212, 1214, 1073743040, 1217, 1219, 1221, 1223, 1225, // NOLINT
- 1227, 1229, 1232, 1234, 1236, 1238, 1240, 1242, // NOLINT
- 1244, 1246, 1248, 1250, 1252, 1254, 1256, 1258, // NOLINT
- 1260, 1262, 1264, 1266, 1268, 1270, 1272, 1274, // NOLINT
- 1276, 1278, 1280, 1282, 1284, 1286, 1288, 1290, // NOLINT
- 1292, 1294, 1296, 1298, 1300, 1302, 1304, 1306, // NOLINT
- 1308, 1310, 1312, 1314, 1316, 1318, 1073743153, 1366, // NOLINT
- 1073746080, 4293, 4295, 4301, 7680, 7682, 7684, 7686, // NOLINT
- 7688, 7690, 7692, 7694, 7696, 7698, 7700, 7702, // NOLINT
- 7704, 7706, 7708, 7710, 7712, 7714, 7716, 7718, // NOLINT
- 7720, 7722, 7724, 7726, 7728, 7730, 7732, 7734, // NOLINT
- 7736, 7738, 7740, 7742, 7744, 7746, 7748, 7750, // NOLINT
- 7752, 7754, 7756, 7758, 7760, 7762, 7764, 7766, // NOLINT
- 7768, 7770, 7772, 7774, 7776, 7778, 7780, 7782, // NOLINT
- 7784, 7786, 7788, 7790, 7792, 7794, 7796, 7798, // NOLINT
- 7800, 7802, 7804, 7806, 7808, 7810, 7812, 7814, // NOLINT
- 7816, 7818, 7820, 7822, 7824, 7826, 7828, 7838, // NOLINT
- 7840, 7842, 7844, 7846, 7848, 7850, 7852, 7854, // NOLINT
- 7856, 7858, 7860, 7862, 7864, 7866, 7868, 7870, // NOLINT
- 7872, 7874, 7876, 7878, 7880, 7882, 7884, 7886, // NOLINT
- 7888, 7890, 7892, 7894, 7896, 7898, 7900, 7902, // NOLINT
- 7904, 7906, 7908, 7910, 7912, 7914, 7916, 7918, // NOLINT
- 7920, 7922, 7924, 7926, 7928, 7930, 7932, 7934, // NOLINT
- 1073749768, 7951, 1073749784, 7965, 1073749800, 7983, 1073749816, 7999,
// NOLINT
- 1073749832, 8013, 8025, 8027, 8029, 8031, 1073749864, 8047, // NOLINT
- 1073749944, 8123, 1073749960, 8139, 1073749976, 8155, 1073749992, 8172,
// NOLINT
- 1073750008, 8187 }; // NOLINT
+static const uint16_t kUppercaseTable0Size = 455;
+static const int32_t kUppercaseTable0[455] = {
+ 1073741889, 90, 1073742016, 214,
+ 1073742040, 222, 256, 258, // NOLINT
+ 260, 262, 264, 266,
+ 268, 270, 272, 274, // NOLINT
+ 276, 278, 280, 282,
+ 284, 286, 288, 290, // NOLINT
+ 292, 294, 296, 298,
+ 300, 302, 304, 306, // NOLINT
+ 308, 310, 313, 315,
+ 317, 319, 321, 323, // NOLINT
+ 325, 327, 330, 332,
+ 334, 336, 338, 340, // NOLINT
+ 342, 344, 346, 348,
+ 350, 352, 354, 356, // NOLINT
+ 358, 360, 362, 364,
+ 366, 368, 370, 372, // NOLINT
+ 374, 1073742200, 377, 379,
+ 381, 1073742209, 386, 388, // NOLINT
+ 1073742214, 391, 1073742217, 395,
+ 1073742222, 401, 1073742227, 404, // NOLINT
+ 1073742230, 408, 1073742236, 413,
+ 1073742239, 416, 418, 420, // NOLINT
+ 1073742246, 423, 425, 428,
+ 1073742254, 431, 1073742257, 435, // NOLINT
+ 437, 1073742263, 440, 444,
+ 452, 455, 458, 461, // NOLINT
+ 463, 465, 467, 469,
+ 471, 473, 475, 478, // NOLINT
+ 480, 482, 484, 486,
+ 488, 490, 492, 494, // NOLINT
+ 497, 500, 1073742326, 504,
+ 506, 508, 510, 512, // NOLINT
+ 514, 516, 518, 520,
+ 522, 524, 526, 528, // NOLINT
+ 530, 532, 534, 536,
+ 538, 540, 542, 544, // NOLINT
+ 546, 548, 550, 552,
+ 554, 556, 558, 560, // NOLINT
+ 562, 1073742394, 571, 1073742397,
+ 574, 577, 1073742403, 582, // NOLINT
+ 584, 586, 588, 590,
+ 880, 882, 886, 895, // NOLINT
+ 902, 1073742728, 906, 908,
+ 1073742734, 911, 1073742737, 929, // NOLINT
+ 1073742755, 939, 975, 1073742802,
+ 980, 984, 986, 988, // NOLINT
+ 990, 992, 994, 996,
+ 998, 1000, 1002, 1004, // NOLINT
+ 1006, 1012, 1015, 1073742841,
+ 1018, 1073742845, 1071, 1120, // NOLINT
+ 1122, 1124, 1126, 1128,
+ 1130, 1132, 1134, 1136, // NOLINT
+ 1138, 1140, 1142, 1144,
+ 1146, 1148, 1150, 1152, // NOLINT
+ 1162, 1164, 1166, 1168,
+ 1170, 1172, 1174, 1176, // NOLINT
+ 1178, 1180, 1182, 1184,
+ 1186, 1188, 1190, 1192, // NOLINT
+ 1194, 1196, 1198, 1200,
+ 1202, 1204, 1206, 1208, // NOLINT
+ 1210, 1212, 1214, 1073743040,
+ 1217, 1219, 1221, 1223, // NOLINT
+ 1225, 1227, 1229, 1232,
+ 1234, 1236, 1238, 1240, // NOLINT
+ 1242, 1244, 1246, 1248,
+ 1250, 1252, 1254, 1256, // NOLINT
+ 1258, 1260, 1262, 1264,
+ 1266, 1268, 1270, 1272, // NOLINT
+ 1274, 1276, 1278, 1280,
+ 1282, 1284, 1286, 1288, // NOLINT
+ 1290, 1292, 1294, 1296,
+ 1298, 1300, 1302, 1304, // NOLINT
+ 1306, 1308, 1310, 1312,
+ 1314, 1316, 1318, 1320, // NOLINT
+ 1322, 1324, 1326, 1073743153,
+ 1366, 1073746080, 4293, 4295, // NOLINT
+ 4301, 7680, 7682, 7684,
+ 7686, 7688, 7690, 7692, // NOLINT
+ 7694, 7696, 7698, 7700,
+ 7702, 7704, 7706, 7708, // NOLINT
+ 7710, 7712, 7714, 7716,
+ 7718, 7720, 7722, 7724, // NOLINT
+ 7726, 7728, 7730, 7732,
+ 7734, 7736, 7738, 7740, // NOLINT
+ 7742, 7744, 7746, 7748,
+ 7750, 7752, 7754, 7756, // NOLINT
+ 7758, 7760, 7762, 7764,
+ 7766, 7768, 7770, 7772, // NOLINT
+ 7774, 7776, 7778, 7780,
+ 7782, 7784, 7786, 7788, // NOLINT
+ 7790, 7792, 7794, 7796,
+ 7798, 7800, 7802, 7804, // NOLINT
+ 7806, 7808, 7810, 7812,
+ 7814, 7816, 7818, 7820, // NOLINT
+ 7822, 7824, 7826, 7828,
+ 7838, 7840, 7842, 7844, // NOLINT
+ 7846, 7848, 7850, 7852,
+ 7854, 7856, 7858, 7860, // NOLINT
+ 7862, 7864, 7866, 7868,
+ 7870, 7872, 7874, 7876, // NOLINT
+ 7878, 7880, 7882, 7884,
+ 7886, 7888, 7890, 7892, // NOLINT
+ 7894, 7896, 7898, 7900,
+ 7902, 7904, 7906, 7908, // NOLINT
+ 7910, 7912, 7914, 7916,
+ 7918, 7920, 7922, 7924, // NOLINT
+ 7926, 7928, 7930, 7932,
+ 7934, 1073749768, 7951, 1073749784, // NOLINT
+ 7965, 1073749800, 7983, 1073749816,
+ 7999, 1073749832, 8013, 8025, // NOLINT
+ 8027, 8029, 8031, 1073749864,
+ 8047, 1073749944, 8123, 1073749960, // NOLINT
+ 8139, 1073749976, 8155, 1073749992,
+ 8172, 1073750008, 8187}; // NOLINT
static const uint16_t kUppercaseTable1Size = 86;
static const int32_t kUppercaseTable1[86] = {
258, 263, 1073742091, 269, 1073742096, 274, 277, 1073742105, // NOLINT
@@ -401,20 +391,21 @@
3262, 3264, 3266, 3268, 3270, 3272, 3274, 3276, // NOLINT
3278, 3280, 3282, 3284, 3286, 3288, 3290, 3292, // NOLINT
3294, 3296, 3298, 3307, 3309, 3314 }; // NOLINT
-static const uint16_t kUppercaseTable5Size = 91;
-static const int32_t kUppercaseTable5[91] = {
- 1600, 1602, 1604, 1606, 1608, 1610, 1612, 1614, // NOLINT
- 1616, 1618, 1620, 1622, 1624, 1626, 1628, 1630, // NOLINT
- 1632, 1634, 1636, 1638, 1640, 1642, 1644, 1664, // NOLINT
- 1666, 1668, 1670, 1672, 1674, 1676, 1678, 1680, // NOLINT
- 1682, 1684, 1686, 1826, 1828, 1830, 1832, 1834, // NOLINT
- 1836, 1838, 1842, 1844, 1846, 1848, 1850, 1852, // NOLINT
- 1854, 1856, 1858, 1860, 1862, 1864, 1866, 1868, // NOLINT
- 1870, 1872, 1874, 1876, 1878, 1880, 1882, 1884, // NOLINT
- 1886, 1888, 1890, 1892, 1894, 1896, 1898, 1900, // NOLINT
- 1902, 1913, 1915, 1073743741, 1918, 1920, 1922, 1924, // NOLINT
- 1926, 1931, 1933, 1936, 1938, 1952, 1954, 1956, // NOLINT
- 1958, 1960, 1962 }; // NOLINT
+static const uint16_t kUppercaseTable5Size = 101;
+static const int32_t kUppercaseTable5[101] = {
+ 1600, 1602, 1604, 1606, 1608, 1610, 1612, 1614, //
NOLINT
+ 1616, 1618, 1620, 1622, 1624, 1626, 1628, 1630, //
NOLINT
+ 1632, 1634, 1636, 1638, 1640, 1642, 1644, 1664, //
NOLINT
+ 1666, 1668, 1670, 1672, 1674, 1676, 1678, 1680, //
NOLINT
+ 1682, 1684, 1686, 1688, 1690, 1826, 1828, 1830, //
NOLINT
+ 1832, 1834, 1836, 1838, 1842, 1844, 1846, 1848, //
NOLINT
+ 1850, 1852, 1854, 1856, 1858, 1860, 1862, 1864, //
NOLINT
+ 1866, 1868, 1870, 1872, 1874, 1876, 1878, 1880, //
NOLINT
+ 1882, 1884, 1886, 1888, 1890, 1892, 1894, 1896, //
NOLINT
+ 1898, 1900, 1902, 1913, 1915, 1073743741, 1918, 1920, //
NOLINT
+ 1922, 1924, 1926, 1931, 1933, 1936, 1938, 1942, //
NOLINT
+ 1944, 1946, 1948, 1950, 1952, 1954, 1956, 1958, //
NOLINT
+ 1960, 1073743786, 1965, 1073743792, 1969}; //
NOLINT
static const uint16_t kUppercaseTable7Size = 2;
static const int32_t kUppercaseTable7[2] = {
1073749793, 7994 }; // NOLINT
@@ -440,66 +431,125 @@
// Lowercase: point.category == 'Ll'
-static const uint16_t kLowercaseTable0Size = 463;
-static const int32_t kLowercaseTable0[463] = {
- 1073741921, 122, 181, 1073742047, 246, 1073742072, 255, 257, // NOLINT
- 259, 261, 263, 265, 267, 269, 271, 273, // NOLINT
- 275, 277, 279, 281, 283, 285, 287, 289, // NOLINT
- 291, 293, 295, 297, 299, 301, 303, 305, // NOLINT
- 307, 309, 1073742135, 312, 314, 316, 318, 320, // NOLINT
- 322, 324, 326, 1073742152, 329, 331, 333, 335, // NOLINT
- 337, 339, 341, 343, 345, 347, 349, 351, // NOLINT
- 353, 355, 357, 359, 361, 363, 365, 367, // NOLINT
- 369, 371, 373, 375, 378, 380, 1073742206, 384, // NOLINT
- 387, 389, 392, 1073742220, 397, 402, 405, 1073742233, // NOLINT
- 411, 414, 417, 419, 421, 424, 1073742250, 427, // NOLINT
- 429, 432, 436, 438, 1073742265, 442, 1073742269, 447, // NOLINT
- 454, 457, 460, 462, 464, 466, 468, 470, // NOLINT
- 472, 474, 1073742300, 477, 479, 481, 483, 485, // NOLINT
- 487, 489, 491, 493, 1073742319, 496, 499, 501, // NOLINT
- 505, 507, 509, 511, 513, 515, 517, 519, // NOLINT
- 521, 523, 525, 527, 529, 531, 533, 535, // NOLINT
- 537, 539, 541, 543, 545, 547, 549, 551, // NOLINT
- 553, 555, 557, 559, 561, 1073742387, 569, 572, // NOLINT
- 1073742399, 576, 578, 583, 585, 587, 589, 1073742415, // NOLINT
- 659, 1073742485, 687, 881, 883, 887, 1073742715, 893, // NOLINT
- 912, 1073742764, 974, 1073742800, 977, 1073742805, 983, 985, // NOLINT
- 987, 989, 991, 993, 995, 997, 999, 1001, // NOLINT
- 1003, 1005, 1073742831, 1011, 1013, 1016, 1073742843, 1020, // NOLINT
- 1073742896, 1119, 1121, 1123, 1125, 1127, 1129, 1131, // NOLINT
- 1133, 1135, 1137, 1139, 1141, 1143, 1145, 1147, // NOLINT
- 1149, 1151, 1153, 1163, 1165, 1167, 1169, 1171, // NOLINT
- 1173, 1175, 1177, 1179, 1181, 1183, 1185, 1187, // NOLINT
- 1189, 1191, 1193, 1195, 1197, 1199, 1201, 1203, // NOLINT
- 1205, 1207, 1209, 1211, 1213, 1215, 1218, 1220, // NOLINT
- 1222, 1224, 1226, 1228, 1073743054, 1231, 1233, 1235, // NOLINT
- 1237, 1239, 1241, 1243, 1245, 1247, 1249, 1251, // NOLINT
- 1253, 1255, 1257, 1259, 1261, 1263, 1265, 1267, // NOLINT
- 1269, 1271, 1273, 1275, 1277, 1279, 1281, 1283, // NOLINT
- 1285, 1287, 1289, 1291, 1293, 1295, 1297, 1299, // NOLINT
- 1301, 1303, 1305, 1307, 1309, 1311, 1313, 1315, // NOLINT
- 1317, 1319, 1073743201, 1415, 1073749248, 7467, 1073749355, 7543, //
NOLINT
- 1073749369, 7578, 7681, 7683, 7685, 7687, 7689, 7691, // NOLINT
- 7693, 7695, 7697, 7699, 7701, 7703, 7705, 7707, // NOLINT
- 7709, 7711, 7713, 7715, 7717, 7719, 7721, 7723, // NOLINT
- 7725, 7727, 7729, 7731, 7733, 7735, 7737, 7739, // NOLINT
- 7741, 7743, 7745, 7747, 7749, 7751, 7753, 7755, // NOLINT
- 7757, 7759, 7761, 7763, 7765, 7767, 7769, 7771, // NOLINT
- 7773, 7775, 7777, 7779, 7781, 7783, 7785, 7787, // NOLINT
- 7789, 7791, 7793, 7795, 7797, 7799, 7801, 7803, // NOLINT
- 7805, 7807, 7809, 7811, 7813, 7815, 7817, 7819, // NOLINT
- 7821, 7823, 7825, 7827, 1073749653, 7837, 7839, 7841, // NOLINT
- 7843, 7845, 7847, 7849, 7851, 7853, 7855, 7857, // NOLINT
- 7859, 7861, 7863, 7865, 7867, 7869, 7871, 7873, // NOLINT
- 7875, 7877, 7879, 7881, 7883, 7885, 7887, 7889, // NOLINT
- 7891, 7893, 7895, 7897, 7899, 7901, 7903, 7905, // NOLINT
- 7907, 7909, 7911, 7913, 7915, 7917, 7919, 7921, // NOLINT
- 7923, 7925, 7927, 7929, 7931, 7933, 1073749759, 7943, // NOLINT
- 1073749776, 7957, 1073749792, 7975, 1073749808, 7991, 1073749824, 8005,
// NOLINT
- 1073749840, 8023, 1073749856, 8039, 1073749872, 8061, 1073749888, 8071,
// NOLINT
- 1073749904, 8087, 1073749920, 8103, 1073749936, 8116, 1073749942, 8119,
// NOLINT
- 8126, 1073749954, 8132, 1073749958, 8135, 1073749968, 8147, 1073749974,
// NOLINT
- 8151, 1073749984, 8167, 1073750002, 8180, 1073750006, 8183 }; // NOLINT
+static const uint16_t kLowercaseTable0Size = 467;
+static const int32_t kLowercaseTable0[467] = {
+ 1073741921, 122, 181, 1073742047,
+ 246, 1073742072, 255, 257, // NOLINT
+ 259, 261, 263, 265,
+ 267, 269, 271, 273, // NOLINT
+ 275, 277, 279, 281,
+ 283, 285, 287, 289, // NOLINT
+ 291, 293, 295, 297,
+ 299, 301, 303, 305, // NOLINT
+ 307, 309, 1073742135, 312,
+ 314, 316, 318, 320, // NOLINT
+ 322, 324, 326, 1073742152,
+ 329, 331, 333, 335, // NOLINT
+ 337, 339, 341, 343,
+ 345, 347, 349, 351, // NOLINT
+ 353, 355, 357, 359,
+ 361, 363, 365, 367, // NOLINT
+ 369, 371, 373, 375,
+ 378, 380, 1073742206, 384, // NOLINT
+ 387, 389, 392, 1073742220,
+ 397, 402, 405, 1073742233, // NOLINT
+ 411, 414, 417, 419,
+ 421, 424, 1073742250, 427, // NOLINT
+ 429, 432, 436, 438,
+ 1073742265, 442, 1073742269, 447, // NOLINT
+ 454, 457, 460, 462,
+ 464, 466, 468, 470, // NOLINT
+ 472, 474, 1073742300, 477,
+ 479, 481, 483, 485, // NOLINT
+ 487, 489, 491, 493,
+ 1073742319, 496, 499, 501, // NOLINT
+ 505, 507, 509, 511,
+ 513, 515, 517, 519, // NOLINT
+ 521, 523, 525, 527,
+ 529, 531, 533, 535, // NOLINT
+ 537, 539, 541, 543,
+ 545, 547, 549, 551, // NOLINT
+ 553, 555, 557, 559,
+ 561, 1073742387, 569, 572, // NOLINT
+ 1073742399, 576, 578, 583,
+ 585, 587, 589, 1073742415, // NOLINT
+ 659, 1073742485, 687, 881,
+ 883, 887, 1073742715, 893, // NOLINT
+ 912, 1073742764, 974, 1073742800,
+ 977, 1073742805, 983, 985, // NOLINT
+ 987, 989, 991, 993,
+ 995, 997, 999, 1001, // NOLINT
+ 1003, 1005, 1073742831, 1011,
+ 1013, 1016, 1073742843, 1020, // NOLINT
+ 1073742896, 1119, 1121, 1123,
+ 1125, 1127, 1129, 1131, // NOLINT
+ 1133, 1135, 1137, 1139,
+ 1141, 1143, 1145, 1147, // NOLINT
+ 1149, 1151, 1153, 1163,
+ 1165, 1167, 1169, 1171, // NOLINT
+ 1173, 1175, 1177, 1179,
+ 1181, 1183, 1185, 1187, // NOLINT
+ 1189, 1191, 1193, 1195,
+ 1197, 1199, 1201, 1203, // NOLINT
+ 1205, 1207, 1209, 1211,
+ 1213, 1215, 1218, 1220, // NOLINT
+ 1222, 1224, 1226, 1228,
+ 1073743054, 1231, 1233, 1235, // NOLINT
+ 1237, 1239, 1241, 1243,
+ 1245, 1247, 1249, 1251, // NOLINT
+ 1253, 1255, 1257, 1259,
+ 1261, 1263, 1265, 1267, // NOLINT
+ 1269, 1271, 1273, 1275,
+ 1277, 1279, 1281, 1283, // NOLINT
+ 1285, 1287, 1289, 1291,
+ 1293, 1295, 1297, 1299, // NOLINT
+ 1301, 1303, 1305, 1307,
+ 1309, 1311, 1313, 1315, // NOLINT
+ 1317, 1319, 1321, 1323,
+ 1325, 1327, 1073743201, 1415, // NOLINT
+ 1073749248, 7467, 1073749355, 7543,
+ 1073749369, 7578, 7681, 7683, // NOLINT
+ 7685, 7687, 7689, 7691,
+ 7693, 7695, 7697, 7699, // NOLINT
+ 7701, 7703, 7705, 7707,
+ 7709, 7711, 7713, 7715, // NOLINT
+ 7717, 7719, 7721, 7723,
+ 7725, 7727, 7729, 7731, // NOLINT
+ 7733, 7735, 7737, 7739,
+ 7741, 7743, 7745, 7747, // NOLINT
+ 7749, 7751, 7753, 7755,
+ 7757, 7759, 7761, 7763, // NOLINT
+ 7765, 7767, 7769, 7771,
+ 7773, 7775, 7777, 7779, // NOLINT
+ 7781, 7783, 7785, 7787,
+ 7789, 7791, 7793, 7795, // NOLINT
+ 7797, 7799, 7801, 7803,
+ 7805, 7807, 7809, 7811, // NOLINT
+ 7813, 7815, 7817, 7819,
+ 7821, 7823, 7825, 7827, // NOLINT
+ 1073749653, 7837, 7839, 7841,
+ 7843, 7845, 7847, 7849, // NOLINT
+ 7851, 7853, 7855, 7857,
+ 7859, 7861, 7863, 7865, // NOLINT
+ 7867, 7869, 7871, 7873,
+ 7875, 7877, 7879, 7881, // NOLINT
+ 7883, 7885, 7887, 7889,
+ 7891, 7893, 7895, 7897, // NOLINT
+ 7899, 7901, 7903, 7905,
+ 7907, 7909, 7911, 7913, // NOLINT
+ 7915, 7917, 7919, 7921,
+ 7923, 7925, 7927, 7929, // NOLINT
+ 7931, 7933, 1073749759, 7943,
+ 1073749776, 7957, 1073749792, 7975, // NOLINT
+ 1073749808, 7991, 1073749824, 8005,
+ 1073749840, 8023, 1073749856, 8039, // NOLINT
+ 1073749872, 8061, 1073749888, 8071,
+ 1073749904, 8087, 1073749920, 8103, // NOLINT
+ 1073749936, 8116, 1073749942, 8119,
+ 8126, 1073749954, 8132, 1073749958, // NOLINT
+ 8135, 1073749968, 8147, 1073749974,
+ 8151, 1073749984, 8167, 1073750002, // NOLINT
+ 8180, 1073750006, 8183}; // NOLINT
static const uint16_t kLowercaseTable1Size = 84;
static const int32_t kLowercaseTable1[84] = {
266, 1073742094, 271, 275, 303, 308, 313, 1073742140, // NOLINT
@@ -513,20 +563,35 @@
3277, 3279, 3281, 3283, 3285, 3287, 3289, 3291, // NOLINT
3293, 3295, 3297, 1073745123, 3300, 3308, 3310, 3315, // NOLINT
1073745152, 3365, 3367, 3373 }; // NOLINT
-static const uint16_t kLowercaseTable5Size = 93;
-static const int32_t kLowercaseTable5[93] = {
- 1601, 1603, 1605, 1607, 1609, 1611, 1613, 1615, // NOLINT
- 1617, 1619, 1621, 1623, 1625, 1627, 1629, 1631, // NOLINT
- 1633, 1635, 1637, 1639, 1641, 1643, 1645, 1665, // NOLINT
- 1667, 1669, 1671, 1673, 1675, 1677, 1679, 1681, // NOLINT
- 1683, 1685, 1687, 1827, 1829, 1831, 1833, 1835, // NOLINT
- 1837, 1073743663, 1841, 1843, 1845, 1847, 1849, 1851, // NOLINT
- 1853, 1855, 1857, 1859, 1861, 1863, 1865, 1867, // NOLINT
- 1869, 1871, 1873, 1875, 1877, 1879, 1881, 1883, // NOLINT
- 1885, 1887, 1889, 1891, 1893, 1895, 1897, 1899, // NOLINT
- 1901, 1903, 1073743729, 1912, 1914, 1916, 1919, 1921, // NOLINT
- 1923, 1925, 1927, 1932, 1934, 1937, 1939, 1953, // NOLINT
- 1955, 1957, 1959, 1961, 2042 }; // NOLINT
+static const uint16_t kLowercaseTable5Size = 105;
+static const int32_t kLowercaseTable5[105] = {
+ 1601, 1603, 1605, 1607,
+ 1609, 1611, 1613, 1615, // NOLINT
+ 1617, 1619, 1621, 1623,
+ 1625, 1627, 1629, 1631, // NOLINT
+ 1633, 1635, 1637, 1639,
+ 1641, 1643, 1645, 1665, // NOLINT
+ 1667, 1669, 1671, 1673,
+ 1675, 1677, 1679, 1681, // NOLINT
+ 1683, 1685, 1687, 1689,
+ 1691, 1827, 1829, 1831, // NOLINT
+ 1833, 1835, 1837, 1073743663,
+ 1841, 1843, 1845, 1847, // NOLINT
+ 1849, 1851, 1853, 1855,
+ 1857, 1859, 1861, 1863, // NOLINT
+ 1865, 1867, 1869, 1871,
+ 1873, 1875, 1877, 1879, // NOLINT
+ 1881, 1883, 1885, 1887,
+ 1889, 1891, 1893, 1895, // NOLINT
+ 1897, 1899, 1901, 1903,
+ 1073743729, 1912, 1914, 1916, // NOLINT
+ 1919, 1921, 1923, 1925,
+ 1927, 1932, 1934, 1937, // NOLINT
+ 1073743763, 1941, 1943, 1945,
+ 1947, 1949, 1951, 1953, // NOLINT
+ 1955, 1957, 1959, 1961,
+ 2042, 1073744688, 2906, 1073744740, // NOLINT
+ 2917}; // NOLINT
static const uint16_t kLowercaseTable7Size = 6;
static const int32_t kLowercaseTable7[6] = {
1073748736, 6918, 1073748755, 6935, 1073749825, 8026 }; // NOLINT
@@ -550,65 +615,118 @@
}
-// Letter: point.category in
['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl' ]
+// Letter: point.category in
['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']
-static const uint16_t kLetterTable0Size = 435;
-static const int32_t kLetterTable0[435] = {
- 1073741889, 90, 1073741921, 122, 170, 181, 186, 1073742016, // NOLINT
- 214, 1073742040, 246, 1073742072, 705, 1073742534, 721, 1073742560, //
NOLINT
- 740, 748, 750, 1073742704, 884, 1073742710, 887, 1073742714, // NOLINT
- 893, 902, 1073742728, 906, 908, 1073742734, 929, 1073742755, // NOLINT
- 1013, 1073742839, 1153, 1073742986, 1319, 1073743153, 1366, 1369, //
NOLINT
- 1073743201, 1415, 1073743312, 1514, 1073743344, 1522, 1073743392, 1610,
// NOLINT
- 1073743470, 1647, 1073743473, 1747, 1749, 1073743589, 1766, 1073743598,
// NOLINT
- 1775, 1073743610, 1788, 1791, 1808, 1073743634, 1839, 1073743693, //
NOLINT
- 1957, 1969, 1073743818, 2026, 1073743860, 2037, 2042, 1073743872, //
NOLINT
- 2069, 2074, 2084, 2088, 1073743936, 2136, 2208, 1073744034, // NOLINT
- 2220, 1073744132, 2361, 2365, 2384, 1073744216, 2401, 1073744241, //
NOLINT
- 2423, 1073744249, 2431, 1073744261, 2444, 1073744271, 2448, 1073744275,
// NOLINT
- 2472, 1073744298, 2480, 2482, 1073744310, 2489, 2493, 2510, // NOLINT
- 1073744348, 2525, 1073744351, 2529, 1073744368, 2545, 1073744389, 2570,
// NOLINT
- 1073744399, 2576, 1073744403, 2600, 1073744426, 2608, 1073744434, 2611,
// NOLINT
- 1073744437, 2614, 1073744440, 2617, 1073744473, 2652, 2654, 1073744498,
// NOLINT
- 2676, 1073744517, 2701, 1073744527, 2705, 1073744531, 2728, 1073744554,
// NOLINT
- 2736, 1073744562, 2739, 1073744565, 2745, 2749, 2768, 1073744608, //
NOLINT
- 2785, 1073744645, 2828, 1073744655, 2832, 1073744659, 2856, 1073744682,
// NOLINT
- 2864, 1073744690, 2867, 1073744693, 2873, 2877, 1073744732, 2909, //
NOLINT
- 1073744735, 2913, 2929, 2947, 1073744773, 2954, 1073744782, 2960, //
NOLINT
- 1073744786, 2965, 1073744793, 2970, 2972, 1073744798, 2975, 1073744803,
// NOLINT
- 2980, 1073744808, 2986, 1073744814, 3001, 3024, 1073744901, 3084, //
NOLINT
- 1073744910, 3088, 1073744914, 3112, 1073744938, 3123, 1073744949, 3129,
// NOLINT
- 3133, 1073744984, 3161, 1073744992, 3169, 1073745029, 3212, 1073745038,
// NOLINT
- 3216, 1073745042, 3240, 1073745066, 3251, 1073745077, 3257, 3261, //
NOLINT
- 3294, 1073745120, 3297, 1073745137, 3314, 1073745157, 3340, 1073745166,
// NOLINT
- 3344, 1073745170, 3386, 3389, 3406, 1073745248, 3425, 1073745274, //
NOLINT
- 3455, 1073745285, 3478, 1073745306, 3505, 1073745331, 3515, 3517, //
NOLINT
- 1073745344, 3526, 1073745409, 3632, 1073745458, 3635, 1073745472, 3654,
// NOLINT
- 1073745537, 3714, 3716, 1073745543, 3720, 3722, 3725, 1073745556, //
NOLINT
- 3735, 1073745561, 3743, 1073745569, 3747, 3749, 3751, 1073745578, //
NOLINT
- 3755, 1073745581, 3760, 1073745586, 3763, 3773, 1073745600, 3780, //
NOLINT
- 3782, 1073745628, 3807, 3840, 1073745728, 3911, 1073745737, 3948, //
NOLINT
- 1073745800, 3980, 1073745920, 4138, 4159, 1073746000, 4181, 1073746010,
// NOLINT
- 4189, 4193, 1073746021, 4198, 1073746030, 4208, 1073746037, 4225, //
NOLINT
- 4238, 1073746080, 4293, 4295, 4301, 1073746128, 4346, 1073746172, //
NOLINT
- 4680, 1073746506, 4685, 1073746512, 4694, 4696, 1073746522, 4701, //
NOLINT
- 1073746528, 4744, 1073746570, 4749, 1073746576, 4784, 1073746610, 4789,
// NOLINT
- 1073746616, 4798, 4800, 1073746626, 4805, 1073746632, 4822, 1073746648,
// NOLINT
- 4880, 1073746706, 4885, 1073746712, 4954, 1073746816, 5007, 1073746848,
// NOLINT
- 5108, 1073746945, 5740, 1073747567, 5759, 1073747585, 5786, 1073747616,
// NOLINT
- 5866, 1073747694, 5872, 1073747712, 5900, 1073747726, 5905, 1073747744,
// NOLINT
- 5937, 1073747776, 5969, 1073747808, 5996, 1073747822, 6000, 1073747840,
// NOLINT
- 6067, 6103, 6108, 1073748000, 6263, 1073748096, 6312, 6314, // NOLINT
- 1073748144, 6389, 1073748224, 6428, 1073748304, 6509, 1073748336, 6516,
// NOLINT
- 1073748352, 6571, 1073748417, 6599, 1073748480, 6678, 1073748512, 6740,
// NOLINT
- 6823, 1073748741, 6963, 1073748805, 6987, 1073748867, 7072, 1073748910,
// NOLINT
- 7087, 1073748922, 7141, 1073748992, 7203, 1073749069, 7247, 1073749082,
// NOLINT
- 7293, 1073749225, 7404, 1073749230, 7409, 1073749237, 7414, 1073749248,
// NOLINT
- 7615, 1073749504, 7957, 1073749784, 7965, 1073749792, 8005, 1073749832,
// NOLINT
- 8013, 1073749840, 8023, 8025, 8027, 8029, 1073749855, 8061, // NOLINT
- 1073749888, 8116, 1073749942, 8124, 8126, 1073749954, 8132, 1073749958,
// NOLINT
- 8140, 1073749968, 8147, 1073749974, 8155, 1073749984, 8172, 1073750002,
// NOLINT
- 8180, 1073750006, 8188 }; // NOLINT
+static const uint16_t kLetterTable0Size = 431;
+static const int32_t kLetterTable0[431] = {
+ 1073741889, 90, 1073741921, 122,
+ 170, 181, 186, 1073742016, // NOLINT
+ 214, 1073742040, 246, 1073742072,
+ 705, 1073742534, 721, 1073742560, // NOLINT
+ 740, 748, 750, 1073742704,
+ 884, 1073742710, 887, 1073742714, // NOLINT
+ 893, 895, 902, 1073742728,
+ 906, 908, 1073742734, 929, // NOLINT
+ 1073742755, 1013, 1073742839, 1153,
+ 1073742986, 1327, 1073743153, 1366, // NOLINT
+ 1369, 1073743201, 1415, 1073743312,
+ 1514, 1073743344, 1522, 1073743392, // NOLINT
+ 1610, 1073743470, 1647, 1073743473,
+ 1747, 1749, 1073743589, 1766, // NOLINT
+ 1073743598, 1775, 1073743610, 1788,
+ 1791, 1808, 1073743634, 1839, // NOLINT
+ 1073743693, 1957, 1969, 1073743818,
+ 2026, 1073743860, 2037, 2042, // NOLINT
+ 1073743872, 2069, 2074, 2084,
+ 2088, 1073743936, 2136, 1073744032, // NOLINT
+ 2226, 1073744132, 2361, 2365,
+ 2384, 1073744216, 2401, 1073744241, // NOLINT
+ 2432, 1073744261, 2444, 1073744271,
+ 2448, 1073744275, 2472, 1073744298, // NOLINT
+ 2480, 2482, 1073744310, 2489,
+ 2493, 2510, 1073744348, 2525, // NOLINT
+ 1073744351, 2529, 1073744368, 2545,
+ 1073744389, 2570, 1073744399, 2576, // NOLINT
+ 1073744403, 2600, 1073744426, 2608,
+ 1073744434, 2611, 1073744437, 2614, // NOLINT
+ 1073744440, 2617, 1073744473, 2652,
+ 2654, 1073744498, 2676, 1073744517, // NOLINT
+ 2701, 1073744527, 2705, 1073744531,
+ 2728, 1073744554, 2736, 1073744562, // NOLINT
+ 2739, 1073744565, 2745, 2749,
+ 2768, 1073744608, 2785, 1073744645, // NOLINT
+ 2828, 1073744655, 2832, 1073744659,
+ 2856, 1073744682, 2864, 1073744690, // NOLINT
+ 2867, 1073744693, 2873, 2877,
+ 1073744732, 2909, 1073744735, 2913, // NOLINT
+ 2929, 2947, 1073744773, 2954,
+ 1073744782, 2960, 1073744786, 2965, // NOLINT
+ 1073744793, 2970, 2972, 1073744798,
+ 2975, 1073744803, 2980, 1073744808, // NOLINT
+ 2986, 1073744814, 3001, 3024,
+ 1073744901, 3084, 1073744910, 3088, // NOLINT
+ 1073744914, 3112, 1073744938, 3129,
+ 3133, 1073744984, 3161, 1073744992, // NOLINT
+ 3169, 1073745029, 3212, 1073745038,
+ 3216, 1073745042, 3240, 1073745066, // NOLINT
+ 3251, 1073745077, 3257, 3261,
+ 3294, 1073745120, 3297, 1073745137, // NOLINT
+ 3314, 1073745157, 3340, 1073745166,
+ 3344, 1073745170, 3386, 3389, // NOLINT
+ 3406, 1073745248, 3425, 1073745274,
+ 3455, 1073745285, 3478, 1073745306, // NOLINT
+ 3505, 1073745331, 3515, 3517,
+ 1073745344, 3526, 1073745409, 3632, // NOLINT
+ 1073745458, 3635, 1073745472, 3654,
+ 1073745537, 3714, 3716, 1073745543, // NOLINT
+ 3720, 3722, 3725, 1073745556,
+ 3735, 1073745561, 3743, 1073745569, // NOLINT
+ 3747, 3749, 3751, 1073745578,
+ 3755, 1073745581, 3760, 1073745586, // NOLINT
+ 3763, 3773, 1073745600, 3780,
+ 3782, 1073745628, 3807, 3840, // NOLINT
+ 1073745728, 3911, 1073745737, 3948,
+ 1073745800, 3980, 1073745920, 4138, // NOLINT
+ 4159, 1073746000, 4181, 1073746010,
+ 4189, 4193, 1073746021, 4198, // NOLINT
+ 1073746030, 4208, 1073746037, 4225,
+ 4238, 1073746080, 4293, 4295, // NOLINT
+ 4301, 1073746128, 4346, 1073746172,
+ 4680, 1073746506, 4685, 1073746512, // NOLINT
+ 4694, 4696, 1073746522, 4701,
+ 1073746528, 4744, 1073746570, 4749, // NOLINT
+ 1073746576, 4784, 1073746610, 4789,
+ 1073746616, 4798, 4800, 1073746626, // NOLINT
+ 4805, 1073746632, 4822, 1073746648,
+ 4880, 1073746706, 4885, 1073746712, // NOLINT
+ 4954, 1073746816, 5007, 1073746848,
+ 5108, 1073746945, 5740, 1073747567, // NOLINT
+ 5759, 1073747585, 5786, 1073747616,
+ 5866, 1073747694, 5880, 1073747712, // NOLINT
+ 5900, 1073747726, 5905, 1073747744,
+ 5937, 1073747776, 5969, 1073747808, // NOLINT
+ 5996, 1073747822, 6000, 1073747840,
+ 6067, 6103, 6108, 1073748000, // NOLINT
+ 6263, 1073748096, 6312, 6314,
+ 1073748144, 6389, 1073748224, 6430, // NOLINT
+ 1073748304, 6509, 1073748336, 6516,
+ 1073748352, 6571, 1073748417, 6599, // NOLINT
+ 1073748480, 6678, 1073748512, 6740,
+ 6823, 1073748741, 6963, 1073748805, // NOLINT
+ 6987, 1073748867, 7072, 1073748910,
+ 7087, 1073748922, 7141, 1073748992, // NOLINT
+ 7203, 1073749069, 7247, 1073749082,
+ 7293, 1073749225, 7404, 1073749230, // NOLINT
+ 7409, 1073749237, 7414, 1073749248,
+ 7615, 1073749504, 7957, 1073749784, // NOLINT
+ 7965, 1073749792, 8005, 1073749832,
+ 8013, 1073749840, 8023, 8025, // NOLINT
+ 8027, 8029, 1073749855, 8061,
+ 1073749888, 8116, 1073749942, 8124, // NOLINT
+ 8126, 1073749954, 8132, 1073749958,
+ 8140, 1073749968, 8147, 1073749974, // NOLINT
+ 8155, 1073749984, 8172, 1073750002,
+ 8180, 1073750006, 8188}; // NOLINT
static const uint16_t kLetterTable1Size = 87;
static const int32_t kLetterTable1[87] = {
113, 127, 1073741968, 156, 258, 263, 1073742090, 275, // NOLINT
@@ -631,19 +749,33 @@
static const uint16_t kLetterTable4Size = 2;
static const int32_t kLetterTable4[2] = {
1073741824, 8140 }; // NOLINT
-static const uint16_t kLetterTable5Size = 88;
-static const int32_t kLetterTable5[88] = {
- 1073741824, 1164, 1073743056, 1277, 1073743104, 1548, 1073743376, 1567,
// NOLINT
- 1073743402, 1579, 1073743424, 1646, 1073743487, 1687, 1073743520, 1775,
// NOLINT
- 1073743639, 1823, 1073743650, 1928, 1073743755, 1934, 1073743760, 1939,
// NOLINT
- 1073743776, 1962, 1073743864, 2049, 1073743875, 2053, 1073743879, 2058,
// NOLINT
- 1073743884, 2082, 1073743936, 2163, 1073744002, 2227, 1073744114, 2295,
// NOLINT
- 2299, 1073744138, 2341, 1073744176, 2374, 1073744224, 2428, 1073744260,
// NOLINT
- 2482, 2511, 1073744384, 2600, 1073744448, 2626, 1073744452, 2635, //
NOLINT
- 1073744480, 2678, 2682, 1073744512, 2735, 2737, 1073744565, 2742, //
NOLINT
- 1073744569, 2749, 2752, 2754, 1073744603, 2781, 1073744608, 2794, //
NOLINT
- 1073744626, 2804, 1073744641, 2822, 1073744649, 2830, 1073744657, 2838,
// NOLINT
- 1073744672, 2854, 1073744680, 2862, 1073744832, 3042, 1073744896, 8191
}; // NOLINT
+static const uint16_t kLetterTable5Size = 100;
+static const int32_t kLetterTable5[100] = {
+ 1073741824, 1164, 1073743056, 1277,
+ 1073743104, 1548, 1073743376, 1567, // NOLINT
+ 1073743402, 1579, 1073743424, 1646,
+ 1073743487, 1693, 1073743520, 1775, // NOLINT
+ 1073743639, 1823, 1073743650, 1928,
+ 1073743755, 1934, 1073743760, 1965, // NOLINT
+ 1073743792, 1969, 1073743863, 2049,
+ 1073743875, 2053, 1073743879, 2058, // NOLINT
+ 1073743884, 2082, 1073743936, 2163,
+ 1073744002, 2227, 1073744114, 2295, // NOLINT
+ 2299, 1073744138, 2341, 1073744176,
+ 2374, 1073744224, 2428, 1073744260, // NOLINT
+ 2482, 2511, 1073744352, 2532,
+ 1073744358, 2543, 1073744378, 2558, // NOLINT
+ 1073744384, 2600, 1073744448, 2626,
+ 1073744452, 2635, 1073744480, 2678, // NOLINT
+ 2682, 1073744510, 2735, 2737,
+ 1073744565, 2742, 1073744569, 2749, // NOLINT
+ 2752, 2754, 1073744603, 2781,
+ 1073744608, 2794, 1073744626, 2804, // NOLINT
+ 1073744641, 2822, 1073744649, 2830,
+ 1073744657, 2838, 1073744672, 2854, // NOLINT
+ 1073744680, 2862, 1073744688, 2906,
+ 1073744732, 2911, 1073744740, 2917, // NOLINT
+ 1073744832, 3042, 1073744896, 8191}; // NOLINT
static const uint16_t kLetterTable6Size = 6;
static const int32_t kLetterTable6[6] = {
1073741824, 6051, 1073747888, 6086, 1073747915, 6139 }; // NOLINT
@@ -687,49 +819,363 @@
}
-// Number: point.category == 'Nd'
+// ID_Start: ((point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo',
+// 'Nl'] or 'Other_ID_Start' in point.properties) and ('Pattern_Syntax'
not in
+// point.properties) and ('Pattern_White_Space' not in point.properties))
or
+// ('JS_ID_Start' in point.properties)
-static const uint16_t kNumberTable0Size = 56;
-static const int32_t kNumberTable0[56] = {
- 1073741872, 57, 1073743456, 1641, 1073743600, 1785, 1073743808, 1993,
// NOLINT
- 1073744230, 2415, 1073744358, 2543, 1073744486, 2671, 1073744614, 2799,
// NOLINT
- 1073744742, 2927, 1073744870, 3055, 1073744998, 3183, 1073745126, 3311,
// NOLINT
- 1073745254, 3439, 1073745488, 3673, 1073745616, 3801, 1073745696, 3881,
// NOLINT
- 1073745984, 4169, 1073746064, 4249, 1073747936, 6121, 1073747984, 6169,
// NOLINT
- 1073748294, 6479, 1073748432, 6617, 1073748608, 6793, 1073748624, 6809,
// NOLINT
- 1073748816, 7001, 1073748912, 7097, 1073749056, 7241, 1073749072, 7257
}; // NOLINT
-static const uint16_t kNumberTable5Size = 12;
-static const int32_t kNumberTable5[12] = {
- 1073743392, 1577, 1073744080, 2265, 1073744128, 2313, 1073744336, 2521,
// NOLINT
- 1073744464, 2649, 1073744880, 3065 }; // NOLINT
-static const uint16_t kNumberTable7Size = 2;
-static const int32_t kNumberTable7[2] = {
- 1073749776, 7961 }; // NOLINT
-bool Number::Is(uchar c) {
+static const uint16_t kID_StartTable0Size = 434;
+static const int32_t kID_StartTable0[434] = {
+ 36, 1073741889, 90, 92,
+ 95, 1073741921, 122, 170, // NOLINT
+ 181, 186, 1073742016, 214,
+ 1073742040, 246, 1073742072, 705, // NOLINT
+ 1073742534, 721, 1073742560, 740,
+ 748, 750, 1073742704, 884, // NOLINT
+ 1073742710, 887, 1073742714, 893,
+ 895, 902, 1073742728, 906, // NOLINT
+ 908, 1073742734, 929, 1073742755,
+ 1013, 1073742839, 1153, 1073742986, // NOLINT
+ 1327, 1073743153, 1366, 1369,
+ 1073743201, 1415, 1073743312, 1514, // NOLINT
+ 1073743344, 1522, 1073743392, 1610,
+ 1073743470, 1647, 1073743473, 1747, // NOLINT
+ 1749, 1073743589, 1766, 1073743598,
+ 1775, 1073743610, 1788, 1791, // NOLINT
+ 1808, 1073743634, 1839, 1073743693,
+ 1957, 1969, 1073743818, 2026, // NOLINT
+ 1073743860, 2037, 2042, 1073743872,
+ 2069, 2074, 2084, 2088, // NOLINT
+ 1073743936, 2136, 1073744032, 2226,
+ 1073744132, 2361, 2365, 2384, // NOLINT
+ 1073744216, 2401, 1073744241, 2432,
+ 1073744261, 2444, 1073744271, 2448, // NOLINT
+ 1073744275, 2472, 1073744298, 2480,
+ 2482, 1073744310, 2489, 2493, // NOLINT
+ 2510, 1073744348, 2525, 1073744351,
+ 2529, 1073744368, 2545, 1073744389, // NOLINT
+ 2570, 1073744399, 2576, 1073744403,
+ 2600, 1073744426, 2608, 1073744434, // NOLINT
+ 2611, 1073744437, 2614, 1073744440,
+ 2617, 1073744473, 2652, 2654, // NOLINT
+ 1073744498, 2676, 1073744517, 2701,
+ 1073744527, 2705, 1073744531, 2728, // NOLINT
+ 1073744554, 2736, 1073744562, 2739,
+ 1073744565, 2745, 2749, 2768, // NOLINT
+ 1073744608, 2785, 1073744645, 2828,
+ 1073744655, 2832, 1073744659, 2856, // NOLINT
+ 1073744682, 2864, 1073744690, 2867,
+ 1073744693, 2873, 2877, 1073744732, // NOLINT
+ 2909, 1073744735, 2913, 2929,
+ 2947, 1073744773, 2954, 1073744782, // NOLINT
+ 2960, 1073744786, 2965, 1073744793,
+ 2970, 2972, 1073744798, 2975, // NOLINT
+ 1073744803, 2980, 1073744808, 2986,
+ 1073744814, 3001, 3024, 1073744901, // NOLINT
+ 3084, 1073744910, 3088, 1073744914,
+ 3112, 1073744938, 3129, 3133, // NOLINT
+ 1073744984, 3161, 1073744992, 3169,
+ 1073745029, 3212, 1073745038, 3216, // NOLINT
+ 1073745042, 3240, 1073745066, 3251,
+ 1073745077, 3257, 3261, 3294, // NOLINT
+ 1073745120, 3297, 1073745137, 3314,
+ 1073745157, 3340, 1073745166, 3344, // NOLINT
+ 1073745170, 3386, 3389, 3406,
+ 1073745248, 3425, 1073745274, 3455, // NOLINT
+ 1073745285, 3478, 1073745306, 3505,
+ 1073745331, 3515, 3517, 1073745344, // NOLINT
+ 3526, 1073745409, 3632, 1073745458,
+ 3635, 1073745472, 3654, 1073745537, // NOLINT
+ 3714, 3716, 1073745543, 3720,
+ 3722, 3725, 1073745556, 3735, // NOLINT
+ 1073745561, 3743, 1073745569, 3747,
+ 3749, 3751, 1073745578, 3755, // NOLINT
+ 1073745581, 3760, 1073745586, 3763,
+ 3773, 1073745600, 3780, 3782, // NOLINT
+ 1073745628, 3807, 3840, 1073745728,
+ 3911, 1073745737, 3948, 1073745800, // NOLINT
+ 3980, 1073745920, 4138, 4159,
+ 1073746000, 4181, 1073746010, 4189, // NOLINT
+ 4193, 1073746021, 4198, 1073746030,
+ 4208, 1073746037, 4225, 4238, // NOLINT
+ 1073746080, 4293, 4295, 4301,
+ 1073746128, 4346, 1073746172, 4680, // NOLINT
+ 1073746506, 4685, 1073746512, 4694,
+ 4696, 1073746522, 4701, 1073746528, // NOLINT
+ 4744, 1073746570, 4749, 1073746576,
+ 4784, 1073746610, 4789, 1073746616, // NOLINT
+ 4798, 4800, 1073746626, 4805,
+ 1073746632, 4822, 1073746648, 4880, // NOLINT
+ 1073746706, 4885, 1073746712, 4954,
+ 1073746816, 5007, 1073746848, 5108, // NOLINT
+ 1073746945, 5740, 1073747567, 5759,
+ 1073747585, 5786, 1073747616, 5866, // NOLINT
+ 1073747694, 5880, 1073747712, 5900,
+ 1073747726, 5905, 1073747744, 5937, // NOLINT
+ 1073747776, 5969, 1073747808, 5996,
+ 1073747822, 6000, 1073747840, 6067, // NOLINT
+ 6103, 6108, 1073748000, 6263,
+ 1073748096, 6312, 6314, 1073748144, // NOLINT
+ 6389, 1073748224, 6430, 1073748304,
+ 6509, 1073748336, 6516, 1073748352, // NOLINT
+ 6571, 1073748417, 6599, 1073748480,
+ 6678, 1073748512, 6740, 6823, // NOLINT
+ 1073748741, 6963, 1073748805, 6987,
+ 1073748867, 7072, 1073748910, 7087, // NOLINT
+ 1073748922, 7141, 1073748992, 7203,
+ 1073749069, 7247, 1073749082, 7293, // NOLINT
+ 1073749225, 7404, 1073749230, 7409,
+ 1073749237, 7414, 1073749248, 7615, // NOLINT
+ 1073749504, 7957, 1073749784, 7965,
+ 1073749792, 8005, 1073749832, 8013, // NOLINT
+ 1073749840, 8023, 8025, 8027,
+ 8029, 1073749855, 8061, 1073749888, // NOLINT
+ 8116, 1073749942, 8124, 8126,
+ 1073749954, 8132, 1073749958, 8140, // NOLINT
+ 1073749968, 8147, 1073749974, 8155,
+ 1073749984, 8172, 1073750002, 8180, // NOLINT
+ 1073750006, 8188}; // NOLINT
+static const uint16_t kID_StartTable1Size = 84;
+static const int32_t kID_StartTable1[84] = {
+ 113, 127, 1073741968, 156,
+ 258, 263, 1073742090, 275, // NOLINT
+ 277, 1073742104, 285, 292,
+ 294, 296, 1073742122, 313, // NOLINT
+ 1073742140, 319, 1073742149, 329,
+ 334, 1073742176, 392, 1073744896, // NOLINT
+ 3118, 1073744944, 3166, 1073744992,
+ 3300, 1073745131, 3310, 1073745138, // NOLINT
+ 3315, 1073745152, 3365, 3367,
+ 3373, 1073745200, 3431, 3439, // NOLINT
+ 1073745280, 3478, 1073745312, 3494,
+ 1073745320, 3502, 1073745328, 3510, // NOLINT
+ 1073745336, 3518, 1073745344, 3526,
+ 1073745352, 3534, 1073745360, 3542, // NOLINT
+ 1073745368, 3550, 1073745925, 4103,
+ 1073745953, 4137, 1073745969, 4149, // NOLINT
+ 1073745976, 4156, 1073745985, 4246,
+ 1073746075, 4255, 1073746081, 4346, // NOLINT
+ 1073746172, 4351, 1073746181, 4397,
+ 1073746225, 4494, 1073746336, 4538, // NOLINT
+ 1073746416, 4607, 1073746944, 8191}; // NOLINT
+static const uint16_t kID_StartTable2Size = 4;
+static const int32_t kID_StartTable2[4] = {1073741824, 3509, 1073745408,
+ 8191}; // NOLINT
+static const uint16_t kID_StartTable3Size = 2;
+static const int32_t kID_StartTable3[2] = {1073741824, 8191}; // NOLINT
+static const uint16_t kID_StartTable4Size = 2;
+static const int32_t kID_StartTable4[2] = {1073741824, 8140}; // NOLINT
+static const uint16_t kID_StartTable5Size = 100;
+static const int32_t kID_StartTable5[100] = {
+ 1073741824, 1164, 1073743056, 1277,
+ 1073743104, 1548, 1073743376, 1567, // NOLINT
+ 1073743402, 1579, 1073743424, 1646,
+ 1073743487, 1693, 1073743520, 1775, // NOLINT
+ 1073743639, 1823, 1073743650, 1928,
+ 1073743755, 1934, 1073743760, 1965, // NOLINT
+ 1073743792, 1969, 1073743863, 2049,
+ 1073743875, 2053, 1073743879, 2058, // NOLINT
+ 1073743884, 2082, 1073743936, 2163,
+ 1073744002, 2227, 1073744114, 2295, // NOLINT
+ 2299, 1073744138, 2341, 1073744176,
+ 2374, 1073744224, 2428, 1073744260, // NOLINT
+ 2482, 2511, 1073744352, 2532,
+ 1073744358, 2543, 1073744378, 2558, // NOLINT
+ 1073744384, 2600, 1073744448, 2626,
+ 1073744452, 2635, 1073744480, 2678, // NOLINT
+ 2682, 1073744510, 2735, 2737,
+ 1073744565, 2742, 1073744569, 2749, // NOLINT
+ 2752, 2754, 1073744603, 2781,
+ 1073744608, 2794, 1073744626, 2804, // NOLINT
+ 1073744641, 2822, 1073744649, 2830,
+ 1073744657, 2838, 1073744672, 2854, // NOLINT
+ 1073744680, 2862, 1073744688, 2906,
+ 1073744732, 2911, 1073744740, 2917, // NOLINT
+ 1073744832, 3042, 1073744896, 8191}; // NOLINT
+static const uint16_t kID_StartTable6Size = 6;
+static const int32_t kID_StartTable6[6] = {1073741824, 6051, 1073747888,
6086,
+ 1073747915, 6139}; // NOLINT
+static const uint16_t kID_StartTable7Size = 48;
+static const int32_t kID_StartTable7[48] = {
+ 1073748224, 6765, 1073748592, 6873,
+ 1073748736, 6918, 1073748755, 6935, // NOLINT
+ 6941, 1073748767, 6952, 1073748778,
+ 6966, 1073748792, 6972, 6974, // NOLINT
+ 1073748800, 6977, 1073748803, 6980,
+ 1073748806, 7089, 1073748947, 7485, // NOLINT
+ 1073749328, 7567, 1073749394, 7623,
+ 1073749488, 7675, 1073749616, 7796, // NOLINT
+ 1073749622, 7932, 1073749793, 7994,
+ 1073749825, 8026, 1073749862, 8126, // NOLINT
+ 1073749954, 8135, 1073749962, 8143,
+ 1073749970, 8151, 1073749978, 8156}; // NOLINT
+bool ID_Start::Is(uchar c) {
int chunk_index = c >> 13;
switch (chunk_index) {
- case 0: return LookupPredicate(kNumberTable0,
- kNumberTable0Size,
- c);
- case 5: return LookupPredicate(kNumberTable5,
- kNumberTable5Size,
- c);
- case 7: return LookupPredicate(kNumberTable7,
- kNumberTable7Size,
***The diff for this file has been truncated for email.***
=======================================
--- /branches/bleeding_edge/src/unicode.h Tue Jun 3 08:12:43 2014 UTC
+++ /branches/bleeding_edge/src/unicode.h Wed Oct 8 14:55:03 2014 UTC
@@ -44,6 +44,7 @@
CacheEntry entries_[kSize];
};
+
// A cache used in case conversion. It caches the value for characters
// that either have no mapping or map to a single character independent
// of context. Characters that map to more than one character or that
@@ -70,6 +71,7 @@
CacheEntry entries_[kSize];
};
+
class UnicodeData {
private:
friend class Test;
@@ -77,6 +79,7 @@
static const uchar kMaxCodePoint;
};
+
class Utf16 {
public:
static inline bool IsSurrogatePair(int lead, int trail) {
@@ -113,14 +116,6 @@
}
};
-class Latin1 {
- public:
- static const unsigned kMaxChar = 0xff;
- // Returns 0 if character does not convert to single latin-1 character
- // or if the character doesn't not convert back to latin-1 via inverse
- // operation (upper to lower, etc).
- static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
-};
class Utf8 {
public:
@@ -155,45 +150,6 @@
unsigned* cursor);
};
-
-class Utf8DecoderBase {
- public:
- // Initialization done in subclass.
- inline Utf8DecoderBase();
- inline Utf8DecoderBase(uint16_t* buffer,
- unsigned buffer_length,
- const uint8_t* stream,
- unsigned stream_length);
- inline unsigned Utf16Length() const { return utf16_length_; }
- protected:
- // This reads all characters and sets the utf16_length_.
- // The first buffer_length utf16 chars are cached in the buffer.
- void Reset(uint16_t* buffer,
- unsigned buffer_length,
- const uint8_t* stream,
- unsigned stream_length);
- static void WriteUtf16Slow(const uint8_t* stream,
- uint16_t* data,
- unsigned length);
- const uint8_t* unbuffered_start_;
- unsigned utf16_length_;
- bool last_byte_of_buffer_unused_;
- private:
- DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
-};
-
-template <unsigned kBufferSize>
-class Utf8Decoder : public Utf8DecoderBase {
- public:
- inline Utf8Decoder() {}
- inline Utf8Decoder(const char* stream, unsigned length);
- inline void Reset(const char* stream, unsigned length);
- inline unsigned WriteUtf16(uint16_t* data, unsigned length) const;
- private:
- uint16_t buffer_[kBufferSize];
-};
-
-
struct Uppercase {
static bool Is(uchar c);
};
@@ -203,7 +159,10 @@
struct Letter {
static bool Is(uchar c);
};
-struct Number {
+struct ID_Start {
+ static bool Is(uchar c);
+};
+struct ID_Continue {
static bool Is(uchar c);
};
struct WhiteSpace {
@@ -212,12 +171,6 @@
struct LineTerminator {
static bool Is(uchar c);
};
-struct CombiningMark {
- static bool Is(uchar c);
-};
-struct ConnectorPunctuation {
- static bool Is(uchar c);
-};
struct ToLowercase {
static const int kMaxWidth = 3;
static const bool kIsToLower = true;
=======================================
--- /branches/bleeding_edge/test/cctest/test-strings.cc Mon Oct 6 15:50:40
2014 UTC
+++ /branches/bleeding_edge/test/cctest/test-strings.cc Wed Oct 8 14:55:03
2014 UTC
@@ -37,6 +37,7 @@
#include "src/api.h"
#include "src/factory.h"
#include "src/objects.h"
+#include "src/unicode-decoder.h"
#include "test/cctest/cctest.h"
// Adapted from http://en.wikipedia.org/wiki/Multiply-with-carry
=======================================
--- /branches/bleeding_edge/test/mjsunit/var.js Tue Dec 7 11:01:02 2010 UTC
+++ /branches/bleeding_edge/test/mjsunit/var.js Wed Oct 8 14:55:03 2014 UTC
@@ -35,3 +35,22 @@
assertTrue(!z && typeof z == 'undefined');
if (false) { var z; }
assertTrue(!z && typeof z == 'undefined');
+
+assertThrows("var \u2E2F;", SyntaxError);
+assertThrows("var \\u2E2F;", SyntaxError);
+
+assertDoesNotThrow("var \u2118;");
+assertDoesNotThrow("var \\u2118;");
+assertDoesNotThrow("var \u212E;");
+assertDoesNotThrow("var \\u212E;");
+assertDoesNotThrow("var \u309B;");
+assertDoesNotThrow("var \\u309B;");
+assertDoesNotThrow("var \u309C;");
+assertDoesNotThrow("var \\u309C;");
+
+assertDoesNotThrow("var $\u00B7;");
+assertDoesNotThrow("var $\u0387;");
+assertDoesNotThrow("var $\u1369;");
+assertDoesNotThrow("var $\u1370;");
+assertDoesNotThrow("var $\u1371;");
+assertDoesNotThrow("var $\u19DA;");
=======================================
--- /branches/bleeding_edge/test/unittests/unittests.gyp Tue Oct 7
07:36:21 2014 UTC
+++ /branches/bleeding_edge/test/unittests/unittests.gyp Wed Oct 8
14:55:03 2014 UTC
@@ -58,6 +58,7 @@
'run-all-unittests.cc',
'test-utils.h',
'test-utils.cc',
+ 'unicode/unicode-predicates-unittest.cc',
],
'conditions': [
['v8_target_arch=="arm"', {
=======================================
--- /branches/bleeding_edge/tools/gyp/v8.gyp Wed Oct 8 11:19:51 2014 UTC
+++ /branches/bleeding_edge/tools/gyp/v8.gyp Wed Oct 8 14:55:03 2014 UTC
@@ -808,6 +808,8 @@
'../../src/unicode-inl.h',
'../../src/unicode.cc',
'../../src/unicode.h',
+ '../../src/unicode-decoder.cc',
+ '../../src/unicode-decoder.h',
'../../src/unique.h',
'../../src/uri.h',
'../../src/utils-inl.h',
@@ -1694,6 +1696,12 @@
'../../src/mksnapshot.cc',
],
'conditions': [
+ ['v8_enable_i18n_support==1', {
+ 'dependencies': [
+ '<(icu_gyp_path):icui18n',
+ '<(icu_gyp_path):icuuc',
+ ]
+ }],
['want_separate_host_toolset==1', {
'toolsets': ['host'],
}, {
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.