Diff
Modified: trunk/LayoutTests/ChangeLog (266702 => 266703)
--- trunk/LayoutTests/ChangeLog 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/LayoutTests/ChangeLog 2020-09-07 17:11:42 UTC (rev 266703)
@@ -1,3 +1,14 @@
+2020-09-06 Darin Adler <[email protected]>
+
+ Make TextCodecCJK and TextCodecSingleByte thread-safe and refactor a bit to share code
+ https://bugs.webkit.org/show_bug.cgi?id=216229
+
+ Reviewed by Sam Weinig.
+
+ * TestExpectations: Removed expectation that skips the three eof-utf-8
+ tests. They were failing because the tests were imported into the source
+ tree improperly and are passing now that is fixed.
+
2020-09-07 Youenn Fablet <[email protected]>
Fix Internals::supportsVCPEncoder on BigSur
Modified: trunk/LayoutTests/TestExpectations (266702 => 266703)
--- trunk/LayoutTests/TestExpectations 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/LayoutTests/TestExpectations 2020-09-07 17:11:42 UTC (rev 266703)
@@ -486,9 +486,6 @@
# Newly imported WPT ref tests failures.
imported/w3c/web-platform-tests/html/editing/editing-0/spelling-and-grammar-checking/spelling-markers-008.html [ ImageOnlyFailure ]
imported/w3c/web-platform-tests/html/editing/editing-0/spelling-and-grammar-checking/spelling-markers-010.html [ ImageOnlyFailure ]
-imported/w3c/web-platform-tests/encoding/eof-utf-8-one.html [ ImageOnlyFailure ]
-imported/w3c/web-platform-tests/encoding/eof-utf-8-three.html [ ImageOnlyFailure ]
-imported/w3c/web-platform-tests/encoding/eof-utf-8-two.html [ ImageOnlyFailure ]
imported/w3c/web-platform-tests/html/rendering/bindings/the-select-element-0/option-label.html [ ImageOnlyFailure ]
imported/w3c/web-platform-tests/html/rendering/non-replaced-elements/lists/li-type-unsupported-lower-alpha.html [ ImageOnlyFailure ]
imported/w3c/web-platform-tests/html/rendering/non-replaced-elements/lists/li-type-unsupported-lower-roman.html [ ImageOnlyFailure ]
@@ -925,7 +922,7 @@
imported/w3c/web-platform-tests/fetch/content-encoding/bad-gzip-body.any.html [ Pass Failure ]
imported/w3c/web-platform-tests/fetch/http-cache/basic-auth-cache-test.html [ ImageOnlyFailure ]
-# Fetch fetatures that are not implemented.
+# Fetch features that are not implemented.
imported/w3c/web-platform-tests/fetch/corb [ Skip ]
imported/w3c/web-platform-tests/fetch/metadata/ [ Skip ]
imported/w3c/web-platform-tests/fetch/sec-metadata [ Skip ]
Modified: trunk/LayoutTests/imported/w3c/ChangeLog (266702 => 266703)
--- trunk/LayoutTests/imported/w3c/ChangeLog 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/LayoutTests/imported/w3c/ChangeLog 2020-09-07 17:11:42 UTC (rev 266703)
@@ -1,3 +1,18 @@
+2020-09-06 Darin Adler <[email protected]>
+
+ Make TextCodecCJK and TextCodecSingleByte thread-safe and refactor a bit to share code
+ https://bugs.webkit.org/show_bug.cgi?id=216229
+
+ Reviewed by Sam Weinig.
+
+ * web-platform-tests/encoding/eof-utf-8-one.html:
+ * web-platform-tests/encoding/eof-utf-8-three.html:
+ * web-platform-tests/encoding/eof-utf-8-two.html:
+ Updated these files with fresh copies. Somehow, the trailing invalid
+ UTF-8 sequences from these files must have gotten lost when we
+ re-synced web-platform-tests/encoding from upstream. If we do that
+ again, then these tests will start failing again.
+
2020-09-07 Youenn Fablet <[email protected]>
Safari takes too long to fetch images from memory cache
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/eof-utf-8-one.html (266702 => 266703)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/eof-utf-8-one.html 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/eof-utf-8-one.html 2020-09-07 17:11:42 UTC (rev 266703)
@@ -2,4 +2,4 @@
<meta charset=utf-8>
<title>UTF-8 file ending with a one-byte truncated sequence</title>
<link rel=match href=""
-One-byte truncated sequence:
\ No newline at end of file
+One-byte truncated sequence:\xF0
\ No newline at end of file
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/eof-utf-8-three.html (266702 => 266703)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/eof-utf-8-three.html 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/eof-utf-8-three.html 2020-09-07 17:11:42 UTC (rev 266703)
@@ -2,4 +2,4 @@
<meta charset=utf-8>
<title>UTF-8 file ending with a three-byte truncated sequence</title>
<link rel=match href=""
-Three-byte truncated sequence:
\ No newline at end of file
+Three-byte truncated sequence:\xF0\x9F\x92
\ No newline at end of file
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/eof-utf-8-two.html (266702 => 266703)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/eof-utf-8-two.html 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/eof-utf-8-two.html 2020-09-07 17:11:42 UTC (rev 266703)
@@ -2,4 +2,4 @@
<meta charset=utf-8>
<title>UTF-8 file ending with a two-byte truncated sequence</title>
<link rel=match href=""
-Two-byte truncated sequence:
\ No newline at end of file
+Two-byte truncated sequence:\xF0\x9F
\ No newline at end of file
Modified: trunk/LayoutTests/platform/mac-wk2/TestExpectations (266702 => 266703)
--- trunk/LayoutTests/platform/mac-wk2/TestExpectations 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/LayoutTests/platform/mac-wk2/TestExpectations 2020-09-07 17:11:42 UTC (rev 266703)
@@ -1109,7 +1109,6 @@
webkit.org/b/215468 [ Mojave Release ] media/modern-media-controls/scrubber-support/scrubber-support-drag.html [ Pass Failure ]
# <rdar://problem/37330377> ASan tests exiting early due to timeouts
-[ Release ] imported/w3c/web-platform-tests/encoding [ Skip ]
[ Release ] imported/w3c/web-platform-tests/WebCryptoAPI/derive_bits_keys [ Skip ]
# <rdar://problem/60063002> REGRESSION: (r257758) http/tests/adClickAttribution/conversion-disabled-in-ephemeral-session.html is crashing
Modified: trunk/Source/WebCore/ChangeLog (266702 => 266703)
--- trunk/Source/WebCore/ChangeLog 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/Source/WebCore/ChangeLog 2020-09-07 17:11:42 UTC (rev 266703)
@@ -1,3 +1,71 @@
+2020-09-06 Darin Adler <[email protected]>
+
+ Make TextCodecCJK and TextCodecSingleByte thread-safe and refactor a bit to share code
+ https://bugs.webkit.org/show_bug.cgi?id=216229
+
+ Reviewed by Sam Weinig.
+
+ The text encoding machinery is usable on multiple threads, but our new TextCodec
+ classes have some global data structures that need to be guarded to keep that intact.
+
+ * platform/text/EncodingTables.cpp:
+ (WebCore::checkEncodingTableInvariants): One-time check for invariants that clients
+ of these tables depend on.
+
+ * platform/text/EncodingTables.h: Added checkEncodingTableInvariants. Also Added
+ function templates for encoding tables: Added findFirstInSortedPairs,
+ findLastInSortedPairs and findInSortedPairs for searching a sorted array of pairs
+ used as a map. Added sortByFirst to aid in the creation of such a sorted array.
+ And added isSortedByFirst and sortedFirstsAreUnique so we can assert those invariants.
+ One of the good features of the findInSortedPairs functions is that they handle integer
+ values that don't fit in an integral key type, returning WTF::nullopt in that case.
+ That lets us pass code point values when looking in tables that use code units as their
+ key, without separately checking if they are in range, which otherwise requires
+ converting to UChar and checking for equality or calling U_IS_BMP.
+
+ * platform/text/TextCodecCJK.cpp:
+ (WebCore::TextCodecCJK::TextCodecCJK): Call checkEncodingTableInvariants.
+ (WebCore::TextCodecCJK::encode const): Ditto.
+ (WebCore::jis0208DecodeIndex): Use std::call_once for thread safety.
+ Use sortByFirst and sortedFirstsAreUnique.
+ (WebCore::codePointJIS0208): Use findFirstInSortedPairs.
+ (WebCore::codePointJIS0212): Ditto.
+ (WebCore::TextCodecCJK::eucJPDecode): Cast to char instad of LChar when adding
+ an ASCII character to a StringBuilder.
+ (WebCore::eucJPEncode): Use the name codePoint instead of c to match other
+ surrounding code. Use findLastInSortedPairs.
+ (WebCore::iso2022JPEncode): Removed some unneeded casts to uint8_t when appending
+ bytes to a Vector. Added a static_assert to check the size of iso2022JPKatakana.
+ Use findLastInSortedPairs.
+ (WebCore::shiftJISEncode): Use findInSortedPairs.
+ (WebCore::eucKREncodingIndex): Use std::call_once for thread safety.
+ Use sortByFirst and sortedFirstsAreUnique.
+ (WebCore::eucKREncode): Removed some unneeded casts to uint8_t when appending
+ bytes to a Vector. Use findFirstInSortedPairs.
+ (WebCore::TextCodecCJK::eucKRDecode): Use findFirstInSortedPairs.
+ (WebCore::big5Encode): Use findInSortedPairs. Also renamed c to codePoint.
+ (WebCore::big5DecodeIndex): Use std::call_once for thread safety.
+ Use sortByFirst and sortedFirstsAreUnique.
+ (WebCore::TextCodecCJK::big5Decode): Use findFirstInSortedPairs. Cast to char
+ instad of LChar when adding an ASCII character to a StringBuilder.
+
+ * platform/text/TextCodecSingleByte.cpp:
+ (WebCore::tableForEncoding): Return an IteratorRange instead of a pair of
+ pointer and size. This works with std::begin/end. Also make table with actual
+ encoded bytes, by adding 0x80 here, rather than doing that when using the table.
+ (WebCore::encode): Use std::call_once for thread safety.
+ Use sortByFirst and sortedFirstsAreUnique. The code before was not sorting at
+ all, which means it probably didn't work in any cases where the code units
+ happen to not be in ascending order. We should add some test cases.
+ (WebCore::decode): Use findFirstInSortedPairs. Also use StringView::codePoints
+ because these are likely to be 8-bit strings and we don't need to temporarily
+ upconvert them to 16-bit just to encode them. Should probably later measure if
+ the use of StringView::upconvertedCharacters plus CodePointIterator<UChar>
+ instead of StringView::codePoints is better for performance in the CJK encoding
+ functions. This approach means more branching inside the loop, but the other
+ version involves memory allocation and a second loop when the characters are
+ all 8-bit.
+
2020-09-07 Youenn Fablet <[email protected]>
Fix Internals::supportsVCPEncoder on BigSur
Modified: trunk/Source/WebCore/platform/text/EncodingTables.cpp (266702 => 266703)
--- trunk/Source/WebCore/platform/text/EncodingTables.cpp 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/Source/WebCore/platform/text/EncodingTables.cpp 2020-09-07 17:11:42 UTC (rev 266703)
@@ -26,6 +26,8 @@
#include "config.h"
#include "EncodingTables.h"
+#include <mutex>
+
namespace WebCore {
// FIXME: Compress these tables and decompress them if they are actually used. This will significantly decrease the size of WebCore's binary.
@@ -6236,4 +6238,24 @@
{ 23742, 0x7199 }, { 23743, 0x71B9 }, { 23744, 0x71BA }, { 23745, 0x72A7 }, { 23746, 0x79A7 }, { 23747, 0x7A00 }, { 23748, 0x7FB2 }, { 23749, 0x8A70 },
};
+#if ASSERT_ENABLED
+
+void checkEncodingTableInvariants()
+{
+ static std::once_flag once;
+ std::call_once(once, [] {
+ ASSERT(isSortedByFirst(jis0208));
+
+ ASSERT(isSortedByFirst(jis0212));
+ ASSERT(sortedFirstsAreUnique(jis0212));
+
+ ASSERT(isSortedByFirst(big5EncodingMap));
+
+ ASSERT(isSortedByFirst(eucKRDecodingIndex));
+ ASSERT(sortedFirstsAreUnique(eucKRDecodingIndex));
+ });
}
+
+#endif
+
+}
Modified: trunk/Source/WebCore/platform/text/EncodingTables.h (266702 => 266703)
--- trunk/Source/WebCore/platform/text/EncodingTables.h 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/Source/WebCore/platform/text/EncodingTables.h 2020-09-07 17:11:42 UTC (rev 266703)
@@ -25,7 +25,11 @@
#pragma once
+#include <algorithm>
+#include <iterator>
#include <unicode/umachine.h>
+#include <utility>
+#include <wtf/Optional.h>
namespace WebCore {
@@ -35,4 +39,89 @@
extern const std::pair<uint16_t, UChar32> big5DecodingExtras[3904];
extern const std::pair<uint16_t, UChar> eucKRDecodingIndex[17048];
+void checkEncodingTableInvariants();
+
+// Functions for using sorted arrays of pairs as a map.
+// FIXME: Consider moving these functions to StdLibExtras.h for uses other than encoding tables.
+template<typename CollectionType> void sortByFirst(CollectionType&);
+template<typename CollectionType> bool isSortedByFirst(const CollectionType&);
+template<typename CollectionType> bool sortedFirstsAreUnique(const CollectionType&);
+template<typename CollectionType, typename KeyType> static auto findFirstInSortedPairs(const CollectionType& collection, const KeyType&) -> Optional<decltype(std::begin(collection)->second)>;
+template<typename CollectionType, typename KeyType> static auto findLastInSortedPairs(const CollectionType& collection, const KeyType&) -> Optional<decltype(std::begin(collection)->second)>;
+template<typename CollectionType, typename KeyType> static auto findInSortedPairs(const CollectionType& collection, const KeyType&) -> std::pair<decltype(std::begin(collection)), decltype(std::begin(collection))>;
+
+#if !ASSERT_ENABLED
+inline void checkEncodingTableInvariants() { }
+#endif
+
+struct CompareFirst {
+ template<typename TypeA, typename TypeB> bool operator()(const TypeA& a, const TypeB& b)
+ {
+ return a.first < b.first;
+ }
+};
+
+struct EqualFirst {
+ template<typename TypeA, typename TypeB> bool operator()(const TypeA& a, const TypeB& b)
+ {
+ return a.first == b.first;
+ }
+};
+
+template<typename T> struct FirstAdapter {
+ const T& first;
+};
+template<typename T> FirstAdapter<T> makeFirstAdapter(const T& value)
+{
+ return { value };
}
+
+template<typename CollectionType> void sortByFirst(CollectionType& collection)
+{
+ std::sort(std::begin(collection), std::end(collection), CompareFirst { });
+}
+
+template<typename CollectionType> bool isSortedByFirst(const CollectionType& collection)
+{
+ return std::is_sorted(std::begin(collection), std::end(collection), CompareFirst { });
+}
+
+template<typename CollectionType> bool sortedFirstsAreUnique(const CollectionType& collection)
+{
+ return std::adjacent_find(std::begin(collection), std::end(collection), EqualFirst { }) == std::end(collection);
+}
+
+template<typename CollectionType, typename KeyType> static auto findFirstInSortedPairs(const CollectionType& collection, const KeyType& key) -> Optional<decltype(std::begin(collection)->second)>
+{
+ if constexpr (std::is_integral_v<KeyType>) {
+ if (key != decltype(std::begin(collection)->first)(key))
+ return WTF::nullopt;
+ }
+ auto iterator = std::lower_bound(std::begin(collection), std::end(collection), makeFirstAdapter(key), CompareFirst { });
+ if (iterator == std::end(collection) || key < iterator->first)
+ return WTF::nullopt;
+ return iterator->second;
+}
+
+template<typename CollectionType, typename KeyType> static auto findLastInSortedPairs(const CollectionType& collection, const KeyType& key) -> Optional<decltype(std::begin(collection)->second)>
+{
+ if constexpr (std::is_integral_v<KeyType>) {
+ if (key != decltype(std::begin(collection)->first)(key))
+ return WTF::nullopt;
+ }
+ auto iterator = std::upper_bound(std::begin(collection), std::end(collection), makeFirstAdapter(key), CompareFirst { });
+ if (iterator == std::begin(collection) || (--iterator)->first < key)
+ return WTF::nullopt;
+ return iterator->second;
+}
+
+template<typename CollectionType, typename KeyType> static auto findInSortedPairs(const CollectionType& collection, const KeyType& key) -> std::pair<decltype(std::begin(collection)), decltype(std::begin(collection))>
+{
+ if constexpr (std::is_integral_v<KeyType>) {
+ if (key != decltype(std::begin(collection)->first)(key))
+ return { std::end(collection), std::end(collection) };
+ }
+ return std::equal_range(std::begin(collection), std::end(collection), makeFirstAdapter(key), CompareFirst { });
+}
+
+}
Modified: trunk/Source/WebCore/platform/text/TextCodecCJK.cpp (266702 => 266703)
--- trunk/Source/WebCore/platform/text/TextCodecCJK.cpp 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/Source/WebCore/platform/text/TextCodecCJK.cpp 2020-09-07 17:11:42 UTC (rev 266703)
@@ -27,6 +27,7 @@
#include "TextCodecCJK.h"
#include "EncodingTables.h"
+#include <mutex>
#include <wtf/text/CodePointIterator.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/unicode/CharacterNames.h>
@@ -44,6 +45,7 @@
TextCodecCJK::TextCodecCJK(Encoding encoding)
: m_encoding(encoding)
{
+ checkEncodingTableInvariants();
}
void TextCodecCJK::registerEncodingNames(EncodingNameRegistrar registrar)
@@ -124,16 +126,17 @@
using JIS0208DecodeIndex = std::array<std::pair<uint16_t, UChar>, std::size(jis0208)>;
static const JIS0208DecodeIndex& jis0208DecodeIndex()
{
- static auto& table = *[] {
- auto* table = new JIS0208DecodeIndex;
+ // Allocate this at runtime because building it at compile time would make the binary much larger and this is often not used.
+ static JIS0208DecodeIndex* table;
+ static std::once_flag once;
+ std::call_once(once, [&] {
+ table = new JIS0208DecodeIndex;
for (size_t i = 0; i < std::size(jis0208); i++)
(*table)[i] = { jis0208[i].second, jis0208[i].first };
- std::sort(table->begin(), table->end(), [] (auto& a, auto& b) {
- return a.first < b.first;
- });
- return table;
- }();
- return table;
+ sortByFirst(*table);
+ ASSERT(sortedFirstsAreUnique(*table));
+ });
+ return *table;
}
String TextCodecCJK::decodeCommon(const uint8_t* bytes, size_t length, bool flush, bool stopOnError, bool& sawError, const Function<SawError(uint8_t, StringBuilder&)>& byteParser)
@@ -179,27 +182,12 @@
static Optional<UChar> codePointJIS0208(uint16_t pointer)
{
- auto& index = jis0208DecodeIndex();
- auto range = std::equal_range(index.begin(), index.end(), std::pair<uint16_t, UChar>(pointer, 0), [](const auto& a, const auto& b) {
- return a.first < b.first;
- });
- if (range.first != range.second) {
- ASSERT(range.first + 1 == range.second);
- return range.first->second;
- }
- return WTF::nullopt;
+ return findFirstInSortedPairs(jis0208DecodeIndex(), pointer);
}
static Optional<UChar> codePointJIS0212(uint16_t pointer)
{
- auto range = std::equal_range(jis0212, jis0212 + std::size(jis0212), std::pair<uint16_t, UChar>(pointer, 0), [](const auto& a, const auto& b) {
- return a.first < b.first;
- });
- if (range.first != range.second) {
- ASSERT(range.first + 1 == range.second);
- return range.first->second;
- }
- return WTF::nullopt;
+ return findFirstInSortedPairs(jis0212, pointer);
}
// https://encoding.spec.whatwg.org/#euc-jp-decoder
@@ -228,7 +216,7 @@
return SawError::Yes;
}
if (isASCII(byte)) {
- result.append(static_cast<LChar>(byte));
+ result.append(static_cast<char>(byte));
return SawError::No;
}
if (byte == 0x8E || byte == 0x8F || (byte >= 0xA1 && byte <= 0xFE)) {
@@ -247,42 +235,34 @@
auto characters = string.upconvertedCharacters();
for (WTF::CodePointIterator<UChar> iterator(characters.get(), characters.get() + string.length()); !iterator.atEnd(); ++iterator) {
- auto c = *iterator;
- if (isASCII(c)) {
- result.append(c);
+ auto codePoint = *iterator;
+ if (isASCII(codePoint)) {
+ result.append(codePoint);
continue;
}
- if (c == 0x00A5) {
+ if (codePoint == 0x00A5) {
result.append(0x5C);
continue;
}
- if (c == 0x203E) {
+ if (codePoint == 0x203E) {
result.append(0x7E);
continue;
}
- if (c >= 0xFF61 && c <= 0xFF9F) {
+ if (codePoint >= 0xFF61 && codePoint <= 0xFF9F) {
result.append(0x8E);
- result.append(c - 0xFF61 + 0xA1);
+ result.append(codePoint - 0xFF61 + 0xA1);
continue;
}
- if (c == 0x2212)
- c = 0xFF0D;
+ if (codePoint == 0x2212)
+ codePoint = 0xFF0D;
- if (static_cast<UChar>(c) != c) {
- unencodableHandler(c, result);
+ auto pointer = findLastInSortedPairs(jis0208, codePoint);
+ if (!pointer) {
+ unencodableHandler(codePoint, result);
continue;
}
-
- auto pointerRange = std::equal_range(std::begin(jis0208), std::end(jis0208), std::pair<UChar, uint16_t>(static_cast<UChar>(c), 0), [](const auto& a, const auto& b) {
- return a.first < b.first;
- });
- if (pointerRange.first == pointerRange.second) {
- unencodableHandler(c, result);
- continue;
- }
- uint16_t pointer = (pointerRange.second - 1)->second;
- result.append(pointer / 94 + 0xA1);
- result.append(pointer % 94 + 0xA1);
+ result.append(*pointer / 94 + 0xA1);
+ result.append(*pointer % 94 + 0xA1);
}
return result;
}
@@ -504,12 +484,12 @@
Function<void(UChar32)> parseCodePoint;
parseCodePoint = [&] (UChar32 codePoint) {
if (state == State::ASCII && isASCII(codePoint)) {
- result.append(static_cast<uint8_t>(codePoint));
+ result.append(codePoint);
return;
}
if (state == State::Roman) {
if (isASCII(codePoint) && codePoint != 0x005C && codePoint !=0x007E) {
- result.append(static_cast<uint8_t>(codePoint));
+ result.append(codePoint);
return;
}
if (codePoint == 0x00A5) {
@@ -545,22 +525,15 @@
0x30C1, 0x30C4, 0x30C6, 0x30C8, 0x30CA, 0x30CB, 0x30CC, 0x30CD, 0x30CE, 0x30CF, 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30DE, 0x30DF,
0x30E0, 0x30E1, 0x30E2, 0x30E4, 0x30E6, 0x30E8, 0x30E9, 0x30EA, 0x30EB, 0x30EC, 0x30ED, 0x30EF, 0x30F3, 0x309B, 0x309C
};
+ static_assert(std::size(iso2022JPKatakana) == 0xFF9F - 0xFF61 + 1);
codePoint = iso2022JPKatakana[codePoint - 0xFF61];
}
-
- auto codeUnit = static_cast<UChar>(codePoint);
- if (codeUnit != codePoint) {
+
+ auto pointer = findLastInSortedPairs(jis0208, codePoint);
+ if (!pointer) {
statefulUnencodableHandler(codePoint, result);
return;
}
-
- auto range = std::equal_range(std::begin(jis0208), std::end(jis0208), std::pair<UChar, uint16_t>(codeUnit, 0), [](const auto& a, const auto& b) {
- return a.first < b.first;
- });
- if (range.first == range.second) {
- statefulUnencodableHandler(codePoint, result);
- return;
- }
if (state != State::Jis0208) {
state = State::Jis0208;
result.append(0x1B);
@@ -569,9 +542,8 @@
parseCodePoint(codePoint);
return;
}
- uint16_t pointer = (range.second - 1)->second;
- result.append(pointer / 94 + 0x21);
- result.append(pointer % 94 + 0x21);
+ result.append(*pointer / 94 + 0x21);
+ result.append(*pointer % 94 + 0x21);
};
auto characters = string.upconvertedCharacters();
@@ -650,20 +622,12 @@
if (codePoint == 0x2212)
codePoint = 0xFF0D;
- auto codeUnit = static_cast<UChar>(codePoint);
- if (codeUnit != codePoint) {
- unencodableHandler(codePoint, result);
- continue;
- }
-
- auto range = std::equal_range(std::begin(jis0208), std::end(jis0208), std::pair<UChar, uint16_t>(codeUnit, 0), [](const auto& a, const auto& b) {
- return a.first < b.first;
- });
+ auto range = findInSortedPairs(jis0208, codePoint);
if (range.first == range.second) {
unencodableHandler(codePoint, result);
continue;
}
-
+
ASSERT(range.first + 3 >= range.second);
for (auto* pair = range.second - 1; pair >= range.first; pair--) {
uint16_t pointer = pair->second;
@@ -684,16 +648,17 @@
using EUCKREncodingIndex = std::array<std::pair<UChar, uint16_t>, std::size(eucKRDecodingIndex)>;
static const EUCKREncodingIndex& eucKREncodingIndex()
{
- static auto& table = *[] {
- auto table = new EUCKREncodingIndex;
+ // Allocate this at runtime because building it at compile time would make the binary much larger and this is often not used.
+ static EUCKREncodingIndex* table;
+ static std::once_flag once;
+ std::call_once(once, [&] {
+ table = new EUCKREncodingIndex;
for (size_t i = 0; i < std::size(eucKRDecodingIndex); i++)
(*table)[i] = { eucKRDecodingIndex[i].second, eucKRDecodingIndex[i].first };
- std::sort(table->begin(), table->end(), [] (auto& a, auto& b) {
- return a.first < b.first;
- });
- return table;
- }();
- return table;
+ sortByFirst(*table);
+ ASSERT(sortedFirstsAreUnique(*table));
+ });
+ return *table;
}
// https://encoding.spec.whatwg.org/#euc-kr-encoder
@@ -706,26 +671,17 @@
for (WTF::CodePointIterator<UChar> iterator(characters.get(), characters.get() + string.length()); !iterator.atEnd(); ++iterator) {
auto codePoint = *iterator;
if (isASCII(codePoint)) {
- result.append(static_cast<uint8_t>(codePoint));
+ result.append(codePoint);
continue;
}
- auto codeUnit = static_cast<UChar>(codePoint);
- if (codeUnit != codePoint) {
+ auto pointer = findFirstInSortedPairs(eucKREncodingIndex(), codePoint);
+ if (!pointer) {
unencodableHandler(codePoint, result);
continue;
}
- auto& index = eucKREncodingIndex();
- auto range = std::equal_range(index.begin(), index.end(), std::pair<UChar, uint16_t>(codePoint, 0), [](const auto& a, const auto& b) {
- return a.first < b.first;
- });
- if (range.first == range.second) {
- unencodableHandler(codePoint, result);
- continue;
- }
- uint16_t pointer = range.first->second;
- result.append(pointer / 190 + 0x81);
- result.append(pointer % 190 + 0x41);
+ result.append(*pointer / 190 + 0x81);
+ result.append(*pointer % 190 + 0x41);
}
return result;
}
@@ -736,12 +692,8 @@
return decodeCommon(bytes, length, flush, stopOnError, sawError, [this] (uint8_t byte, StringBuilder& result) {
if (uint8_t lead = std::exchange(m_lead, 0x00)) {
if (byte >= 0x41 && byte <= 0xFE) {
- uint16_t pointer = (lead - 0x81) * 190 + byte - 0x41;
- auto range = std::equal_range(std::begin(eucKRDecodingIndex), std::end(eucKRDecodingIndex), std::pair<uint16_t, UChar>(pointer, 0), [] (const auto& a, const auto& b) {
- return a.first < b.first;
- });
- if (range.first != range.second) {
- result.append(range.first->second);
+ if (auto codePoint = findFirstInSortedPairs(eucKRDecodingIndex, (lead - 0x81) * 190 + byte - 0x41)) {
+ result.append(*codePoint);
return SawError::No;
}
}
@@ -769,23 +721,20 @@
auto characters = string.upconvertedCharacters();
for (WTF::CodePointIterator<UChar> iterator(characters.get(), characters.get() + string.length()); !iterator.atEnd(); ++iterator) {
- auto c = *iterator;
- if (isASCII(c)) {
- result.append(c);
+ auto codePoint = *iterator;
+ if (isASCII(codePoint)) {
+ result.append(codePoint);
continue;
}
- auto pointerRange = std::equal_range(std::begin(big5EncodingMap), std::end(big5EncodingMap), std::pair<UChar32, uint16_t>(c, 0), [](const auto& a, const auto& b) {
- return a.first < b.first;
- });
-
+ auto pointerRange = findInSortedPairs(big5EncodingMap, codePoint);
if (pointerRange.first == pointerRange.second) {
- unencodableHandler(c, result);
+ unencodableHandler(codePoint, result);
continue;
}
uint16_t pointer = 0;
- if (c == 0x2550 || c == 0x255E || c == 0x2561 || c == 0x256A || c == 0x5341 || c == 0x5345)
+ if (codePoint == 0x2550 || codePoint == 0x255E || codePoint == 0x2561 || codePoint == 0x256A || codePoint == 0x5341 || codePoint == 0x5345)
pointer = (pointerRange.second - 1)->second;
else
pointer = pointerRange.first->second;
@@ -854,18 +803,19 @@
using Big5DecodeIndex = std::array<std::pair<uint16_t, UChar32>, std::size(big5DecodingExtras) + std::size(big5EncodingMap)>;
static const Big5DecodeIndex& big5DecodeIndex()
{
- static auto& table = *[] {
- auto table = new Big5DecodeIndex;
+ // Allocate this at runtime because building it at compile time would make the binary much larger and this is often not used.
+ static Big5DecodeIndex* table;
+ static std::once_flag once;
+ std::call_once(once, [&] {
+ table = new Big5DecodeIndex;
for (size_t i = 0; i < std::size(big5DecodingExtras); i++)
(*table)[i] = big5DecodingExtras[i];
for (size_t i = 0; i < std::size(big5EncodingMap); i++)
(*table)[i + std::size(big5DecodingExtras)] = { big5EncodingMap[i].second, big5EncodingMap[i].first };
- std::sort(table->begin(), table->end(), [] (auto& a, auto& b) {
- return a.first < b.first;
- });
- return table;
- }();
- return table;
+ sortByFirst(*table);
+ ASSERT(sortedFirstsAreUnique(*table));
+ });
+ return *table;
}
String TextCodecCJK::big5Decode(const uint8_t* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
@@ -888,14 +838,9 @@
result.appendCharacter(0x00EA);
result.appendCharacter(0x030C);
} else {
- auto& index = big5DecodeIndex();
- auto range = std::equal_range(index.begin(), index.end(), std::pair<uint16_t, UChar32>(pointer, 0), [](const auto& a, const auto& b) {
- return a.first < b.first;
- });
- if (range.first != range.second) {
- ASSERT(range.first + 1 == range.second);
- result.appendCharacter(range.first->second);
- } else
+ if (auto codePoint = findFirstInSortedPairs(big5DecodeIndex(), pointer))
+ result.appendCharacter(*codePoint);
+ else
return SawError::Yes;
}
return SawError::No;
@@ -905,7 +850,7 @@
return SawError::Yes;
}
if (isASCII(byte)) {
- result.append(static_cast<LChar>(byte));
+ result.append(static_cast<char>(byte));
return SawError::No;
}
if (byte >= 0x81 && byte <= 0xFE) {
Modified: trunk/Source/WebCore/platform/text/TextCodecSingleByte.cpp (266702 => 266703)
--- trunk/Source/WebCore/platform/text/TextCodecSingleByte.cpp 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/Source/WebCore/platform/text/TextCodecSingleByte.cpp 2020-09-07 17:11:42 UTC (rev 266703)
@@ -26,6 +26,9 @@
#include "config.h"
#include "TextCodecSingleByte.h"
+#include "EncodingTables.h"
+#include <mutex>
+#include <wtf/IteratorRange.h>
#include <wtf/text/CodePointIterator.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/unicode/CharacterNames.h>
@@ -46,7 +49,8 @@
};
using SingleByteDecodeTable = std::array<UChar, 128>;
-using SingleByteEncodeTable = std::pair<const std::pair<UChar, uint8_t>*, size_t>;
+using SingleByteEncodeTableEntry = std::pair<UChar, uint8_t>;
+using SingleByteEncodeTable = WTF::IteratorRange<const SingleByteEncodeTableEntry*>;
// From https://encoding.spec.whatwg.org/index-iso-8859-3.txt with 0xFFFD filling the gaps
static constexpr SingleByteDecodeTable iso88593 {
@@ -169,18 +173,25 @@
template<const SingleByteDecodeTable& decodeTable> SingleByteEncodeTable tableForEncoding()
{
- // FIXME: With the C++20 version of std::count, we should be able to change this from const to constexpr and get it computed at compile time.
- static const size_t size = std::size(decodeTable) - std::count(std::begin(decodeTable), std::end(decodeTable), replacementCharacter);
- static const auto table = [&] {
- auto table = new std::pair<UChar, uint8_t>[size];
+ // Allocate this at runtime because building it at compile time would make the binary much larger and this is often not used.
+ // FIXME: With the C++20 version of std::count, we should be able to change this from const to constexpr and compute the size at compile time.
+ static const auto size = std::size(decodeTable) - std::count(std::begin(decodeTable), std::end(decodeTable), replacementCharacter);
+ static const SingleByteEncodeTableEntry* entries;
+ std::once_flag once;
+ std::call_once(once, [&] {
+ auto* mutableEntries = new SingleByteEncodeTableEntry[size];
size_t j = 0;
for (uint8_t i = 0; i < std::size(decodeTable); i++) {
if (decodeTable[i] != replacementCharacter)
- table[j++] = { decodeTable[i], i };
+ mutableEntries[j++] = { decodeTable[i], i + 0x80 };
}
- return table;
- }();
- return { table, size };
+ ASSERT(j == size);
+ auto collection = WTF::makeIteratorRange(&mutableEntries[0], &mutableEntries[size]);
+ sortByFirst(collection);
+ ASSERT(sortedFirstsAreUnique(collection));
+ entries = mutableEntries;
+ });
+ return WTF::makeIteratorRange(&entries[0], &entries[size]);
}
static SingleByteEncodeTable tableForEncoding(TextCodecSingleByte::Encoding encoding)
@@ -240,31 +251,20 @@
// https://encoding.spec.whatwg.org/#single-byte-encoder
static Vector<uint8_t> encode(const SingleByteEncodeTable& table, StringView string, Function<void(UChar32, Vector<uint8_t>&)>&& unencodableHandler)
{
+ // FIXME: Consider adding an ASCII fast path like the one in TextCodecLatin1::decode.
Vector<uint8_t> result;
result.reserveInitialCapacity(string.length());
-
- auto characters = string.upconvertedCharacters();
- for (WTF::CodePointIterator<UChar> iterator(characters.get(), characters.get() + string.length()); !iterator.atEnd(); ++iterator) {
- auto codePoint = *iterator;
+ for (auto codePoint : string.codePoints()) {
if (isASCII(codePoint)) {
result.append(codePoint);
continue;
}
-
- auto codeUnit = static_cast<UChar>(codePoint);
- if (codeUnit != codePoint) {
+ auto byte = findFirstInSortedPairs(table, codePoint);
+ if (!byte) {
unencodableHandler(codePoint, result);
continue;
}
-
- auto range = std::equal_range(table.first, table.first + table.second, std::pair<UChar, uint8_t>(codeUnit, 0), [](const auto& a, const auto& b) {
- return a.first < b.first;
- });
- if (range.first == range.second) {
- unencodableHandler(codePoint, result);
- continue;
- }
- result.append(range.first->second + 0x80);
+ result.append(*byte);
}
return result;
}
@@ -284,7 +284,6 @@
sawError = true;
result.append(codePoint);
};
-
if (stopOnError) {
for (size_t i = 0; i < length; i++) {
parseByte(bytes[i]);
@@ -295,7 +294,6 @@
for (size_t i = 0; i < length; i++)
parseByte(bytes[i]);
}
-
return result.toString();
}
Modified: trunk/Tools/ChangeLog (266702 => 266703)
--- trunk/Tools/ChangeLog 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/Tools/ChangeLog 2020-09-07 17:11:42 UTC (rev 266703)
@@ -1,3 +1,14 @@
+2020-09-06 Darin Adler <[email protected]>
+
+ Make TextCodecCJK and TextCodecSingleByte thread-safe and refactor a bit to share code
+ https://bugs.webkit.org/show_bug.cgi?id=216229
+
+ Reviewed by Sam Weinig.
+
+ * TestWebKitAPI/Tests/WTF/StringView.cpp:
+ (TestWebKitAPI::TEST): Removed an unused variable. At one point, with
+ code that I wrote and now have rolled out, this was causing a build failure.
+
2020-09-06 Myles C. Maxfield <[email protected]>
Make GlyphBufferAdvance and GlyphBufferOrigin more robust
Modified: trunk/Tools/TestWebKitAPI/Tests/WTF/StringView.cpp (266702 => 266703)
--- trunk/Tools/TestWebKitAPI/Tests/WTF/StringView.cpp 2020-09-07 17:02:40 UTC (rev 266702)
+++ trunk/Tools/TestWebKitAPI/Tests/WTF/StringView.cpp 2020-09-07 17:11:42 UTC (rev 266703)
@@ -896,7 +896,6 @@
TEST(WTF, StringView8Bit)
{
- StringView nullView;
EXPECT_TRUE(StringView().is8Bit());
EXPECT_TRUE(StringView::empty().is8Bit());