Diff
Modified: trunk/LayoutTests/imported/w3c/ChangeLog (266667 => 266668)
--- trunk/LayoutTests/imported/w3c/ChangeLog 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/LayoutTests/imported/w3c/ChangeLog 2020-09-05 19:48:15 UTC (rev 266668)
@@ -1,3 +1,20 @@
+2020-09-05 Alex Christensen <[email protected]>
+
+ TextDecoder should properly handle streams
+ https://bugs.webkit.org/show_bug.cgi?id=216202
+
+ Reviewed by Darin Adler.
+
+ * web-platform-tests/encoding/streams/decode-non-utf8.any-expected.txt:
+ * web-platform-tests/encoding/streams/decode-non-utf8.any.js:
+ * web-platform-tests/encoding/streams/decode-non-utf8.any.worker-expected.txt:
+ * web-platform-tests/encoding/streams/decode-split-character.any-expected.txt:
+ * web-platform-tests/encoding/streams/decode-split-character.any.worker-expected.txt:
+ * web-platform-tests/encoding/streams/realms.window-expected.txt:
+ * web-platform-tests/encoding/textdecoder-fatal-streaming-expected.txt: Removed.
+ * web-platform-tests/encoding/textdecoder-fatal-streaming.any-expected.txt:
+ * web-platform-tests/encoding/textdecoder-fatal-streaming.any.worker-expected.txt:
+
2020-09-05 Darin Adler <[email protected]>
CSS revert should serialize as "revert", not "Revert"
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any-expected.txt (266667 => 266668)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any-expected.txt 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any-expected.txt 2020-09-05 19:48:15 UTC (rev 266668)
@@ -8,5 +8,8 @@
PASS TextDecoderStream should be able to decode Shift_JIS
PASS TextDecoderStream should be able to decode invalid sequences in Shift_JIS
PASS TextDecoderStream should be able to reject invalid sequences in Shift_JIS
+PASS TextDecoderStream should be able to decode ISO-2022-JP
+PASS TextDecoderStream should be able to decode invalid sequences in ISO-2022-JP
+PASS TextDecoderStream should be able to reject invalid sequences in ISO-2022-JP
PASS TextDecoderStream should be able to decode ISO-8859-14
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any.js (266667 => 266668)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any.js 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any.js 2020-09-05 19:48:15 UTC (rev 266668)
@@ -26,6 +26,12 @@
invalid: [255]
},
{
+ name: 'ISO-2022-JP',
+ value: [65, 66, 67, 0x1B, 65, 66, 67],
+ expected: "ABC\u{fffd}ABC",
+ invalid: [0x0E]
+ },
+ {
name: 'ISO-8859-14',
value: [100, 240, 114],
expected: "d\u{0175}r",
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any.worker-expected.txt (266667 => 266668)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any.worker-expected.txt 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any.worker-expected.txt 2020-09-05 19:48:15 UTC (rev 266668)
@@ -8,5 +8,8 @@
PASS TextDecoderStream should be able to decode Shift_JIS
PASS TextDecoderStream should be able to decode invalid sequences in Shift_JIS
PASS TextDecoderStream should be able to reject invalid sequences in Shift_JIS
+PASS TextDecoderStream should be able to decode ISO-2022-JP
+PASS TextDecoderStream should be able to decode invalid sequences in ISO-2022-JP
+PASS TextDecoderStream should be able to reject invalid sequences in ISO-2022-JP
PASS TextDecoderStream should be able to decode ISO-8859-14
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-split-character.any-expected.txt (266667 => 266668)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-split-character.any-expected.txt 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-split-character.any-expected.txt 2020-09-05 19:48:15 UTC (rev 266668)
@@ -1,13 +1,13 @@
PASS a code point split between chunks should not be emitted until all bytes are available; split point = 2
-FAIL a code point split between chunks should not be emitted until all bytes are available; split point = 3 assert_array_equals: the split code point should be in the second chunk of the output lengths differ, expected array ["I ", "💙 streams"] length 2, got ["I 💙 streams"] length 1
-FAIL a code point split between chunks should not be emitted until all bytes are available; split point = 4 assert_array_equals: the split code point should be in the second chunk of the output lengths differ, expected array ["I ", "💙 streams"] length 2, got ["I 💙 streams"] length 1
-FAIL a code point split between chunks should not be emitted until all bytes are available; split point = 5 assert_array_equals: the split code point should be in the second chunk of the output lengths differ, expected array ["I ", "💙 streams"] length 2, got ["I 💙 streams"] length 1
+PASS a code point split between chunks should not be emitted until all bytes are available; split point = 3
+PASS a code point split between chunks should not be emitted until all bytes are available; split point = 4
+PASS a code point split between chunks should not be emitted until all bytes are available; split point = 5
PASS a code point should be emitted as soon as all bytes are available
PASS an empty chunk inside a code point split between chunks should not change the output; split point = 1
PASS an empty chunk inside a code point split between chunks should not change the output; split point = 2
-FAIL an empty chunk inside a code point split between chunks should not change the output; split point = 3 assert_equals: two chunks should be output expected 2 but got 1
-FAIL an empty chunk inside a code point split between chunks should not change the output; split point = 4 assert_equals: two chunks should be output expected 2 but got 1
-FAIL an empty chunk inside a code point split between chunks should not change the output; split point = 5 assert_equals: two chunks should be output expected 2 but got 1
+PASS an empty chunk inside a code point split between chunks should not change the output; split point = 3
+PASS an empty chunk inside a code point split between chunks should not change the output; split point = 4
+PASS an empty chunk inside a code point split between chunks should not change the output; split point = 5
PASS an empty chunk inside a code point split between chunks should not change the output; split point = 6
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-split-character.any.worker-expected.txt (266667 => 266668)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-split-character.any.worker-expected.txt 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-split-character.any.worker-expected.txt 2020-09-05 19:48:15 UTC (rev 266668)
@@ -1,13 +1,13 @@
PASS a code point split between chunks should not be emitted until all bytes are available; split point = 2
-FAIL a code point split between chunks should not be emitted until all bytes are available; split point = 3 assert_array_equals: the split code point should be in the second chunk of the output lengths differ, expected array ["I ", "💙 streams"] length 2, got ["I 💙 streams"] length 1
-FAIL a code point split between chunks should not be emitted until all bytes are available; split point = 4 assert_array_equals: the split code point should be in the second chunk of the output lengths differ, expected array ["I ", "💙 streams"] length 2, got ["I 💙 streams"] length 1
-FAIL a code point split between chunks should not be emitted until all bytes are available; split point = 5 assert_array_equals: the split code point should be in the second chunk of the output lengths differ, expected array ["I ", "💙 streams"] length 2, got ["I 💙 streams"] length 1
+PASS a code point split between chunks should not be emitted until all bytes are available; split point = 3
+PASS a code point split between chunks should not be emitted until all bytes are available; split point = 4
+PASS a code point split between chunks should not be emitted until all bytes are available; split point = 5
PASS a code point should be emitted as soon as all bytes are available
PASS an empty chunk inside a code point split between chunks should not change the output; split point = 1
PASS an empty chunk inside a code point split between chunks should not change the output; split point = 2
-FAIL an empty chunk inside a code point split between chunks should not change the output; split point = 3 assert_equals: two chunks should be output expected 2 but got 1
-FAIL an empty chunk inside a code point split between chunks should not change the output; split point = 4 assert_equals: two chunks should be output expected 2 but got 1
-FAIL an empty chunk inside a code point split between chunks should not change the output; split point = 5 assert_equals: two chunks should be output expected 2 but got 1
+PASS an empty chunk inside a code point split between chunks should not change the output; split point = 3
+PASS an empty chunk inside a code point split between chunks should not change the output; split point = 4
+PASS an empty chunk inside a code point split between chunks should not change the output; split point = 5
PASS an empty chunk inside a code point split between chunks should not change the output; split point = 6
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/realms.window-expected.txt (266667 => 266668)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/realms.window-expected.txt 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/realms.window-expected.txt 2020-09-05 19:48:15 UTC (rev 266668)
@@ -10,6 +10,6 @@
PASS the result object when read is called after write should come from the same realm as the constructor of TextDecoderStream
PASS the result object when write is called with a pending read should come from the same realm as the constructor of TextDecoderStream
PASS TypeError for chunk with the wrong type should come from constructor realm of TextDecoderStream
-FAIL TypeError for invalid chunk should come from constructor realm of TextDecoderStream assert_unreached: Should have rejected: write TypeError should come from constructor realm Reached unreachable code
+PASS TypeError for invalid chunk should come from constructor realm of TextDecoderStream
PASS TypeError for incomplete input should come from constructor realm of TextDecoderStream
Deleted: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming-expected.txt (266667 => 266668)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming-expected.txt 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming-expected.txt 2020-09-05 19:48:15 UTC (rev 266668)
@@ -1,4 +0,0 @@
-
-FAIL Fatal flag, non-streaming cases assert_equals: Unterminated UTF-8 sequence should emit replacement character if fatal flag is unset expected "\ufffd" but got ""
-FAIL Fatal flag, streaming cases assert_equals: expected "\0" but got ""
-
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any-expected.txt (266667 => 266668)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any-expected.txt 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any-expected.txt 2020-09-05 19:48:15 UTC (rev 266668)
@@ -1,4 +1,4 @@
PASS Fatal flag, non-streaming cases
-FAIL Fatal flag, streaming cases assert_equals: expected "\0" but got ""
+PASS Fatal flag, streaming cases
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any.worker-expected.txt (266667 => 266668)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any.worker-expected.txt 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any.worker-expected.txt 2020-09-05 19:48:15 UTC (rev 266668)
@@ -1,4 +1,4 @@
PASS Fatal flag, non-streaming cases
-FAIL Fatal flag, streaming cases assert_equals: expected "\0" but got ""
+PASS Fatal flag, streaming cases
Modified: trunk/Source/WebCore/ChangeLog (266667 => 266668)
--- trunk/Source/WebCore/ChangeLog 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/Source/WebCore/ChangeLog 2020-09-05 19:48:15 UTC (rev 266668)
@@ -1,3 +1,23 @@
+2020-09-05 Alex Christensen <[email protected]>
+
+ TextDecoder should properly handle streams
+ https://bugs.webkit.org/show_bug.cgi?id=216202
+
+ Reviewed by Darin Adler.
+
+ A TextCodec keeps state when it decodes part of valid input, such as the first byte of a multibyte sequence.
+ TextEncoding::decode makes a new TextCodec and throws away that state.
+ In order to properly handle streaming, we need to keep the TextCodec and call TextCodec::decode directly.
+
+ Covered by newly passing web platform tests. I also added a test that failed in my first implementation attempt
+ but passes now in WebKit as well as Chromium. Firefox hasn't implemented TextDecoderStream yet, but this test will
+ hopefully help them not make the same mistake I did.
+
+ * dom/TextDecoder.cpp:
+ (WebCore::TextDecoder::decode):
+ (WebCore::codeUnitByteSize): Deleted.
+ * dom/TextDecoder.h:
+
2020-09-05 Myles C. Maxfield <[email protected]>
[Cocoa] CTFontIsSystemUIFont() is faster than CTFontDescriptorIsSystemUIFont()/CTFontCopyFontDescriptor()
Modified: trunk/Source/WebCore/dom/TextDecoder.cpp (266667 => 266668)
--- trunk/Source/WebCore/dom/TextDecoder.cpp 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/Source/WebCore/dom/TextDecoder.cpp 2020-09-05 19:48:15 UTC (rev 266668)
@@ -26,10 +26,14 @@
#include "TextDecoder.h"
#include "HTMLParserIdioms.h"
+#include "TextCodec.h"
+#include "TextEncodingRegistry.h"
#include <wtf/Optional.h>
namespace WebCore {
+TextDecoder::~TextDecoder() = default;
+
ExceptionOr<Ref<TextDecoder>> TextDecoder::create(const String& label, Options options)
{
String strippedLabel = stripLeadingAndTrailingHTMLSpaces(label);
@@ -117,11 +121,6 @@
return WaitForMoreBOMBytes::No;
}
-static size_t codeUnitByteSize(const TextEncoding& encoding)
-{
- return encoding.isByteBasedEncoding() ? 1 : 2;
-}
-
ExceptionOr<String> TextDecoder::decode(Optional<BufferSource::VariantType> input, DecodeOptions options)
{
Optional<BufferSource> inputBuffer;
@@ -151,28 +150,15 @@
return String();
}
- const bool stopOnError = true;
- bool sawError = false;
- if (length % codeUnitByteSize(m_textEncoding))
- sawError = true;
- const char* charData = reinterpret_cast<const char*>(data);
- String result;
- if (!sawError)
- result = m_textEncoding.decode(charData, length, stopOnError, sawError);
+ auto oldBuffer = std::exchange(m_buffer, { });
- if (sawError) {
- if (options.stream) {
- result = String();
- if (!m_buffer.size())
- m_buffer.append(data, length);
- } else {
- if (m_options.fatal)
- return Exception { TypeError };
- result = m_textEncoding.decode(charData, length);
- }
- } else
- m_buffer.clear();
+ if (!m_codec)
+ m_codec = newTextCodec(m_textEncoding);
+ bool sawError = false;
+ String result = m_codec->decode(reinterpret_cast<const char*>(data), length, !options.stream, false, sawError);
+ if (sawError && m_options.fatal)
+ return Exception { TypeError };
return result;
}
Modified: trunk/Source/WebCore/dom/TextDecoder.h (266667 => 266668)
--- trunk/Source/WebCore/dom/TextDecoder.h 2020-09-05 18:21:20 UTC (rev 266667)
+++ trunk/Source/WebCore/dom/TextDecoder.h 2020-09-05 19:48:15 UTC (rev 266668)
@@ -32,6 +32,8 @@
namespace WebCore {
+class TextCodec;
+
class TextDecoder : public RefCounted<TextDecoder> {
public:
struct Options {
@@ -43,6 +45,7 @@
};
static ExceptionOr<Ref<TextDecoder>> create(const String& label, Options);
+ ~TextDecoder();
String encoding() const;
bool fatal() const { return m_options.fatal; }
@@ -57,7 +60,8 @@
size_t bytesNeededForFullBOMIgnoreCheck() const;
bool isBeginningOfIncompleteBOM(const uint8_t*, size_t) const;
- TextEncoding m_textEncoding;
+ const TextEncoding m_textEncoding;
+ std::unique_ptr<TextCodec> m_codec;
Options m_options;
Vector<uint8_t> m_buffer;
bool m_bomIgnoredIfNecessary { false };