Title: [205188] trunk
Revision
205188
Author
[email protected]
Date
2016-08-30 11:09:07 -0700 (Tue, 30 Aug 2016)

Log Message

[Fetch API] Body mix-in text() should decode data as UTF-8
https://bugs.webkit.org/show_bug.cgi?id=161372

Patch by Youenn Fablet <[email protected]> on 2016-08-30
Reviewed by Sam Weinig.

LayoutTests/imported/w3c:

* web-platform-tests/fetch/api/basic/text-utf8-expected.txt: Added.
* web-platform-tests/fetch/api/basic/text-utf8.html: Added.
* web-platform-tests/fetch/api/resources/status.py: Added.
(main):

Source/WebCore:

Test: imported/w3c/web-platform-tests/fetch/api/basic/text-utf8.html

UsingTextResourceDecoder to decode data as UTF-8.
Making sure to prepend BOM if there is none, as specified in https://encoding.spec.whatwg.org/#utf-8-decode.

* Modules/fetch/FetchBodyConsumer.cpp:
(WebCore::shouldPrependBOM):
(WebCore::textFromUTF8):
(WebCore::FetchBodyConsumer::resolveWithData):
(WebCore::FetchBodyConsumer::takeAsText):

Modified Paths

Added Paths

Diff

Modified: trunk/LayoutTests/imported/w3c/ChangeLog (205187 => 205188)


--- trunk/LayoutTests/imported/w3c/ChangeLog	2016-08-30 18:05:00 UTC (rev 205187)
+++ trunk/LayoutTests/imported/w3c/ChangeLog	2016-08-30 18:09:07 UTC (rev 205188)
@@ -1,3 +1,15 @@
+2016-08-30  Youenn Fablet  <[email protected]>
+
+        [Fetch API] Body mix-in text() should decode data as UTF-8
+        https://bugs.webkit.org/show_bug.cgi?id=161372
+
+        Reviewed by Sam Weinig.
+
+        * web-platform-tests/fetch/api/basic/text-utf8-expected.txt: Added.
+        * web-platform-tests/fetch/api/basic/text-utf8.html: Added.
+        * web-platform-tests/fetch/api/resources/status.py: Added.
+        (main):
+
 2016-08-29  Chris Dumez  <[email protected]>
 
         Unskip several web-platform-tests that are now passing

Added: trunk/LayoutTests/imported/w3c/web-platform-tests/fetch/api/basic/text-utf8-expected.txt (0 => 205188)


--- trunk/LayoutTests/imported/w3c/web-platform-tests/fetch/api/basic/text-utf8-expected.txt	                        (rev 0)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/fetch/api/basic/text-utf8-expected.txt	2016-08-30 18:09:07 UTC (rev 205188)
@@ -0,0 +1,22 @@
+
+PASS UTF-8 with BOM with Request.text() 
+PASS UTF-8 with BOM with Response.text() 
+PASS UTF-8 with BOM with fetched data (UTF-8 charset) 
+PASS UTF-8 with BOM with fetched data (UTF-16 charset) 
+PASS UTF-8 without BOM with Request.text() 
+PASS UTF-8 without BOM with Response.text() 
+PASS UTF-8 without BOM with fetched data (UTF-8 charset) 
+PASS UTF-8 without BOM with fetched data (UTF-16 charset) 
+PASS UTF-16BE with BOM decoded as UTF-8 with Request.text() 
+PASS UTF-16BE with BOM decoded as UTF-8 with Response.text() 
+PASS UTF-16BE with BOM decoded as UTF-8 with fetched data (UTF-8 charset) 
+PASS UTF-16BE with BOM decoded as UTF-8 with fetched data (UTF-16 charset) 
+PASS UTF-16LE with BOM decoded as UTF-8 with Request.text() 
+PASS UTF-16LE with BOM decoded as UTF-8 with Response.text() 
+PASS UTF-16LE with BOM decoded as UTF-8 with fetched data (UTF-8 charset) 
+PASS UTF-16LE with BOM decoded as UTF-8 with fetched data (UTF-16 charset) 
+PASS UTF-16 without BOM decoded as UTF-8 with Request.text() 
+PASS UTF-16 without BOM decoded as UTF-8 with Response.text() 
+PASS UTF-16 without BOM decoded as UTF-8 with fetched data (UTF-8 charset) 
+PASS UTF-16 without BOM decoded as UTF-8 with fetched data (UTF-16 charset) 
+

Added: trunk/LayoutTests/imported/w3c/web-platform-tests/fetch/api/basic/text-utf8.html (0 => 205188)


--- trunk/LayoutTests/imported/w3c/web-platform-tests/fetch/api/basic/text-utf8.html	                        (rev 0)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/fetch/api/basic/text-utf8.html	2016-08-30 18:09:07 UTC (rev 205188)
@@ -0,0 +1,69 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>Fetch: Request and Response text() should decode as UTF-8</title>
+<script src=""
+<script src=""
+<link rel="help" href="" />
+
+<script src=""
+<script>
+
+function testTextDecoding(body, expectedText, urlParameter, title)
+{
+    var arrayBuffer = stringToArray(body);
+
+    promise_test(function(test) {
+        var request = new Request("", {method: "POST", body: arrayBuffer});
+        return request.text().then(function(value) {
+            assert_equals(value, expectedText, "Request.text() should decode data as UTF-8");
+        });
+    }, title + " with Request.text()");
+
+    promise_test(function(test) {
+        var response = new Response(arrayBuffer);
+        return response.text().then(function(value) {
+            assert_equals(value, expectedText, "Response.text() should decode data as UTF-8");
+        });
+    }, title + " with Response.text()");
+
+    promise_test(function(test) {
+        return fetch("../resources/status.py?code=200&type=text%2Fplain%3Bcharset%3DUTF-8&content=" + urlParameter).then(function(response) {
+            return response.text().then(function(value) {
+                assert_equals(value, expectedText, "Fetched Response.text() should decode data as UTF-8");
+            });
+        });
+    }, title + " with fetched data (UTF-8 charset)");
+
+    promise_test(function(test) {
+        return fetch("../resources/status.py?code=200&type=text%2Fplain%3Bcharset%3DUTF-16&content=" + urlParameter).then(function(response) {
+            return response.text().then(function(value) {
+                assert_equals(value, expectedText, "Fetched Response.text() should decode data as UTF-8");
+            });
+        });
+    }, title + " with fetched data (UTF-16 charset)");
+}
+
+var utf8WithBOM = "\xef\xbb\xbf\xe4\xb8\x89\xe6\x9d\x91\xe3\x81\x8b\xe3\x81\xaa\xe5\xad\x90";
+var utf8WithBOMAsURLParameter = "%EF%BB%BF%E4%B8%89%E6%9D%91%E3%81%8B%E3%81%AA%E5%AD%90";
+var utf8WithoutBOM = "\xe4\xb8\x89\xe6\x9d\x91\xe3\x81\x8b\xe3\x81\xaa\xe5\xad\x90";
+var utf8WithoutBOMAsURLParameter = "%E4%B8%89%E6%9D%91%E3%81%8B%E3%81%AA%E5%AD%90";
+var utf8Decoded = "三村かな子";
+testTextDecoding(utf8WithBOM, utf8Decoded, utf8WithBOMAsURLParameter, "UTF-8 with BOM");
+testTextDecoding(utf8WithoutBOM, utf8Decoded, utf8WithoutBOMAsURLParameter, "UTF-8 without BOM");
+
+var utf16BEWithBOM = "\xfe\xff\x4e\x09\x67\x51\x30\x4b\x30\x6a\x5b\x50";
+var utf16BEWithBOMAsURLParameter = "%fe%ff%4e%09%67%51%30%4b%30%6a%5b%50";
+var utf16BEWithBOMDecodedAsUTF8 = "��N\tgQ0K0j[P";
+testTextDecoding(utf16BEWithBOM, utf16BEWithBOMDecodedAsUTF8, utf16BEWithBOMAsURLParameter, "UTF-16BE with BOM decoded as UTF-8");
+
+var utf16LEWithBOM = "\xff\xfe\x09\x4e\x51\x67\x4b\x30\x6a\x30\x50\x5b";
+var utf16LEWithBOMAsURLParameter = "%ff%fe%09%4e%51%67%4b%30%6a%30%50%5b";
+var utf16LEWithBOMDecodedAsUTF8 = "��\tNQgK0j0P[";
+testTextDecoding(utf16LEWithBOM, utf16LEWithBOMDecodedAsUTF8, utf16LEWithBOMAsURLParameter, "UTF-16LE with BOM decoded as UTF-8");
+
+var utf16WithoutBOM = "\xe6\x00\xf8\x00\xe5\x00\x0a\x00\xc6\x30\xb9\x30\xc8\x30\x0a\x00";
+var utf16WithoutBOMAsURLParameter = "%E6%00%F8%00%E5%00%0A%00%C6%30%B9%30%C8%30%0A%00";
+var utf16WithoutBOMDecoded = "\ufffd\u0000\ufffd\u0000\ufffd\u0000\u000a\u0000\ufffd\u0030\ufffd\u0030\ufffd\u0030\u000a\u0000";
+testTextDecoding(utf16WithoutBOM, utf16WithoutBOMDecoded, utf16WithoutBOMAsURLParameter, "UTF-16 without BOM decoded as UTF-8");
+
+</script>

Added: trunk/LayoutTests/imported/w3c/web-platform-tests/fetch/api/resources/status.py (0 => 205188)


--- trunk/LayoutTests/imported/w3c/web-platform-tests/fetch/api/resources/status.py	                        (rev 0)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/fetch/api/resources/status.py	2016-08-30 18:09:07 UTC (rev 205188)
@@ -0,0 +1,9 @@
+def main(request, response):
+    code = int(request.GET.first("code", 200))
+    text = request.GET.first("text", "OMG")
+    content = request.GET.first("content", "")
+    type = request.GET.first("type", "")
+    status = (code, text)
+    headers = [("Content-Type", type),
+               ("X-Request-Method", request.method)]
+    return status, headers, content

Modified: trunk/Source/WebCore/ChangeLog (205187 => 205188)


--- trunk/Source/WebCore/ChangeLog	2016-08-30 18:05:00 UTC (rev 205187)
+++ trunk/Source/WebCore/ChangeLog	2016-08-30 18:09:07 UTC (rev 205188)
@@ -1,3 +1,21 @@
+2016-08-30  Youenn Fablet  <[email protected]>
+
+        [Fetch API] Body mix-in text() should decode data as UTF-8
+        https://bugs.webkit.org/show_bug.cgi?id=161372
+
+        Reviewed by Sam Weinig.
+
+        Test: imported/w3c/web-platform-tests/fetch/api/basic/text-utf8.html
+
+        UsingTextResourceDecoder to decode data as UTF-8.
+        Making sure to prepend BOM if there is none, as specified in https://encoding.spec.whatwg.org/#utf-8-decode.
+
+        * Modules/fetch/FetchBodyConsumer.cpp:
+        (WebCore::shouldPrependBOM):
+        (WebCore::textFromUTF8):
+        (WebCore::FetchBodyConsumer::resolveWithData):
+        (WebCore::FetchBodyConsumer::takeAsText):
+
 2016-08-30  Zalan Bujtas  <[email protected]>
 
         ASSERTION FAILED: opportunitiesInRun <= expansionOpportunityCount in WebCore::computeExpansionForJustifiedText

Modified: trunk/Source/WebCore/Modules/fetch/FetchBodyConsumer.cpp (205187 => 205188)


--- trunk/Source/WebCore/Modules/fetch/FetchBodyConsumer.cpp	2016-08-30 18:05:00 UTC (rev 205187)
+++ trunk/Source/WebCore/Modules/fetch/FetchBodyConsumer.cpp	2016-08-30 18:09:07 UTC (rev 205188)
@@ -33,6 +33,7 @@
 
 #include "JSBlob.h"
 #include "JSDOMPromise.h"
+#include "TextResourceDecoder.h"
 
 namespace WebCore {
 
@@ -43,6 +44,21 @@
     return Blob::create(WTFMove(value), contentType);
 }
 
+static inline bool shouldPrependBOM(const unsigned char* data, unsigned length)
+{
+    if (length < 3)
+        return true;
+    return data[0] != 0xef || data[1] != 0xbb || data[2] != 0xbf;
+}
+
+static String textFromUTF8(const unsigned char* data, unsigned length)
+{
+    auto decoder = TextResourceDecoder::create("text/plain", "UTF-8");
+    if (shouldPrependBOM(data, length))
+        decoder->decode("\xef\xbb\xbf", 3);
+    return decoder->decodeAndFlush(reinterpret_cast<const char*>(data), length);
+}
+
 void FetchBodyConsumer::resolveWithData(DeferredWrapper& promise, const unsigned char* data, unsigned length)
 {
     switch (m_type) {
@@ -53,10 +69,10 @@
         promise.resolveWithNewlyCreated(blobFromData(data, length, m_contentType));
         return;
     case Type::JSON:
-        fulfillPromiseWithJSON(promise, String(data, length));
+        fulfillPromiseWithJSON(promise, textFromUTF8(data, length));
         return;
     case Type::Text:
-        promise.resolve(String(data, length));
+        promise.resolve(textFromUTF8(data, length));
         return;
     case Type::None:
         ASSERT_NOT_REACHED();
@@ -126,10 +142,11 @@
 
 String FetchBodyConsumer::takeAsText()
 {
+    // FIXME: We could probably text decode on the fly as soon as m_type is set to JSON or Text.
     if (!m_buffer)
         return String();
 
-    auto text = String(m_buffer->data(), m_buffer->size());
+    auto text = textFromUTF8(reinterpret_cast<const unsigned char*>(m_buffer->data()), m_buffer->size());
     m_buffer = nullptr;
     return text;
 }
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to