Diff
Modified: trunk/LayoutTests/ChangeLog (261394 => 261395)
--- trunk/LayoutTests/ChangeLog 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/LayoutTests/ChangeLog 2020-05-08 17:35:46 UTC (rev 261395)
@@ -1,3 +1,16 @@
+2020-05-08 Wenson Hsieh <[email protected]>
+
+ Preserve character set information when writing to the pasteboard when copying rich text
+ https://bugs.webkit.org/show_bug.cgi?id=211524
+
+ Reviewed by Darin Adler.
+
+ Rebaseline several layout tests.
+
+ * platform/ios/editing/pasteboard/onpaste-text-html-expected.txt:
+ * platform/mac/editing/pasteboard/onpaste-text-html-expected.txt: Added.
+ * platform/mac/fast/events/ondrop-text-html-expected.txt: Added.
+
2020-05-08 Eric Carlson <[email protected]>
REGRESSION(r261341): imported/w3c/web-platform-tests/html/semantics/embedded-content/the-video-element/intrinsic_sizes.htm is failing
Modified: trunk/LayoutTests/platform/ios/editing/pasteboard/onpaste-text-html-expected.txt (261394 => 261395)
--- trunk/LayoutTests/platform/ios/editing/pasteboard/onpaste-text-html-expected.txt 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/LayoutTests/platform/ios/editing/pasteboard/onpaste-text-html-expected.txt 2020-05-08 17:35:46 UTC (rev 261395)
@@ -1,5 +1,5 @@
CONSOLE MESSAGE: text/plain: This test verifies that we can get text/html from the clipboard during an onpaste event.
-CONSOLE MESSAGE: text/html: <span style="...">This test verifies that we can get text/html from the clipboard during an onpaste event.<span class="Apple-converted-space"> </span></span>
+CONSOLE MESSAGE: text/html: <meta charset="UTF-8"><span style="...">This test verifies that we can get text/html from the clipboard during an onpaste event.<span class="Apple-converted-space"> </span></span>
This test verifies that we can get text/html from the clipboard during an onpaste event. This test requires DRT.
Paste content in this div. This test verifies that we can get text/html from the clipboard during an onpaste event.
PASS
Added: trunk/LayoutTests/platform/mac/editing/pasteboard/onpaste-text-html-expected.txt (0 => 261395)
--- trunk/LayoutTests/platform/mac/editing/pasteboard/onpaste-text-html-expected.txt (rev 0)
+++ trunk/LayoutTests/platform/mac/editing/pasteboard/onpaste-text-html-expected.txt 2020-05-08 17:35:46 UTC (rev 261395)
@@ -0,0 +1,6 @@
+CONSOLE MESSAGE: text/plain: This test verifies that we can get text/html from the clipboard during an onpaste event.
+CONSOLE MESSAGE: text/html: <meta charset="UTF-8"><span style="...">This test verifies that we can get text/html from the clipboard during an onpaste event.<span class="Apple-converted-space"> </span></span>
+This test verifies that we can get text/html from the clipboard during an onpaste event. This test requires DRT.
+Paste content in this div.This test verifies that we can get text/html from the clipboard during an onpaste event.
+PASS
+
Added: trunk/LayoutTests/platform/mac/fast/events/ondrop-text-html-expected.txt (0 => 261395)
--- trunk/LayoutTests/platform/mac/fast/events/ondrop-text-html-expected.txt (rev 0)
+++ trunk/LayoutTests/platform/mac/fast/events/ondrop-text-html-expected.txt 2020-05-08 17:35:46 UTC (rev 261395)
@@ -0,0 +1,4 @@
+CONSOLE MESSAGE: text/plain: This test verifies that we can get text/html from the drag object during an ondrop event.
+CONSOLE MESSAGE: text/html: <meta charset="UTF-8"><span style="caret-color: rgb(0, 0, 0); color: rgb(0, 0, 0); font-size: medium; font-style: normal; font-variant-caps: normal; font-weight: normal; letter-spacing: normal; orphans: auto; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: auto; word-spacing: 0px; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; text-decoration: none; display: inline !important; float: none;">This test verifies that we can get text/html from the drag object during an ondrop event.<span class="Apple-converted-space"> </span></span>
+This test verifies that we can get text/html from the drag object during an ondrop event. This test requires DRT.
+PASS
Modified: trunk/Source/WebCore/ChangeLog (261394 => 261395)
--- trunk/Source/WebCore/ChangeLog 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/Source/WebCore/ChangeLog 2020-05-08 17:35:46 UTC (rev 261395)
@@ -1,3 +1,42 @@
+2020-05-08 Wenson Hsieh <[email protected]>
+
+ Preserve character set information when writing to the pasteboard when copying rich text
+ https://bugs.webkit.org/show_bug.cgi?id=211524
+
+ Reviewed by Darin Adler.
+
+ This patch is a followup to r261247, which introduced a workaround for Cocoa platforms to automatically add a
+ `meta charset` when writing HTML data to the platform pasteboard. When copying a rich text selection, we use a
+ different codepath when sanitizing DOM content - that is, `serializePreservingVisualAppearance`.
+
+ The previous change also introduced an enum, `AddMetaCharsetIfNeeded`, to limit applying this workaround to only
+ when we're writing HTML to the clipboard. However, it should be harmless to include this when reading sanitized
+ HTML as well, so we can also simplify this code by removing the `AddMetaCharsetIfNeeded` enum (and all of its
+ uses).
+
+ Test: CopyHTML.SanitizationPreservesCharacterSetInSelectedText
+
+ * Modules/async-clipboard/ClipboardItemBindingsDataSource.cpp:
+ (WebCore::ClipboardItemBindingsDataSource::ClipboardItemTypeLoader::sanitizeDataIfNeeded):
+ * dom/DataTransfer.cpp:
+ (WebCore::DataTransfer::setDataFromItemList):
+ * editing/MarkupAccumulator.h:
+ (WebCore::MarkupAccumulator::isAllASCII const):
+ * editing/cocoa/WebContentReaderCocoa.mm:
+ (WebCore::sanitizeMarkupWithArchive):
+ (WebCore::WebContentReader::readHTML):
+ (WebCore::WebContentMarkupReader::readHTML):
+ * editing/markup.cpp:
+ (WebCore::sanitizeMarkup):
+ (WebCore::serializePreservingVisualAppearanceInternal):
+
+ Move the `meta charset` workaround from sanitizedMarkupForFragmentInDocument to
+ serializePreservingVisualAppearanceInternal, such that HTML data written to the pasteboard when copying rich
+ text can be used to load a web view, without losing the fact that the copied content was UTF-8 encoded.
+
+ (WebCore::sanitizedMarkupForFragmentInDocument):
+ * editing/markup.h:
+
2020-05-08 Andres Gonzalez <[email protected]>
Fix for crashes in LayoutTests in isolated tree mode.
Modified: trunk/Source/WebCore/Modules/async-clipboard/ClipboardItemBindingsDataSource.cpp (261394 => 261395)
--- trunk/Source/WebCore/Modules/async-clipboard/ClipboardItemBindingsDataSource.cpp 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/Source/WebCore/Modules/async-clipboard/ClipboardItemBindingsDataSource.cpp 2020-05-08 17:35:46 UTC (rev 261395)
@@ -269,7 +269,7 @@
if (markupToSanitize.isEmpty())
return;
- m_data = { sanitizeMarkup(markupToSanitize, AddMetaCharsetIfNeeded::Yes) };
+ m_data = { sanitizeMarkup(markupToSanitize) };
}
if (m_type == "image/png"_s) {
Modified: trunk/Source/WebCore/dom/DataTransfer.cpp (261394 => 261395)
--- trunk/Source/WebCore/dom/DataTransfer.cpp 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/Source/WebCore/dom/DataTransfer.cpp 2020-05-08 17:35:46 UTC (rev 261395)
@@ -253,7 +253,7 @@
String sanitizedData;
if (type == "text/html")
- sanitizedData = sanitizeMarkup(data, AddMetaCharsetIfNeeded::Yes);
+ sanitizedData = sanitizeMarkup(data);
else if (type == "text/uri-list") {
auto url = "" }, data);
if (url.isValid())
Modified: trunk/Source/WebCore/editing/MarkupAccumulator.h (261394 => 261395)
--- trunk/Source/WebCore/editing/MarkupAccumulator.h 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/Source/WebCore/editing/MarkupAccumulator.h 2020-05-08 17:35:46 UTC (rev 261395)
@@ -69,6 +69,7 @@
protected:
static size_t totalLength(const Vector<String>&);
size_t length() const { return m_markup.length(); }
+ bool isAllASCII() const { return m_markup.toStringPreserveCapacity().isAllASCII(); }
void concatenateMarkup(StringBuilder&);
Modified: trunk/Source/WebCore/editing/cocoa/WebContentReaderCocoa.mm (261394 => 261395)
--- trunk/Source/WebCore/editing/cocoa/WebContentReaderCocoa.mm 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/Source/WebCore/editing/cocoa/WebContentReaderCocoa.mm 2020-05-08 17:35:46 UTC (rev 261395)
@@ -449,7 +449,7 @@
if (shouldReplaceRichContentWithAttachments()) {
replaceRichContentWithAttachments(frame, fragment, markupAndArchive.archive->subresources());
- return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, AddMetaCharsetIfNeeded::No, msoListQuirks, markupAndArchive.markup);
+ return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);
}
HashMap<AtomString, AtomString> blobURLMap;
@@ -492,7 +492,7 @@
replaceSubresourceURLs(fragment.get(), WTFMove(blobURLMap));
- return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, AddMetaCharsetIfNeeded::No, msoListQuirks, markupAndArchive.markup);
+ return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);
}
bool WebContentReader::readWebArchive(SharedBuffer& buffer)
@@ -580,7 +580,7 @@
String markup;
if (RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled() && shouldSanitize()) {
- markup = sanitizeMarkup(stringOmittingMicrosoftPrefix, AddMetaCharsetIfNeeded::No, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
+ markup = sanitizeMarkup(stringOmittingMicrosoftPrefix, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
removeSubresourceURLAttributes(fragment, [] (const URL& url) {
return shouldReplaceSubresourceURL(url);
});
@@ -599,7 +599,7 @@
String rawHTML = stripMicrosoftPrefix(string);
if (shouldSanitize()) {
- markup = sanitizeMarkup(rawHTML, AddMetaCharsetIfNeeded::No, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
+ markup = sanitizeMarkup(rawHTML, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
removeSubresourceURLAttributes(fragment, [] (const URL& url) {
return shouldReplaceSubresourceURL(url);
});
Modified: trunk/Source/WebCore/editing/markup.cpp (261394 => 261395)
--- trunk/Source/WebCore/editing/markup.cpp 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/Source/WebCore/editing/markup.cpp 2020-05-08 17:35:46 UTC (rev 261395)
@@ -201,7 +201,7 @@
return page;
}
-String sanitizeMarkup(const String& rawHTML, AddMetaCharsetIfNeeded addMetaCharsetIfNeeded, MSOListQuirks msoListQuirks, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer)
+String sanitizeMarkup(const String& rawHTML, MSOListQuirks msoListQuirks, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer)
{
auto page = createPageForSanitizingWebContent();
Document* stagingDocument = page->mainFrame().document();
@@ -212,7 +212,7 @@
if (fragmentSanitizer)
(*fragmentSanitizer)(fragment);
- return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, addMetaCharsetIfNeeded, msoListQuirks, rawHTML);
+ return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, rawHTML);
}
enum class MSOListMode { Preserve, DoNotPreserve };
@@ -240,6 +240,14 @@
return node.parentOrShadowHostNode();
}
+ void prependMetaCharsetUTF8TagIfNonASCIICharactersArePresent()
+ {
+ if (isAllASCII())
+ return;
+
+ m_reversedPrecedingMarkup.append("<meta charset=\"UTF-8\">"_s);
+ }
+
private:
void appendStyleNodeOpenTag(StringBuilder&, StyleProperties*, Document&, bool isBlock = false);
const String& styleNodeCloseTag(bool isBlock = false);
@@ -917,6 +925,11 @@
if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleEnd.previous()))
accumulator.appendString(interchangeNewlineString);
+#if PLATFORM(COCOA)
+ // On Cocoa platforms, this markup is eventually persisted to the pasteboard and read back as UTF-8 data,
+ // so this meta tag is needed for clients that read this data in the future from the pasteboard and load it.
+ accumulator.prependMetaCharsetUTF8TagIfNonASCIICharactersArePresent();
+#endif
return accumulator.takeResults();
}
@@ -945,7 +958,7 @@
&& htmlTag.contains("xmlns:w=\"urn:schemas-microsoft-com:office:word\"");
}
-String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, AddMetaCharsetIfNeeded addMetaCharsetIfNeeded, MSOListQuirks msoListQuirks, const String& originalMarkup)
+String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, MSOListQuirks msoListQuirks, const String& originalMarkup)
{
MSOListMode msoListMode = msoListQuirks == MSOListQuirks::CheckIfNeeded && shouldPreserveMSOLists(originalMarkup)
? MSOListMode::Preserve : MSOListMode::DoNotPreserve;
@@ -966,17 +979,6 @@
"xmlns=\"http://www.w3.org/TR/REC-html40\">");
}
-#if PLATFORM(COCOA)
- if (addMetaCharsetIfNeeded == AddMetaCharsetIfNeeded::Yes && !result.isAllASCII()) {
- // On Cocoa platforms, this markup is eventually persisted to the pasteboard and read back as UTF-8 data,
- // so this meta tag is needed for clients that read this data in the future from the pasteboard and load it.
- // This logic is used by both DataTransfer and Clipboard APIs to sanitize "text/html" from the page.
- builder.appendLiteral("<meta charset=\"UTF-8\">");
- }
-#else
- UNUSED_PARAM(addMetaCharsetIfNeeded);
-#endif
-
builder.append(result);
if (msoListMode == MSOListMode::Preserve)
Modified: trunk/Source/WebCore/editing/markup.h (261394 => 261395)
--- trunk/Source/WebCore/editing/markup.h 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/Source/WebCore/editing/markup.h 2020-05-08 17:35:46 UTC (rev 261395)
@@ -54,9 +54,8 @@
std::unique_ptr<Page> createPageForSanitizingWebContent();
enum class MSOListQuirks : bool { CheckIfNeeded, Disabled };
-enum class AddMetaCharsetIfNeeded : bool { No, Yes };
-String sanitizeMarkup(const String&, AddMetaCharsetIfNeeded = AddMetaCharsetIfNeeded::No, MSOListQuirks = MSOListQuirks::Disabled, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer = WTF::nullopt);
-String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&&, Document&, AddMetaCharsetIfNeeded, MSOListQuirks, const String& originalMarkup);
+String sanitizeMarkup(const String&, MSOListQuirks = MSOListQuirks::Disabled, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer = WTF::nullopt);
+String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&&, Document&, MSOListQuirks, const String& originalMarkup);
WEBCORE_EXPORT Ref<DocumentFragment> createFragmentFromText(Range& context, const String& text);
WEBCORE_EXPORT Ref<DocumentFragment> createFragmentFromMarkup(Document&, const String& markup, const String& baseURL, ParserContentPolicy = AllowScriptingContent);
Modified: trunk/Tools/ChangeLog (261394 => 261395)
--- trunk/Tools/ChangeLog 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/Tools/ChangeLog 2020-05-08 17:35:46 UTC (rev 261395)
@@ -1,3 +1,15 @@
+2020-05-08 Wenson Hsieh <[email protected]>
+
+ Preserve character set information when writing to the pasteboard when copying rich text
+ https://bugs.webkit.org/show_bug.cgi?id=211524
+
+ Reviewed by Darin Adler.
+
+ Add a new API test to verify that the HTML data written to the pasteboard when copying a rich text selection
+ can be converted into an `NSAttributedString` that contains correctly encoded non-Latin characters.
+
+ * TestWebKitAPI/Tests/WebKitCocoa/CopyHTML.mm:
+
2020-05-08 Chris Dumez <[email protected]>
REGRESSION(r259209) Webview's pending URL is null after restoring session state
Modified: trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/CopyHTML.mm (261394 => 261395)
--- trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/CopyHTML.mm 2020-05-08 17:33:58 UTC (rev 261394)
+++ trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/CopyHTML.mm 2020-05-08 17:35:46 UTC (rev 261395)
@@ -102,6 +102,27 @@
EXPECT_FALSE(htmlInNativePasteboard.contains("dangerousCode"));
}
+TEST(CopyHTML, SanitizationPreservesCharacterSetInSelectedText)
+{
+ auto webView = createWebViewWithCustomPasteboardDataEnabled();
+ [webView synchronouslyLoadHTMLString:@"<!DOCTYPE html>"
+ "<body>"
+ "<meta charset='utf-8'>"
+ "<span id='copy'>我叫謝文昇</span>"
+ "<script>getSelection().selectAllChildren(copy);</script>"
+ "</body>"];
+ [webView copy:nil];
+ [webView waitForNextPresentationUpdate];
+
+ NSString *copiedMarkup = readHTMLStringFromPasteboard();
+ EXPECT_TRUE([copiedMarkup containsString:@"<span "]);
+ EXPECT_TRUE([copiedMarkup containsString:@"我叫謝文昇"]);
+ EXPECT_TRUE([copiedMarkup containsString:@"</span>"]);
+
+ auto attributedString = adoptNS([[NSAttributedString alloc] initWithData:readHTMLDataFromPasteboard() options:@{ NSDocumentTypeDocumentOption: NSHTMLTextDocumentType } documentAttributes:nil error:nil]);
+ EXPECT_WK_STREQ("我叫謝文昇", [attributedString string]);
+}
+
TEST(CopyHTML, SanitizationPreservesCharacterSet)
{
Vector<std::pair<RetainPtr<NSString>, RetainPtr<NSData>>, 3> markupStringsAndData;
@@ -137,7 +158,7 @@
EXPECT_WK_STREQ("我叫謝文昇", [attributedString string]);
__block BOOL foundColorAttribute = NO;
- [attributedString enumerateAttribute:NSForegroundColorAttributeName inRange:NSMakeRange(0, 5) options:0 usingBlock:^(CocoaColor *color, NSRange range, BOOL *) {
+ [attributedString enumerateAttribute:NSForegroundColorAttributeName inRange:NSMakeRange(0, 5) options:0 usingBlock:^(CocoaColor *color, NSRange range, BOOL*) {
CGFloat redComponent = 0;
CGFloat greenComponent = 0;
CGFloat blueComponent = 0;