Modified: trunk/Source/WebCore/ChangeLog (262644 => 262645)
--- trunk/Source/WebCore/ChangeLog 2020-06-05 19:54:00 UTC (rev 262644)
+++ trunk/Source/WebCore/ChangeLog 2020-06-05 19:59:00 UTC (rev 262645)
@@ -1,3 +1,24 @@
+2020-06-05 Wenson Hsieh <wenson_hs...@apple.com>
+
+ Text manipulation should exclude characters outside of the unicode private use area
+ https://bugs.webkit.org/show_bug.cgi?id=212800
+ <rdar://problem/63736417>
+
+ Reviewed by Sihui Liu.
+
+ Consider characters that fall outside of unicode PUA (in addition to line breaks) as excluded when extracting
+ tokens during text manipulation. In doing this, we also rename a few member variables in `ManipulationUnit` to
+ refer to "token delimiters" rather than line breaks.
+
+ Test: TextManipulation.StartTextManipulationExtractsPrivateUseCharactersAsExcludedTokens
+
+ * editing/TextManipulationController.cpp:
+ (WebCore::isInPrivateUseArea):
+ (WebCore::isTokenDelimiter):
+ (WebCore::TextManipulationController::parse):
+ (WebCore::TextManipulationController::observeParagraphs):
+ * editing/TextManipulationController.h:
+
2020-06-05 Dean Jackson <d...@apple.com>
REGRESSION (r262366): [ Mac wk1 ] webgl/webgl-backing-store-size-update.html is failing
Modified: trunk/Source/WebCore/editing/TextManipulationController.cpp (262644 => 262645)
--- trunk/Source/WebCore/editing/TextManipulationController.cpp 2020-06-05 19:54:00 UTC (rev 262644)
+++ trunk/Source/WebCore/editing/TextManipulationController.cpp 2020-06-05 19:59:00 UTC (rev 262645)
@@ -294,6 +294,16 @@
return false;
}
+static bool isInPrivateUseArea(UChar character)
+{
+ return 0xE000 <= character && character <= 0xF8FF;
+}
+
+static bool isTokenDelimiter(UChar character)
+{
+ return isHTMLLineBreak(character) || isInPrivateUseArea(character);
+}
+
TextManipulationController::ManipulationUnit TextManipulationController::parse(StringView text, Node* textNode)
{
Vector<ManipulationToken> tokens;
@@ -302,18 +312,15 @@
size_t startPositionOfCurrentToken = 0;
bool isNodeExcluded = exclusionRuleMatcher.isExcluded(textNode);
bool containsOnlyHTMLSpace = true;
- bool containsLineBreak = false;
- bool firstTokenContainsLineBreak = false;
- bool lastTokenContainsLineBreak = false;
+ bool containsTokenDelimiter = false;
+ bool firstTokenContainsDelimiter = false;
+ bool lastTokenContainsDelimiter = false;
size_t index = 0;
for (; index < text.length(); ++index) {
auto character = text[index];
- if (isNotHTMLSpace(character)) {
- containsOnlyHTMLSpace = false;
- positionOfLastNonHTMLSpace = index;
- } else if (isHTMLLineBreak(character)) {
- containsLineBreak = true;
+ if (isTokenDelimiter(character)) {
+ containsTokenDelimiter = true;
if (positionOfLastNonHTMLSpace != WTF::notFound && startPositionOfCurrentToken <= positionOfLastNonHTMLSpace) {
auto tokenString = text.substring(startPositionOfCurrentToken, positionOfLastNonHTMLSpace + 1 - startPositionOfCurrentToken).toString();
tokens.append(ManipulationToken { m_tokenIdentifier.generate(), tokenString, tokenInfo(textNode), isNodeExcluded });
@@ -320,19 +327,20 @@
startPositionOfCurrentToken = positionOfLastNonHTMLSpace + 1;
}
- while (index < text.length() && !isNotHTMLSpace(text[index]))
+ while (index < text.length() && (isHTMLSpace(text[index]) || isInPrivateUseArea(text[index])))
++index;
--index;
- auto lineBreakTokenString = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken).toString();
+ auto stringForToken = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken).toString();
if (tokens.isEmpty())
- firstTokenContainsLineBreak = true;
- tokens.append(ManipulationToken { m_tokenIdentifier.generate(), lineBreakTokenString, tokenInfo(textNode), true });
+ firstTokenContainsDelimiter = true;
+ tokens.append(ManipulationToken { m_tokenIdentifier.generate(), stringForToken, tokenInfo(textNode), true });
startPositionOfCurrentToken = index + 1;
- lastTokenContainsLineBreak = true;
-
- continue;
+ lastTokenContainsDelimiter = true;
+ } else if (isNotHTMLSpace(character)) {
+ containsOnlyHTMLSpace = false;
+ positionOfLastNonHTMLSpace = index;
}
}
@@ -339,10 +347,10 @@
if (startPositionOfCurrentToken < text.length()) {
auto tokenString = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken).toString();
tokens.append(ManipulationToken { m_tokenIdentifier.generate(), tokenString, tokenInfo(textNode), isNodeExcluded });
- lastTokenContainsLineBreak = false;
+ lastTokenContainsDelimiter = false;
}
- return { WTFMove(tokens), *textNode, containsOnlyHTMLSpace || isNodeExcluded, containsLineBreak, firstTokenContainsLineBreak, lastTokenContainsLineBreak };
+ return { WTFMove(tokens), *textNode, containsOnlyHTMLSpace || isNodeExcluded, containsTokenDelimiter, firstTokenContainsDelimiter, lastTokenContainsDelimiter };
}
void TextManipulationController::addItemIfPossible(Vector<ManipulationUnit>&& units)
@@ -432,7 +440,7 @@
continue;
auto unitsInCurrentNode = parse(content.text, contentNode);
- if (unitsInCurrentNode.firstTokenContainsLineBreak)
+ if (unitsInCurrentNode.firstTokenContainsDelimiter)
addItemIfPossible(std::exchange(unitsInCurrentParagraph, { }));
if (unitsInCurrentParagraph.isEmpty() && unitsInCurrentNode.areAllTokensExcluded)
@@ -440,7 +448,7 @@
unitsInCurrentParagraph.append(WTFMove(unitsInCurrentNode));
- if (unitsInCurrentNode.lastTokenContainsLineBreak)
+ if (unitsInCurrentNode.lastTokenContainsDelimiter)
addItemIfPossible(std::exchange(unitsInCurrentParagraph, { }));
}
Modified: trunk/Source/WebCore/editing/TextManipulationController.h (262644 => 262645)
--- trunk/Source/WebCore/editing/TextManipulationController.h 2020-06-05 19:54:00 UTC (rev 262644)
+++ trunk/Source/WebCore/editing/TextManipulationController.h 2020-06-05 19:59:00 UTC (rev 262645)
@@ -155,8 +155,8 @@
Ref<Node> node;
bool areAllTokensExcluded { true };
bool containsLineBreak { false };
- bool firstTokenContainsLineBreak { false };
- bool lastTokenContainsLineBreak { false };
+ bool firstTokenContainsDelimiter { false };
+ bool lastTokenContainsDelimiter { false };
};
ManipulationUnit parse(StringView, Node*);
Modified: trunk/Tools/ChangeLog (262644 => 262645)
--- trunk/Tools/ChangeLog 2020-06-05 19:54:00 UTC (rev 262644)
+++ trunk/Tools/ChangeLog 2020-06-05 19:59:00 UTC (rev 262645)
@@ -1,3 +1,16 @@
+2020-06-05 Wenson Hsieh <wenson_hs...@apple.com>
+
+ Text manipulation should exclude characters outside of the unicode private use area
+ https://bugs.webkit.org/show_bug.cgi?id=212800
+ <rdar://problem/63736417>
+
+ Reviewed by Sihui Liu.
+
+ Add a new API test to verify the new behavior when extracting PUA characters.
+
+ * TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm:
+ (TestWebKitAPI::TEST):
+
2020-06-05 Dean Jackson <d...@apple.com>
REGRESSION (r262366): [ Mac wk1 ] webgl/webgl-backing-store-size-update.html is failing
Modified: trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm (262644 => 262645)
--- trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm 2020-06-05 19:54:00 UTC (rev 262644)
+++ trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm 2020-06-05 19:59:00 UTC (rev 262645)
@@ -843,6 +843,37 @@
EXPECT_STREQ("three four", items[1].tokens[0].content.UTF8String);
}
+TEST(TextManipulation, StartTextManipulationExtractsPrivateUseCharactersAsExcludedTokens)
+{
+ auto delegate = adoptNS([[TextManipulationDelegate alloc] init]);
+ auto webView = adoptNS([[TestWKWebView alloc] initWithFrame:NSMakeRect(0, 0, 400, 400)]);
+ [webView _setTextManipulationDelegate:delegate.get()];
+ [webView synchronouslyLoadHTMLString:@"<body>foobarbaz</body>"];
+ auto configuration = adoptNS([[_WKTextManipulationConfiguration alloc] init]);
+
+ done = false;
+ [webView _startTextManipulationsWithConfiguration:configuration.get() completion:^{
+ done = true;
+ }];
+ TestWebKitAPI::Util::run(&done);
+
+ auto *items = [delegate items];
+ EXPECT_EQ(items.count, 1UL);
+
+ auto* item = items.firstObject;
+ EXPECT_EQ(item.tokens.count, 5UL);
+ EXPECT_WK_STREQ("foo", item.tokens[0].content);
+ EXPECT_FALSE(item.tokens[0].isExcluded);
+ EXPECT_WK_STREQ("", item.tokens[1].content);
+ EXPECT_TRUE(item.tokens[1].isExcluded);
+ EXPECT_WK_STREQ("bar", item.tokens[2].content);
+ EXPECT_FALSE(item.tokens[2].isExcluded);
+ EXPECT_WK_STREQ("", item.tokens[3].content);
+ EXPECT_TRUE(item.tokens[3].isExcluded);
+ EXPECT_WK_STREQ("baz", item.tokens[4].content);
+ EXPECT_FALSE(item.tokens[4].isExcluded);
+}
+
TEST(TextManipulation, StartTextManipulationExtractsValuesByNode)
{
auto delegate = adoptNS([[TextManipulationDelegate alloc] init]);