Title: [262645] trunk
Revision
262645
Author
wenson_hs...@apple.com
Date
2020-06-05 12:59:00 -0700 (Fri, 05 Jun 2020)

Log Message

Text manipulation should exclude characters outside of the unicode private use area
https://bugs.webkit.org/show_bug.cgi?id=212800
<rdar://problem/63736417>

Reviewed by Sihui Liu.

Source/WebCore:

Consider characters that fall outside of unicode PUA (in addition to line breaks) as excluded when extracting
tokens during text manipulation. In doing this, we also rename a few member variables in `ManipulationUnit` to
refer to "token delimiters" rather than line breaks.

Test: TextManipulation.StartTextManipulationExtractsPrivateUseCharactersAsExcludedTokens

* editing/TextManipulationController.cpp:
(WebCore::isInPrivateUseArea):
(WebCore::isTokenDelimiter):
(WebCore::TextManipulationController::parse):
(WebCore::TextManipulationController::observeParagraphs):
* editing/TextManipulationController.h:

Tools:

Add a new API test to verify the new behavior when extracting PUA characters.

* TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm:
(TestWebKitAPI::TEST):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (262644 => 262645)


--- trunk/Source/WebCore/ChangeLog	2020-06-05 19:54:00 UTC (rev 262644)
+++ trunk/Source/WebCore/ChangeLog	2020-06-05 19:59:00 UTC (rev 262645)
@@ -1,3 +1,24 @@
+2020-06-05  Wenson Hsieh  <wenson_hs...@apple.com>
+
+        Text manipulation should exclude characters outside of the unicode private use area
+        https://bugs.webkit.org/show_bug.cgi?id=212800
+        <rdar://problem/63736417>
+
+        Reviewed by Sihui Liu.
+
+        Consider characters that fall outside of unicode PUA (in addition to line breaks) as excluded when extracting
+        tokens during text manipulation. In doing this, we also rename a few member variables in `ManipulationUnit` to
+        refer to "token delimiters" rather than line breaks.
+
+        Test: TextManipulation.StartTextManipulationExtractsPrivateUseCharactersAsExcludedTokens
+
+        * editing/TextManipulationController.cpp:
+        (WebCore::isInPrivateUseArea):
+        (WebCore::isTokenDelimiter):
+        (WebCore::TextManipulationController::parse):
+        (WebCore::TextManipulationController::observeParagraphs):
+        * editing/TextManipulationController.h:
+
 2020-06-05  Dean Jackson  <d...@apple.com>
 
         REGRESSION (r262366): [ Mac wk1 ] webgl/webgl-backing-store-size-update.html is failing

Modified: trunk/Source/WebCore/editing/TextManipulationController.cpp (262644 => 262645)


--- trunk/Source/WebCore/editing/TextManipulationController.cpp	2020-06-05 19:54:00 UTC (rev 262644)
+++ trunk/Source/WebCore/editing/TextManipulationController.cpp	2020-06-05 19:59:00 UTC (rev 262645)
@@ -294,6 +294,16 @@
     return false;
 }
 
+static bool isInPrivateUseArea(UChar character)
+{
+    return 0xE000 <= character && character <= 0xF8FF;
+}
+
+static bool isTokenDelimiter(UChar character)
+{
+    return isHTMLLineBreak(character) || isInPrivateUseArea(character);
+}
+
 TextManipulationController::ManipulationUnit TextManipulationController::parse(StringView text, Node* textNode)
 {
     Vector<ManipulationToken> tokens;
@@ -302,18 +312,15 @@
     size_t startPositionOfCurrentToken = 0;
     bool isNodeExcluded = exclusionRuleMatcher.isExcluded(textNode);
     bool containsOnlyHTMLSpace = true;
-    bool containsLineBreak = false;
-    bool firstTokenContainsLineBreak = false;
-    bool lastTokenContainsLineBreak = false;
+    bool containsTokenDelimiter = false;
+    bool firstTokenContainsDelimiter = false;
+    bool lastTokenContainsDelimiter = false;
 
     size_t index = 0;
     for (; index < text.length(); ++index) {
         auto character = text[index];
-        if (isNotHTMLSpace(character)) {
-            containsOnlyHTMLSpace = false;
-            positionOfLastNonHTMLSpace = index;
-        } else if (isHTMLLineBreak(character)) {
-            containsLineBreak = true;
+        if (isTokenDelimiter(character)) {
+            containsTokenDelimiter = true;
             if (positionOfLastNonHTMLSpace != WTF::notFound && startPositionOfCurrentToken <= positionOfLastNonHTMLSpace) {
                 auto tokenString = text.substring(startPositionOfCurrentToken, positionOfLastNonHTMLSpace + 1 - startPositionOfCurrentToken).toString();
                 tokens.append(ManipulationToken { m_tokenIdentifier.generate(), tokenString, tokenInfo(textNode), isNodeExcluded });
@@ -320,19 +327,20 @@
                 startPositionOfCurrentToken = positionOfLastNonHTMLSpace + 1;
             }
 
-            while (index < text.length() && !isNotHTMLSpace(text[index]))
+            while (index < text.length() && (isHTMLSpace(text[index]) || isInPrivateUseArea(text[index])))
                 ++index;
 
             --index;
 
-            auto lineBreakTokenString = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken).toString();
+            auto stringForToken = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken).toString();
             if (tokens.isEmpty())
-                firstTokenContainsLineBreak = true;
-            tokens.append(ManipulationToken { m_tokenIdentifier.generate(), lineBreakTokenString, tokenInfo(textNode), true });
+                firstTokenContainsDelimiter = true;
+            tokens.append(ManipulationToken { m_tokenIdentifier.generate(), stringForToken, tokenInfo(textNode), true });
             startPositionOfCurrentToken = index + 1;
-            lastTokenContainsLineBreak = true;
-
-            continue;
+            lastTokenContainsDelimiter = true;
+        } else if (isNotHTMLSpace(character)) {
+            containsOnlyHTMLSpace = false;
+            positionOfLastNonHTMLSpace = index;
         }
     }
 
@@ -339,10 +347,10 @@
     if (startPositionOfCurrentToken < text.length()) {
         auto tokenString = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken).toString();
         tokens.append(ManipulationToken { m_tokenIdentifier.generate(), tokenString, tokenInfo(textNode), isNodeExcluded });
-        lastTokenContainsLineBreak = false;
+        lastTokenContainsDelimiter = false;
     }
 
-    return { WTFMove(tokens), *textNode, containsOnlyHTMLSpace || isNodeExcluded, containsLineBreak, firstTokenContainsLineBreak, lastTokenContainsLineBreak };
+    return { WTFMove(tokens), *textNode, containsOnlyHTMLSpace || isNodeExcluded, containsTokenDelimiter, firstTokenContainsDelimiter, lastTokenContainsDelimiter };
 }
 
 void TextManipulationController::addItemIfPossible(Vector<ManipulationUnit>&& units)
@@ -432,7 +440,7 @@
             continue;
 
         auto unitsInCurrentNode = parse(content.text, contentNode);
-        if (unitsInCurrentNode.firstTokenContainsLineBreak)
+        if (unitsInCurrentNode.firstTokenContainsDelimiter)
             addItemIfPossible(std::exchange(unitsInCurrentParagraph, { }));
 
         if (unitsInCurrentParagraph.isEmpty() && unitsInCurrentNode.areAllTokensExcluded)
@@ -440,7 +448,7 @@
 
         unitsInCurrentParagraph.append(WTFMove(unitsInCurrentNode));
 
-        if (unitsInCurrentNode.lastTokenContainsLineBreak)
+        if (unitsInCurrentNode.lastTokenContainsDelimiter)
             addItemIfPossible(std::exchange(unitsInCurrentParagraph, { }));
     }
 

Modified: trunk/Source/WebCore/editing/TextManipulationController.h (262644 => 262645)


--- trunk/Source/WebCore/editing/TextManipulationController.h	2020-06-05 19:54:00 UTC (rev 262644)
+++ trunk/Source/WebCore/editing/TextManipulationController.h	2020-06-05 19:59:00 UTC (rev 262645)
@@ -155,8 +155,8 @@
         Ref<Node> node;
         bool areAllTokensExcluded { true };
         bool containsLineBreak { false };
-        bool firstTokenContainsLineBreak { false };
-        bool lastTokenContainsLineBreak { false };
+        bool firstTokenContainsDelimiter { false };
+        bool lastTokenContainsDelimiter { false };
     };
     ManipulationUnit parse(StringView, Node*);
 

Modified: trunk/Tools/ChangeLog (262644 => 262645)


--- trunk/Tools/ChangeLog	2020-06-05 19:54:00 UTC (rev 262644)
+++ trunk/Tools/ChangeLog	2020-06-05 19:59:00 UTC (rev 262645)
@@ -1,3 +1,16 @@
+2020-06-05  Wenson Hsieh  <wenson_hs...@apple.com>
+
+        Text manipulation should exclude characters outside of the unicode private use area
+        https://bugs.webkit.org/show_bug.cgi?id=212800
+        <rdar://problem/63736417>
+
+        Reviewed by Sihui Liu.
+
+        Add a new API test to verify the new behavior when extracting PUA characters.
+
+        * TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm:
+        (TestWebKitAPI::TEST):
+
 2020-06-05  Dean Jackson  <d...@apple.com>
 
         REGRESSION (r262366): [ Mac wk1 ] webgl/webgl-backing-store-size-update.html is failing

Modified: trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm (262644 => 262645)


--- trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm	2020-06-05 19:54:00 UTC (rev 262644)
+++ trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm	2020-06-05 19:59:00 UTC (rev 262645)
@@ -843,6 +843,37 @@
     EXPECT_STREQ("three four", items[1].tokens[0].content.UTF8String);
 }
 
+TEST(TextManipulation, StartTextManipulationExtractsPrivateUseCharactersAsExcludedTokens)
+{
+    auto delegate = adoptNS([[TextManipulationDelegate alloc] init]);
+    auto webView = adoptNS([[TestWKWebView alloc] initWithFrame:NSMakeRect(0, 0, 400, 400)]);
+    [webView _setTextManipulationDelegate:delegate.get()];
+    [webView synchronouslyLoadHTMLString:@"<body>foobarbaz</body>"];
+    auto configuration = adoptNS([[_WKTextManipulationConfiguration alloc] init]);
+
+    done = false;
+    [webView _startTextManipulationsWithConfiguration:configuration.get() completion:^{
+        done = true;
+    }];
+    TestWebKitAPI::Util::run(&done);
+
+    auto *items = [delegate items];
+    EXPECT_EQ(items.count, 1UL);
+
+    auto* item = items.firstObject;
+    EXPECT_EQ(item.tokens.count, 5UL);
+    EXPECT_WK_STREQ("foo", item.tokens[0].content);
+    EXPECT_FALSE(item.tokens[0].isExcluded);
+    EXPECT_WK_STREQ("", item.tokens[1].content);
+    EXPECT_TRUE(item.tokens[1].isExcluded);
+    EXPECT_WK_STREQ("bar", item.tokens[2].content);
+    EXPECT_FALSE(item.tokens[2].isExcluded);
+    EXPECT_WK_STREQ("", item.tokens[3].content);
+    EXPECT_TRUE(item.tokens[3].isExcluded);
+    EXPECT_WK_STREQ("baz", item.tokens[4].content);
+    EXPECT_FALSE(item.tokens[4].isExcluded);
+}
+
 TEST(TextManipulation, StartTextManipulationExtractsValuesByNode)
 {
     auto delegate = adoptNS([[TextManipulationDelegate alloc] init]);
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to