Title: [262778] trunk
Revision
262778
Author
[email protected]
Date
2020-06-08 23:54:20 -0700 (Mon, 08 Jun 2020)

Log Message

TextManipulation should only convert text from Node's text content to tokens
https://bugs.webkit.org/show_bug.cgi?id=212928

Reviewed by Wenson Hsieh.

Source/WebCore:

TextIterator may emit text like line breaks between nodes. This kind of text is generated based on the range of
TextIterator and style of node. We need this text for splitting tokens or splitting paragraphs, but we should
not convert it to normal tokens. This is because tokens should be created from content of node and text
manipulation fails if content does not match. The change of this kind of text does not indicate change in
content and we may still be able to finish text manipulation.

Test: TextManipulation.CompleteTextManipulationReplaceTwoSimpleParagraphs

* editing/TextManipulationController.cpp:
(WebCore::isInPrivateUseArea):
(WebCore::isTokenDelimiter):
(WebCore::ParagraphContentIterator::currentContent):
(WebCore::ParagraphContentIterator::appendToText):
(WebCore::ParagraphContentIterator::advanceIteratorNodeAndUpdateText):
(WebCore::TextManipulationController::createUnit):
(WebCore::TextManipulationController::parse):
(WebCore::TextManipulationController::observeParagraphs):
(WebCore::TextManipulationController::replace):
* editing/TextManipulationController.h:

Tools:

* TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm:
(TestWebKitAPI::TEST):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (262777 => 262778)


--- trunk/Source/WebCore/ChangeLog	2020-06-09 06:50:24 UTC (rev 262777)
+++ trunk/Source/WebCore/ChangeLog	2020-06-09 06:54:20 UTC (rev 262778)
@@ -1,3 +1,30 @@
+2020-06-08  Sihui Liu  <[email protected]>
+
+        TextManipulation should only convert text from Node's text content to tokens
+        https://bugs.webkit.org/show_bug.cgi?id=212928
+
+        Reviewed by Wenson Hsieh.
+
+        TextIterator may emit text like line breaks between nodes. This kind of text is generated based on the range of 
+        TextIterator and style of node. We need this text for splitting tokens or splitting paragraphs, but we should 
+        not convert it to normal tokens. This is because tokens should be created from content of node and text 
+        manipulation fails if content does not match. The change of this kind of text does not indicate change in 
+        content and we may still be able to finish text manipulation.
+
+        Test: TextManipulation.CompleteTextManipulationReplaceTwoSimpleParagraphs
+
+        * editing/TextManipulationController.cpp:
+        (WebCore::isInPrivateUseArea):
+        (WebCore::isTokenDelimiter):
+        (WebCore::ParagraphContentIterator::currentContent):
+        (WebCore::ParagraphContentIterator::appendToText):
+        (WebCore::ParagraphContentIterator::advanceIteratorNodeAndUpdateText):
+        (WebCore::TextManipulationController::createUnit):
+        (WebCore::TextManipulationController::parse):
+        (WebCore::TextManipulationController::observeParagraphs):
+        (WebCore::TextManipulationController::replace):
+        * editing/TextManipulationController.h:
+
 2020-06-08  Rob Buis  <[email protected]>
 
         XMLHTTPRequest.send should not send Content-Type headers when Blob has no type

Modified: trunk/Source/WebCore/editing/TextManipulationController.cpp (262777 => 262778)


--- trunk/Source/WebCore/editing/TextManipulationController.cpp	2020-06-09 06:50:24 UTC (rev 262777)
+++ trunk/Source/WebCore/editing/TextManipulationController.cpp	2020-06-09 06:54:20 UTC (rev 262778)
@@ -143,6 +143,16 @@
     flushPendingItemsForCallback();
 }
 
+static bool isInPrivateUseArea(UChar character)
+{
+    return 0xE000 <= character && character <= 0xF8FF;
+}
+
+static bool isTokenDelimiter(UChar character)
+{
+    return isHTMLLineBreak(character) || isInPrivateUseArea(character);
+}
+
 class ParagraphContentIterator {
 public:
     ParagraphContentIterator(const Position& start, const Position& end)
@@ -166,7 +176,7 @@
 
     struct CurrentContent {
         RefPtr<Node> node;
-        StringView text;
+        Vector<String> text;
         bool isTextContent { false };
         bool isReplacedContent { false };
     };
@@ -173,7 +183,7 @@
 
     CurrentContent currentContent()
     {
-        CurrentContent content = { m_node.copyRef(), m_text ? m_text.value() : StringView { }, !!m_text };
+        CurrentContent content = { m_node.copyRef(), m_text ? m_text.value() : Vector<String> { }, !!m_text };
         if (content.node) {
             if (auto* renderer = content.node->renderer()) {
                 if (renderer->isRenderReplaced()) {
@@ -200,17 +210,39 @@
             m_node = m_pastEndNode;
     }
 
+    void appendToText(Vector<String>& text, StringBuilder& stringBuilder)
+    {
+        if (!stringBuilder.isEmpty()) {
+            text.append(stringBuilder.toString());
+            stringBuilder.clear();
+        }
+    }
+
     void advanceIteratorNodeAndUpdateText()
     {
         ASSERT(shouldAdvanceIteratorPastCurrentNode());
 
         StringBuilder stringBuilder;
+        Vector<String> text;
         while (shouldAdvanceIteratorPastCurrentNode()) {
-            stringBuilder.append(m_iterator.text());
+            if (!m_iterator.node()) {
+                auto iteratorText = m_iterator.text();
+                bool containsDelimiter = false;
+                for (unsigned index = 0; index < iteratorText.length() && !containsDelimiter; ++index)
+                    containsDelimiter = isTokenDelimiter(iteratorText[index]);
+
+                if (containsDelimiter) {
+                    appendToText(text, stringBuilder);
+                    text.append({ });
+                }
+            } else
+                stringBuilder.append(m_iterator.text());
+
             m_iterator.advance();
             m_iteratorNode = m_iterator.atEnd() ? nullptr : createLiveRange(m_iterator.range())->firstNode();
         }
-        m_text = { stringBuilder.toString() };
+        appendToText(text, stringBuilder);
+        m_text = text;
     }
 
     TextIterator m_iterator;
@@ -217,7 +249,7 @@
     RefPtr<Node> m_iteratorNode;
     RefPtr<Node> m_node;
     RefPtr<Node> m_pastEndNode;
-    Optional<String> m_text;
+    Optional<Vector<String>> m_text;
 };
 
 static bool shouldExtractValueForTextManipulation(const HTMLInputElement& input)
@@ -294,36 +326,34 @@
     return false;
 }
 
-static bool isInPrivateUseArea(UChar character)
+TextManipulationController::ManipulationUnit TextManipulationController::createUnit(const Vector<String>& text, Node& textNode)
 {
-    return 0xE000 <= character && character <= 0xF8FF;
+    ManipulationUnit unit = { textNode, { } };
+    for (auto& textEntry : text) {
+        if (!textEntry.isNull())
+            parse(unit, textEntry, textNode);
+        else {
+            if (unit.tokens.isEmpty())
+                unit.firstTokenContainsDelimiter = true;
+            unit.lastTokenContainsDelimiter = true;
+        }
+    }
+    return unit;
 }
 
-static bool isTokenDelimiter(UChar character)
+void TextManipulationController::parse(ManipulationUnit& unit, const String& text, Node& textNode)
 {
-    return isHTMLLineBreak(character) || isInPrivateUseArea(character);
-}
-
-TextManipulationController::ManipulationUnit TextManipulationController::parse(StringView text, Node* textNode)
-{
-    Vector<ManipulationToken> tokens;
     ExclusionRuleMatcher exclusionRuleMatcher(m_exclusionRules);
+    bool isNodeExcluded = exclusionRuleMatcher.isExcluded(&textNode);
     size_t positionOfLastNonHTMLSpace = WTF::notFound;
     size_t startPositionOfCurrentToken = 0;
-    bool isNodeExcluded = exclusionRuleMatcher.isExcluded(textNode);
-    bool containsOnlyHTMLSpace = true;
-    bool containsTokenDelimiter = false;
-    bool firstTokenContainsDelimiter = false;
-    bool lastTokenContainsDelimiter = false;
-
     size_t index = 0;
     for (; index < text.length(); ++index) {
         auto character = text[index];
         if (isTokenDelimiter(character)) {
-            containsTokenDelimiter = true;
             if (positionOfLastNonHTMLSpace != WTF::notFound && startPositionOfCurrentToken <= positionOfLastNonHTMLSpace) {
-                auto tokenString = text.substring(startPositionOfCurrentToken, positionOfLastNonHTMLSpace + 1 - startPositionOfCurrentToken).toString();
-                tokens.append(ManipulationToken { m_tokenIdentifier.generate(), tokenString, tokenInfo(textNode), isNodeExcluded });
+                auto stringForToken = text.substring(startPositionOfCurrentToken, positionOfLastNonHTMLSpace + 1 - startPositionOfCurrentToken);
+                unit.tokens.append(ManipulationToken { m_tokenIdentifier.generate(), stringForToken, tokenInfo(&textNode), isNodeExcluded });
                 startPositionOfCurrentToken = positionOfLastNonHTMLSpace + 1;
             }
 
@@ -332,25 +362,24 @@
 
             --index;
 
-            auto stringForToken = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken).toString();
-            if (tokens.isEmpty())
-                firstTokenContainsDelimiter = true;
-            tokens.append(ManipulationToken { m_tokenIdentifier.generate(), stringForToken, tokenInfo(textNode), true });
+            auto stringForToken = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken);
+            if (unit.tokens.isEmpty() && !unit.firstTokenContainsDelimiter)
+                unit.firstTokenContainsDelimiter = true;
+            unit.tokens.append(ManipulationToken { m_tokenIdentifier.generate(), stringForToken, tokenInfo(&textNode), true });
             startPositionOfCurrentToken = index + 1;
-            lastTokenContainsDelimiter = true;
+            unit.lastTokenContainsDelimiter = true;
         } else if (isNotHTMLSpace(character)) {
-            containsOnlyHTMLSpace = false;
+            if (!isNodeExcluded)
+                unit.areAllTokensExcluded = false;
             positionOfLastNonHTMLSpace = index;
         }
     }
 
     if (startPositionOfCurrentToken < text.length()) {
-        auto tokenString = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken).toString();
-        tokens.append(ManipulationToken { m_tokenIdentifier.generate(), tokenString, tokenInfo(textNode), isNodeExcluded });
-        lastTokenContainsDelimiter = false;
+        auto stringForToken = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken);
+        unit.tokens.append(ManipulationToken { m_tokenIdentifier.generate(), stringForToken, tokenInfo(&textNode), isNodeExcluded });
+        unit.lastTokenContainsDelimiter = false;
     }
-
-    return { WTFMove(tokens), *textNode, containsOnlyHTMLSpace || isNodeExcluded, containsTokenDelimiter, firstTokenContainsDelimiter, lastTokenContainsDelimiter };
 }
 
 void TextManipulationController::addItemIfPossible(Vector<ManipulationUnit>&& units)
@@ -432,7 +461,7 @@
 
         if (content.isReplacedContent) {
             if (!unitsInCurrentParagraph.isEmpty())
-                unitsInCurrentParagraph.append(ManipulationUnit { { ManipulationToken { m_tokenIdentifier.generate(), "[]", tokenInfo(content.node.get()), true } }, *contentNode });
+                unitsInCurrentParagraph.append(ManipulationUnit { *contentNode, { ManipulationToken { m_tokenIdentifier.generate(), "[]", tokenInfo(content.node.get()), true } } });
             continue;
         }
 
@@ -439,16 +468,17 @@
         if (!content.isTextContent)
             continue;
 
-        auto unitsInCurrentNode = parse(content.text, contentNode);
-        if (unitsInCurrentNode.firstTokenContainsDelimiter)
+        auto currentUnit = createUnit(content.text, *contentNode);
+        if (currentUnit.firstTokenContainsDelimiter)
             addItemIfPossible(std::exchange(unitsInCurrentParagraph, { }));
 
-        if (unitsInCurrentParagraph.isEmpty() && unitsInCurrentNode.areAllTokensExcluded)
-                continue;
+        if (unitsInCurrentParagraph.isEmpty() && currentUnit.areAllTokensExcluded)
+            continue;
 
-        unitsInCurrentParagraph.append(WTFMove(unitsInCurrentNode));
+        bool currentUnitEndsWithDelimiter = currentUnit.lastTokenContainsDelimiter;
+        unitsInCurrentParagraph.append(WTFMove(currentUnit));
 
-        if (unitsInCurrentNode.lastTokenContainsDelimiter)
+        if (currentUnitEndsWithDelimiter)
             addItemIfPossible(std::exchange(unitsInCurrentParagraph, { }));
     }
 
@@ -680,7 +710,7 @@
 
             tokensInCurrentNode.append(item.tokens[currentTokenIndex]);
         } else
-            tokensInCurrentNode = parse(content.text, content.node.get()).tokens;
+            tokensInCurrentNode = createUnit(content.text, *content.node).tokens;
 
         bool isNodeIncluded = WTF::anyOf(tokensInCurrentNode, [] (auto& token) {
             return !token.isExcluded;

Modified: trunk/Source/WebCore/editing/TextManipulationController.h (262777 => 262778)


--- trunk/Source/WebCore/editing/TextManipulationController.h	2020-06-09 06:50:24 UTC (rev 262777)
+++ trunk/Source/WebCore/editing/TextManipulationController.h	2020-06-09 06:54:20 UTC (rev 262778)
@@ -151,14 +151,14 @@
     };
 
     struct ManipulationUnit {
+        Ref<Node> node;
         Vector<ManipulationToken> tokens;
-        Ref<Node> node;
         bool areAllTokensExcluded { true };
-        bool containsLineBreak { false };
         bool firstTokenContainsDelimiter { false };
         bool lastTokenContainsDelimiter { false };
     };
-    ManipulationUnit parse(StringView, Node*);
+    ManipulationUnit createUnit(const Vector<String>&, Node&);
+    void parse(ManipulationUnit&, const String&, Node&);
 
     void addItem(ManipulationItemData&&);
     void addItemIfPossible(Vector<ManipulationUnit>&&);

Modified: trunk/Tools/ChangeLog (262777 => 262778)


--- trunk/Tools/ChangeLog	2020-06-09 06:50:24 UTC (rev 262777)
+++ trunk/Tools/ChangeLog	2020-06-09 06:54:20 UTC (rev 262778)
@@ -1,3 +1,13 @@
+2020-06-08  Sihui Liu  <[email protected]>
+
+        TextManipulation should only convert text from Node's text content to tokens
+        https://bugs.webkit.org/show_bug.cgi?id=212928
+
+        Reviewed by Wenson Hsieh.
+
+        * TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm:
+        (TestWebKitAPI::TEST):
+
 2020-06-08  Diego Pino Garcia  <[email protected]>
 
         [webkitpy] Check 'bug-search' returns a non null result before parsing

Modified: trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm (262777 => 262778)


--- trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm	2020-06-09 06:50:24 UTC (rev 262777)
+++ trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm	2020-06-09 06:54:20 UTC (rev 262778)
@@ -1019,6 +1019,40 @@
     EXPECT_WK_STREQ("<p>hello, <i>WebKit</i></p>", [webView stringByEvaluatingJavaScript:@"document.body.innerHTML"]);
 }
 
+TEST(TextManipulation, CompleteTextManipulationReplaceTwoSimpleParagraphs)
+{
+    auto delegate = adoptNS([[TextManipulationDelegate alloc] init]);
+    auto webView = adoptNS([[TestWKWebView alloc] initWithFrame:NSMakeRect(0, 0, 400, 400)]);
+    [webView _setTextManipulationDelegate:delegate.get()];
+    [webView synchronouslyLoadHTMLString:@"<p>hello</p>world"];
+    auto configuration = adoptNS([[_WKTextManipulationConfiguration alloc] init]);
+
+    done = false;
+    [webView _startTextManipulationsWithConfiguration:configuration.get() completion:^{
+        done = true;
+    }];
+    TestWebKitAPI::Util::run(&done);
+
+    auto *items = [delegate items];
+    EXPECT_EQ(items.count, 2UL);
+    EXPECT_EQ(items[0].tokens.count, 1UL);
+    EXPECT_STREQ("hello", items[0].tokens[0].content.UTF8String);
+    EXPECT_EQ(items[1].tokens.count, 1UL);
+    EXPECT_STREQ("world", items[1].tokens[0].content.UTF8String);
+
+    done = false;
+    [webView _completeTextManipulationForItems:@[
+        createItem(items[0].identifier, {{ items[0].tokens[0].identifier, @"Hello" }}).get(),
+        createItem(items[1].identifier, {{ items[1].tokens[0].identifier, @"World" }}).get()
+    ] completion:^(NSArray<NSError *> *errors) {
+        EXPECT_EQ(errors, nil);
+        done = true;
+    }];
+    TestWebKitAPI::Util::run(&done);
+
+    EXPECT_WK_STREQ("<p>Hello</p>World", [webView stringByEvaluatingJavaScript:@"document.body.innerHTML"]);
+}
+
 TEST(TextManipulation, CompleteTextManipulationReplaceMultipleSimpleParagraphs)
 {
     auto delegate = adoptNS([[TextManipulationDelegate alloc] init]);
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to