Title: [264729] trunk
Revision
264729
Author
wenson_hs...@apple.com
Date
2020-07-22 15:10:20 -0700 (Wed, 22 Jul 2020)

Log Message

Article headlines are split across multiple lines after translating tagesschau.de
https://bugs.webkit.org/show_bug.cgi?id=214652
<rdar://problem/63995571>

Reviewed by Tim Horton.

Source/WebCore:

Treat block-level heading elements (`h1` through `h6`) as item boundaries. On this website, we currently vend
the main heading and sub-heading as separate tokens in the same item, which sometimes causes the machine
translation framework to try and rearrange and split tokens, since it treats the combination of the main heading
and sub-heading text as a single sentence to be translated.

Test: TextManipulation.StartTextManipulationExtractsHeadingElementsAsSeparateItems

* editing/TextManipulationController.cpp:
(WebCore::isEnclosingItemBoundaryElement):

Tools:

Adds a new API test (a reduced version of article headlines from the front page of tagesschau.de).

* TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm:

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (264728 => 264729)


--- trunk/Source/WebCore/ChangeLog	2020-07-22 21:55:03 UTC (rev 264728)
+++ trunk/Source/WebCore/ChangeLog	2020-07-22 22:10:20 UTC (rev 264729)
@@ -1,3 +1,21 @@
+2020-07-22  Wenson Hsieh  <wenson_hs...@apple.com>
+
+        Article headlines are split across multiple lines after translating tagesschau.de
+        https://bugs.webkit.org/show_bug.cgi?id=214652
+        <rdar://problem/63995571>
+
+        Reviewed by Tim Horton.
+
+        Treat block-level heading elements (`h1` through `h6`) as item boundaries. On this website, we currently vend
+        the main heading and sub-heading as separate tokens in the same item, which sometimes causes the machine
+        translation framework to try and rearrange and split tokens, since it treats the combination of the main heading
+        and sub-heading text as a single sentence to be translated.
+
+        Test: TextManipulation.StartTextManipulationExtractsHeadingElementsAsSeparateItems
+
+        * editing/TextManipulationController.cpp:
+        (WebCore::isEnclosingItemBoundaryElement):
+
 2020-07-22  Alex Christensen  <achristen...@webkit.org>
 
         Safari does not present CertificateInfo for service-worker served documents

Modified: trunk/Source/WebCore/editing/TextManipulationController.cpp (264728 => 264729)


--- trunk/Source/WebCore/editing/TextManipulationController.cpp	2020-07-22 21:55:03 UTC (rev 264728)
+++ trunk/Source/WebCore/editing/TextManipulationController.cpp	2020-07-22 22:10:20 UTC (rev 264729)
@@ -316,6 +316,10 @@
     if (element.hasTagName(HTMLNames::spanTag) && displayType == DisplayType::InlineBlock)
         return true;
 
+    if (displayType == DisplayType::Block && (element.hasTagName(HTMLNames::h1Tag) || element.hasTagName(HTMLNames::h2Tag) || element.hasTagName(HTMLNames::h3Tag)
+        || element.hasTagName(HTMLNames::h4Tag) || element.hasTagName(HTMLNames::h5Tag) || element.hasTagName(HTMLNames::h6Tag)))
+        return true;
+
     return false;
 }
 

Modified: trunk/Tools/ChangeLog (264728 => 264729)


--- trunk/Tools/ChangeLog	2020-07-22 21:55:03 UTC (rev 264728)
+++ trunk/Tools/ChangeLog	2020-07-22 22:10:20 UTC (rev 264729)
@@ -1,3 +1,15 @@
+2020-07-22  Wenson Hsieh  <wenson_hs...@apple.com>
+
+        Article headlines are split across multiple lines after translating tagesschau.de
+        https://bugs.webkit.org/show_bug.cgi?id=214652
+        <rdar://problem/63995571>
+
+        Reviewed by Tim Horton.
+
+        Adds a new API test (a reduced version of article headlines from the front page of tagesschau.de).
+
+        * TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm:
+
 2020-07-22  Alex Christensen  <achristen...@webkit.org>
 
         Safari does not present CertificateInfo for service-worker served documents

Modified: trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm (264728 => 264729)


--- trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm	2020-07-22 21:55:03 UTC (rev 264728)
+++ trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm	2020-07-22 22:10:20 UTC (rev 264729)
@@ -1092,6 +1092,35 @@
     TestWebKitAPI::Util::run(&done);
 }
 
+TEST(TextManipulation, StartTextManipulationExtractsHeadingElementsAsSeparateItems)
+{
+    auto delegate = adoptNS([[TextManipulationDelegate alloc] init]);
+    auto webView = adoptNS([[TestWKWebView alloc] initWithFrame:NSMakeRect(0, 0, 400, 400)]);
+    [webView _setTextManipulationDelegate:delegate.get()];
+
+    [webView synchronouslyLoadHTMLString:@"<!DOCTYPE html>"
+        "<html>"
+        "  <body>"
+        "    <div style='float: left; width: 300px; height: 150px;'></div>"
+        "    <p style='float: left; width: 600px;'>Hello world</p>"
+        "    <h4 style='float: left; width: 600px;'>This is a heading</h4>"
+        "  </body>"
+        "</html>"];
+
+    done = false;
+    [webView _startTextManipulationsWithConfiguration:nil completion:^{
+        done = true;
+    }];
+    TestWebKitAPI::Util::run(&done);
+
+    auto items = [delegate items];
+    EXPECT_EQ(items.count, 2UL);
+    EXPECT_EQ(items[0].tokens.count, 1UL);
+    EXPECT_WK_STREQ("Hello world", items[0].tokens[0].content);
+    EXPECT_EQ(items[1].tokens.count, 1UL);
+    EXPECT_WK_STREQ("This is a heading", items[1].tokens[0].content);
+}
+
 struct Token {
     NSString *identifier;
     NSString *content;
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to