Jdlrobson has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/401776 )

Change subject: Hygiene: Begin refactoring of MobileFormatter mega class
......................................................................

Hygiene: Begin refactoring of MobileFormatter mega class

After a discussion with Piotr we agreed that the MobileFormatter
code is becoming unmanageable and hard to test.

Going forward we will aim to reduce all transforms to simple classes
that take a DomElement and transform it into another DomElement.
This will improve testability and sanity.

As a first pass we will only update the lead paragraph transform
given that we are currently working on improving that feature in
T170006

Bug: T170006
Change-Id: I1fb2dc9e7ad1fe0431819ce3ca6295b3df8410e0
---
M extension.json
M includes/MobileFormatter.php
A includes/transforms/IMobileTransform.php
A includes/transforms/MoveLeadParagraphTransform.php
A tests/phpunit/transforms/MoveLeadParagraphTransformTest.php
5 files changed, 176 insertions(+), 92 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/MobileFrontend 
refs/changes/76/401776/1

diff --git a/extension.json b/extension.json
index d650d4d..351d06e 100644
--- a/extension.json
+++ b/extension.json
@@ -45,6 +45,9 @@
        "ExtensionMessagesFiles": {
                "MobileFrontendAlias": "MobileFrontend.alias.php"
        },
+       "AutoloadNamespaces": {
+               "MobileFrontend\\Transforms\\": "includes/transforms/"
+       },
        "AutoloadClasses": {
                "ExtMobileFrontend": "includes/MobileFrontend.body.php",
                "MobileFrontendHooks": "includes/MobileFrontend.hooks.php",
diff --git a/includes/MobileFormatter.php b/includes/MobileFormatter.php
index cfabb07..7a39d4d 100644
--- a/includes/MobileFormatter.php
+++ b/includes/MobileFormatter.php
@@ -2,6 +2,7 @@
 
 use HtmlFormatter\HtmlFormatter;
 use MobileFrontend\ContentProviders\IContentProvider;
+use MobileFrontend\Transforms\MoveLeadParagraphTransform;
 
 /**
  * Converts HTML into a mobile-friendly version
@@ -218,94 +219,6 @@
                }
        }
 
-       /**
-        * Move the first paragraph in the lead section above the infobox
-        *
-        * In order for a paragraph to be moved the following conditions must 
be met:
-        *   - the lead section contains at least one infobox;
-        *   - the paragraph doesn't already appear before the first infobox
-        *     if any in the DOM;
-        *   - the paragraph contains text content, e.g. no <p></p>;
-        *   - the paragraph doesn't contain coordinates, i.e. span#coordinates.
-        *   - article belongs to the MAIN namespace
-        *
-        * Additionally if paragraph immediate sibling is a list (ol or ul 
element), the list
-        * is also moved along with paragraph above infobox.
-        *
-        * Note that the first paragraph is not moved before hatnotes, or mbox 
or other
-        * elements that are not infoboxes.
-        *
-        * @param DOMElement $leadSectionBody
-        * @param DOMDocument $doc Document to which the section belongs
-        */
-       private function moveFirstParagraphBeforeInfobox( $leadSectionBody, 
$doc ) {
-               // Move lead parapgraph only on pages in MAIN namespace (see 
@T163805)
-               if ( $this->title->getNamespace() !== NS_MAIN ) {
-                       return;
-               }
-               $xPath = new DOMXPath( $doc );
-               // Find infoboxes and paragraphs that have text content, i.e. 
paragraphs
-               // that are not empty nor are wrapper paragraphs that contain 
span#coordinates.
-               $infoboxAndParagraphs = $xPath->query(
-                       './table[contains(@class,"infobox")] | 
./p[string-length(text()) > 0]',
-                       $leadSectionBody
-               );
-               // We need both an infobox and a paragraph and the first 
element of our query result
-               // ought to be an infobox.
-               if ( $infoboxAndParagraphs->length >= 2 &&
-                       $infoboxAndParagraphs->item( 0 )->nodeName == 'table'
-               ) {
-                       $firstP = null;
-                       for ( $i = 1; $i < $infoboxAndParagraphs->length; $i++ 
) {
-                               if ( $infoboxAndParagraphs->item( $i 
)->nodeName == 'p' ) {
-                                       $firstP = $infoboxAndParagraphs->item( 
$i );
-                                       break;
-                               }
-                       }
-                       if ( $firstP ) {
-                               $listElementAfterParagraph = null;
-                               $where = $infoboxAndParagraphs->item( 0 );
-
-                               $elementAfterParagraphQuery = $xPath->query( 
'following-sibling::*[1]', $firstP );
-                               if ( $elementAfterParagraphQuery->length > 0 ) {
-                                       $elem = 
$elementAfterParagraphQuery->item( 0 );
-                                       if ( $elem->tagName === 'ol' || 
$elem->tagName === 'ul' ) {
-                                               $listElementAfterParagraph = 
$elem;
-                                       }
-                               }
-
-                               $leadSectionBody->insertBefore( $firstP, $where 
);
-                               if ( $listElementAfterParagraph !== null ) {
-                                       $leadSectionBody->insertBefore( 
$listElementAfterParagraph, $where );
-                               }
-                       }
-               }
-               /**
-                * @see https://phabricator.wikimedia.org/T149884
-                * @todo remove after research is done
-                */
-               if ( MobileContext::singleton()->getMFConfig()->get( 
'MFLogWrappedInfoboxes' ) ) {
-                       $this->logInfoboxesWrappedInContainers( 
$leadSectionBody, $xPath );
-               }
-       }
-
-       /**
-        * Finds all infoboxes which are one or more levels deep in $xPath 
content. When at least one
-        * element is found - log the page title and revision
-        *
-        * @see https://phabricator.wikimedia.org/T149884
-        * @param $leadSectionBody
-        * @param DOMXPath $xPath
-        */
-       private function logInfoboxesWrappedInContainers( $leadSectionBody, 
DOMXPath $xPath ) {
-               $infoboxes = $xPath->query( 
'./*//table[contains(@class,"infobox")]' .
-                       '[not(ancestor::table[contains(@class,"infobox")])]', 
$leadSectionBody );
-               if ( $infoboxes->length > 0 ) {
-                       \MediaWiki\Logger\LoggerFactory::getInstance( 'mobile' 
)->info(
-                               "Found infobox wrapped with container on 
{$this->title} (rev:{$this->revId})"
-                       );
-               }
-       }
        /**
         * Replaces any references links with a link to Special:MobileCite
         *
@@ -646,6 +559,10 @@
         * @param array $transformOptions Options to pass when transforming 
content per section
         */
        protected function makeSections( DOMDocument $doc, array $headings, 
array $transformOptions ) {
+               $leadTransform = $transformOptions[ 
self::SHOW_FIRST_PARAGRAPH_BEFORE_INFOBOX ] &&
+                       $this->title->getNamespace() === NS_MAIN ?
+                               new MoveLeadParagraphTransform( $this->title, 
$this->revId ) : false;
+
                // Find the parser output wrapper div
                $xpath = new DOMXPath( $doc );
                $containers = $xpath->query( 
'body/div[@class="mw-parser-output"][1]' );
@@ -695,8 +612,8 @@
                                                $toc->appendChild( $tocHeading 
);
                                                $sectionBody->appendChild( $toc 
);
                                        }
-                                       if ( $transformOptions[ 
self::SHOW_FIRST_PARAGRAPH_BEFORE_INFOBOX ] ) {
-                                               
$this->moveFirstParagraphBeforeInfobox( $sectionBody, $doc );
+                                       if ( $leadTransform ) {
+                                               $leadTransform->transform( 
$sectionBody );
                                        }
                                }
                                $sectionNumber += 1;
@@ -710,8 +627,8 @@
                }
 
                // If the document had the lead section only:
-               if ( $sectionNumber == 0 && $transformOptions[ 
self::SHOW_FIRST_PARAGRAPH_BEFORE_INFOBOX ] ) {
-                       $this->moveFirstParagraphBeforeInfobox( $sectionBody, 
$doc );
+               if ( $sectionNumber == 0 && $leadTransform ) {
+                       $leadTransform->transform( $sectionBody );
                }
 
                if ( $sectionBody->hasChildNodes() ) {
diff --git a/includes/transforms/IMobileTransform.php 
b/includes/transforms/IMobileTransform.php
new file mode 100644
index 0000000..ee54d72
--- /dev/null
+++ b/includes/transforms/IMobileTransform.php
@@ -0,0 +1,13 @@
+<?php
+
+namespace MobileFrontend\Transforms;
+
+use DomElement;
+
+interface IMobileTransform {
+       /**
+        * Transforms the DomElement in some way
+        * @param DomElement $node to be transformed
+        */
+       public function transform( DomElement $node );
+}
diff --git a/includes/transforms/MoveLeadParagraphTransform.php 
b/includes/transforms/MoveLeadParagraphTransform.php
new file mode 100644
index 0000000..d41269d
--- /dev/null
+++ b/includes/transforms/MoveLeadParagraphTransform.php
@@ -0,0 +1,112 @@
+<?php
+
+namespace MobileFrontend\Transforms;
+
+use DOMXPath;
+use MobileContext;
+use DomElement;
+
+class MoveLeadParagraphTransform implements IMobileTransform {
+       /**
+        * @param string $title for logging purposes
+        * @param number $revId for logging purposes
+        */
+       public function __construct( $title, $revId ) {
+               $this->title = $title;
+               $this->revId = $revId;
+       }
+
+       /**
+        * Rearranges content so that text in the lead paragraph is prioritised 
to appear
+        * before the infobox
+        * @param DomElement $node to be transformed
+        */
+       public function transform( DomElement $node ) {
+               $this->moveFirstParagraphBeforeInfobox( $node, 
$node->ownerDocument );
+       }
+
+       /**
+        * Move the first paragraph in the lead section above the infobox
+        *
+        * In order for a paragraph to be moved the following conditions must 
be met:
+        *   - the lead section contains at least one infobox;
+        *   - the paragraph doesn't already appear before the first infobox
+        *     if any in the DOM;
+        *   - the paragraph contains text content, e.g. no <p></p>;
+        *   - the paragraph doesn't contain coordinates, i.e. span#coordinates.
+        *   - article belongs to the MAIN namespace
+        *
+        * Additionally if paragraph immediate sibling is a list (ol or ul 
element), the list
+        * is also moved along with paragraph above infobox.
+        *
+        * Note that the first paragraph is not moved before hatnotes, or mbox 
or other
+        * elements that are not infoboxes.
+        *
+        * @param DOMElement $leadSectionBody
+        * @param DOMDocument $doc Document to which the section belongs
+        */
+       private function moveFirstParagraphBeforeInfobox( $leadSectionBody, 
$doc ) {
+               $xPath = new DOMXPath( $doc );
+               // Find infoboxes and paragraphs that have text content, i.e. 
paragraphs
+               // that are not empty nor are wrapper paragraphs that contain 
span#coordinates.
+               $infoboxAndParagraphs = $xPath->query(
+                       './table[contains(@class,"infobox")] | 
./p[string-length(text()) > 0]',
+                       $leadSectionBody
+               );
+               // We need both an infobox and a paragraph and the first 
element of our query result
+               // ought to be an infobox.
+               if ( $infoboxAndParagraphs->length >= 2 &&
+                       $infoboxAndParagraphs->item( 0 )->nodeName == 'table'
+               ) {
+                       $firstP = null;
+                       for ( $i = 1; $i < $infoboxAndParagraphs->length; $i++ 
) {
+                               if ( $infoboxAndParagraphs->item( $i 
)->nodeName == 'p' ) {
+                                       $firstP = $infoboxAndParagraphs->item( 
$i );
+                                       break;
+                               }
+                       }
+                       if ( $firstP ) {
+                               $listElementAfterParagraph = null;
+                               $where = $infoboxAndParagraphs->item( 0 );
+
+                               $elementAfterParagraphQuery = $xPath->query( 
'following-sibling::*[1]', $firstP );
+                               if ( $elementAfterParagraphQuery->length > 0 ) {
+                                       $elem = 
$elementAfterParagraphQuery->item( 0 );
+                                       if ( $elem->tagName === 'ol' || 
$elem->tagName === 'ul' ) {
+                                               $listElementAfterParagraph = 
$elem;
+                                       }
+                               }
+
+                               $leadSectionBody->insertBefore( $firstP, $where 
);
+                               if ( $listElementAfterParagraph !== null ) {
+                                       $leadSectionBody->insertBefore( 
$listElementAfterParagraph, $where );
+                               }
+                       }
+               }
+               /**
+                * @see https://phabricator.wikimedia.org/T149884
+                * @todo remove after research is done
+                */
+               if ( MobileContext::singleton()->getMFConfig()->get( 
'MFLogWrappedInfoboxes' ) ) {
+                       $this->logInfoboxesWrappedInContainers( 
$leadSectionBody, $xPath );
+               }
+       }
+
+       /**
+        * Finds all infoboxes which are one or more levels deep in $xPath 
content. When at least one
+        * element is found - log the page title and revision
+        *
+        * @see https://phabricator.wikimedia.org/T149884
+        * @param $leadSectionBody
+        * @param DOMXPath $xPath
+        */
+       private function logInfoboxesWrappedInContainers( $leadSectionBody, 
DOMXPath $xPath ) {
+               $infoboxes = $xPath->query( 
'./*//table[contains(@class,"infobox")]' .
+                       '[not(ancestor::table[contains(@class,"infobox")])]', 
$leadSectionBody );
+               if ( $infoboxes->length > 0 ) {
+                       \MediaWiki\Logger\LoggerFactory::getInstance( 'mobile' 
)->info(
+                               "Found infobox wrapped with container on 
{$this->title} (rev:{$this->revId})"
+                       );
+               }
+       }
+}
diff --git a/tests/phpunit/transforms/MoveLeadParagraphTransformTest.php 
b/tests/phpunit/transforms/MoveLeadParagraphTransformTest.php
new file mode 100644
index 0000000..3ddf67a
--- /dev/null
+++ b/tests/phpunit/transforms/MoveLeadParagraphTransformTest.php
@@ -0,0 +1,39 @@
+<?php
+
+use MobileFrontend\Transforms\MoveLeadParagraphTransform;
+
+/**
+ * @group MobileFrontend
+ */
+class MoveLeadParagraphTransformTest extends MediaWikiTestCase {
+       public static function wrap( $html ) {
+               return "<!DOCTYPE HTML>
+<html><body>$html</body></html>
+";
+       }
+       /**
+        * @dataProvider provideTransform
+        *
+        * @param string $html
+        * @param string $expected
+        */
+       public function testTransform( $html, $expected ) {
+               $transform = new MoveLeadParagraphTransform( 'A', 1 );
+               $doc = new DOMDocument();
+               $doc->loadHTML( self::wrap( $html ) );
+               $transform->transform( $doc->getElementsByTagName( 'body' 
)->item( 0 ) );
+               $this->assertEquals( $doc->saveHTML(), self::wrap( $expected ) 
);
+       }
+
+       public function provideTransform() {
+               $infobox = '<table class="infobox"></table>';
+               $paragraph = '<p>first paragraph</p>';
+
+               return [
+                       [
+                               "$infobox$paragraph",
+                               "$paragraph$infobox",
+                       ]
+               ];
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/401776
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I1fb2dc9e7ad1fe0431819ce3ca6295b3df8410e0
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/MobileFrontend
Gerrit-Branch: master
Gerrit-Owner: Jdlrobson <jrob...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to