EBernhardson has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/103259


Change subject: Split redlinking out of Templating class
......................................................................

Split redlinking out of Templating class

Change-Id: I547ae6b7625ae8462394e6c2efec1f30e9afb1ae
---
M Flow.php
A includes/Redlinker.php
M includes/Templating.php
M tests/TemplatingTest.php
4 files changed, 296 insertions(+), 206 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Flow 
refs/changes/59/103259/1

diff --git a/Flow.php b/Flow.php
index 95e02b0..699af8f 100755
--- a/Flow.php
+++ b/Flow.php
@@ -61,6 +61,7 @@
 $wgAutoloadClasses['Flow\DbFactory'] = $dir . 'includes/DbFactory.php';
 $wgAutoloadClasses['Flow\ParsoidUtils'] = $dir . 'includes/ParsoidUtils.php';
 $wgAutoloadClasses['Flow\Templating'] = $dir . 'includes/Templating.php';
+$wgAutoloadClasses['Flow\Redlinker'] = $dir . 'includes/Redlinker.php';
 $wgAutoloadClasses['Flow\UrlGenerator'] = $dir . 'includes/UrlGenerator.php';
 $wgAutoloadClasses['Flow\View'] = $dir . 'includes/View.php';
 $wgAutoloadClasses['Flow\WorkflowLoader'] = $dir . 
'includes/WorkflowLoader.php';
diff --git a/includes/Redlinker.php b/includes/Redlinker.php
new file mode 100644
index 0000000..3bf8c20
--- /dev/null
+++ b/includes/Redlinker.php
@@ -0,0 +1,229 @@
+<?php
+
+namespace Flow;
+
+use Flow\Model\PostRevision;
+use Closure;
+use DOMDocument;
+use DOMNode;
+use LinkBatch;
+use Linker;
+use Title;
+
+/**
+ * Applys red links to an html fragment from parsoid. Relative
+ * links are resolved in relation the the provided title.
+ */
+class Redlinker {
+
+       /**
+        * @var Title To resolve relative links against
+        */
+       protected $title;
+
+       /**
+        * @var LinkBatch
+        */
+       protected $batch;
+
+       /**
+        * @var array
+        */
+       protected $identifiers = array();
+
+       /**
+        * @var array Array of registered post Ids
+        */
+       protected $registered = array();
+
+       /**
+        * @var array Array of processed post Ids
+        */
+       protected $processed = array();
+
+       /**
+        * @param Title $title To resolve relative links against
+        * @param LinkBatch $batch
+        */
+       public function __construct( \Title $title, \LinkBatch $batch ) {
+               $this->title = $title;
+               $this->batch = $batch;
+       }
+
+       /**
+        * Registers callback function to find content links in Parsoid html.
+        * The goal is to batch-load and add to LinkCache as much links as 
possible.
+        *
+        * This can be registered on multiple posts (e.g. multiple topics) to
+        * batch-load as much as possible; all of the identifiers have to be
+        * saved and will be processed as soon as they first are needed.
+        */
+       public function register( PostRevision $post ) {
+               $revisionId = $post->getRevisionId()->getHex();
+               if ( !isset( $this->processed[$revisionId], 
$this->registered[$revisionId] ) ) {
+                       $this->registered[$revisionId] = true;
+                       $identifier = $post->registerRecursive( array( $this, 
'collectLinks' ), array(), 'parsoidlinks' );
+                       $this->identifiers[$identifier] = $post;
+               }
+       }
+
+       /**
+        * DON'T CALL THIS METHOD!
+        * This is for internal use only: it's a callback function to
+        * PostRevision::registerRecursive, which can be registered via
+        * Templating::registerParsoidLinks.
+        *
+        * Returns an array of linked pages in Parsoid.
+        *
+        * @param PostRevision $post
+        * @param array $result
+        * @return array Return array in the format of [result, continue]
+        */
+       public function collectLinks( PostRevision $post, $result ) {
+               // topic titles don't contain html
+               if ( $post->isTopicTitle() ) {
+                       return array( array(), true );
+               }
+
+               // make sure a post is not checked more than once
+               $revisionId = $post->getRevisionId()->getHex();
+               if ( isset( $this->processed[$revisionId] ) ) {
+                       return array( array(), false );
+               }
+               $this->processed[$revisionId] = true;
+
+               /*
+                * Workaround because DOMDocument can't guess charset.
+                * Content should be utf-8. Alternative "workarounds" would be 
to
+                * provide the charset in $response, as either:
+                * * <?xml encoding="utf-8" ?>
+                * * <meta http-equiv="Content-Type" content="text/html; 
charset=utf-8">
+                */
+               $content = mb_convert_encoding( $post->getContent( 'html' ), 
'HTML-ENTITIES', 'UTF-8' );
+
+               // find links in DOM
+               if ( $content ) {
+                       $dom = ParsoidUtils::createDOM( $content );
+                       $batch = $this->batch;
+                       self::forEachLink( $dom, function( DOMNode $linkNode, 
array $parsoid ) use( $batch ) {
+                               $title = Title::newFromText( 
$parsoid['sa']['href'] );
+                               if ( $title !== null ) {
+                                       $batch->addObj( $title );
+                               }
+                       } );
+               }
+
+               /*
+                * $result will not be used; we'll register this callback 
multiple
+                * times and will want to gather overlapping results, so they'll
+                * be stored in $this->batch
+                */
+               return array( array(), true );
+       }
+
+       /**
+        * Execute any pending batched title lookups
+        */
+       public function resolveLinkStatus() {
+               if ( !$this->identifiers ) {
+                       return;
+               }
+               foreach ( $this->identifiers as $identifier => $post ) {
+                       $post->getRecursiveResult( $identifier );
+               }
+               $this->identifiers = array();
+               if ( !$this->batch->isEmpty() ) {
+                       $this->batch->execute();
+                       $this->batch->setArray( array() );
+               }
+       }
+
+       /**
+        * Parsoid ignores red links. With good reason: redlinks should only be
+        * applied when rendering the content, not when it's created.
+        *
+        * This method will parse a given content, fetch all of its links & let 
MW's
+        * Linker class build the link HTML (which will take redlinks into 
account.)
+        * It will then substitute original link HTML for the one Linker 
generated.
+        *
+        * @param string $content
+        * @return string
+        */
+       public function apply( $content ) {
+               if ( !$content ) {
+                       return '';
+               }
+
+               $this->resolveLinkStatus();
+
+               /*
+                * Workaround because DOMDocument can't guess charset.
+                * Content should be utf-8. Alternative "workarounds" would be 
to
+                * provide the charset in $response, as either:
+                * * <?xml encoding="utf-8" ?>
+                * * <meta http-equiv="Content-Type" content="text/html; 
charset=utf-8">
+                */
+               $content = mb_convert_encoding( $content, 'HTML-ENTITIES', 
'UTF-8' );
+
+               $self = $this;
+               $dom = ParsoidUtils::createDOM( $content );
+               self::forEachLink( $dom, function( DOMNode $linkNode, array 
$parsoid ) use ( $self, $dom ) {
+                       $title = $self->createRelativeTitle( 
$parsoid['sa']['href'] );
+                       // Don't process invalid links
+                       if ( $title === null ) {
+                               return;
+                       }
+
+                       // gather existing link attributes
+                       $attributes = array();
+                       foreach ( $linkNode->attributes as $attribute ) {
+                               $attributes[$attribute->name] = 
$attribute->value;
+                       }
+                       // let MW build link HTML based on Parsoid data
+                       $html = Linker::link( $title, htmlspecialchars( 
$linkNode->nodeValue ), $attributes );
+                       // create new DOM from this MW-built link
+                       $replacementNode = ParsoidUtils::createDOM( $html 
)->getElementsByTagName( 'a' )->item( 0 );
+                       // import MW-built link node into content DOM
+                       $replacementNode = $dom->importNode( $replacementNode, 
true );
+                       // replace Parsoid link with MW-built link
+                       $linkNode->parentNode->replaceChild( $replacementNode, 
$linkNode );
+               } );
+
+               return $dom->saveHTML();
+       }
+
+       /**
+        * Subpage links from parsoid don't contain any direct context, its 
applied via
+        * a <base href="..."> tag, so here we apply a similar rule resolving 
against
+        * $wgFlowParsoidTitle falling back to $wgTitle.
+        */
+       public function createRelativeTitle( $text ) {
+               if ( $text && $text[0] === '/' ) {
+                       return Title::newFromText( $this->title->getDBkey() . 
$text, $this->title->getNamespace() );
+               } else {
+                       return Title::newFromText( $text );
+               }
+       }
+
+       /**
+        * Helper method executes a callback on every anchor that contains
+        * an ['sa']['href'] value in data-parsoid
+        *
+        * @param string $content Html
+        * @param Closure $callback Receives (DOMNode, array)
+        */
+       static public function forEachLink( DOMDocument $dom, Closure $callback 
) {
+               $xpath = new \DOMXPath( $dom );
+               $linkNodes = $xpath->query( 
'//a[@rel="mw:WikiLink"][@data-parsoid]' );
+
+               $links = array();
+               foreach ( $linkNodes as $linkNode ) {
+                       $parsoid = $linkNode->getAttribute( 'data-parsoid' );
+                       $parsoid = json_decode( $parsoid, true );
+                       if ( isset( $parsoid['sa']['href'] ) ) {
+                               $callback( $linkNode, $parsoid );
+                       }
+               }
+       }
+}
+
diff --git a/includes/Templating.php b/includes/Templating.php
index 6c91da6..a14a316 100644
--- a/includes/Templating.php
+++ b/includes/Templating.php
@@ -6,16 +6,14 @@
 use Flow\Block\TopicBlock;
 use Flow\Model\AbstractRevision;
 use Flow\Model\PostRevision;
-use Flow\Model\UUID;
-use Flow\Model\Workflow;
 use Flow\View\PostActionMenu;
 use OutputPage;
 // These dont really belong here
 use Html;
 use Linker;
+use LinkBatch;
 use MWTimestamp;
 use RequestContext;
-use Title;
 use User;
 use Flow\Exception\InvalidDataException;
 
@@ -25,31 +23,15 @@
        protected $namespaces;
        protected $globals;
 
-       /**
-        * @var array Array of PostRevision::registerRecursive return values
-        * @see Templating::registerParsoidLinks
-        */
-       public $parsoidLinksIdentifiers = array();
-
-       /**
-        * @var array Array of processed post Ids
-        * @see Templating::registerParsoidLinks
-        */
-       public $parsoidLinksProcessed = array();
-
-       /**
-        * @var array Array of Title objects
-        * @see Templating::registerParsoidLinks
-        */
-       public $parsoidLinks = array();
-
        public function __construct( UrlGenerator $urlGenerator, OutputPage 
$output, array $namespaces = array(), array $globals = array() ) {
+               global $wgTitle, $wgFlowParsoidTitle;
                $this->urlGenerator = $urlGenerator;
                $this->output = $output;
                foreach ( $namespaces as $ns => $path ) {
                        $this->addNamespace( $ns, $path );
                }
                $this->globals = $globals;
+               $this->redlinks = new Redlinker( $wgFlowParsoidTitle ?: 
$wgTitle, new LinkBatch );
        }
 
        public function getOutput() {
@@ -145,6 +127,7 @@
                if ( !$actionMenu->isAllowed( 'view' ) ) {
                        return '';
                }
+
                return $this->render( "flow:topic.html.php", array(
                        'block' => $block,
                        'topic' => $block->getWorkflow(),
@@ -383,7 +366,7 @@
                        if ( $format === 'html' ) {
                                // Parsoid doesn't render redlinks
                                try {
-                                       $content = $this->applyRedlinks( 
$content );
+                                       $content = $this->redlinks->apply( 
$content );
                                } catch ( \Exception $e ) {
                                        wfDebugLog( __CLASS__, __METHOD__ . ': 
Failed applying redlinks for rev_id = ' . $revision->getRevisionId()->getHex() 
);
                                        \MWExceptionHandler::logException( $e );
@@ -395,165 +378,7 @@
                }
        }
 
-       /**
-        * Parsoid ignores red links. With good reason: redlinks should only be
-        * applied when rendering the content, not when it's created.
-        *
-        * This method will parse a given content, fetch all of its links & let 
MW's
-        * Linker class build the link HTML (which will take redlinks into 
account.)
-        * It will then substitute original link HTML for the one Linker 
generated.
-        *
-        * @param string $content
-        * @return string
-        */
-       protected function applyRedlinks( $content ) {
-               if ( !$content ) {
-                       return '';
-               }
-
-               /*
-                * In order to efficiently replace redlinks, multiple recursive
-                * functions (may) have been registered that fetch an array of 
linked-to
-                * titles. Since we'll now need to apply redlinks, it's time to 
execute
-                * all these callbacks & batch-load all of the titles they come 
up with.
-                */
-               foreach ( $this->parsoidLinksIdentifiers as $identifier => 
$post ) {
-                       $post->getRecursiveResult( $identifier );
-                       unset( $this->parsoidLinksIdentifiers[$identifier] );
-               }
-               if ( $this->parsoidLinks ) {
-                       $batch = new \LinkBatch( $this->parsoidLinks );
-                       $batch->execute();
-                       $this->parsoidLinks = array();
-               }
-
-               /*
-                * Workaround because DOMDocument can't guess charset.
-                * Content should be utf-8. Alternative "workarounds" would be 
to
-                * provide the charset in $response, as either:
-                * * <?xml encoding="utf-8" ?>
-                * * <meta http-equiv="Content-Type" content="text/html; 
charset=utf-8">
-                */
-               $content = mb_convert_encoding( $content, 'HTML-ENTITIES', 
'UTF-8' );
-
-               $dom = ParsoidUtils::createDOM( $content );
-
-               // find links in DOM
-               $xpath = new \DOMXPath( $dom );
-               $linkNodes = $xpath->query( 
'//a[@rel="mw:WikiLink"][@data-parsoid]' );
-               foreach ( $linkNodes as $linkNode ) {
-                       $parsoid = $linkNode->getAttribute( 'data-parsoid' );
-                       $parsoid = json_decode( $parsoid, true );
-
-                       if ( isset( $parsoid['sa']['href'] ) ) {
-                               // Don't process invalid links
-                               if ( $parsoid['sa']['href'][0] === '/' ) {
-                                       $title = $this->resolveSubpage( 
$parsoid['sa']['href'] );
-                               } else {
-                                       $title = Title::newFromText( 
$parsoid['sa']['href'] );
-                               }
-                               if ( $title === null ) {
-                                       continue;
-                               }
-                               // gather existing link attributes
-                               $attributes = array();
-                               foreach ( $linkNode->attributes as $attribute ) 
{
-                                       $attributes[$attribute->name] = 
$attribute->value;
-                               }
-
-                               // let MW build link HTML based on Parsoid data
-                               $linkHTML = Linker::link( $title, 
htmlspecialchars( $linkNode->nodeValue ), $attributes );
-
-                               // create new DOM from this MW-built link
-                               $linkDom = ParsoidUtils::createDOM( $linkHTML );
-
-                               // import MW-built link node into content DOM
-                               $replacementNode = 
$linkDom->getElementsByTagName( 'a' )->item( 0 );
-                               $replacementNode = $dom->importNode( 
$replacementNode, true );
-
-                               // replace Parsoid link with MW-built link
-                               $linkNode->parentNode->replaceChild( 
$replacementNode, $linkNode );
-                       }
-               }
-
-               return $dom->saveHTML();
-       }
-
-       protected function resolveSubpage( $text ) {
-               global $wgTitle, $wgFlowParsoidTitle;
-               if ( $wgFlowParsoidTitle === null ) {
-                       return Title::newFromText( $wgTitle->getDBkey() . 
$text, $wgTitle->getNamespace() );
-               } else {
-                       return Title::newFromText( 
$wgFlowParsoidTitle->getDBkey() . $text, $wgFlowParsoidTitle->getNamespace() );
-               }
-       }
-
-       /**
-        * Registers callback function to find content links in Parsoid html.
-        * The goal is to batch-load and add to LinkCache as much links as 
possible.
-        */
-       public function registerParsoidLinks( PostRevision $post ) {
-               /*
-                * This can be registered on multiple posts (e.g. multiple 
topics) to
-                * batch-load as much as possible; all of the identifiers have 
to be
-                * saved and will be processed as soon as they first are needed.
-                */
-               $identifier = $post->registerRecursive( array( $this, 
'registerParsoidLinksCallback' ), array(), 'parsoidlinks' );
-               $this->parsoidLinksIdentifiers[$identifier] = $post;
-       }
-
-       /**
-        * DON'T CALL THIS METHOD!
-        * This is for internal use only: it's a callback function to
-        * PostRevision::registerRecursive, which can be registered via
-        * Templating::registerParsoidLinks.
-        *
-        * Returns an array of linked pages in Parsoid.
-        *
-        * @param PostRevision $post
-        * @param array $result
-        * @return array Return array in the format of [result, continue]
-        */
-       public function registerParsoidLinksCallback( PostRevision $post, 
$result ) {
-               // topic titles don't contain html
-               if ( $post->isTopicTitle() ) {
-                       return array( array(), true );
-               }
-
-               // make sure a post is not checked more than once
-               $revisionId = $post->getRevisionId()->getHex();
-               if ( isset( $this->parsoidLinksProcessed[$revisionId] ) ) {
-                       return array( array(), false );
-               }
-               $this->parsoidLinksProcessed[$revisionId] = true;
-
-               // find links in DOM
-               $content = $post->getContent( 'html' );
-               if ( $content ) {
-                       $dom = ParsoidUtils::createDOM( $content );
-                       $xpath = new \DOMXPath( $dom );
-                       $linkNodes = $xpath->query( 
'//a[@rel="mw:WikiLink"][@data-parsoid]' );
-
-                       foreach ( $linkNodes as $linkNode ) {
-                               $parsoid = $linkNode->getAttribute( 
'data-parsoid' );
-                               $parsoid = json_decode( $parsoid, true );
-
-                               if ( isset( $parsoid['sa']['href'] ) ) {
-                                       // real results will be stored in 
Templating::parsoidLinks
-                                       $link = $parsoid['sa']['href'];
-                                       $title = Title::newFromText( $link );
-                                       if ( $title !== null ) {
-                                               $this->parsoidLinks[$link] = 
$title;
-                                       }
-                               }
-                       }
-               }
-
-               /*
-                * $result will not be used; we'll register this callback 
multiple
-                * times and will want to gather overlapping results, so they'll
-                * be stored at Templating::parsoidLinks
-                */
-               return array( array(), true );
+       public function registerParsoidLinks( AbstractRevision $revision ) {
+               $this->redlinks->register( $revision );
        }
 }
diff --git a/tests/TemplatingTest.php b/tests/TemplatingTest.php
index 9efd856..4eb88cb 100644
--- a/tests/TemplatingTest.php
+++ b/tests/TemplatingTest.php
@@ -4,7 +4,7 @@
 
 use Title;
 
-class TemplatingTest extends \MediaWikiTestCase {
+class RedLinkTest extends \MediaWikiTestCase {
 
        // Default values for PostRevision::newFromRow to work
        static protected $postRow = array(
@@ -59,31 +59,66 @@
        /**
         * @dataProvider redLinkProvider
         */
-       public function testRedLinks( $message, $saHref, $expect ) {
-               // needs a page to resolve subpage links against
-               $this->setMwGlobals( 'wgTitle', Title::newMainPage() );
-
-               $parsoid = htmlentities( json_encode( array( 'sa' => array( 
'href' => $saHref ) ) ) );
-               $uid = Model\UUID::create( '0509b4bf4b2d616abe79080027a08222' );
-               $rev = Model\PostRevision::fromStorageRow( array(
-                       'rev_id' => $uid->getBinary(),
-                       'tree_rev_id' => $uid->getBinary(),
-                       'rev_content' => '<a rel="mw:WikiLink" data-parsoid="' 
. $parsoid . '">' . htmlspecialchars( $saHref ) . '</a>',
-                       'rev_flags' => 'html'
-               ) + self::$postRow );
-
-               $content = $this->mockTemplating()->getContent( $rev, 'html' );
-               $this->assertContains( $expect, $content, $message );
+       public function testApplysRedLinks( $message, $saHref, $expect ) {
+               $anchor = \Html::element( 'a', array(
+                       'rel' => 'mw:WikiLink',
+                       'data-parsoid' => json_encode( array( 'sa' => array( 
'href' => $saHref ) ) ),
+               ), $saHref );
+               $redlink = new Redlinker( Title::newMainPage(), $this->getMock( 
'LinkBatch' ) );
+               $result = $redlink->apply( $anchor );
+               $this->assertContains( $expect, $result, $message );
        }
 
-       protected function mockTemplating() {
-               $urlGenerator = $this->getMockBuilder( 'Flow\UrlGenerator' )
-                       ->disableOriginalConstructor()
-                       ->getMock();
-               $output = $this->getMockBuilder( 'OutputPage' )
-                       ->disableOriginalConstructor()
-                       ->getMock();
+       public function testRegistersAnchors() {
+               $saHref = 'Main_Page';
+               $anchor = \Html::element( 'a', array(
+                       'rel' => 'mw:WikiLink',
+                       'data-parsoid' => json_encode( array( 'sa' => array( 
'href' => $saHref ) ) ),
+               ), $saHref );
 
-               return new Templating( $urlGenerator, $output );
+               // We don't need a real id, just something reasonable.
+               $uid = Model\UUID::getComparisonUUID( null );
+               $post = Model\PostRevision::fromStorageRow( array(
+                       'rev_id' => $uid,
+                       'tree_rev_id' => $uid,
+                       'tree_rev_descendant_id' => $uid,
+                       'tree_parent_id' => $uid,
+                       'rev_content' => $anchor,
+                       'rev_flags' => 'html',
+               ) + self::$postRow );
+               $post->setChildren( array() );
+
+               $batch = $this->getMock( 'LinkBatch' );
+               $batch->expects( $this->once() )
+                       ->method( 'addObj' )
+                       ->with( new MethodReturnsConstraint(
+                               'getDBkey',
+                               $this->matches( $saHref )
+                       ) );
+
+               $redlinker = new Redlinker( Title::newMainPage(), $batch );
+               $redlinker->register( $post );
+               $redlinker->resolveLinkStatus();
+       }
+
+
+}
+
+class MethodReturnsConstraint extends \PHPUnit_Framework_Constraint {
+       public function __construct( $method, \PHPUnit_Framework_Constraint 
$constraint ) {
+               $this->method = $method;
+               $this->constraint = $constraint;
+       }
+
+       protected function matches( $other ) {
+               return $this->constraint->matches( call_user_func( array( 
$other, $this->method ) ) );
+       }
+
+       public function toString() {
+               return $this->constraint->toString();
+       }
+
+       protected function failureDescription( $other ) {
+               return $this->constraint->failureDescription( $other ) . " from 
{$this->method} method";
        }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/103259
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I547ae6b7625ae8462394e6c2efec1f30e9afb1ae
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Flow
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to