EBernhardson has uploaded a new change for review.
https://gerrit.wikimedia.org/r/103259
Change subject: Split redlinking out of Templating class
......................................................................
Split redlinking out of Templating class
Change-Id: I547ae6b7625ae8462394e6c2efec1f30e9afb1ae
---
M Flow.php
A includes/Redlinker.php
M includes/Templating.php
M tests/TemplatingTest.php
4 files changed, 296 insertions(+), 206 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Flow
refs/changes/59/103259/1
diff --git a/Flow.php b/Flow.php
index 95e02b0..699af8f 100755
--- a/Flow.php
+++ b/Flow.php
@@ -61,6 +61,7 @@
$wgAutoloadClasses['Flow\DbFactory'] = $dir . 'includes/DbFactory.php';
$wgAutoloadClasses['Flow\ParsoidUtils'] = $dir . 'includes/ParsoidUtils.php';
$wgAutoloadClasses['Flow\Templating'] = $dir . 'includes/Templating.php';
+$wgAutoloadClasses['Flow\Redlinker'] = $dir . 'includes/Redlinker.php';
$wgAutoloadClasses['Flow\UrlGenerator'] = $dir . 'includes/UrlGenerator.php';
$wgAutoloadClasses['Flow\View'] = $dir . 'includes/View.php';
$wgAutoloadClasses['Flow\WorkflowLoader'] = $dir .
'includes/WorkflowLoader.php';
diff --git a/includes/Redlinker.php b/includes/Redlinker.php
new file mode 100644
index 0000000..3bf8c20
--- /dev/null
+++ b/includes/Redlinker.php
@@ -0,0 +1,229 @@
+<?php
+
+namespace Flow;
+
+use Flow\Model\PostRevision;
+use Closure;
+use DOMDocument;
+use DOMNode;
+use LinkBatch;
+use Linker;
+use Title;
+
+/**
+ * Applys red links to an html fragment from parsoid. Relative
+ * links are resolved in relation the the provided title.
+ */
+class Redlinker {
+
+ /**
+ * @var Title To resolve relative links against
+ */
+ protected $title;
+
+ /**
+ * @var LinkBatch
+ */
+ protected $batch;
+
+ /**
+ * @var array
+ */
+ protected $identifiers = array();
+
+ /**
+ * @var array Array of registered post Ids
+ */
+ protected $registered = array();
+
+ /**
+ * @var array Array of processed post Ids
+ */
+ protected $processed = array();
+
+ /**
+ * @param Title $title To resolve relative links against
+ * @param LinkBatch $batch
+ */
+ public function __construct( \Title $title, \LinkBatch $batch ) {
+ $this->title = $title;
+ $this->batch = $batch;
+ }
+
+ /**
+ * Registers callback function to find content links in Parsoid html.
+ * The goal is to batch-load and add to LinkCache as much links as
possible.
+ *
+ * This can be registered on multiple posts (e.g. multiple topics) to
+ * batch-load as much as possible; all of the identifiers have to be
+ * saved and will be processed as soon as they first are needed.
+ */
+ public function register( PostRevision $post ) {
+ $revisionId = $post->getRevisionId()->getHex();
+ if ( !isset( $this->processed[$revisionId],
$this->registered[$revisionId] ) ) {
+ $this->registered[$revisionId] = true;
+ $identifier = $post->registerRecursive( array( $this,
'collectLinks' ), array(), 'parsoidlinks' );
+ $this->identifiers[$identifier] = $post;
+ }
+ }
+
+ /**
+ * DON'T CALL THIS METHOD!
+ * This is for internal use only: it's a callback function to
+ * PostRevision::registerRecursive, which can be registered via
+ * Templating::registerParsoidLinks.
+ *
+ * Returns an array of linked pages in Parsoid.
+ *
+ * @param PostRevision $post
+ * @param array $result
+ * @return array Return array in the format of [result, continue]
+ */
+ public function collectLinks( PostRevision $post, $result ) {
+ // topic titles don't contain html
+ if ( $post->isTopicTitle() ) {
+ return array( array(), true );
+ }
+
+ // make sure a post is not checked more than once
+ $revisionId = $post->getRevisionId()->getHex();
+ if ( isset( $this->processed[$revisionId] ) ) {
+ return array( array(), false );
+ }
+ $this->processed[$revisionId] = true;
+
+ /*
+ * Workaround because DOMDocument can't guess charset.
+ * Content should be utf-8. Alternative "workarounds" would be
to
+ * provide the charset in $response, as either:
+ * * <?xml encoding="utf-8" ?>
+ * * <meta http-equiv="Content-Type" content="text/html;
charset=utf-8">
+ */
+ $content = mb_convert_encoding( $post->getContent( 'html' ),
'HTML-ENTITIES', 'UTF-8' );
+
+ // find links in DOM
+ if ( $content ) {
+ $dom = ParsoidUtils::createDOM( $content );
+ $batch = $this->batch;
+ self::forEachLink( $dom, function( DOMNode $linkNode,
array $parsoid ) use( $batch ) {
+ $title = Title::newFromText(
$parsoid['sa']['href'] );
+ if ( $title !== null ) {
+ $batch->addObj( $title );
+ }
+ } );
+ }
+
+ /*
+ * $result will not be used; we'll register this callback
multiple
+ * times and will want to gather overlapping results, so they'll
+ * be stored in $this->batch
+ */
+ return array( array(), true );
+ }
+
+ /**
+ * Execute any pending batched title lookups
+ */
+ public function resolveLinkStatus() {
+ if ( !$this->identifiers ) {
+ return;
+ }
+ foreach ( $this->identifiers as $identifier => $post ) {
+ $post->getRecursiveResult( $identifier );
+ }
+ $this->identifiers = array();
+ if ( !$this->batch->isEmpty() ) {
+ $this->batch->execute();
+ $this->batch->setArray( array() );
+ }
+ }
+
+ /**
+ * Parsoid ignores red links. With good reason: redlinks should only be
+ * applied when rendering the content, not when it's created.
+ *
+ * This method will parse a given content, fetch all of its links & let
MW's
+ * Linker class build the link HTML (which will take redlinks into
account.)
+ * It will then substitute original link HTML for the one Linker
generated.
+ *
+ * @param string $content
+ * @return string
+ */
+ public function apply( $content ) {
+ if ( !$content ) {
+ return '';
+ }
+
+ $this->resolveLinkStatus();
+
+ /*
+ * Workaround because DOMDocument can't guess charset.
+ * Content should be utf-8. Alternative "workarounds" would be
to
+ * provide the charset in $response, as either:
+ * * <?xml encoding="utf-8" ?>
+ * * <meta http-equiv="Content-Type" content="text/html;
charset=utf-8">
+ */
+ $content = mb_convert_encoding( $content, 'HTML-ENTITIES',
'UTF-8' );
+
+ $self = $this;
+ $dom = ParsoidUtils::createDOM( $content );
+ self::forEachLink( $dom, function( DOMNode $linkNode, array
$parsoid ) use ( $self, $dom ) {
+ $title = $self->createRelativeTitle(
$parsoid['sa']['href'] );
+ // Don't process invalid links
+ if ( $title === null ) {
+ return;
+ }
+
+ // gather existing link attributes
+ $attributes = array();
+ foreach ( $linkNode->attributes as $attribute ) {
+ $attributes[$attribute->name] =
$attribute->value;
+ }
+ // let MW build link HTML based on Parsoid data
+ $html = Linker::link( $title, htmlspecialchars(
$linkNode->nodeValue ), $attributes );
+ // create new DOM from this MW-built link
+ $replacementNode = ParsoidUtils::createDOM( $html
)->getElementsByTagName( 'a' )->item( 0 );
+ // import MW-built link node into content DOM
+ $replacementNode = $dom->importNode( $replacementNode,
true );
+ // replace Parsoid link with MW-built link
+ $linkNode->parentNode->replaceChild( $replacementNode,
$linkNode );
+ } );
+
+ return $dom->saveHTML();
+ }
+
+ /**
+ * Subpage links from parsoid don't contain any direct context, its
applied via
+ * a <base href="..."> tag, so here we apply a similar rule resolving
against
+ * $wgFlowParsoidTitle falling back to $wgTitle.
+ */
+ public function createRelativeTitle( $text ) {
+ if ( $text && $text[0] === '/' ) {
+ return Title::newFromText( $this->title->getDBkey() .
$text, $this->title->getNamespace() );
+ } else {
+ return Title::newFromText( $text );
+ }
+ }
+
+ /**
+ * Helper method executes a callback on every anchor that contains
+ * an ['sa']['href'] value in data-parsoid
+ *
+ * @param string $content Html
+ * @param Closure $callback Receives (DOMNode, array)
+ */
+ static public function forEachLink( DOMDocument $dom, Closure $callback
) {
+ $xpath = new \DOMXPath( $dom );
+ $linkNodes = $xpath->query(
'//a[@rel="mw:WikiLink"][@data-parsoid]' );
+
+ $links = array();
+ foreach ( $linkNodes as $linkNode ) {
+ $parsoid = $linkNode->getAttribute( 'data-parsoid' );
+ $parsoid = json_decode( $parsoid, true );
+ if ( isset( $parsoid['sa']['href'] ) ) {
+ $callback( $linkNode, $parsoid );
+ }
+ }
+ }
+}
+
diff --git a/includes/Templating.php b/includes/Templating.php
index 6c91da6..a14a316 100644
--- a/includes/Templating.php
+++ b/includes/Templating.php
@@ -6,16 +6,14 @@
use Flow\Block\TopicBlock;
use Flow\Model\AbstractRevision;
use Flow\Model\PostRevision;
-use Flow\Model\UUID;
-use Flow\Model\Workflow;
use Flow\View\PostActionMenu;
use OutputPage;
// These dont really belong here
use Html;
use Linker;
+use LinkBatch;
use MWTimestamp;
use RequestContext;
-use Title;
use User;
use Flow\Exception\InvalidDataException;
@@ -25,31 +23,15 @@
protected $namespaces;
protected $globals;
- /**
- * @var array Array of PostRevision::registerRecursive return values
- * @see Templating::registerParsoidLinks
- */
- public $parsoidLinksIdentifiers = array();
-
- /**
- * @var array Array of processed post Ids
- * @see Templating::registerParsoidLinks
- */
- public $parsoidLinksProcessed = array();
-
- /**
- * @var array Array of Title objects
- * @see Templating::registerParsoidLinks
- */
- public $parsoidLinks = array();
-
public function __construct( UrlGenerator $urlGenerator, OutputPage
$output, array $namespaces = array(), array $globals = array() ) {
+ global $wgTitle, $wgFlowParsoidTitle;
$this->urlGenerator = $urlGenerator;
$this->output = $output;
foreach ( $namespaces as $ns => $path ) {
$this->addNamespace( $ns, $path );
}
$this->globals = $globals;
+ $this->redlinks = new Redlinker( $wgFlowParsoidTitle ?:
$wgTitle, new LinkBatch );
}
public function getOutput() {
@@ -145,6 +127,7 @@
if ( !$actionMenu->isAllowed( 'view' ) ) {
return '';
}
+
return $this->render( "flow:topic.html.php", array(
'block' => $block,
'topic' => $block->getWorkflow(),
@@ -383,7 +366,7 @@
if ( $format === 'html' ) {
// Parsoid doesn't render redlinks
try {
- $content = $this->applyRedlinks(
$content );
+ $content = $this->redlinks->apply(
$content );
} catch ( \Exception $e ) {
wfDebugLog( __CLASS__, __METHOD__ . ':
Failed applying redlinks for rev_id = ' . $revision->getRevisionId()->getHex()
);
\MWExceptionHandler::logException( $e );
@@ -395,165 +378,7 @@
}
}
- /**
- * Parsoid ignores red links. With good reason: redlinks should only be
- * applied when rendering the content, not when it's created.
- *
- * This method will parse a given content, fetch all of its links & let
MW's
- * Linker class build the link HTML (which will take redlinks into
account.)
- * It will then substitute original link HTML for the one Linker
generated.
- *
- * @param string $content
- * @return string
- */
- protected function applyRedlinks( $content ) {
- if ( !$content ) {
- return '';
- }
-
- /*
- * In order to efficiently replace redlinks, multiple recursive
- * functions (may) have been registered that fetch an array of
linked-to
- * titles. Since we'll now need to apply redlinks, it's time to
execute
- * all these callbacks & batch-load all of the titles they come
up with.
- */
- foreach ( $this->parsoidLinksIdentifiers as $identifier =>
$post ) {
- $post->getRecursiveResult( $identifier );
- unset( $this->parsoidLinksIdentifiers[$identifier] );
- }
- if ( $this->parsoidLinks ) {
- $batch = new \LinkBatch( $this->parsoidLinks );
- $batch->execute();
- $this->parsoidLinks = array();
- }
-
- /*
- * Workaround because DOMDocument can't guess charset.
- * Content should be utf-8. Alternative "workarounds" would be
to
- * provide the charset in $response, as either:
- * * <?xml encoding="utf-8" ?>
- * * <meta http-equiv="Content-Type" content="text/html;
charset=utf-8">
- */
- $content = mb_convert_encoding( $content, 'HTML-ENTITIES',
'UTF-8' );
-
- $dom = ParsoidUtils::createDOM( $content );
-
- // find links in DOM
- $xpath = new \DOMXPath( $dom );
- $linkNodes = $xpath->query(
'//a[@rel="mw:WikiLink"][@data-parsoid]' );
- foreach ( $linkNodes as $linkNode ) {
- $parsoid = $linkNode->getAttribute( 'data-parsoid' );
- $parsoid = json_decode( $parsoid, true );
-
- if ( isset( $parsoid['sa']['href'] ) ) {
- // Don't process invalid links
- if ( $parsoid['sa']['href'][0] === '/' ) {
- $title = $this->resolveSubpage(
$parsoid['sa']['href'] );
- } else {
- $title = Title::newFromText(
$parsoid['sa']['href'] );
- }
- if ( $title === null ) {
- continue;
- }
- // gather existing link attributes
- $attributes = array();
- foreach ( $linkNode->attributes as $attribute )
{
- $attributes[$attribute->name] =
$attribute->value;
- }
-
- // let MW build link HTML based on Parsoid data
- $linkHTML = Linker::link( $title,
htmlspecialchars( $linkNode->nodeValue ), $attributes );
-
- // create new DOM from this MW-built link
- $linkDom = ParsoidUtils::createDOM( $linkHTML );
-
- // import MW-built link node into content DOM
- $replacementNode =
$linkDom->getElementsByTagName( 'a' )->item( 0 );
- $replacementNode = $dom->importNode(
$replacementNode, true );
-
- // replace Parsoid link with MW-built link
- $linkNode->parentNode->replaceChild(
$replacementNode, $linkNode );
- }
- }
-
- return $dom->saveHTML();
- }
-
- protected function resolveSubpage( $text ) {
- global $wgTitle, $wgFlowParsoidTitle;
- if ( $wgFlowParsoidTitle === null ) {
- return Title::newFromText( $wgTitle->getDBkey() .
$text, $wgTitle->getNamespace() );
- } else {
- return Title::newFromText(
$wgFlowParsoidTitle->getDBkey() . $text, $wgFlowParsoidTitle->getNamespace() );
- }
- }
-
- /**
- * Registers callback function to find content links in Parsoid html.
- * The goal is to batch-load and add to LinkCache as much links as
possible.
- */
- public function registerParsoidLinks( PostRevision $post ) {
- /*
- * This can be registered on multiple posts (e.g. multiple
topics) to
- * batch-load as much as possible; all of the identifiers have
to be
- * saved and will be processed as soon as they first are needed.
- */
- $identifier = $post->registerRecursive( array( $this,
'registerParsoidLinksCallback' ), array(), 'parsoidlinks' );
- $this->parsoidLinksIdentifiers[$identifier] = $post;
- }
-
- /**
- * DON'T CALL THIS METHOD!
- * This is for internal use only: it's a callback function to
- * PostRevision::registerRecursive, which can be registered via
- * Templating::registerParsoidLinks.
- *
- * Returns an array of linked pages in Parsoid.
- *
- * @param PostRevision $post
- * @param array $result
- * @return array Return array in the format of [result, continue]
- */
- public function registerParsoidLinksCallback( PostRevision $post,
$result ) {
- // topic titles don't contain html
- if ( $post->isTopicTitle() ) {
- return array( array(), true );
- }
-
- // make sure a post is not checked more than once
- $revisionId = $post->getRevisionId()->getHex();
- if ( isset( $this->parsoidLinksProcessed[$revisionId] ) ) {
- return array( array(), false );
- }
- $this->parsoidLinksProcessed[$revisionId] = true;
-
- // find links in DOM
- $content = $post->getContent( 'html' );
- if ( $content ) {
- $dom = ParsoidUtils::createDOM( $content );
- $xpath = new \DOMXPath( $dom );
- $linkNodes = $xpath->query(
'//a[@rel="mw:WikiLink"][@data-parsoid]' );
-
- foreach ( $linkNodes as $linkNode ) {
- $parsoid = $linkNode->getAttribute(
'data-parsoid' );
- $parsoid = json_decode( $parsoid, true );
-
- if ( isset( $parsoid['sa']['href'] ) ) {
- // real results will be stored in
Templating::parsoidLinks
- $link = $parsoid['sa']['href'];
- $title = Title::newFromText( $link );
- if ( $title !== null ) {
- $this->parsoidLinks[$link] =
$title;
- }
- }
- }
- }
-
- /*
- * $result will not be used; we'll register this callback
multiple
- * times and will want to gather overlapping results, so they'll
- * be stored at Templating::parsoidLinks
- */
- return array( array(), true );
+ public function registerParsoidLinks( AbstractRevision $revision ) {
+ $this->redlinks->register( $revision );
}
}
diff --git a/tests/TemplatingTest.php b/tests/TemplatingTest.php
index 9efd856..4eb88cb 100644
--- a/tests/TemplatingTest.php
+++ b/tests/TemplatingTest.php
@@ -4,7 +4,7 @@
use Title;
-class TemplatingTest extends \MediaWikiTestCase {
+class RedLinkTest extends \MediaWikiTestCase {
// Default values for PostRevision::newFromRow to work
static protected $postRow = array(
@@ -59,31 +59,66 @@
/**
* @dataProvider redLinkProvider
*/
- public function testRedLinks( $message, $saHref, $expect ) {
- // needs a page to resolve subpage links against
- $this->setMwGlobals( 'wgTitle', Title::newMainPage() );
-
- $parsoid = htmlentities( json_encode( array( 'sa' => array(
'href' => $saHref ) ) ) );
- $uid = Model\UUID::create( '0509b4bf4b2d616abe79080027a08222' );
- $rev = Model\PostRevision::fromStorageRow( array(
- 'rev_id' => $uid->getBinary(),
- 'tree_rev_id' => $uid->getBinary(),
- 'rev_content' => '<a rel="mw:WikiLink" data-parsoid="'
. $parsoid . '">' . htmlspecialchars( $saHref ) . '</a>',
- 'rev_flags' => 'html'
- ) + self::$postRow );
-
- $content = $this->mockTemplating()->getContent( $rev, 'html' );
- $this->assertContains( $expect, $content, $message );
+ public function testApplysRedLinks( $message, $saHref, $expect ) {
+ $anchor = \Html::element( 'a', array(
+ 'rel' => 'mw:WikiLink',
+ 'data-parsoid' => json_encode( array( 'sa' => array(
'href' => $saHref ) ) ),
+ ), $saHref );
+ $redlink = new Redlinker( Title::newMainPage(), $this->getMock(
'LinkBatch' ) );
+ $result = $redlink->apply( $anchor );
+ $this->assertContains( $expect, $result, $message );
}
- protected function mockTemplating() {
- $urlGenerator = $this->getMockBuilder( 'Flow\UrlGenerator' )
- ->disableOriginalConstructor()
- ->getMock();
- $output = $this->getMockBuilder( 'OutputPage' )
- ->disableOriginalConstructor()
- ->getMock();
+ public function testRegistersAnchors() {
+ $saHref = 'Main_Page';
+ $anchor = \Html::element( 'a', array(
+ 'rel' => 'mw:WikiLink',
+ 'data-parsoid' => json_encode( array( 'sa' => array(
'href' => $saHref ) ) ),
+ ), $saHref );
- return new Templating( $urlGenerator, $output );
+ // We don't need a real id, just something reasonable.
+ $uid = Model\UUID::getComparisonUUID( null );
+ $post = Model\PostRevision::fromStorageRow( array(
+ 'rev_id' => $uid,
+ 'tree_rev_id' => $uid,
+ 'tree_rev_descendant_id' => $uid,
+ 'tree_parent_id' => $uid,
+ 'rev_content' => $anchor,
+ 'rev_flags' => 'html',
+ ) + self::$postRow );
+ $post->setChildren( array() );
+
+ $batch = $this->getMock( 'LinkBatch' );
+ $batch->expects( $this->once() )
+ ->method( 'addObj' )
+ ->with( new MethodReturnsConstraint(
+ 'getDBkey',
+ $this->matches( $saHref )
+ ) );
+
+ $redlinker = new Redlinker( Title::newMainPage(), $batch );
+ $redlinker->register( $post );
+ $redlinker->resolveLinkStatus();
+ }
+
+
+}
+
+class MethodReturnsConstraint extends \PHPUnit_Framework_Constraint {
+ public function __construct( $method, \PHPUnit_Framework_Constraint
$constraint ) {
+ $this->method = $method;
+ $this->constraint = $constraint;
+ }
+
+ protected function matches( $other ) {
+ return $this->constraint->matches( call_user_func( array(
$other, $this->method ) ) );
+ }
+
+ public function toString() {
+ return $this->constraint->toString();
+ }
+
+ protected function failureDescription( $other ) {
+ return $this->constraint->failureDescription( $other ) . " from
{$this->method} method";
}
}
--
To view, visit https://gerrit.wikimedia.org/r/103259
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I547ae6b7625ae8462394e6c2efec1f30e9afb1ae
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Flow
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits