EBernhardson has uploaded a new change for review.
https://gerrit.wikimedia.org/r/191697
Change subject: WIP: Use fixtures for parsoid in unit tests
......................................................................
WIP: Use fixtures for parsoid in unit tests
This still has a bug where it seems like new fixtures are generated on
each run of the tests rather than re-using the previously generated fixtures
but not sure why yet.
Additionally a couple unit tests related to this conversion are still failing,
needs more work.
Change-Id: I069d71ac1dad8cd8011b752cbb9f618718fab395
---
M autoload.php
M container-test.php
M container.php
A includes/Parsoid/ContentConverter.php
A includes/Parsoid/Converter/FilesystemCachingDecorator.php
A includes/Parsoid/Converter/PHPConverter.php
A includes/Parsoid/Converter/ParsoidConverter.php
M includes/Parsoid/Utils.php
M tests/phpunit/Parsoid/ReferenceExtractorTest.php
9 files changed, 289 insertions(+), 153 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Flow
refs/changes/97/191697/1
diff --git a/autoload.php b/autoload.php
index 72f28b1..8311850 100644
--- a/autoload.php
+++ b/autoload.php
@@ -243,7 +243,11 @@
'Flow\\NotificationFormatter' => __DIR__ .
'/includes/Notifications/Formatter.php',
'Flow\\NotificationsUserLocator' => __DIR__ .
'/includes/Notifications/UserLocator.php',
'Flow\\OccupationController' => __DIR__ .
'/includes/TalkpageManager.php',
+ 'Flow\\Parsoid\\ContentConverter' => __DIR__ .
'/includes/Parsoid/ContentConverter.php',
'Flow\\Parsoid\\ContentFixer' => __DIR__ .
'/includes/Parsoid/ContentFixer.php',
+ 'Flow\\Parsoid\\Converter\\FilesystemCachingDecorator' => __DIR__ .
'/includes/Parsoid/Converter/FilesystemCachingDecorator.php',
+ 'Flow\\Parsoid\\Converter\\PHPConverter' => __DIR__ .
'/includes/Parsoid/Converter/PHPConverter.php',
+ 'Flow\\Parsoid\\Converter\\ParsoidConverter' => __DIR__ .
'/includes/Parsoid/Converter/ParsoidConverter.php',
'Flow\\Parsoid\\Extractor' => __DIR__ .
'/includes/Parsoid/Extractor.php',
'Flow\\Parsoid\\Extractor\\CategoryExtractor' => __DIR__ .
'/includes/Parsoid/Extractor/CategoryExtractor.php',
'Flow\\Parsoid\\Extractor\\ExtLinkExtractor' => __DIR__ .
'/includes/Parsoid/Extractor/ExtLinkExtractor.php',
diff --git a/container-test.php b/container-test.php
index 087627b..a8f8076 100644
--- a/container-test.php
+++ b/container-test.php
@@ -5,6 +5,8 @@
// need a testcase to get at the mocking methods.
$testCase = new Flow\Tests\FlowTestCase();
+// The default configuration of the SpamBlacklist extension reaches out to
+// meta to collect the blacklist. Rather than worying about this just turn it
off.
$container['controller.spamblacklist'] = $testCase->getMockBuilder(
'Flow\\SpamFilter\\SpamBlacklist' )
->disableOriginalConstructor()
->getMock();
@@ -13,4 +15,16 @@
->method( 'validate' )
->will( $testCase->returnValue( Status::newGood() ) );
+$converter = $container['content_converter'];
+if ( $converter instanceof Flow\Parsoid\Converter\ParsoidConverter ) {
+ // Decorate the content converter with a version that stores local
fixtures so we don't have
+ // external dependencies while testing.
+ $container['content_converter'] = $c->share( function( $c ) use (
$converter ) {
+ return new Flow\Parsoid\Converter\FilesystemCachingDecorator(
+ $converter,
+ __DIR__ . '/tests/phpunit/fixtures/parsoid'
+ );
+ } );
+}
+
return $container;
diff --git a/container.php b/container.php
index d64a2b0..6733512 100644
--- a/container.php
+++ b/container.php
@@ -1162,4 +1162,20 @@
$c['listener.editcount'] = $c->share( function( $c ) {
return new \Flow\Data\Listener\EditCountListener( $c['flow_actions'] );
} );
+
+$c['content_converter'] = $c->share( function( $c ) {
+ global $wgFlowParsoidURL, $wgFlowParsoidPrefix, $wgFlowParsoidTimeout,
$wgFlowParsoidForwardCookies;
+
+ if ( $wgFlowParsoidURL ) {
+ return new Flow\Parsoid\Converter\ParsoidConverter(
+ $wgFlowParsoidURL,
+ $wgFlowParsoidPrefix,
+ $wgFlowParsoidTimeout,
+ $wgFlowParsoidForwardCookies
+ );
+ } else {
+ return new Flow\Parsoid\Converter\PHPConverter();
+ }
+} );
+
return $c;
diff --git a/includes/Parsoid/ContentConverter.php
b/includes/Parsoid/ContentConverter.php
new file mode 100644
index 0000000..095927d
--- /dev/null
+++ b/includes/Parsoid/ContentConverter.php
@@ -0,0 +1,28 @@
+<?php
+
+namespace Flow\Parsoid;
+
+use Flow\Exception\NoParsoidException;
+use Flow\Exception\WikitextException;
+use Title;
+
+interface ContentConverter {
+ /**
+ * Convert from/to wikitext/html
+ *
+ * @param string $from Format of content to convert: html|wikitext
+ * @param string $to Format to convert to: html|wikitext
+ * @param string $content
+ * @param Title $title
+ * @return string
+ * @throws NoParsoidException When there are errors contacting parsoid
+ * @throws WikitextException When conversion is unsupported by the
converter
+ */
+ function convert( $from, $to, $content, Title $title );
+
+ /**
+ * @return array A list of resource loader modules that need to be
included
+ * with content converter by this converter.
+ */
+ function getRequiredModules();
+}
diff --git a/includes/Parsoid/Converter/FilesystemCachingDecorator.php
b/includes/Parsoid/Converter/FilesystemCachingDecorator.php
new file mode 100644
index 0000000..8f846bc
--- /dev/null
+++ b/includes/Parsoid/Converter/FilesystemCachingDecorator.php
@@ -0,0 +1,65 @@
+<?php
+
+namespace Flow\Parsoid\Converter;
+
+use Flow\Parsoid\ContentConverter;
+use Title;
+
+/**
+ * Primarily intended to be used from unit tests to remove external
+ * dependencies on parsoid. Decorates an existing ContentConverter to
+ * cache its values into a directory which can be commited to git.
+ *
+ * Does not handle updating the fixtures, every once in awhile
+ * developers will need to purge the fixture directory and run the
+ * tests from scratch.
+ *
+ * Be carefull to match your fixture directories to the converter
+ * being decorated. Instances decorating different converters must
+ * use separate directories
+ */
+class FilesystemCachingDecorator implements ContentConverter {
+
+ /**
+ * @var ContentConverter The converter being decorated
+ */
+ protected $converter;
+
+ /**
+ * @var string The directory where fixtures are stored
+ */
+ protected $fixtureDir;
+
+ /**
+ * @var ContentConverter $converter The converter being decorated
+ * @var string $fixtureDir The directory where fixtures are stored
+ */
+ public function __construct( ContentConverter $converter, $fixtureDir )
{
+ $this->converter = $converter;
+ $this->fixtureDir = $fixtureDir;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public function getRequiredModules() {
+ return $this->converter->getRequiredModules();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public function convert( $from, $to, $content, Title $title ) {
+ $key = md5( $from . $to . $content . $title->getPrefixedText()
);
+ $file = "{$this->fixtureDir}/$key";
+ if ( file_exists( $file ) ) {
+ $content = file_get_contents( $file );
+ } else {
+ $content = $this->converter->convert( $from, $to,
$content, $title );
+ file_put_contents( $file, $content );
+ }
+
+ return $content;
+ }
+}
+
diff --git a/includes/Parsoid/Converter/PHPConverter.php
b/includes/Parsoid/Converter/PHPConverter.php
new file mode 100644
index 0000000..ccbd93f
--- /dev/null
+++ b/includes/Parsoid/Converter/PHPConverter.php
@@ -0,0 +1,47 @@
+<?php
+
+namespace Flow\Parsoid\Converter;
+
+use Flow\Exception\WikitextException;
+use Flow\Parsoid\ContentConverter;
+use Title;
+
+class PHPConverter implements ContentConverter {
+
+ /**
+ * {@inheritDoc}
+ */
+ public function getRequiredModules() {
+ return array();
+ }
+
+ /**
+ * Convert from/to wikitext/html using Parser.
+ *
+ * This only supports wikitext to HTML.
+ *
+ * @param string $from Format of content to convert: wikitext
+ * @param string $to Format to convert to: html
+ * @param string $content
+ * @param Title $title
+ * @return string
+ * @throws WikitextException When the conversion is unsupported
+ */
+ public function convert( $from, $to, $content, Title $title ) {
+ if ( $from === $to || $content === '' ) {
+ return $content;
+ }
+ if ( $from !== 'wikitext' && $to !== 'html' ) {
+ throw new WikitextException( 'Parser only supports
wikitext to HTML conversion', 'process-wikitext' );
+ }
+
+ global $wgParser;
+
+ $options = new \ParserOptions;
+ $options->setTidy( true );
+ $options->setEditSection( false );
+
+ $output = $wgParser->parse( $content, $title, $options );
+ return $output->getText();
+ }
+}
diff --git a/includes/Parsoid/Converter/ParsoidConverter.php
b/includes/Parsoid/Converter/ParsoidConverter.php
new file mode 100644
index 0000000..2b955d4
--- /dev/null
+++ b/includes/Parsoid/Converter/ParsoidConverter.php
@@ -0,0 +1,104 @@
+<?php
+
+namespace Flow\Parsoid\Converter;
+
+use Flow\Exception\NoParsoidException;
+use Flow\Exception\WikitextException;
+use Flow\Parsoid\ContentConverter;
+use Flow\Parsoid\Utils;
+use RequestContext;
+use Title;
+use User;
+
+class ParsoidConverter implements ContentConverter {
+
+ public function __construct( $url, $prefix, $timeout, $forwardCookies )
{
+ $this->url = $url;
+ $this->prefix = $prefix;
+ $this->timeout = $timeout;
+ $this->forwardCookies = $forwardCookies;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public function getRequiredModules() {
+ return array( 'mediawiki.skinning.content.parsoid' );
+ }
+
+ /**
+ * Convert from/to wikitext/html via Parsoid.
+ *
+ * This will assume Parsoid is installed.
+ *
+ * @param string $from Format of content to convert: html|wikitext
+ * @param string $to Format to convert to: html|wikitext
+ * @param string $content
+ * @param Title $title
+ * @return string
+ * @throws NoParsoidException When parsoid configuration is not
available
+ * @throws WikitextException When conversion is unsupported
+ */
+ public function convert( $from, $to, $content, Title $title ) {
+ if ( $from == 'html' ) {
+ $from = 'html';
+ } elseif ( in_array( $from, array( 'wt', 'wikitext' ) ) ) {
+ $from = 'wt';
+ } else {
+ throw new WikitextException( 'Unknown source format: '
. $from, 'process-wikitext' );
+ }
+
+ $request = \MWHttpRequest::factory(
+ $this->url . '/' . $this->prefix . '/' .
$title->getPrefixedDBkey(),
+ array(
+ 'method' => 'POST',
+ 'postData' => wfArrayToCgi( array( $from =>
$content ) ),
+ 'body' => true,
+ 'timeout' => $this->timeout,
+ 'connectTimeout' => 'default',
+ )
+ );
+
+ if ( $this->forwardCookies && !User::isEveryoneAllowed( 'read'
) ) {
+ if ( PHP_SAPI === 'cli' ) {
+ // From the command line we need to generate a
cookie
+ $cookies =
Utils::generateForwardedCookieForCli();
+ } else {
+ $cookies =
RequestContext::getMain()->getRequest()->getHeader( 'Cookie' );
+ }
+ $request->setHeader( 'Cookie', $cookies );
+ }
+
+ $status = $request->execute();
+ if ( !$status->isOK() ) {
+ wfDebugLog( 'Flow', __METHOD__ . ': Failed contacting
parsoid: ' . $status->getMessage()->text() );
+ throw new NoParsoidException( 'Failed contacting
Parsoid', 'process-wikitext' );
+ }
+ $response = $request->getContent();
+
+ // HTML is wrapped in <body> tag, undo that.
+ // unless $response is empty
+ if ( $to == 'html' && $response ) {
+ /*
+ * Workaround because DOMDocument can't guess charset.
+ * Parsoid provides utf-8. Alternative "workarounds"
would be to
+ * provide the charset in $response, as either:
+ * * <?xml encoding="utf-8" ?>
+ * * <meta http-equiv="Content-Type"
content="text/html; charset=utf-8">
+ */
+ $response = mb_convert_encoding( $response,
'HTML-ENTITIES', 'UTF-8' );
+
+ $dom = Utils::createDOM( $response );
+ $body = $dom->getElementsByTagName( 'body' )->item(0);
+
+ $response = '';
+ foreach( $body->childNodes as $child ) {
+ $response .= $child->ownerDocument->saveHTML(
$child );
+ }
+ } elseif ( !in_array( $to, array( 'wt', 'wikitext' ) ) ) {
+ throw new WikitextException( "Unknown format requested:
" . $to, 'process-wikitext' );
+ }
+
+ return $response;
+ }
+}
diff --git a/includes/Parsoid/Utils.php b/includes/Parsoid/Utils.php
index d0b9a9d..aa8b712 100644
--- a/includes/Parsoid/Utils.php
+++ b/includes/Parsoid/Utils.php
@@ -28,18 +28,9 @@
* @throws InvalidDataException When $title does not exist
*/
public static function convert( $from, $to, $content, Title $title ) {
- if ( $from === $to || $content === '' ) {
- return $content;
- }
-
- try {
- self::parsoidConfig();
- } catch ( NoParsoidException $e ) {
- // If we have no parsoid config, fallback to the parser.
- return self::parser( $from, $to, $content, $title );
- }
-
- return self::parsoid( $from, $to, $content, $title );
+ /** @var Converter */
+ $converter = Container::get( 'content_converter' );
+ return $converter->convert( $from, $to, $content, $title );
}
/**
@@ -63,132 +54,6 @@
$lang = $lang ?: $wgLang;
return $lang->truncate( $plain, $truncateLength );
}
- }
-
- /**
- * Convert from/to wikitext/html via Parsoid.
- *
- * This will assume Parsoid is installed.
- *
- * @param string $from Format of content to convert: html|wikitext
- * @param string $to Format to convert to: html|wikitext
- * @param string $content
- * @param Title $title
- * @return string
- * @throws NoParsoidException When parsoid configuration is not
available
- * @throws WikitextException When conversion is unsupported
- */
- protected static function parsoid( $from, $to, $content, Title $title )
{
- list( $parsoidURL, $parsoidPrefix, $parsoidTimeout,
$parsoidForwardCookies ) = self::parsoidConfig();
-
- if ( $from == 'html' ) {
- $from = 'html';
- } elseif ( in_array( $from, array( 'wt', 'wikitext' ) ) ) {
- $from = 'wt';
- } else {
- throw new WikitextException( 'Unknown source format: '
. $from, 'process-wikitext' );
- }
-
- $request = \MWHttpRequest::factory(
- $parsoidURL . '/' . $parsoidPrefix . '/' .
$title->getPrefixedDBkey(),
- array(
- 'method' => 'POST',
- 'postData' => wfArrayToCgi( array( $from =>
$content ) ),
- 'body' => true,
- 'timeout' => $parsoidTimeout,
- 'connectTimeout' => 'default',
- )
- );
- if ( $parsoidForwardCookies && !User::isEveryoneAllowed( 'read'
) ) {
- if ( PHP_SAPI === 'cli' ) {
- // From the command line we need to generate a
cookie
- $cookies =
self::generateForwardedCookieForCli();
- } else {
- $cookies =
RequestContext::getMain()->getRequest()->getHeader( 'Cookie' );
- }
- $request->setHeader( 'Cookie', $cookies );
- }
- $status = $request->execute();
- if ( !$status->isOK() ) {
- wfDebugLog( 'Flow', __METHOD__ . ': Failed contacting
parsoid: ' . $status->getMessage()->text() );
- throw new NoParsoidException( 'Failed contacting
Parsoid', 'process-wikitext' );
- }
- $response = $request->getContent();
-
- // HTML is wrapped in <body> tag, undo that.
- // unless $response is empty
- if ( $to == 'html' && $response ) {
- /*
- * Workaround because DOMDocument can't guess charset.
- * Parsoid provides utf-8. Alternative "workarounds"
would be to
- * provide the charset in $response, as either:
- * * <?xml encoding="utf-8" ?>
- * * <meta http-equiv="Content-Type"
content="text/html; charset=utf-8">
- */
- $response = mb_convert_encoding( $response,
'HTML-ENTITIES', 'UTF-8' );
-
- $dom = self::createDOM( $response );
- $body = $dom->getElementsByTagName( 'body' )->item(0);
-
- $response = '';
- foreach( $body->childNodes as $child ) {
- $response .= $child->ownerDocument->saveHTML(
$child );
- }
- } elseif ( !in_array( $to, array( 'wt', 'wikitext' ) ) ) {
- throw new WikitextException( "Unknown format requested:
" . $to, 'process-wikitext' );
- }
-
- return $response;
- }
-
- /**
- * Convert from/to wikitext/html using Parser.
- *
- * This only supports wikitext to HTML.
- *
- * @param string $from Format of content to convert: wikitext
- * @param string $to Format to convert to: html
- * @param string $content
- * @param Title $title
- * @return string
- * @throws WikitextException When the conversion is unsupported
- */
- protected static function parser( $from, $to, $content, Title $title ) {
- if ( $from !== 'wikitext' && $to !== 'html' ) {
- throw new WikitextException( 'Parser only supports
wikitext to HTML conversion', 'process-wikitext' );
- }
-
- global $wgParser;
-
- $options = new \ParserOptions;
- $options->setTidy( true );
- $options->setEditSection( false );
-
- $output = $wgParser->parse( $content, $title, $options );
- return $output->getText();
- }
-
- /**
- * Returns Flow's Parsoid config. $wgFlowParsoid* variables are used to
- * specify how to connect to Parsoid.
- *
- * @return array Parsoid config, in array(URL, prefix, timeout,
forwardCookies) format
- * @throws NoParsoidException When parsoid is unconfigured
- */
- protected static function parsoidConfig() {
- global
- $wgFlowParsoidURL, $wgFlowParsoidPrefix,
$wgFlowParsoidTimeout, $wgFlowParsoidForwardCookies;
-
- if ( !$wgFlowParsoidURL ) {
- throw new NoParsoidException( 'Flow Parsoid
configuration is unavailable', 'process-wikitext' );
- }
-
- return array(
- $wgFlowParsoidURL,
- $wgFlowParsoidPrefix,
- $wgFlowParsoidTimeout,
- $wgFlowParsoidForwardCookies,
- );
}
/**
@@ -253,14 +118,11 @@
* @return bool
*/
public static function onFlowAddModules( OutputPage $out ) {
-
- try {
- self::parsoidConfig();
- // XXX We only need the Parsoid CSS if some content
being
- // rendered has getContentFormat() === 'html'.
- $out->addModules( 'mediawiki.skinning.content.parsoid'
);
- } catch ( NoParsoidException $e ) {
- // The module is only necessary when we are using
parsoid.
+ /** @var Converter */
+ $converter = Container::get( 'content_converter' );
+ $modules = $converter->getRequiredModules();
+ if ( $modules ) {
+ $out->addModules( $modules );
}
return true;
diff --git a/tests/phpunit/Parsoid/ReferenceExtractorTest.php
b/tests/phpunit/Parsoid/ReferenceExtractorTest.php
index e47e85c..54eb5ca 100644
--- a/tests/phpunit/Parsoid/ReferenceExtractorTest.php
+++ b/tests/phpunit/Parsoid/ReferenceExtractorTest.php
@@ -163,13 +163,9 @@
$reflProperty->setAccessible( true );
$extractors = $reflProperty->getValue( $referenceExtractor );
- $result = $reflMethod->invoke(
- $referenceExtractor,
- $factory,
- $extractors['post'],
- Utils::convert( 'wt', 'html', $wikitext,
Title::newFromText( $page ) )
- );
- $this->assertCount( 1, $result );
+ $html = Utils::convert( 'wt', 'html', $wikitext,
Title::newFromText( $page ) );
+ $result = $reflMethod->invoke( $referenceExtractor, $factory,
$extractors['post'], $html );
+ $this->assertCount( 1, $result, $html );
$result = reset( $result );
$this->assertInstanceOf( $expectedClass, $result, $description
);
--
To view, visit https://gerrit.wikimedia.org/r/191697
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I069d71ac1dad8cd8011b752cbb9f618718fab395
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Flow
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits