jenkins-bot has submitted this change and it was merged.
Change subject: Revert "Bug 48917: Fix extracts" - breaks tests
......................................................................
Revert "Bug 48917: Fix extracts" - breaks tests
This reverts commit c8020c90476a9abdfd0c7d6502df06a0bc70aa49
Change-Id: I5ac8b28762a5bf00cee17fbf6f3431ba02c23b2c
---
M MobileFrontend.php
D includes/ExtractFormatter.php
M includes/HtmlFormatter.php
M includes/MobileFormatter.php
M includes/MobileFrontend.hooks.php
M includes/api/ApiQueryExtracts.php
D tests/ExtractFormatterTest.php
M tests/HtmlFormatterTest.php
8 files changed, 87 insertions(+), 147 deletions(-)
Approvals:
MaxSem: Verified; Looks good to me, approved
jenkins-bot: Verified
diff --git a/MobileFrontend.php b/MobileFrontend.php
index 34ef53d..64f245b 100644
--- a/MobileFrontend.php
+++ b/MobileFrontend.php
@@ -38,7 +38,6 @@
'MobileFrontendHooks' => 'MobileFrontend.hooks',
'DeviceDetection' => 'DeviceDetection',
- 'ExtractFormatter' => 'ExtractFormatter',
'HtmlFormatter' => 'HtmlFormatter',
'MobileContext' => 'MobileContext',
'MobileFormatter' => 'MobileFormatter',
diff --git a/includes/ExtractFormatter.php b/includes/ExtractFormatter.php
deleted file mode 100644
index 880f02c..0000000
--- a/includes/ExtractFormatter.php
+++ /dev/null
@@ -1,56 +0,0 @@
-<?php
-
-class ExtractFormatter extends HtmlFormatter {
- const SECTION_MARKER_START = "\1\2";
- const SECTION_MARKER_END = "\2\1";
-
- private $plainText;
-
- public function __construct( $text, $plainText ) {
- global $wgMFRemovableClasses;
-
- wfProfileIn( __METHOD__ );
- parent::__construct( HtmlFormatter::wrapHTML( $text ) );
- $this->plainText = $plainText;
-
- $this->removeImages();
- $this->useImgAlt( false );
- // @fixme: use rules from MobileFormatter?
- $this->remove( array( 'table', 'div', '.editsection',
'.mw-editsection', 'sup.reference', 'span.coordinates',
- 'span.geo-multi-punct', 'span.geo-nondefault',
'.noexcerpt', '.error' )
- );
- $this->remove( $wgMFRemovableClasses );
- if ( $plainText ) {
- $this->flattenAllTags();
- } else {
- $this->flatten( array( 'span', 'a' ) );
- }
- wfProfileOut( __METHOD__ );
- }
-
- public function getText( $dummy = null ) {
- wfProfileIn( __METHOD__ );
- $this->filterContent();
- $text = parent::getText();
- if ( $this->plainText ) {
- $text = html_entity_decode( $text );
- $text = str_replace( "\xC2\xA0", ' ', $text ); //
replace nbsp with space
- $text = str_replace( "\r", "\n", $text ); // for Windows
- $text = preg_replace( "/\n{3,}/", "\n\n", $text ); //
normalise newlines
- }
- wfProfileOut( __METHOD__ );
- return $text;
- }
-
- public function onHtmlReady( $html ) {
- wfProfileIn( __METHOD__ );
- if ( $this->plainText ) {
- $html = preg_replace( '/\s*(<h([1-6])\b)/i',
- "\n\n" . self::SECTION_MARKER_START . '$2' .
self::SECTION_MARKER_END . '$1' ,
- $html
- );
- }
- wfProfileOut( __METHOD__ );
- return $html;
- }
-}
\ No newline at end of file
diff --git a/includes/HtmlFormatter.php b/includes/HtmlFormatter.php
index 32dcd9f..472a52a 100644
--- a/includes/HtmlFormatter.php
+++ b/includes/HtmlFormatter.php
@@ -15,7 +15,6 @@
private $removeImages = false;
private $idWhitelist = array();
private $flattenRedLinks = false;
- private $useImgAlt = true;
/**
* Constructor
@@ -108,13 +107,6 @@
}
/**
- * @param bool $value
- */
- public function useImgAlt( $value ) {
- $this->useImgAlt = $value;
- }
-
- /**
* Checks whether specified element should not be removed due to
whitelist
* @param DOMElement $element: Element to check
* @return bool
@@ -157,7 +149,7 @@
$tagToRemoveNodes = $doc->getElementsByTagName(
$tagToRemove );
foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
if ( $tagToRemoveNode &&
$this->elementNotWhitelisted( $tagToRemoveNode ) ) {
- if ( $this->useImgAlt &&
$tagToRemoveNode->nodeName == 'img' ) {
+ if ( $tagToRemoveNode->nodeName ==
'img' ) {
$domElemsToReplace[] =
$tagToRemoveNode;
} else {
$domElemsToRemove[] =
$tagToRemoveNode;
@@ -179,33 +171,30 @@
$domElement->parentNode->replaceChild( $replacement,
$domElement );
}
- $this->removeElements( $domElemsToRemove );
+ foreach ( $domElemsToRemove as $domElement ) {
+ $domElement->parentNode->removeChild( $domElement );
+ }
// Elements with named IDs
- $domElemsToRemove = array();
foreach ( $removals['ID'] as $itemToRemove ) {
$itemToRemoveNode = $doc->getElementById( $itemToRemove
);
if ( $itemToRemoveNode ) {
- $domElemsToRemove[] = $itemToRemoveNode;
+ $itemToRemoveNode->parentNode->removeChild(
$itemToRemoveNode );
}
}
- $this->removeElements( $domElemsToRemove );
// CSS Classes
- $domElemsToRemove = array();
$xpath = new DOMXpath( $doc );
foreach ( $removals['CLASS'] as $classToRemove ) {
- $elements = $xpath->query( '//*[contains(@class, "' .
$classToRemove . '")]' );
+ $elements = $xpath->query( '//*[@class="' .
$classToRemove . '"]' );
/** @var $element DOMElement */
foreach ( $elements as $element ) {
- $classes = $element->getAttribute( 'class' );
- if ( preg_match( "/\b$classToRemove\b/",
$classes ) && $element->parentNode && $this->elementNotWhitelisted( $element )
) {
- $domElemsToRemove[] = $element;
+ if ( $element->parentNode &&
$this->elementNotWhitelisted( $element ) ) {
+ $element->parentNode->removeChild(
$element );
}
}
}
- $this->removeElements( $domElemsToRemove );
// Tags with CSS Classes
foreach ( $removals['TAG_CLASS'] as $classToRemove ) {
@@ -244,13 +233,6 @@
}
}
wfProfileOut( __METHOD__ );
- }
-
- private function removeElements( array $elements ) {
- /** @var $element DOMElement */
- foreach ( $elements as $element ) {
- $element->parentNode->removeChild( $element );
- }
}
/**
diff --git a/includes/MobileFormatter.php b/includes/MobileFormatter.php
index 681a087..4d11a78 100644
--- a/includes/MobileFormatter.php
+++ b/includes/MobileFormatter.php
@@ -26,11 +26,19 @@
private $defaultItemsToRemove = array(
'table.toc',
+ 'div.stub',
'#search', // remove search form element from Special:Search
+ 'div.sister-project',
'div.magnify',
'.editsection', // FIXME: deprecate in favour of mw-editsection
'.mw-editsection', // Edit links in section headings
'span.t',
+ '.portal',
+ '#protected-icon',
+ '.boilerplate',
+ '#id-articulo-destacado', // FA star on eswiki, @todo: remove
class="metadata topicon" instead
+ '.hiddenStructure',
+ '.medialist',
'.mw-search-createlink',
'#ogg_player_1',
'#ogg_player_2',
diff --git a/includes/MobileFrontend.hooks.php
b/includes/MobileFrontend.hooks.php
index d55d944..c825408 100644
--- a/includes/MobileFrontend.hooks.php
+++ b/includes/MobileFrontend.hooks.php
@@ -506,12 +506,11 @@
$dir = dirname( dirname( __FILE__ ) ) . '/tests';
$files[] = "$dir/ApiParseExtenderTest.php";
- $files[] = "$dir/DeviceDetectionTest.php";
- $files[] = "$dir/ExtractFormatterTest.php";
- $files[] = "$dir/HtmlFormatterTest.php";
$files[] = "$dir/MobileContextTest.php";
- $files[] = "$dir/MobileFormatterTest.php";
$files[] = "$dir/MobileFrontendTest.php";
+ $files[] = "$dir/DeviceDetectionTest.php";
+ $files[] = "$dir/HtmlFormatterTest.php";
+ $files[] = "$dir/MobileFormatterTest.php";
$files[] = "$dir/modules/MFResourceLoaderModuleTest.php";
// special page tests
diff --git a/includes/api/ApiQueryExtracts.php
b/includes/api/ApiQueryExtracts.php
index 0bb8312..da5f711 100644
--- a/includes/api/ApiQueryExtracts.php
+++ b/includes/api/ApiQueryExtracts.php
@@ -1,6 +1,9 @@
<?php
class ApiQueryExtracts extends ApiQueryBase {
+ const SECTION_MARKER_START = "\1\2";
+ const SECTION_MARKER_END = "\2\1";
+
/**
* @var ParserOptions
*/
@@ -139,7 +142,7 @@
private function getFirstSection( $text, $plainText ) {
if ( $plainText ) {
- $regexp = '/^(.*?)(?=' .
ExtractFormatter::SECTION_MARKER_START . ')/s';
+ $regexp = '/^(.*?)(?=' . self::SECTION_MARKER_START .
')/s';
} else {
$regexp = '/^(.*?)(?=<h[1-6]\b)/s';
}
@@ -288,7 +291,7 @@
private function doSections( $text ) {
$text = preg_replace_callback(
- "/" . ExtractFormatter::SECTION_MARKER_START . '(\d)'.
ExtractFormatter::SECTION_MARKER_END . "(.*?)$/m",
+ "/" . self::SECTION_MARKER_START . '(\d)'.
self::SECTION_MARKER_END . "(.*?)$/m",
array( $this, 'sectionCallback' ),
$text
);
@@ -332,7 +335,7 @@
'intro' => false,
'plaintext' => false,
'sectionformat' => array(
- ApiBase::PARAM_TYPE => array( 'plain', 'wiki',
'raw' ),
+ ApiBase::PARAM_TYPE =>
ExtractFormatter::$sectionFormats,
ApiBase::PARAM_DFLT => 'wiki',
),
'continue' => array(
@@ -383,3 +386,58 @@
return __CLASS__ . ': $Id$';
}
}
+
+class ExtractFormatter extends HtmlFormatter {
+ private $plainText;
+ private $sectionFormat;
+
+ public static $sectionFormats = array(
+ 'plain',
+ 'wiki',
+ 'raw',
+ );
+
+ public function __construct( $text, $plainText, $sectionFormat ) {
+ wfProfileIn( __METHOD__ );
+ parent::__construct( HtmlFormatter::wrapHTML( $text ) );
+ $this->plainText = $plainText;
+ $this->sectionFormat = $sectionFormat;
+
+ $this->removeImages();
+ // @fixme: use rules from MobileFormatter?
+ $this->remove( array( 'table', 'div', '.editsection',
'.mw-editsection', 'sup.reference', 'span.coordinates',
+ 'span.geo-multi-punct', 'span.geo-nondefault',
'.noexcerpt', '.error' )
+ );
+ if ( $plainText ) {
+ $this->flattenAllTags();
+ } else {
+ $this->flatten( array( 'span', 'a' ) );
+ }
+ wfProfileOut( __METHOD__ );
+ }
+
+ public function getText( $dummy = null ) {
+ wfProfileIn( __METHOD__ );
+ $this->filterContent();
+ $text = parent::getText();
+ if ( $this->plainText ) {
+ $text = html_entity_decode( $text );
+ $text = str_replace( "\r", "\n", $text ); // for Windows
+ $text = preg_replace( "/\n{3,}/", "\n\n", $text ); //
normalise newlines
+ }
+ wfProfileOut( __METHOD__ );
+ return $text;
+ }
+
+ public function onHtmlReady( $html ) {
+ wfProfileIn( __METHOD__ );
+ if ( $this->plainText ) {
+ $html = preg_replace( '/\s*(<h([1-6])\b)/i',
+ "\n\n" . ApiQueryExtracts::SECTION_MARKER_START
. '$2' . ApiQueryExtracts::SECTION_MARKER_END . '$1' ,
+ $html
+ );
+ }
+ wfProfileOut( __METHOD__ );
+ return $html;
+ }
+}
\ No newline at end of file
diff --git a/tests/ExtractFormatterTest.php b/tests/ExtractFormatterTest.php
deleted file mode 100644
index c843735..0000000
--- a/tests/ExtractFormatterTest.php
+++ /dev/null
@@ -1,36 +0,0 @@
-<?php
-
-/**
- * @group MobileFrontend
- */
-class MF_ExtractFormatterTest extends MediaWikiTestCase {
- /**
- * @dataProvider provideExtracts
- */
- public function testExtracts( $expected, $wikiText, $plainText ) {
- $title = Title::newFromText( 'Test' );
- $po = new ParserOptions();
- $po->setEditSection( true );
- $parser = new Parser();
- $text = $parser->parse( $wikiText, $title, $po )->getText();
- $fmt = new ExtractFormatter( $text, $plainText );
- $fmt->remove( '.metadata' ); // Will be added via
$wgMFRemovableClasses on WMF
- $text = trim( $fmt->getText() );
- $this->assertEquals( $expected, $text );
- }
-
- public function provideExtracts() {
- $dutch = "'''Dutch''' (<span class=\"unicode haudio\"
style=\"white-space:nowrap;\"><span class=\"fn\">"
- .
"[[File:Loudspeaker.svg|11px|link=File:nl-Nederlands.ogg|About this
sound]] [[:Media:nl-Nederlands.ogg|''Nederlands'']]"
- . "</span> <small class=\"metadata audiolinkinfo\"
style=\"cursor:help;\">([[Wikipedia:Media help|<span style=\"cursor:help;\">"
- . "help</span>]]ยท[[:File:nl-Nederlands.ogg|<span
style=\"cursor:help;\">info</span>]])</small></span>) is a"
- . " [[West Germanic languages|West Germanic language]]
and the native language of most of the population of the [[Netherlands]]";
- return array(
- array(
- "Dutch ( Nederlands ) is a West Germanic
language and the native language of most of the population of the Netherlands",
- $dutch,
- true,
- ),
- );
- }
-}
\ No newline at end of file
diff --git a/tests/HtmlFormatterTest.php b/tests/HtmlFormatterTest.php
index a9a141b..b5fa9db 100644
--- a/tests/HtmlFormatterTest.php
+++ b/tests/HtmlFormatterTest.php
@@ -25,12 +25,8 @@
}
public function getHtmlData() {
- $removeImages = function( HtmlFormatter $f ) {
+ $disableImages = function( HtmlFormatter $f ) {
$f->removeImages();
- };
- $fullyRemoveImages = function( HtmlFormatter $f ) {
- $f->removeImages();
- $f->useImgAlt( false );
};
$removeTags = function( HtmlFormatter $f ) {
$f->remove( array( 'table', '.foo', '#bar', 'div.baz' )
);
@@ -47,21 +43,11 @@
array(
'<img src="/foo/bar.jpg">Blah</img>',
'<span class="mw-mf-image-replacement">['.
wfMessage( 'mobile-frontend-missing-image' ) .']</span>Blah',
- $removeImages,
- ),
- array(
- '<img src="/foo/bar.jpg" alt="Blah"/>',
- '<span
class="mw-mf-image-replacement">[Blah]</span>',
- $removeImages,
- ),
- array(
- '<img src="/foo/bar.jpg" alt="Blah"/>',
- '',
- $fullyRemoveImages,
+ $disableImages,
),
// basic tag removal
array(
- '<table><tr><td>foo</td></tr></table><div
class="foo">foo</div><div class="foo quux">foo</div><span id="bar">bar</span>
+ '<table><tr><td>foo</td></tr></table><div
class="foo">foo</div><span id="bar">bar</span>
<strong class="foo" id="bar">foobar</strong><div class="notfoo">test</div><div
class="baz"/>
<span class="baz">baz</span> <span class="foo" id="jedi">jedi</span>',
@@ -89,23 +75,23 @@
array(
'<img alt="picture of kitty" src="kitty.jpg">',
'<span class="mw-mf-image-replacement">[picture
of kitty]</span>',
- $removeImages,
+ $disableImages,
),
array(
'<img src="kitty.jpg">',
'<span class="mw-mf-image-replacement">[' .
wfMessage( 'mobile-frontend-missing-image' ) . ']</span>',
- $removeImages,
+ $disableImages,
),
array(
'<img alt src="kitty.jpg">',
'<span class="mw-mf-image-replacement">[' .
wfMessage( 'mobile-frontend-missing-image' ) . ']</span>',
- $removeImages,
+ $disableImages,
),
array(
'<img alt src="kitty.jpg">look at the cute
kitty!<img alt="picture of angry dog" src="dog.jpg">',
'<span class="mw-mf-image-replacement">[' .
wfMessage( 'mobile-frontend-missing-image' ) . ']</span>look at the cute
kitty!'.
'<span
class="mw-mf-image-replacement">[picture of angry dog]</span>',
- $removeImages,
+ $disableImages,
)
);
}
--
To view, visit https://gerrit.wikimedia.org/r/68487
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I5ac8b28762a5bf00cee17fbf6f3431ba02c23b2c
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/MobileFrontend
Gerrit-Branch: master
Gerrit-Owner: MaxSem <[email protected]>
Gerrit-Reviewer: MaxSem <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits