[MediaWiki-commits] [Gerrit] Remove text extracts - change (mediawiki...MobileFrontend)

jenkins-bot (Code Review) Fri, 17 Jan 2014 13:35:45 -0800

jenkins-bot has submitted this change and it was merged.

Change subject: Remove text extracts
......................................................................



Remove text extracts

This functionality has been split off to Extension:TextExtracts
and deployed on WMF

Change-Id: Ib42a17084cff1e19553f1c21c9d53a90d5cac0ba
---
M MobileFrontend.php
M includes/MobileFrontend.hooks.php
D includes/api/ApiQueryExtracts.php
D includes/formatters/ExtractFormatter.php
D tests/ExtractFormatterTest.php
5 files changed, 0 insertions(+), 504 deletions(-)

Approvals:
  Jdlrobson: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/MobileFrontend.php b/MobileFrontend.php
index 894880d..733948f 100644
--- a/MobileFrontend.php
+++ b/MobileFrontend.php
@@ -51,14 +51,12 @@
        'WmlContext' => 'WmlContext',
        'WmlDeviceProperties' => 'DeviceDetection',
 
-       'ExtractFormatter' => 'formatters/ExtractFormatter',
        'MobileFormatter' => 'formatters/MobileFormatter',
        'MobileFormatterHTML' => 'formatters/MobileFormatterHTML',
        'MobileFormatterWML' => 'formatters/MobileFormatterWML',
 
        'ApiMobileView' => 'api/ApiMobileView',
        'ApiParseExtender' => 'api/ApiParseExtender',
-       'ApiQueryExtracts' => 'api/ApiQueryExtracts',
 
        'InlineDiffFormatter' => 'diff/InlineDiffFormatter',
        'InlineDifferenceEngine' => 'diff/InlineDifferenceEngine',
@@ -103,14 +101,12 @@
 
 $wgExtensionFunctions[] = 'efMobileFrontend_Setup';
 
-$wgAPIPropModules['extracts'] = 'ApiQueryExtracts';
 $wgAPIModules['mobileview'] = 'ApiMobileView';
 
 $wgHooks['APIGetAllowedParams'][] = 'ApiParseExtender::onAPIGetAllowedParams';
 $wgHooks['APIAfterExecute'][] = 'ApiParseExtender::onAPIAfterExecute';
 $wgHooks['APIGetParamDescription'][] = 
'ApiParseExtender::onAPIGetParamDescription';
 $wgHooks['APIGetDescription'][] = 'ApiParseExtender::onAPIGetDescription';
-$wgHooks['OpenSearchXml'][] = 'ApiQueryExtracts::onOpenSearchXml';
 
 $wgHooks['LinksUpdate'][] = 'MobileFrontendHooks::onLinksUpdate';
 
@@ -273,11 +269,6 @@
                'div.magnify',
                '.nomobile',
        ),
-       // Text extracts
-       'extracts' => array(
-               '.toc', 'table', 'div', '.mw-editsection', 'sup.reference', 
'span.coordinates',
-               'span.geo-multi-punct', 'span.geo-nondefault', '.noexcerpt', 
'.error', '.nomobile'
-       ),
 );
 
 /**
@@ -292,11 +283,6 @@
  *     );
  */
 $wgMFCustomLogos = array();
-
-/**
- * Whether this extension should provide its extracts to OpenSearchXml 
extension
- */
-$wgMFExtendOpenSearchXml = false;
 
 /**
  * Set to false to allow search engines to index your mobile pages. So far, 
Google seems
diff --git a/includes/MobileFrontend.hooks.php 
b/includes/MobileFrontend.hooks.php
index e22e2e2..a93e6d2 100644
--- a/includes/MobileFrontend.hooks.php
+++ b/includes/MobileFrontend.hooks.php
@@ -625,7 +625,6 @@
                $files[] = "$dir/ApiMobileViewTest.php";
                $files[] = "$dir/ApiParseExtenderTest.php";
                $files[] = "$dir/DeviceDetectionTest.php";
-               $files[] = "$dir/ExtractFormatterTest.php";
                $files[] = "$dir/MobileContextTest.php";
                $files[] = "$dir/MobileFormatterTest.php";
                $files[] = "$dir/MobileFrontendHooksTest.php";
diff --git a/includes/api/ApiQueryExtracts.php 
b/includes/api/ApiQueryExtracts.php
deleted file mode 100644
index 38ebfd8..0000000
--- a/includes/api/ApiQueryExtracts.php
+++ /dev/null
@@ -1,391 +0,0 @@
-<?php
-
-class ApiQueryExtracts extends ApiQueryBase {
-       /**
-        * @var ParserOptions
-        */
-       private $parserOptions;
-       private $params;
-
-       public function __construct( $query, $moduleName ) {
-               parent::__construct( $query, $moduleName, 'ex' );
-       }
-
-       public function execute() {
-               wfProfileIn( __METHOD__ );
-               $titles = $this->getPageSet()->getGoodTitles();
-               if ( count( $titles ) == 0 ) {
-                       wfProfileOut( __METHOD__ );
-                       return;
-               }
-               $isXml = $this->getMain()->isInternalMode() || 
$this->getMain()->getPrinter()->getFormat() == 'XML';
-               $result = $this->getResult();
-               $params = $this->params = $this->extractRequestParams();
-               $this->requireMaxOneParameter( $params, 'chars', 'sentences' );
-               $continue = 0;
-               $limit = intval( $params['limit'] );
-               if ( $limit > 1 && !$params['intro'] ) {
-                       $limit = 1;
-                       $result->setWarning( "exlimit was too large for a whole 
article extracts request, lowered to $limit" );
-               }
-               if ( isset( $params['continue'] ) ) {
-                       $continue = intval( $params['continue'] );
-                       if ( $continue < 0 || $continue > count( $titles ) ) {
-                               $this->dieUsageMsg( '_badcontinue' );
-                       }
-                       $titles = array_slice( $titles, $continue, null, true );
-               }
-               $count = 0;
-               /** @var Title $t */
-               foreach ( $titles as $id => $t ) {
-                       if ( ++$count > $limit ) {
-                               $this->setContinueEnumParameter( 'continue', 
$continue + $count - 1 );
-                               break;
-                       }
-                       $text = $this->getExtract( $t );
-                       $pageName = $t->getText();
-                       $text = $this->truncate( $text, $pageName );
-                       if ( $this->params['plaintext'] ) {
-                               $text = $this->doSections( $text );
-                       }
-
-                       if ( $isXml ) {
-                               $fit = $result->addValue( array( 'query', 
'pages', $id ), 'extract', array( '*' => $text ) );
-                       } else {
-                               $fit = $result->addValue( array( 'query', 
'pages', $id ), 'extract', $text );
-                       }
-                       if ( !$fit ) {
-                               $this->setContinueEnumParameter( 'continue', 
$continue + $count - 1 );
-                               break;
-                       }
-               }
-               wfProfileOut( __METHOD__ );
-       }
-
-       /**
-        * OpenSearchXml hook handler
-        * @param array $results
-        * @return bool
-        */
-       public static function onOpenSearchXml( &$results ) {
-               global $wgMFExtendOpenSearchXml;
-               if ( !$wgMFExtendOpenSearchXml || !count( $results ) ) {
-                       return true;
-               }
-               $pageIds = array_keys( $results );
-               $api = new ApiMain( new FauxRequest(
-                       array(
-                               'action' => 'query',
-                               'prop' => 'extracts',
-                               'explaintext' => true,
-                               'exintro' => true,
-                               'exlimit' => count( $results ),
-                               'pageids' => implode( '|', $pageIds ),
-                       ) )
-               );
-               $api->execute();
-               $data = $api->getResultData();
-               foreach ( $pageIds as $id ) {
-                       if ( isset( 
$data['query']['pages'][$id]['extract']['*'] ) ) {
-                               $results[$id]['extract'] = 
$data['query']['pages'][$id]['extract']['*'];
-                               $results[$id]['extract trimmed'] = false;
-                       }
-               }
-               return true;
-       }
-
-       /**
-        * Returns a processed, but not trimmed extract
-        * @param Title $title
-        * @return string
-        */
-       private function getExtract( Title $title ) {
-               wfProfileIn( __METHOD__ );
-               $page = WikiPage::factory( $title );
-
-               $introOnly = $this->params['intro'];
-               $text = $this->getFromCache( $page, $introOnly );
-               // if we need just first section, try retrieving full page and 
getting first section out of it
-               if ( $text === false && $introOnly ) {
-                       $text = $this->getFromCache( $page, false );
-                       if ( $text !== false ) {
-                               $text = $this->getFirstSection( $text, 
$this->params['plaintext'] );
-                       }
-               }
-               if ( $text === false ) {
-                       $text = $this->parse( $page );
-                       $text = $this->convertText( $text, $title, 
$this->params['plaintext'] );
-                       $this->setCache( $page, $text );
-               }
-               wfProfileOut( __METHOD__ );
-               return $text;
-       }
-
-       private function cacheKey( WikiPage $page, $introOnly ) {
-               return wfMemcKey( 'mf', 'extract', $page->getLatest(), 
$this->params['plaintext'], $introOnly );
-       }
-
-       private function getFromCache( WikiPage $page, $introOnly ) {
-               global $wgMemc;
-
-               $key = $this->cacheKey( $page, $introOnly );
-               return $wgMemc->get( $key );
-       }
-
-       private function setCache( WikiPage $page, $text ) {
-               global $wgMemc;
-
-               $key = $this->cacheKey( $page, $this->params['intro'] );
-               $wgMemc->set( $key, $text );
-       }
-
-       private function getFirstSection( $text, $plainText ) {
-               if ( $plainText ) {
-                       $regexp = '/^(.*?)(?=' . 
ExtractFormatter::SECTION_MARKER_START . ')/s';
-               } else {
-                       $regexp = '/^(.*?)(?=<h[1-6]\b)/s';
-               }
-               if ( preg_match( $regexp, $text, $matches ) ) {
-                       $text = $matches[0];
-               }
-               return $text;
-       }
-
-       /**
-        * Returns page HTML
-        * @param WikiPage $page
-        * @return string
-        */
-       private function parse( WikiPage $page ) {
-               wfProfileIn( __METHOD__ );
-               if ( !$this->parserOptions ) {
-                       $this->parserOptions = new ParserOptions( new User( 
'127.0.0.1' ) );
-               }
-               // first try finding full page in parser cache
-               if ( $page->isParserCacheUsed( $this->parserOptions, 0 ) ) {
-                       $pout = ParserCache::singleton()->get( $page, 
$this->parserOptions );
-                       if ( $pout ) {
-                               $text = $pout->getText();
-                               if ( $this->params['intro'] ) {
-                                       $text = $this->getFirstSection( $text, 
false );
-                               }
-                               wfProfileOut( __METHOD__ );
-                               return $text;
-                       }
-               }
-               $request = array(
-                       'action' => 'parse',
-                       'page' => $page->getTitle()->getPrefixedText(),
-                       'prop' => 'text'
-               );
-               if ( $this->params['intro'] ) {
-                       $request['section'] = 0;
-               }
-               // in case of cache miss, render just the needed section
-               $api = new ApiMain( new FauxRequest( $request ) );
-               $api->execute();
-               $data = $api->getResultData();
-               wfProfileOut( __METHOD__ );
-               return $data['parse']['text']['*'];
-       }
-
-       /**
-        * Converts page HTML into an extract
-        * @param string $text
-        * @return string 
-        */
-       private function convertText( $text ) {
-               wfProfileIn( __METHOD__ );
-               $fmt = new ExtractFormatter( $text, $this->params['plaintext'], 
$this->params['sectionformat'] );
-               $text = $fmt->getText();
-
-               wfProfileOut( __METHOD__ );
-               return trim( $text );
-       }
-
-       /**
-        * Truncate the given text to a certain number of characters or 
sentences
-        * @param string $text The text to truncate
-        * @param string $pageName Title of the page (for debugging)
-        * @return string
-        */
-       private function truncate( $text, $pageName ) {
-               if ( $this->params['chars'] ) {
-                       return $this->getFirstChars( $text, 
$this->params['chars'] );
-               } elseif ( $this->params['sentences'] ) {
-                       return $this->getFirstSentences( $text, 
$this->params['sentences'], $pageName );
-               }
-               return $text;
-       }
-
-       /**
-        * 
-        * @param string $text
-        * @param int $requestedLength
-        * @return string
-        */
-       private function getFirstChars( $text, $requestedLength ) {
-               wfProfileIn( __METHOD__ );
-               $length = mb_strlen( $text );
-               if ( $length <= $requestedLength ) {
-                       wfProfileOut( __METHOD__ );
-                       return $text;
-               }
-               $pattern = "#^.{{$requestedLength}}[\\w/]*>?#su";
-               preg_match( $pattern, $text, $m );
-               $text = $m[0];
-               // Fix possibly unclosed tags
-               $text = $this->tidy( $text );
-               $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text();
-               wfProfileOut( __METHOD__ );
-               return $text;
-       }
-
-       /**
-        *
-        * @param string $text
-        * @param int $requestedSentenceCount
-        * @param string $pageName Title of the page (for debugging)
-        * @return string
-        */
-       private function getFirstSentences( $text, $requestedSentenceCount, 
$pageName ) {
-               wfProfileIn( __METHOD__ );
-               // Based on code from OpenSearchXml by Brion Vibber
-               $endchars = array(
-                       '([^\d])\.\s', '\!\s', '\?\s', // regular ASCII
-                       '。', // full-width ideographic full-stop
-                       '．', '！', '？', // double-width roman forms
-                       '｡', // half-width ideographic full stop
-                       );
-
-               $endgroup = implode( '|', $endchars );
-               $end = "(?:$endgroup)";
-               $sentence = ".+?$end+";
-               $regexp = "/^($sentence){1,{$requestedSentenceCount}}/u";
-               $matches = array();
-               $res = preg_match( $regexp, $text, $matches );
-               if( $res ) {
-                       $text = $matches[0];
-               } else {
-                       if ( $res === false ) {
-                               wfDebugLog( 'mobile', "Regular expresssion 
compilation failure. Page: $pageName; RegEx: $regexp" );
-                       }
-                       // Just return the first line
-                       $lines = explode( "\n", $text );
-                       $text = trim( $lines[0] );
-               }
-               $text = $this->tidy( $text );
-               wfProfileOut( __METHOD__ );
-               return $text;
-       }
-
-       /**
-        * A simple wrapper around tidy
-        * @param string $text
-        * @return string
-        */
-       private function tidy( $text ) {
-               global $wgUseTidy;
-
-               wfProfileIn( __METHOD__ );
-               if ( $wgUseTidy && !$this->params['plaintext'] ) {
-                       $text = trim ( MWTidy::tidy( $text ) );
-               }
-               wfProfileOut( __METHOD__ );
-               return $text;
-       }
-
-       private function doSections( $text ) {
-               $text = preg_replace_callback(
-                       "/" . ExtractFormatter::SECTION_MARKER_START . '(\d)'. 
ExtractFormatter::SECTION_MARKER_END . "(.*?)$/m",
-                       array( $this, 'sectionCallback' ),
-                       $text
-               );
-               return $text;
-       }
-
-       private function sectionCallback( $matches ) {
-               if ( $this->params['sectionformat'] == 'raw' ) {
-                       return $matches[0];
-               }
-               $func = __CLASS__ . 
"::doSection_{$this->params['sectionformat']}";
-               return call_user_func( $func, $matches[1], trim( $matches[2] ) 
);
-       }
-
-       private static function doSection_wiki( $level, $text ) {
-               $bars = str_repeat( '=', $level );
-               return "\n$bars $text $bars";
-       }
-
-       private static function doSection_plain( $level, $text ) {
-               return "\n$text";
-       }
-
-       public function getAllowedParams() {
-               return array(
-                       'chars' => array(
-                               ApiBase::PARAM_TYPE => 'integer',
-                               ApiBase::PARAM_MIN => 1,
-                       ),
-                       'sentences' => array(
-                               ApiBase::PARAM_TYPE => 'integer',
-                               ApiBase::PARAM_MIN => 1,
-                               ApiBase::PARAM_MAX => 10,
-                       ),
-                       'limit' => array(
-                               ApiBase::PARAM_DFLT => 1,
-                               ApiBase::PARAM_TYPE => 'limit',
-                               ApiBase::PARAM_MIN => 1,
-                               ApiBase::PARAM_MAX => 20,
-                               ApiBase::PARAM_MAX2 => 20,
-                       ),
-                       'intro' => false,
-                       'plaintext' => false,
-                       'sectionformat' => array(
-                               ApiBase::PARAM_TYPE => array( 'plain', 'wiki', 
'raw' ),
-                               ApiBase::PARAM_DFLT => 'wiki',
-                       ),
-                       'continue' => array(
-                               ApiBase::PARAM_TYPE => 'integer',
-                       ),
-               );
-       }
-
-       public function getParamDescription() {
-               return array(
-                       'chars' => 'How many characters to return, actual text 
returned might be slightly longer.',
-                       'sentences' => 'How many sentences to return',
-                       'limit' => 'How many extracts to return. ',
-                       'intro' => 'Return only content before the first 
section',
-                       'plaintext' => 'Return extracts as plaintext instead of 
limited HTML',
-                       'sectionformat' => array(
-                               'How to format sections in plaintext mode:',
-                               ' plain - No formatting',
-                               ' wiki - Wikitext-style formatting == like this 
==',
-                               " raw - This module's internal representation 
(secton titles prefixed with <ASCII 1><ASCII 2><section level><ASCII 2><ASCII 
1>",
-                       ),
-                       'continue' => 'When more results are available, use 
this to continue',
-               );
-       }
-
-       public function getDescription() {
-               return 'Returns plain-text or limited HTML extracts of the 
given page(s)';
-       }
-
-       public function getPossibleErrors() {
-               return array_merge( parent::getPossibleErrors(), array(
-                       array( 'code' => '_badcontinue', 'info' => 'Invalid 
continue param. You should pass the original value returned by the previous 
query' ),
-               ) );
-       }
-
-       public function getExamples() {
-               return array(
-                       
'api.php?action=query&prop=extracts&exchars=175&titles=Therion' => 'Get a 
175-character extract',
-               );
-       }
-
-
-       public function getHelpUrls() {
-               return 
'https://www.mediawiki.org/wiki/Extension:MobileFrontend#prop.3Dextracts';
-       }
-}
diff --git a/includes/formatters/ExtractFormatter.php 
b/includes/formatters/ExtractFormatter.php
deleted file mode 100644
index 11b2d16..0000000
--- a/includes/formatters/ExtractFormatter.php
+++ /dev/null
@@ -1,60 +0,0 @@
-<?php
-
-/**
- * Provides text-only or limited-HTML extracts of page HTML
- */
-class ExtractFormatter extends HtmlFormatter {
-       const SECTION_MARKER_START = "\1\2";
-       const SECTION_MARKER_END = "\2\1";
-
-       private $plainText;
-
-       /**
-        * @param string $text: Text to convert
-        * @param bool $plainText: Whether extract should be plaintext
-        */
-       public function __construct( $text, $plainText ) {
-               global $wgMFRemovableClasses;
-
-               wfProfileIn( __METHOD__ );
-               parent::__construct( HtmlFormatter::wrapHTML( $text ) );
-               $this->plainText = $plainText;
-
-               $this->setRemoveMedia( true );
-               $this->remove( $wgMFRemovableClasses['base'] );
-               $this->remove( $wgMFRemovableClasses['extracts'] );
-
-               if ( $plainText ) {
-                       $this->flattenAllTags();
-               } else {
-                       $this->flatten( array( 'span', 'a' ) );
-               }
-               wfProfileOut( __METHOD__ );
-       }
-
-       public function getText( $dummy = null ) {
-               wfProfileIn( __METHOD__ );
-               $this->filterContent();
-               $text = parent::getText();
-               if ( $this->plainText ) {
-                       $text = html_entity_decode( $text );
-                       $text = str_replace( "\xC2\xA0", ' ', $text ); // 
replace nbsp with space
-                       $text = str_replace( "\r", "\n", $text ); // for Windows
-                       $text = preg_replace( "/\n{3,}/", "\n\n", $text ); // 
normalise newlines
-               }
-               wfProfileOut( __METHOD__ );
-               return $text;
-       }
-
-       public function onHtmlReady( $html ) {
-               wfProfileIn( __METHOD__ );
-               if ( $this->plainText ) {
-                       $html = preg_replace( '/\s*(<h([1-6])\b)/i',
-                               "\n\n" . self::SECTION_MARKER_START . '$2' . 
self::SECTION_MARKER_END . '$1' ,
-                               $html
-                       );
-               }
-               wfProfileOut( __METHOD__ );
-               return $html;
-       }
-}
diff --git a/tests/ExtractFormatterTest.php b/tests/ExtractFormatterTest.php
deleted file mode 100644
index 8dd73cf..0000000
--- a/tests/ExtractFormatterTest.php
+++ /dev/null
@@ -1,38 +0,0 @@
-<?php
-
-/**
- * @group MobileFrontend
- * @group Broken
- * Disabled for now due to Jenkins weirdness
- */
-class MF_ExtractFormatterTest extends MediaWikiTestCase {
-       /**
-        * @dataProvider provideExtracts
-        */
-       public function testExtracts( $expected, $wikiText, $plainText ) {
-               $title = Title::newFromText( 'Test' );
-               $po = new ParserOptions();
-               $po->setEditSection( true );
-               $parser = new Parser();
-               $text = $parser->parse( $wikiText, $title, $po )->getText();
-               $fmt = new ExtractFormatter( $text, $plainText );
-               $fmt->remove( '.metadata' ); // Will be added via 
$wgMFRemovableClasses on WMF
-               $text = trim( $fmt->getText() );
-               $this->assertEquals( $expected, $text );
-       }
-
-       public function provideExtracts() {
-               $dutch = "'''Dutch''' (<span class=\"unicode haudio\" 
style=\"white-space:nowrap;\"><span class=\"fn\">"
-                       . 
"[[File:Loudspeaker.svg|11px|link=File:nl-Nederlands.ogg|About this 
sound]]&nbsp;[[:Media:nl-Nederlands.ogg|''Nederlands'']]"
-                       . "</span>&nbsp;<small class=\"metadata audiolinkinfo\" 
style=\"cursor:help;\">([[Wikipedia:Media help|<span style=\"cursor:help;\">"
-                       . "help</span>]]·[[:File:nl-Nederlands.ogg|<span 
style=\"cursor:help;\">info</span>]])</small></span>) is a"
-                       . " [[West Germanic languages|West Germanic language]] 
and the native language of most of the population of the [[Netherlands]]";
-               return array(
-                       array(
-                               "Dutch ( Nederlands ) is a West Germanic 
language and the native language of most of the population of the Netherlands",
-                               $dutch,
-                               true,
-                       ),
-               );
-       }
-}
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/107496
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib42a17084cff1e19553f1c21c9d53a90d5cac0ba
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/MobileFrontend
Gerrit-Branch: master
Gerrit-Owner: MaxSem <maxsem.w...@gmail.com>
Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] Remove text extracts - change (mediawiki...MobileFrontend)

Reply via email to