https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114161
Revision: 114161
Author: maxsem
Date: 2012-03-19 18:17:29 +0000 (Mon, 19 Mar 2012)
Log Message:
-----------
Follow-up r114129:
* Fixed leftoffs from excerpts --> extracts rename
* ...including broken OpenSearchXml hook handler
* Split exlength to exchars and exsentences for better control over stuff being
returned
Modified Paths:
--------------
trunk/extensions/MobileFrontend/MobileFrontend.php
trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php
Modified: trunk/extensions/MobileFrontend/MobileFrontend.php
===================================================================
--- trunk/extensions/MobileFrontend/MobileFrontend.php 2012-03-19 18:09:40 UTC
(rev 114160)
+++ trunk/extensions/MobileFrontend/MobileFrontend.php 2012-03-19 18:17:29 UTC
(rev 114161)
@@ -132,7 +132,7 @@
$wgHooks['APIAfterExecute'][] = 'ApiParseExtender::onAPIAfterExecute';
$wgHooks['APIGetParamDescription'][] =
'ApiParseExtender::onAPIGetParamDescription';
$wgHooks['APIGetDescription'][] = 'ApiParseExtender::onAPIGetDescription';
-$wgHooks['OpenSearchXml'][] = 'ApiQueryExcerpts::onOpenSearchXml';
+$wgHooks['OpenSearchXml'][] = 'ApiQueryExtracts::onOpenSearchXml';
function efMobileFrontend_Setup() {
global $wgExtMobileFrontend, $wgHooks;
@@ -174,6 +174,6 @@
}
/**
- * Whether this extension should provide its excerpts to OpenSearchXml
extension
+ * Whether this extension should provide its extracts to OpenSearchXml
extension
*/
$wgMFExtendOpenSearchXml = false;
Modified: trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php
===================================================================
--- trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php 2012-03-19
18:09:40 UTC (rev 114160)
+++ trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php 2012-03-19
18:17:29 UTC (rev 114161)
@@ -21,9 +21,10 @@
wfProfileOut( __METHOD__ );
return;
}
- $isXml = $this->getMain()->getPrinter()->getFormat() == 'XML';
+ $isXml = $this->getMain()->isInternalMode() ||
$this->getMain()->getPrinter()->getFormat() == 'XML';
$result = $this->getResult();
$params = $this->params = $this->extractRequestParams();
+ $this->requireMaxOneParameter( $params, 'chars', 'sentences' );
$continue = 0;
$limit = intval( $params['limit'] );
if ( $limit > 1 && !$params['intro'] ) {
@@ -45,9 +46,8 @@
break;
}
$text = $this->getExtract( $t );
- if ( isset( $params['length'] ) ) {
- $text = $this->trimText( $text );
- }
+ $text = $this->truncate( $text );
+
if ( $isXml ) {
$fit = $result->addValue( array( 'query',
'pages', $id ), 'extract', array( '*' => $text ) );
} else {
@@ -74,7 +74,7 @@
$api = new ApiMain( new FauxRequest(
array(
'action' => 'query',
- 'prop' => 'excerpts',
+ 'prop' => 'extracts',
'explaintext' => true,
'exlimit' => count( $results ),
'pageids' => implode( '|', $pageIds ),
@@ -83,8 +83,8 @@
$api->execute();
$data = $api->getResultData();
foreach ( $pageIds as $id ) {
- if ( isset( $data['query']['pages'][$id]['excerpts'][0]
) ) {
- $results[$id]['extract'] =
$data['query']['pages'][$id]['extract'][0];
+ if ( isset(
$data['query']['pages'][$id]['extract']['*'] ) ) {
+ $results[$id]['extract'] =
$data['query']['pages'][$id]['extract']['*'];
$results[$id]['extract trimmed'] = false;
}
}
@@ -92,7 +92,7 @@
}
/**
- * Returns a processed, but not trimmed excerpt
+ * Returns a processed, but not trimmed extract
* @param Title $title
* @return string
*/
@@ -188,10 +188,8 @@
}
/**
- * Converts page HTML into an excerpt
+ * Converts page HTML into an extract
* @param string $text
- * @param Title $title
- * @param bool $plainText
* @return string
*/
private function convertText( $text ) {
@@ -203,16 +201,22 @@
return trim( $text );
}
+ private function truncate( $text ) {
+ if ( $this->params['chars'] ) {
+ return $this->getFirstChars( $text,
$this->params['chars'] );
+ } elseif ( $this->params['sentences'] ) {
+ return $this->getFirstSentences( $text,
$this->params['sentences'] );
+ }
+ return $text;
+ }
+
/**
*
* @param string $text
* @param int $requestedLength
- * @param bool $plainText
* @return string
*/
- private function trimText( $text, $requestedLength, $plainText ) {
- global $wgUseTidy;
-
+ private function getFirstChars( $text, $requestedLength ) {
wfProfileIn( __METHOD__ );
$length = mb_strlen( $text );
if ( $length <= $requestedLength ) {
@@ -223,20 +227,69 @@
preg_match( $pattern, $text, $m );
$text = $m[0];
// Fix possibly unclosed tags
- if ( $wgUseTidy && !$plainText ) {
+ $text = $this->tidy( $text );
+ $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text();
+ wfProfileOut( __METHOD__ );
+ return $text;
+ }
+
+ /**
+ *
+ * @param string $text
+ * @param int $requestedSentenceCount
+ */
+ private function getFirstSentences( $text, $requestedSentenceCount ) {
+ wfProfileIn( __METHOD__ );
+ // Based on code from OpenSearchXml by Brion Vibber
+ $endchars = array(
+ '([^\d])\.\s', '\!\s', '\?\s', // regular ASCII
+ '。', // full-width ideographic full-stop
+ '.', '!', '?', // double-width roman forms
+ '。', // half-width ideographic full stop
+ );
+
+ $endgroup = implode( '|', $endchars );
+ $end = "(?:$endgroup)";
+ $sentence = ".+?$end+";
+ $regexp = "/^($sentence){{$requestedSentenceCount}}/u";
+ $matches = array();
+ if( preg_match( $regexp, $text, $matches ) ) {
+ return $matches[0];
+ } else {
+ // Just return the first line
+ $lines = explode( "\n", $text );
+ return trim( $lines[0] );
+ }
+ $text = $this->tidy( $text );
+ wfProfileOut( __METHOD__ );
+ return $text;
+ }
+
+ /**
+ * A simple wrapper around tidy
+ * @param string $text
+ */
+ private function tidy( $text ) {
+ global $wgUseTidy;
+
+ wfProfileIn( __METHOD__ );
+ if ( $wgUseTidy && !$this->params['plaintext'] ) {
$text = trim ( MWTidy::tidy( $text ) );
}
- $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text();
wfProfileOut( __METHOD__ );
return $text;
}
public function getAllowedParams() {
return array(
- 'length' => array(
+ 'chars' => array(
ApiBase::PARAM_TYPE => 'integer',
ApiBase::PARAM_MIN => 1,
),
+ 'sentences' => array(
+ ApiBase::PARAM_TYPE => 'integer',
+ ApiBase::PARAM_MIN => 1,
+ ),
'limit' => array(
ApiBase::PARAM_DFLT => 1,
ApiBase::PARAM_TYPE => 'limit',
@@ -258,7 +311,8 @@
public function getParamDescription() {
return array(
- 'length' => 'How many characters to return, actual text
returned might be slightly longer.',
+ 'chars' => 'How many characters to return, actual text
returned might be slightly longer.',
+ 'sentences' => 'How many sentences to return',
'limit' => 'How many extracts to return. ',
'intro' => 'Return only content before the first
section',
'plaintext' => 'Return extracts as plaintext instead of
limited HTML',
@@ -284,7 +338,7 @@
public function getExamples() {
return array(
-
'api.php?action=query&prop=extracts&exlength=175&titles=Therion' => 'Get a
175-character extract',
+
'api.php?action=query&prop=extracts&exchars=175&titles=Therion' => 'Get a
175-character extract',
);
}
@@ -329,8 +383,8 @@
$text = parent::getText();
if ( $this->plainText ) {
$text = html_entity_decode( $text );
- $text = str_replace( "\r", "\n", $text );
- $text = preg_replace( "/\n{3,}/", "\n\n", $text );
+ $text = str_replace( "\r", "\n", $text ); // for Windows
+ $text = preg_replace( "/\n{3,}/", "\n\n", $text ); //
normalise newlines
$text = preg_replace_callback(
"/" . ApiQueryExtracts::SECTION_MARKER_START .
'(\d)'. ApiQueryExtracts::SECTION_MARKER_END . "(.*?)$/m",
array( $this, 'sectionCallback' ),
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs