jenkins-bot has submitted this change and it was merged.
Change subject: Implement interwiki searches
......................................................................
Implement interwiki searches
Bug: 44420
Change-Id: I15c8f53184b3383c24fa8c513fd952eb22b05923
---
M CirrusSearch.php
M includes/CirrusSearch.php
A includes/InterwikiSearcher.php
M includes/Result.php
M includes/ResultSet.php
M includes/ResultsType.php
M includes/Searcher.php
7 files changed, 174 insertions(+), 8 deletions(-)
Approvals:
Manybubbles: Looks good to me, approved
jenkins-bot: Verified
diff --git a/CirrusSearch.php b/CirrusSearch.php
index 789a65a..b3af04b 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -204,6 +204,11 @@
// Should Cirrus show the score?
$wgCirrusSearchShowScore = false;
+// CirrusSearch interwiki searching
+// Keys are the interwiki prefix, values are the index to search
+// Results are cached.
+$wgCirrusSearchInterwikiSources = array();
+
$includes = __DIR__ . "/includes/";
$buildDocument = $includes . 'BuildDocument/';
/**
@@ -223,6 +228,8 @@
$wgAutoloadClasses['CirrusSearch\LinksUpdateJob'] = $includes .
'LinksUpdateJob.php';
$wgAutoloadClasses['CirrusSearch\LinksUpdateSecondaryJob'] = $includes .
'LinksUpdateSecondaryJob.php';
$wgAutoloadClasses['CirrusSearch\FullTextResultsType'] = $includes .
'ResultsType.php';
+$wgAutoloadClasses['CirrusSearch\InterwikiResultsType'] = $includes .
'ResultsType.php';
+$wgAutoloadClasses['CirrusSearch\InterwikiSearcher'] = $includes .
'InterwikiSearcher.php';
$wgAutoloadClasses['CirrusSearch\Job'] = $includes . 'Job.php';
$wgAutoloadClasses['CirrusSearch\MappingConfigBuilder'] = $includes .
'MappingConfigBuilder.php';
$wgAutoloadClasses['CirrusSearch\MassIndexJob'] = $includes .
'MassIndexJob.php';
diff --git a/includes/CirrusSearch.php b/includes/CirrusSearch.php
index 85f4c19..3d0114e 100644
--- a/includes/CirrusSearch.php
+++ b/includes/CirrusSearch.php
@@ -1,5 +1,6 @@
<?php
+use CirrusSearch\InterwikiSearcher;
use CirrusSearch\Searcher;
/**
@@ -94,8 +95,15 @@
// so we must unwrap all OK statuses. Note that $status can be
"good" and still contain null
// since that is interpreted as no results.
if ( $status->isOK() ) {
- return $status->getValue();
+ $result = $status->getValue();
+ $interwiki = new InterwikiSearcher( $this->offset,
$this->limit, $this->namespaces, $user );
+ $interwikiResult = $interwiki->getInterwikiResults(
$term );
+ if ( $interwikiResult ) {
+ $result->setInterwikiResults( $interwikiResult
);
+ }
+ return $result;
}
+
return $status;
}
diff --git a/includes/InterwikiSearcher.php b/includes/InterwikiSearcher.php
new file mode 100644
index 0000000..e0c16aa
--- /dev/null
+++ b/includes/InterwikiSearcher.php
@@ -0,0 +1,82 @@
+<?php
+
+namespace CirrusSearch;
+
+/**
+ * Performs searches using Elasticsearch -- on interwikis!
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+class InterwikiSearcher extends Searcher {
+ /** How long to cache results for, 2 hours */
+ const CACHE_TIME = 7200;
+
+ /**
+ * @var array interwiki mappings to search
+ */
+ private $interwikis;
+
+ /**
+ * Constructor
+ * @param int $offset Offset the results by this much
+ * @param int $limit Limit the results to this many
+ * @param array $namespaces Namespace numbers to search
+ * @param string $index Base name for index to search from, defaults to
wfWikiId()
+ */
+ public function __construct( $offset, $limit, $namespaces, $user ) {
+ global $wgCirrusSearchInterwikiSources;
+ parent::__construct( $offset, $limit, $namespaces, $user );
+ $this->interwikis = $wgCirrusSearchInterwikiSources;
+ // Only allow core namespaces. We can't be sure any others exist
+ $this->namespaces = array_filter( $namespaces, function( $v ) {
+ return $v <= 15;
+ } );
+ }
+
+ /**
+ * Fetch search results, from caches, if there's any
+ * @param string $term Search term to look for
+ * @return ResultSet|null
+ */
+ public function getInterwikiResults( $term ) {
+ global $wgMemc;
+
+ // Return early if we can
+ if ( !$this->interwikis || !$term ) {
+ return;
+ }
+
+ $key = wfMemcKey(
+ 'cirrus',
+ 'interwiki',
+ implode( ':', array_keys( $this->interwikis ) ),
+ md5( $term )
+ );
+
+ $res = $wgMemc->get( $key );
+ if ( !$res ) {
+ $this->setExplicitIndexes( array_values(
$this->interwikis ) );
+ $this->setResultsType( new InterwikiResultsType(
$this->interwikis ) );
+ $results = $this->searchText( $term, false, false );
+ if ( $results->isOk() ) {
+ $res = $results->getValue();
+ $wgMemc->set( $key, $res, self::CACHE_TIME );
+ }
+ }
+
+ return $res;
+ }
+}
diff --git a/includes/Result.php b/includes/Result.php
index 22ddcf3..4c57b89 100644
--- a/includes/Result.php
+++ b/includes/Result.php
@@ -32,19 +32,24 @@
private $byteSize;
private $score;
private $timestamp;
+ private $interwiki;
/**
* Build the result.
* @param $results \Elastica\ResultSet containing all search results
+ * @param $result \Elastica\Result containing the given search result
+ * @param $interwiki Interwiki prefix, if any
* @param $result \Elastic\Result containing information about the
result this class should represent
*/
- public function __construct( $results, $result ) {
+ public function __construct( $results, $result, $interwikis = array() )
{
global $wgCirrusSearchShowScore;
- $this->mTitle = Title::makeTitle( $result->namespace,
$result->title );
+ $this->maybeSetInterwiki( $result, $interwikis );
+ $this->mTitle = Title::makeTitle( $result->namespace,
$result->title, '', $this->interwiki );
if ( $this->getTitle()->getNamespace() == NS_FILE ) {
$this->mImage = wfFindFile( $this->mTitle );
}
+
$data = $result->getData();
// TODO remove ternary once text.word_count is available
everywhere
$this->wordCount = isset( $data['text.word_count'] ) ?
$data['text.word_count'] : $result->text_words;
@@ -183,6 +188,18 @@
return str_replace( $markers, '', $highlighted );
}
+ private function maybeSetInterwiki( $result, $interwikis ) {
+ $iw = '';
+ array_walk( $interwikis, function( $indexBase, $interwiki ) use
( $result, &$iw ) {
+ $index = $result->getIndex();
+ $pos = strpos( $index, $indexBase );
+ if ( $pos === 0 && $index[strlen( $indexBase )] == '_'
) {
+ $iw = $interwiki;
+ }
+ } );
+ $this->interwiki = $iw;
+ }
+
public function getTitleSnippet( $terms ) {
return $this->titleSnippet;
}
@@ -226,4 +243,8 @@
public function isFileMatch() {
return $this->isFileMatch;
}
+
+ public function getInterwikiPrefix() {
+ return $this->interwiki;
+ }
}
diff --git a/includes/ResultSet.php b/includes/ResultSet.php
index 4f863c1..de2c197 100644
--- a/includes/ResultSet.php
+++ b/includes/ResultSet.php
@@ -24,12 +24,14 @@
class ResultSet extends SearchResultSet {
private $result, $hits, $totalHits, $suggestionQuery,
$suggestionSnippet;
private $searchContainedSyntax;
+ private $interwikiMap, $interwikiResults;
- public function __construct( $suggestPrefixes, $suggestSuffixes, $res,
$searchContainedSyntax ) {
+ public function __construct( $suggestPrefixes, $suggestSuffixes, $res,
$searchContainedSyntax, $interwikis = array() ) {
$this->result = $res;
$this->searchContainedSyntax = $searchContainedSyntax;
$this->hits = $res->count();
$this->totalHits = $res->getTotalHits();
+ $this->interwikiMap = $interwikis;
$suggestion = $this->findSuggestion();
if ( $suggestion && !
$this->resultContainsFullyHighlightedMatch() ) {
$this->suggestionQuery = $suggestion[ 'text' ];
@@ -137,12 +139,17 @@
$current = $this->result->current();
if ( $current ) {
$this->result->next();
- return new Result( $this->result, $current );
+ return new Result( $this->result, $current,
$this->interwikiMap );
}
return false;
}
+ public function setInterwikiResults( $res ) {
+ $this->interwikiResults = $res;
+ }
+
public function getInterwikiResults() {
+ return $this->interwikiResults;
}
public function searchContainedSyntax() {
diff --git a/includes/ResultsType.php b/includes/ResultsType.php
index 2ad81e9..1681977 100644
--- a/includes/ResultsType.php
+++ b/includes/ResultsType.php
@@ -128,3 +128,21 @@
return $fields;
}
}
+
+class InterwikiResultsType extends TitleResultsType {
+ /**
+ * @var array interwiki prefix mappings
+ */
+ private $prefixes;
+
+ /**
+ * Constructor
+ */
+ public function __construct( $interwikis ) {
+ $this->prefixes = $interwikis;
+ }
+
+ public function transformElasticsearchResult( $suggestPrefixes,
$suggestSuffixes, $result, $searchContainedSyntax ) {
+ return new ResultSet( $suggestPrefixes, $suggestSuffixes,
$result, $searchContainedSyntax, $this->prefixes );
+ }
+}
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 2579d8c..a82bdcb 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -50,14 +50,17 @@
* @var integer search offset
*/
private $offset;
+
/**
* @var integer maximum number of result
*/
private $limit;
+
/**
* @var array(integer) namespaces in which to search
*/
- private $namespaces;
+ protected $namespaces;
+
/**
* @var ResultsType|null type of results. null defaults to
FullTextResultsType
*/
@@ -141,6 +144,11 @@
private $searchContainedSyntax = false;
/**
+ * @var array indexes to use, if not the default
+ */
+ private $explicitIndexes;
+
+ /**
* Constructor
* @param int $offset Offset the results by this much
* @param int $limit Limit the results to this many
@@ -171,6 +179,13 @@
*/
public function setSort( $sort ) {
$this->sort = $sort;
+ }
+
+ /**
+ * @param array $idx Indexes to use, explicitly
+ */
+ public function setExplicitIndexes( $idxs ) {
+ $this->explicitIndexes = $idxs;
}
/**
@@ -718,8 +733,16 @@
}
// Setup the search
- $search = Connection::getPageType( $this->indexBaseName,
$this->pickIndexTypeFromNamespaces() )
- ->createSearch( $query, $queryOptions );
+ if ( $this->explicitIndexes ) {
+ $baseName = array_shift( $this->explicitIndexes );
+ $extraIndexes = $this->explicitIndexes;
+ $pageType = Connection::getPageType( $baseName );
+
+ } else {
+ $pageType = Connection::getPageType(
$this->indexBaseName,
+ $this->pickIndexTypeFromNamespaces() );
+ }
+ $search = $pageType->createSearch( $query, $queryOptions );
foreach ( $extraIndexes as $i ) {
$search->addIndex( $i );
}
--
To view, visit https://gerrit.wikimedia.org/r/105986
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I15c8f53184b3383c24fa8c513fd952eb22b05923
Gerrit-PatchSet: 10
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Chad <[email protected]>
Gerrit-Reviewer: Chad <[email protected]>
Gerrit-Reviewer: Manybubbles <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits