jenkins-bot has submitted this change and it was merged.

Change subject: Implement interwiki searches
......................................................................


Implement interwiki searches

Bug: 44420
Change-Id: I15c8f53184b3383c24fa8c513fd952eb22b05923
---
M CirrusSearch.php
M includes/CirrusSearch.php
A includes/InterwikiSearcher.php
M includes/Result.php
M includes/ResultSet.php
M includes/ResultsType.php
M includes/Searcher.php
7 files changed, 174 insertions(+), 8 deletions(-)

Approvals:
  Manybubbles: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/CirrusSearch.php b/CirrusSearch.php
index 789a65a..b3af04b 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -204,6 +204,11 @@
 // Should Cirrus show the score?
 $wgCirrusSearchShowScore = false;
 
+// CirrusSearch interwiki searching
+// Keys are the interwiki prefix, values are the index to search
+// Results are cached.
+$wgCirrusSearchInterwikiSources = array();
+
 $includes = __DIR__ . "/includes/";
 $buildDocument = $includes . 'BuildDocument/';
 /**
@@ -223,6 +228,8 @@
 $wgAutoloadClasses['CirrusSearch\LinksUpdateJob'] = $includes . 
'LinksUpdateJob.php';
 $wgAutoloadClasses['CirrusSearch\LinksUpdateSecondaryJob'] = $includes . 
'LinksUpdateSecondaryJob.php';
 $wgAutoloadClasses['CirrusSearch\FullTextResultsType'] = $includes . 
'ResultsType.php';
+$wgAutoloadClasses['CirrusSearch\InterwikiResultsType'] = $includes . 
'ResultsType.php';
+$wgAutoloadClasses['CirrusSearch\InterwikiSearcher'] = $includes . 
'InterwikiSearcher.php';
 $wgAutoloadClasses['CirrusSearch\Job'] = $includes . 'Job.php';
 $wgAutoloadClasses['CirrusSearch\MappingConfigBuilder'] = $includes . 
'MappingConfigBuilder.php';
 $wgAutoloadClasses['CirrusSearch\MassIndexJob'] = $includes . 
'MassIndexJob.php';
diff --git a/includes/CirrusSearch.php b/includes/CirrusSearch.php
index 85f4c19..3d0114e 100644
--- a/includes/CirrusSearch.php
+++ b/includes/CirrusSearch.php
@@ -1,5 +1,6 @@
 <?php
 
+use CirrusSearch\InterwikiSearcher;
 use CirrusSearch\Searcher;
 
 /**
@@ -94,8 +95,15 @@
                // so we must unwrap all OK statuses.  Note that $status can be 
"good" and still contain null
                // since that is interpreted as no results.
                if ( $status->isOK() ) {
-                       return $status->getValue();
+                       $result = $status->getValue();
+                       $interwiki = new InterwikiSearcher( $this->offset, 
$this->limit, $this->namespaces, $user );
+                       $interwikiResult = $interwiki->getInterwikiResults( 
$term );
+                       if ( $interwikiResult ) {
+                               $result->setInterwikiResults( $interwikiResult 
);
+                       }
+                       return $result;
                }
+
                return $status;
        }
 
diff --git a/includes/InterwikiSearcher.php b/includes/InterwikiSearcher.php
new file mode 100644
index 0000000..e0c16aa
--- /dev/null
+++ b/includes/InterwikiSearcher.php
@@ -0,0 +1,82 @@
+<?php
+
+namespace CirrusSearch;
+
+/**
+ * Performs searches using Elasticsearch -- on interwikis! 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+class InterwikiSearcher extends Searcher {
+       /** How long to cache results for, 2 hours */
+       const CACHE_TIME = 7200;
+
+       /**
+        * @var array interwiki mappings to search
+        */
+       private $interwikis;
+
+       /**
+        * Constructor
+        * @param int $offset Offset the results by this much
+        * @param int $limit Limit the results to this many
+        * @param array $namespaces Namespace numbers to search
+        * @param string $index Base name for index to search from, defaults to 
wfWikiId()
+        */
+       public function __construct( $offset, $limit, $namespaces, $user ) {
+               global $wgCirrusSearchInterwikiSources;
+               parent::__construct( $offset, $limit, $namespaces, $user );
+               $this->interwikis = $wgCirrusSearchInterwikiSources;
+               // Only allow core namespaces. We can't be sure any others exist
+               $this->namespaces = array_filter( $namespaces, function( $v ) {
+                       return $v <= 15;
+               } );
+       }
+
+       /**
+        * Fetch search results, from caches, if there's any
+        * @param string $term Search term to look for
+        * @return ResultSet|null
+        */
+       public function getInterwikiResults( $term ) {
+               global $wgMemc;
+
+               // Return early if we can
+               if ( !$this->interwikis || !$term ) {
+                       return;
+               }
+
+               $key = wfMemcKey(
+                       'cirrus',
+                       'interwiki',
+                       implode( ':', array_keys( $this->interwikis ) ),
+                       md5( $term )
+               );
+
+               $res = $wgMemc->get( $key );
+               if ( !$res ) {
+                       $this->setExplicitIndexes( array_values( 
$this->interwikis ) );
+                       $this->setResultsType( new InterwikiResultsType( 
$this->interwikis ) );
+                       $results = $this->searchText( $term, false, false );
+                       if ( $results->isOk() ) {
+                               $res = $results->getValue();
+                               $wgMemc->set( $key, $res, self::CACHE_TIME );
+                       }
+               }
+
+               return $res;
+       }
+}
diff --git a/includes/Result.php b/includes/Result.php
index 22ddcf3..4c57b89 100644
--- a/includes/Result.php
+++ b/includes/Result.php
@@ -32,19 +32,24 @@
        private $byteSize;
        private $score;
        private $timestamp;
+       private $interwiki;
 
        /**
         * Build the result.
         * @param $results \Elastica\ResultSet containing all search results
+        * @param $result \Elastica\Result containing the given search result
+        * @param $interwiki Interwiki prefix, if any
         * @param $result \Elastic\Result containing information about the 
result this class should represent
         */
-       public function __construct( $results, $result ) {
+       public function __construct( $results, $result, $interwikis = array() ) 
{
                global $wgCirrusSearchShowScore;
 
-               $this->mTitle = Title::makeTitle( $result->namespace, 
$result->title );
+               $this->maybeSetInterwiki( $result, $interwikis );
+               $this->mTitle = Title::makeTitle( $result->namespace, 
$result->title, '', $this->interwiki );
                if ( $this->getTitle()->getNamespace() == NS_FILE ) {
                        $this->mImage = wfFindFile( $this->mTitle );
                }
+
                $data = $result->getData();
                // TODO remove ternary once text.word_count is available 
everywhere
                $this->wordCount = isset( $data['text.word_count'] ) ? 
$data['text.word_count'] : $result->text_words;
@@ -183,6 +188,18 @@
                return str_replace( $markers, '', $highlighted );
        }
 
+       private function maybeSetInterwiki( $result, $interwikis ) {
+               $iw = '';
+               array_walk( $interwikis, function( $indexBase, $interwiki ) use 
( $result, &$iw ) {
+                       $index = $result->getIndex();
+                       $pos = strpos( $index, $indexBase );
+                       if ( $pos === 0 && $index[strlen( $indexBase )] == '_' 
) {
+                               $iw = $interwiki;
+                       }
+               } );
+               $this->interwiki = $iw;
+       }
+
        public function getTitleSnippet( $terms ) {
                return $this->titleSnippet;
        }
@@ -226,4 +243,8 @@
        public function isFileMatch() {
                return $this->isFileMatch;
        }
+
+       public function getInterwikiPrefix() {
+               return $this->interwiki;
+       }
 }
diff --git a/includes/ResultSet.php b/includes/ResultSet.php
index 4f863c1..de2c197 100644
--- a/includes/ResultSet.php
+++ b/includes/ResultSet.php
@@ -24,12 +24,14 @@
 class ResultSet extends SearchResultSet {
        private $result, $hits, $totalHits, $suggestionQuery, 
$suggestionSnippet;
        private $searchContainedSyntax;
+       private $interwikiMap, $interwikiResults;
 
-       public function __construct( $suggestPrefixes, $suggestSuffixes, $res, 
$searchContainedSyntax ) {
+       public function __construct( $suggestPrefixes, $suggestSuffixes, $res, 
$searchContainedSyntax, $interwikis = array() ) {
                $this->result = $res;
                $this->searchContainedSyntax = $searchContainedSyntax;
                $this->hits = $res->count();
                $this->totalHits = $res->getTotalHits();
+               $this->interwikiMap = $interwikis;
                $suggestion = $this->findSuggestion();
                if ( $suggestion && ! 
$this->resultContainsFullyHighlightedMatch() ) {
                        $this->suggestionQuery = $suggestion[ 'text' ];
@@ -137,12 +139,17 @@
                $current = $this->result->current();
                if ( $current ) {
                        $this->result->next();
-                       return new Result( $this->result, $current );
+                       return new Result( $this->result, $current, 
$this->interwikiMap );
                }
                return false;
        }
 
+       public function setInterwikiResults( $res ) {
+               $this->interwikiResults = $res;
+       }
+
        public function getInterwikiResults() {
+               return $this->interwikiResults;
        }
 
        public function searchContainedSyntax() {
diff --git a/includes/ResultsType.php b/includes/ResultsType.php
index 2ad81e9..1681977 100644
--- a/includes/ResultsType.php
+++ b/includes/ResultsType.php
@@ -128,3 +128,21 @@
                return $fields;
        }
 }
+
+class InterwikiResultsType extends TitleResultsType {
+       /**
+        * @var array interwiki prefix mappings
+        */
+       private $prefixes;
+
+       /**
+        * Constructor
+        */
+       public function __construct( $interwikis ) {
+               $this->prefixes = $interwikis;
+       }
+
+       public function transformElasticsearchResult( $suggestPrefixes, 
$suggestSuffixes, $result, $searchContainedSyntax ) {
+               return new ResultSet( $suggestPrefixes, $suggestSuffixes, 
$result, $searchContainedSyntax, $this->prefixes );
+       }
+}
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 2579d8c..a82bdcb 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -50,14 +50,17 @@
         * @var integer search offset
         */
        private $offset;
+
        /**
         * @var integer maximum number of result
         */
        private $limit;
+
        /**
         * @var array(integer) namespaces in which to search
         */
-       private $namespaces;
+       protected $namespaces;
+
        /**
         * @var ResultsType|null type of results.  null defaults to 
FullTextResultsType
         */
@@ -141,6 +144,11 @@
        private $searchContainedSyntax = false;
 
        /**
+        * @var array indexes to use, if not the default
+        */
+       private $explicitIndexes;
+
+       /**
         * Constructor
         * @param int $offset Offset the results by this much
         * @param int $limit Limit the results to this many
@@ -171,6 +179,13 @@
         */
        public function setSort( $sort ) {
                $this->sort = $sort;
+       }
+
+       /**
+        * @param array $idx Indexes to use, explicitly
+        */
+       public function setExplicitIndexes( $idxs ) {
+               $this->explicitIndexes = $idxs;
        }
 
        /**
@@ -718,8 +733,16 @@
                }
 
                // Setup the search
-               $search = Connection::getPageType( $this->indexBaseName, 
$this->pickIndexTypeFromNamespaces() )
-                       ->createSearch( $query, $queryOptions );
+               if ( $this->explicitIndexes ) {
+                       $baseName = array_shift( $this->explicitIndexes );
+                       $extraIndexes = $this->explicitIndexes;
+                       $pageType = Connection::getPageType( $baseName );
+                               
+               } else {
+                       $pageType = Connection::getPageType( 
$this->indexBaseName,
+                               $this->pickIndexTypeFromNamespaces() );
+               }
+               $search = $pageType->createSearch( $query, $queryOptions );
                foreach ( $extraIndexes as $i ) {
                        $search->addIndex( $i );
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/105986
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I15c8f53184b3383c24fa8c513fd952eb22b05923
Gerrit-PatchSet: 10
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Chad <[email protected]>
Gerrit-Reviewer: Chad <[email protected]>
Gerrit-Reviewer: Manybubbles <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to