DCausse has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/237693

Change subject: Implement searchSuggestions from SearchEngine
......................................................................

Implement searchSuggestions from SearchEngine

Returns completion suggester results if enabled, fallback to default
implementation otherwise.
(I6fd3a7e in core is needed)

Bug: T112028
Change-Id: Ida9b9f89043f503a68d50e3f10046dd102b9a2ff
---
M CirrusSearch.php
M includes/CirrusSearch.php
M includes/Searcher.php
3 files changed, 112 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/93/237693/1

diff --git a/CirrusSearch.php b/CirrusSearch.php
index 0a624ca..5a6fc16 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -694,6 +694,17 @@
 $wgCirrusSearchCompletionSettings = 
$wgCirrusSearchCompletionProfiles['default'];
 
 /**
+ * Use the completion suggester as the default implemention for 
searchSuggestions
+ * used by OpenSearch API.
+ * You have to build the completion suggester index with the maintenance script
+ * updateSuggesterIndex.php. The suggester only supports queries to the main
+ * namespace. PrefixSearch will be used in all other cases.
+ *
+ * NOTE: This is an experimental API
+ */
+$wgCirrusSearchUseCompletionSuggester = false;
+
+/**
  * Profile for geo context search as you type suggestion (completion 
suggestion)
  * (see profiles/SuggestProfiles.php for more details.)
  *
diff --git a/includes/CirrusSearch.php b/includes/CirrusSearch.php
index e03436e..b2702ed 100644
--- a/includes/CirrusSearch.php
+++ b/includes/CirrusSearch.php
@@ -218,6 +218,99 @@
                return $status->isOk() ? $status->getValue() : $status;
        }
 
+       /**
+        * This implementation will run the completion suggester if it's 
enabled and if the
+        * query is for NS_MAIN. Fallback to SearchEngine default implemention 
otherwise.
+        *
+        * {@inheritDoc}
+        *
+        */
+       public function searchSuggestions( $search ) {
+
+               $config = ConfigFactory::getDefaultInstance()->makeConfig( 
'CirrusSearch' );
+               if ( !$config->getElement( 'CirrusSearchUseCompletionSuggester' 
) ) {
+                       // Completition suggester is not enable, fallback to 
default implementation
+                       return parent::searchSuggestions( $search );
+               }
+
+               // We will certainly ignore queries that coult run on NS_MAIN
+               // but we would have to import most of the NS detection code
+               // code from PrefixSearch.
+
+               $probableNsLookup = strpos( ':', $search ) !== false;
+               if ( $probableNsLookup
+                               || count( $this->namespaces ) != 1
+                               || $this->namespaces[0] !== NS_MAIN ) {
+                       // Fallback to prefix search if we are not on content 
namespace
+                       //return parent::searchSuggestions( $search );
+               }
+
+               $context = RequestContext::getMain();
+               $user = $context->getUser();
+               // offset is omitted, suggestions does not support scrolling 
results
+               $searcher = new Searcher( 0, $this->limit, $config, 
$this->namespaces, $user, $this->indexBaseName );
+
+               $response = $searcher->suggest( $search );
+               if ( !$response->isOK() ) {
+                       // Errors will be logged, let's try the exact db match
+                       $suggestions = array();
+               } else {
+                       $suggestions = $response->getValue();
+               }
+
+               // if the content language has variants, try to retrieve 
fallback results
+               $fallbackLimit = $this->limit - count( $suggestions );
+               if ( $fallbackLimit > 0 ) {
+                       global $wgContLang;
+
+                       $fallbackSearches = 
$wgContLang->autoConvertToAllVariants( $search );
+                       $fallbackSearches = array_diff( array_unique( 
$fallbackSearches ), array( $search ) );
+
+                       foreach ( $fallbackSearches as $fbs ) {
+                               // @todo: verify that searcher is re-usable and 
add a limit setter.
+                               $searcher = new Searcher( 0, $fallbackLimit, 
$config, $this->namespaces, $user, $this->indexBaseName );
+                               $fallbackResponse = $searcher->suggest( $search 
);
+                               $pageIds = array_columns( $suggestions, 
'pageId' );
+
+                               // Same page can be returned (fuzzy suggestions)
+                               foreach( $fallbackSuggestions as $s ) {
+                                       if ( !in_array ( $s['pageId'], $pageIds 
) ) {
+                                               $suggestions[] = $s;
+                                       }
+                               }
+
+                               $fallbackLimit -= $this->limit - count( 
$results );
+
+                               if ( $fallbackLimit == 0 ) {
+                                       break;
+                               }
+                       }
+               }
+
+               $results = array();
+               foreach ( $suggestions as $s ) {
+                       // Rescorer wants strings...
+                       $t = $s['title'];
+                       $results[] = $t->getPrefixedText();
+               }
+
+               // now we can trim
+               $search = trim( $search );
+
+               // Rescore results with an exact title match
+               // @todo: can we optimize this rescorer? we should have the 
resolved title's id
+               // from the backend, we could maybe remove some 
getRedirectTarget resolution
+               $rescorer = new SearchExactMatchRescorer();
+               $results = $rescorer->rescore( $search, $this->namespaces, 
$results, $this->limit );
+
+               // Opensearch wants titles...
+               $titles = array_map( 'Title::newFromText', $results );
+               $lb = new LinkBatch( $titles );
+               $lb->setCaller( __METHOD__ );
+               $lb->execute();
+               return $titles;
+       }
+
        private function moreLikeThis( $term, $searcher, $options ) {
                // Expand titles chasing through redirects
                $titles = array();
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 34e45fc..2bd05fa 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -5,6 +5,7 @@
 use Elastica;
 use Category;
 use CirrusSearch;
+use CirrusSearch\BuildDocument\SuggestBuilder;
 use CirrusSearch\Extra\Filter\SourceRegex;
 use CirrusSearch\Search\Escaper;
 use CirrusSearch\Search\Filters;
@@ -775,7 +776,13 @@
         * @return Status
         */
        public function suggest( $text, $context = null ) {
-               $this->term = $text;
+               // Do not remove spaces at the end, the user might tell us he 
finished writing a word
+               $this->term = ltrim( $text );
+
+               if ( mb_strlen( $this->term ) > 
SuggestBuilder::MAX_INPUT_LENGTH ) {
+                       // Trim the query otherwise we won't find results
+                       $this->term = mb_substr( $this->term, 0, 
SuggestBuilder::MAX_INPUT_LENGTH );
+               }
 
                $suggest = array( 'text' => $text );
                $queryLen = mb_strlen( trim( $text ) ); // Avoid cheating with 
spaces

-- 
To view, visit https://gerrit.wikimedia.org/r/237693
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ida9b9f89043f503a68d50e3f10046dd102b9a2ff
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: DCausse <dcau...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to