DCausse has uploaded a new change for review. https://gerrit.wikimedia.org/r/237693
Change subject: Implement searchSuggestions from SearchEngine ...................................................................... Implement searchSuggestions from SearchEngine Returns completion suggester results if enabled, fallback to default implementation otherwise. (I6fd3a7e in core is needed) Bug: T112028 Change-Id: Ida9b9f89043f503a68d50e3f10046dd102b9a2ff --- M CirrusSearch.php M includes/CirrusSearch.php M includes/Searcher.php 3 files changed, 112 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/93/237693/1 diff --git a/CirrusSearch.php b/CirrusSearch.php index 0a624ca..5a6fc16 100644 --- a/CirrusSearch.php +++ b/CirrusSearch.php @@ -694,6 +694,17 @@ $wgCirrusSearchCompletionSettings = $wgCirrusSearchCompletionProfiles['default']; /** + * Use the completion suggester as the default implemention for searchSuggestions + * used by OpenSearch API. + * You have to build the completion suggester index with the maintenance script + * updateSuggesterIndex.php. The suggester only supports queries to the main + * namespace. PrefixSearch will be used in all other cases. + * + * NOTE: This is an experimental API + */ +$wgCirrusSearchUseCompletionSuggester = false; + +/** * Profile for geo context search as you type suggestion (completion suggestion) * (see profiles/SuggestProfiles.php for more details.) * diff --git a/includes/CirrusSearch.php b/includes/CirrusSearch.php index e03436e..b2702ed 100644 --- a/includes/CirrusSearch.php +++ b/includes/CirrusSearch.php @@ -218,6 +218,99 @@ return $status->isOk() ? $status->getValue() : $status; } + /** + * This implementation will run the completion suggester if it's enabled and if the + * query is for NS_MAIN. Fallback to SearchEngine default implemention otherwise. + * + * {@inheritDoc} + * + */ + public function searchSuggestions( $search ) { + + $config = ConfigFactory::getDefaultInstance()->makeConfig( 'CirrusSearch' ); + if ( !$config->getElement( 'CirrusSearchUseCompletionSuggester' ) ) { + // Completition suggester is not enable, fallback to default implementation + return parent::searchSuggestions( $search ); + } + + // We will certainly ignore queries that coult run on NS_MAIN + // but we would have to import most of the NS detection code + // code from PrefixSearch. + + $probableNsLookup = strpos( ':', $search ) !== false; + if ( $probableNsLookup + || count( $this->namespaces ) != 1 + || $this->namespaces[0] !== NS_MAIN ) { + // Fallback to prefix search if we are not on content namespace + //return parent::searchSuggestions( $search ); + } + + $context = RequestContext::getMain(); + $user = $context->getUser(); + // offset is omitted, suggestions does not support scrolling results + $searcher = new Searcher( 0, $this->limit, $config, $this->namespaces, $user, $this->indexBaseName ); + + $response = $searcher->suggest( $search ); + if ( !$response->isOK() ) { + // Errors will be logged, let's try the exact db match + $suggestions = array(); + } else { + $suggestions = $response->getValue(); + } + + // if the content language has variants, try to retrieve fallback results + $fallbackLimit = $this->limit - count( $suggestions ); + if ( $fallbackLimit > 0 ) { + global $wgContLang; + + $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search ); + $fallbackSearches = array_diff( array_unique( $fallbackSearches ), array( $search ) ); + + foreach ( $fallbackSearches as $fbs ) { + // @todo: verify that searcher is re-usable and add a limit setter. + $searcher = new Searcher( 0, $fallbackLimit, $config, $this->namespaces, $user, $this->indexBaseName ); + $fallbackResponse = $searcher->suggest( $search ); + $pageIds = array_columns( $suggestions, 'pageId' ); + + // Same page can be returned (fuzzy suggestions) + foreach( $fallbackSuggestions as $s ) { + if ( !in_array ( $s['pageId'], $pageIds ) ) { + $suggestions[] = $s; + } + } + + $fallbackLimit -= $this->limit - count( $results ); + + if ( $fallbackLimit == 0 ) { + break; + } + } + } + + $results = array(); + foreach ( $suggestions as $s ) { + // Rescorer wants strings... + $t = $s['title']; + $results[] = $t->getPrefixedText(); + } + + // now we can trim + $search = trim( $search ); + + // Rescore results with an exact title match + // @todo: can we optimize this rescorer? we should have the resolved title's id + // from the backend, we could maybe remove some getRedirectTarget resolution + $rescorer = new SearchExactMatchRescorer(); + $results = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit ); + + // Opensearch wants titles... + $titles = array_map( 'Title::newFromText', $results ); + $lb = new LinkBatch( $titles ); + $lb->setCaller( __METHOD__ ); + $lb->execute(); + return $titles; + } + private function moreLikeThis( $term, $searcher, $options ) { // Expand titles chasing through redirects $titles = array(); diff --git a/includes/Searcher.php b/includes/Searcher.php index 34e45fc..2bd05fa 100644 --- a/includes/Searcher.php +++ b/includes/Searcher.php @@ -5,6 +5,7 @@ use Elastica; use Category; use CirrusSearch; +use CirrusSearch\BuildDocument\SuggestBuilder; use CirrusSearch\Extra\Filter\SourceRegex; use CirrusSearch\Search\Escaper; use CirrusSearch\Search\Filters; @@ -775,7 +776,13 @@ * @return Status */ public function suggest( $text, $context = null ) { - $this->term = $text; + // Do not remove spaces at the end, the user might tell us he finished writing a word + $this->term = ltrim( $text ); + + if ( mb_strlen( $this->term ) > SuggestBuilder::MAX_INPUT_LENGTH ) { + // Trim the query otherwise we won't find results + $this->term = mb_substr( $this->term, 0, SuggestBuilder::MAX_INPUT_LENGTH ); + } $suggest = array( 'text' => $text ); $queryLen = mb_strlen( trim( $text ) ); // Avoid cheating with spaces -- To view, visit https://gerrit.wikimedia.org/r/237693 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ida9b9f89043f503a68d50e3f10046dd102b9a2ff Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: DCausse <dcau...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits