jenkins-bot has submitted this change and it was merged.
Change subject: Add Completion Suggester as a Beta Feature
......................................................................
Add Completion Suggester as a Beta Feature
Override default opensearch for searchSuggest if the beta feature is enabled.
Bug: T112028
Depends-On: I35aece88333a65f6b1f55f7a87e2d14de4f5bea7
Change-Id: Ida9b9f89043f503a68d50e3f10046dd102b9a2ff
---
M CirrusSearch.php
M autoload.php
M i18n/en.json
M i18n/qqq.json
M includes/Api/Suggest.php
M includes/BuildDocument/SuggestBuilder.php
M includes/CirrusSearch.php
A includes/CompletionSuggester.php
M includes/Hooks.php
A includes/Search/SearchSuggestion.php
A includes/Search/SearchSuggestionSet.php
M includes/Searcher.php
M profiles/SuggestProfiles.php
A resources/ext.cirrus.suggest.js
A resources/images/cirrus-beta-ltr.svg
A resources/images/cirrus-beta-rtl.svg
M tests/browser/features/step_definitions/search_steps.rb
M tests/browser/features/suggest_api.feature
M tests/browser/features/support/hooks.rb
M tests/jenkins/FullyFeaturedConfig.php
A tests/unit/Search/SearchSuggestionSetTest.php
M tests/unit/SuggestBuilderTest.php
22 files changed, 1,518 insertions(+), 292 deletions(-)
Approvals:
Cindy-the-browser-test-bot: Looks good to me, but someone else must approve
EBernhardson: Looks good to me, approved
jenkins-bot: Verified
diff --git a/CirrusSearch.php b/CirrusSearch.php
index f625e16..6745153 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -745,6 +745,16 @@
$wgCirrusSearchCompletionSettings =
$wgCirrusSearchCompletionProfiles['default'];
/**
+ * Use the completion suggester as the default implemention for
searchSuggestions.
+ * You have to build the completion suggester index with the maintenance script
+ * updateSuggesterIndex.php. The suggester only supports queries to the main
+ * namespace. PrefixSearch will be used in all other cases.
+ *
+ * NOTE: This is an experimental API
+ */
+$wgCirrusSearchUseCompletionSuggester = false;
+
+/**
* Profile for geo context search as you type suggestion (completion
suggestion)
* (see profiles/SuggestProfiles.php for more details.)
*
@@ -868,6 +878,9 @@
$wgHooks[ 'TitleMoveComplete' ][] = 'CirrusSearch\Hooks::onTitleMoveComplete';
$wgHooks[ 'UnitTestsList' ][] = 'CirrusSearch\Hooks::onUnitTestsList';
$wgHooks[ 'ShowSearchHitTitle' ][] =
'CirrusSearch\Hooks::onShowSearchHitTitle';
+$wgHooks[ 'GetBetaFeaturePreferences' ][] =
'CirrusSearch\Hooks::getBetaFeaturePreferences';
+$wgHooks[ 'BeforePageDisplay' ][] = 'CirrusSearch\Hooks::onBeforePageDisplay';
+
/**
* i18n
*/
@@ -903,6 +916,24 @@
$wgConfigRegistry['CirrusSearch'] =
'CirrusSearch\SearchConfig::newFromGlobals';
/**
+ * Completion Suggester Beta Feature
+ */
+$wgResourceModules += array(
+ "ext.cirrus" => array(
+ 'scripts' => array(
+ 'resources/ext.cirrus.suggest.js'
+ ),
+ 'dependencies' => array(
+ 'mediawiki.searchSuggest'
+ ),
+ 'styles' => array(),
+ 'messages' => array(),
+ 'remoteExtPath' => 'CirrusSearch',
+ 'localBasePath' => __DIR__,
+ )
+);
+
+/**
* Jenkins configuration required to get all the browser tests passing cleanly.
*
* @todo re-enable the code below if/when browser tests are enabled again
diff --git a/autoload.php b/autoload.php
index 600b5a3..ddfdd74 100644
--- a/autoload.php
+++ b/autoload.php
@@ -26,6 +26,7 @@
'CirrusSearch\\CheckIndexes' => __DIR__ .
'/maintenance/checkIndexes.php',
'CirrusSearch\\CirrusIsSetup' => __DIR__ .
'/maintenance/cirrusNeedsToBeBuilt.php',
'CirrusSearch\\ClusterSettings' => __DIR__ .
'/includes/ClusterSettings.php',
+ 'CirrusSearch\\CompletionSuggester' => __DIR__ .
'/includes/CompletionSuggester.php',
'CirrusSearch\\Connection' => __DIR__ . '/includes/Connection.php',
'CirrusSearch\\DataSender' => __DIR__ . '/includes/DataSender.php',
'CirrusSearch\\Dump' => __DIR__ . '/includes/Dump.php',
@@ -103,6 +104,8 @@
'CirrusSearch\\Search\\ResultSet' => __DIR__ .
'/includes/Search/ResultSet.php',
'CirrusSearch\\Search\\ResultsType' => __DIR__ .
'/includes/Search/ResultsType.php',
'CirrusSearch\\Search\\SearchContext' => __DIR__ .
'/includes/Search/SearchContext.php',
+ 'CirrusSearch\\Search\\SearchSuggestion' => __DIR__ .
'/includes/Search/SearchSuggestion.php',
+ 'CirrusSearch\\Search\\SearchSuggestionSet' => __DIR__ .
'/includes/Search/SearchSuggestionSet.php',
'CirrusSearch\\Search\\SearchTextBaseQueryBuilder' => __DIR__ .
'/includes/Search/SearchTextQueryBuilders.php',
'CirrusSearch\\Search\\SearchTextCommonTermsQueryBuilder' => __DIR__ .
'/includes/Search/SearchTextQueryBuilders.php',
'CirrusSearch\\Search\\SearchTextQueryBuilder' => __DIR__ .
'/includes/Search/SearchTextQueryBuilders.php',
diff --git a/i18n/en.json b/i18n/en.json
index f06ff1f..7e6ba48 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -26,5 +26,7 @@
"cirrussearch-give-feedback": "Give us your feedback",
"cirrussearch-morelikethis-settings": " #<!-- leave this line exactly
as it is --> <pre>\n# This message lets you configure the settings of the
\"more like this\" feature.\n# Changes to this take effect immediately.\n# The
syntax is as follows:\n# * Everything from a \"#\" character to the end of
the line is a comment.\n# * Every non-blank line is the setting name followed
by a \":\" character followed by the setting value\n# The settings are:\n# *
min_doc_freq (integer): Minimum number of documents (per shard) that need a
term for it to be considered.\n# * max_doc_freq (integer): Maximum number of
documents (per shard) that have a term for it to be considered.\n#
High frequency terms are generally \"stop words\".\n# * max_query_terms
(integer): Maximum number of terms to be considered. This value is limited to
$wgCirrusSearchMoreLikeThisMaxQueryTermsLimit (100).\n# * min_term_freq
(integer): Minimum number of times the term appears in the input to doc to be
considered. For small fields (title) this value should be 1.\n# *
percent_terms_to_match (float 0 to 1): The percentage of terms to match on.
Defaults to 0.3 (30 percent).\n# * min_word_len (integer): Minimal length of
a term to be considered. Defaults to 0.\n# * max_word_len (integer): The
maximum word length above which words will be ignored. Defaults to unbounded
(0).\n# * fields (comma separated list of values): These are the fields to
use. Allowed fields are title, text, auxiliary_text, opening_text, headings and
all.\n# * use_fields (true|false) : Tell the \"more like this\" query to use
only the field data. Defaults to false: the system will extract the content of
the text field to build the query.\n# Examples of good lines:\n#
min_doc_freq:2\n# max_doc_freq:20000\n# max_query_terms:25\n#
min_term_freq:2\n# percent_terms_to_match:0.3\n# min_word_len:2\n#
max_word_len:40\n# fields:text,opening_text\n# use_fields:true\n# </pre> <!--
leave this line exactly as it is -->",
"cirrussearch-didyoumean-settings": " #<!-- leave this line exactly as
it is --> <pre>\n# This message lets you configure the settings of the \"Did
you mean\" suggestions.\n# See also
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html\n#
Changes to this take effect immediately.\n# The syntax is as follows:\n# *
Everything from a \"#\" character to the end of the line is a comment.\n# *
Every non-blank line is the setting name followed by a \":\" character followed
by the setting value\n# The settings are :\n# * max_errors (integer): the
maximum number of terms that will be considered misspelled in order to be
corrected. 1 or 2.\n# * confidence (float): The confidence level defines a
factor applied to the input phrases score which is used as a threshold for
other suggestion candidates. Only candidates that score higher than the
threshold will be included in the result. For instance a confidence level of
1.0 will only return suggestions that score higher than the input phrase. If
set to 0.0 the best candidate are returned.\n# * min_doc_freq (float 0 to 1):
The minimal threshold in number of documents a suggestion should appear in.\n#
High frequency terms are generally \"stop words\".\n# *
max_term_freq (float 0 to 1): The maximum threshold in number of documents in
which a term can exist in order to be included.\n# * prefix_length (integer):
The minimal number of prefix characters that must match a term in order to be a
suggestion.\n# * suggest_mode (missing, popular, always): The suggest mode
controls the way suggestions are included.\n# Examples of good lines:\n#
max_errors:2\n# confidence:2.0\n# max_term_freq:0.5\n# min_doc_freq:0.01\n#
prefix_length:2\n# suggest_mode:always\n#\n# </pre> <!-- leave this line
exactly as it is -->",
- "cirrussearch-query-too-long": "Search request is longer than the
maximum allowed length. ($1 > $2)"
+ "cirrussearch-query-too-long": "Search request is longer than the
maximum allowed length. ($1 > $2)",
+ "cirrussearch-completionsuggester-pref": "Completion suggester",
+ "cirrussearch-completionsuggester-desc": "New algorithm for search as
you type. Once enabled the search box at the top right corner will use the
Completion Suggester."
}
diff --git a/i18n/qqq.json b/i18n/qqq.json
index 2899dcf..1c4d880 100644
--- a/i18n/qqq.json
+++ b/i18n/qqq.json
@@ -34,5 +34,7 @@
"cirrussearch-give-feedback": "Used as text for an feedback link shown
at the end of Special:Search result
([[mw:Extension:CirrusSearch|$wgCirrusSearchFeedbackLink]])",
"cirrussearch-morelikethis-settings": "Settings for the More Like This
query.\n\n\"More Like This\" is the English name of the feature. The feature is
described at [[:mw:Help:CirrusSearch#Special prefixes]]. The prefix
\"morelike\" cannot be translated anywhere, but the full name of the feature
\"More Like This\" can be translated.\n\nDon't translate technical names like
min_doc_freq, max_query_terms, true|false, field names title, text,
auxiliary_text, opening_text, headings, all etc.\n\nFor a definition of
\"stopwords\" see [[:w:en:Stop words|Stop words in Wikipedia]].",
"cirrussearch-didyoumean-settings": "Settings for the \"Did You Mean?\"
suggestions.\n\n\"Did You Mean?\" is the English name of the feature and can be
translated. This feature is described at
[[:mw:Help:CirrusSearch#Did_you_mean]].\n\nDon't translate technical names like
max_errors, confidence, max_term_freq, min_doc_freq and suggest_mode.\n\n\"Stop
words\" are words that are explicitly excluded from searching and statistics,
usually because they are too frequent to be useful and meaningful. See
[[:w:en:Stop words|stop words]] in the English Wikipedia.",
- "cirrussearch-query-too-long": "Error message shown to users when their
queries are too long. \n\nParameters:\n* $1 - length of current query in
characters\n* $2 - maximum query length, in characters."
+ "cirrussearch-query-too-long": "Error message shown to users when their
queries are too long. \n\nParameters:\n* $1 - length of current query in
characters\n* $2 - maximum query length, in characters.",
+ "cirrussearch-completionsuggester-pref": "Name of the beta feature,
this should not be translated.",
+ "cirrussearch-completionsuggester-desc": "Description of the beta
feature. \"top right\" should be translated to \"top left\" for rtl languages."
}
diff --git a/includes/Api/Suggest.php b/includes/Api/Suggest.php
index 7bca856..a455a3d 100644
--- a/includes/Api/Suggest.php
+++ b/includes/Api/Suggest.php
@@ -2,6 +2,7 @@
namespace CirrusSearch\Api;
use CirrusSearch\Searcher;
+use CirrusSearch;
use RequestContext;
/**
@@ -27,47 +28,31 @@
public function execute() {
$context = RequestContext::getMain();
$user = $context->getUser();
- $conn = $this->getCirrusConnection();
- $searcher = new Searcher( $conn, 0, $this->getParameter(
'limit' ), null, null, $user );
+ $cirrus = new CirrusSearch();
+ $cirrus->setNamespaces( array ( NS_MAIN ) );
+ $limit = $this->getParameter( 'limit' );
+ $cirrus->setLimitOffset( $limit );
$queryText = $this->getParameter( 'text' );
if ( !$queryText ) {
return;
}
- $contextString = $this->getParameter( 'context' );
- if( $contextString ) {
- $context = @json_decode( $contextString, true );
- /*
- * Validate the context, must be in the form of:
- * {
- * name: { foo: bar, baz: qux }
- * name2: { foo: bar, baz: qux }
- * }
- *
- */
- if( !is_array( $context )) {
- $context = null;
- } else {
- foreach( $context as $name => $ctx ) {
- if ( !is_array( $ctx ) ) {
- $this->dieUsage( "Bad context
element $name", 'cirrus-badcontext' );
- }
- }
- }
- } else {
- $context = null;
- }
+ $suggestions = $cirrus->searchSuggestions( $queryText );
- // TODO: add passing context here,
- // see
https://www.elastic.co/guide/en/elasticsearch/reference/current/suggester-context.html
- $result = $searcher->suggest( $queryText, $context );
- if($result->isOK()) {
- $this->getResult()->addValue( null, 'suggest',
$result->getValue() );
- } else {
- $this->getResult()->addValue( null, "error",
$result->getErrorsArray());
- }
+ // Use the same cache options used by OpenSearch
+ $this->getMain()->setCacheMaxAge( $this->getConfig()->get(
'SearchSuggestCacheExpiry' ) );
+ $this->getMain()->setCacheMode( 'public' );
+
+ $this->getResult()->addValue( null, 'suggest',
+ $suggestions->map( function( $sugg ) {
+ return array(
+ 'text' => $sugg->getText(),
+ 'url' => $sugg->getURL(),
+ 'score' => $sugg->getScore(),
+ );
+ } ) );
}
public function getAllowedParams() {
@@ -76,12 +61,11 @@
ApiBase::PARAM_TYPE => 'string',
ApiBase::PARAM_REQUIRED => true,
),
- 'context' => array(
- ApiBase::PARAM_TYPE => 'string',
- ),
'limit' => array(
- ApiBase::PARAM_TYPE => 'integer',
+ ApiBase::PARAM_TYPE => 'limit',
ApiBase::PARAM_DFLT => 5,
+ ApiBase::PARAM_MAX => 20,
+ ApiBase::PARAM_MAX2 => 50,
),
);
}
diff --git a/includes/BuildDocument/SuggestBuilder.php
b/includes/BuildDocument/SuggestBuilder.php
index 0ec199c..9e61ab7 100644
--- a/includes/BuildDocument/SuggestBuilder.php
+++ b/includes/BuildDocument/SuggestBuilder.php
@@ -42,6 +42,16 @@
const REDIRECT_DISCOUNT = 0.1;
/**
+ * Redirect suggestion type
+ */
+ const REDIRECT_SUGGESTION = 'r';
+
+ /**
+ * Title suggestion type
+ */
+ const TITLE_SUGGESTION = 't';
+
+ /**
* Number of common prefix chars a redirect must share with the title
to be
* promoted as a title suggestion.
* This is useful not to promote Eraq as a title suggestion for Iraq
@@ -131,7 +141,7 @@
foreach ( $title['variants'] as $variant ) {
$inputs[] = $this->prepareInput( $variant );
}
- $output = $id . ":t:" . $title['text'];
+ $output = self::encodeTitleOutput( $id, $title['text'] );
return $this->buildSuggestion( $output, $inputs, $location,
$score );
}
@@ -329,4 +339,62 @@
// longer strings
return levenshtein( $a, $b );
}
+
+ /**
+ * Encode a title suggestion output
+ * @param int $id pageId
+ * @param string $title
+ * @return string the encoded output
+ */
+ public static function encodeTitleOutput( $id, $title ) {
+ return $id . ':'. self::TITLE_SUGGESTION . ':' . $title;
+ }
+
+ /**
+ * Encode a redirect suggestion output
+ * @param int $id pageId
+ * @return string the encoded output
+ */
+ public static function encodeRedirectOutput( $id ) {
+ return $id . ':' . self::REDIRECT_SUGGESTION;
+ }
+
+ /**
+ * Decode a suggestion ouput.
+ * The result is an array whith the following keys:
+ * id: the pageId
+ * type: either REDIRECT_SUGGESTION or TITLE_SUGGESTION
+ * text (optional): if TITLE_SUGGESTION the Title text
+ * @param string $output text value returned by a suggest query
+ * @return array mixed or null if the output is not properly encoded
+ */
+ public static function decodeOutput( $output ) {
+ if ( $output == null ) {
+ return null;
+ }
+ $parts = explode( ':', $output, 3 );
+ if ( sizeof ( $parts ) < 2 ) {
+ // Ignore broken output
+ return null;
+ }
+
+
+ switch( $parts[1] ) {
+ case self::REDIRECT_SUGGESTION:
+ return array(
+ 'id' => $parts[0],
+ 'type' => self::REDIRECT_SUGGESTION,
+ );
+ case self::TITLE_SUGGESTION:
+ if ( sizeof( $parts ) < 3 ) {
+ return null;
+ }
+ return array(
+ 'id' => $parts[0],
+ 'type' => self::TITLE_SUGGESTION,
+ 'text' => $parts[2]
+ );
+ }
+ return null;
+ }
}
diff --git a/includes/CirrusSearch.php b/includes/CirrusSearch.php
index 1451ee3..c0a0bd3 100644
--- a/includes/CirrusSearch.php
+++ b/includes/CirrusSearch.php
@@ -4,7 +4,10 @@
use CirrusSearch\InterwikiSearcher;
use CirrusSearch\Search\FullTextResultsType;
use CirrusSearch\Searcher;
+use CirrusSearch\CompletionSuggester;
use CirrusSearch\Search\ResultSet;
+use CirrusSearch\Search\SearchSuggestion;
+use CirrusSearch\Search\SearchSuggestionSet;
use CirrusSearch\SearchConfig;
/**
@@ -345,6 +348,153 @@
return $status->isOk() ? $status->getValue() : $status;
}
+ /**
+ * This implementation will run the completion suggester if it's
enabled and if the
+ * query is for NS_MAIN. Fallback to SearchEngine default implemention
otherwise.
+ *
+ * @param string $search the user query
+ * @return SearchSuggestionSet the suggestions
+ */
+ public function searchSuggestions( $search ) {
+ $config = ConfigFactory::getDefaultInstance()->makeConfig(
'CirrusSearch' );
+ $useCompletionSuggester = $config->getElement(
'CirrusSearchUseCompletionSuggester' );
+
+ $context = RequestContext::getMain();
+ $request = $context->getRequest();
+
+ // Allow experimentation with query parameters
+ if ( $request && $request->getVal(
'cirrusUseCompletionSuggester' ) === 'yes' ) {
+ $useCompletionSuggester = true;
+ }
+
+ if ( !$useCompletionSuggester ) {
+ // Completion suggester is not enabled, fallback to
+ // default implementation
+ return $this->searchSuggestionsPrefixSearchFallback(
$search );
+ }
+
+ // We use Title to extract namespace from a Title string
+ // We append a random letter behind just in case the search
+ // string ends with ':'.
+ $title = Title::newFromText( $search . "A" );
+ if ( $title->getNamespace() != NS_MAIN
+ || count( $this->namespaces ) != 1
+ || reset( $this->namespaces ) != NS_MAIN ) {
+ // Fallback to prefix search if we are not on content
namespace
+ return $this->searchSuggestionsPrefixSearchFallback(
$search );
+ }
+
+ $user = $context->getUser();
+ // offset is omitted, searchSuggestion does not support
+ // scrolling results
+ $suggester = new CompletionSuggester( $this->connection,
$this->limit,
+ $config, $this->namespaces, $user, $this->indexBaseName
);
+
+ $response = $suggester->suggest( $search );
+ $suggestions = SearchSuggestionSet::emptySuggestionSet();
+ if ( $response->isOK() ) {
+ // Errors will be logged, let's try the exact db match
+ $suggestions = $response->getValue();
+ }
+
+ // if the content language has variants, try to retrieve
fallback results
+ $fallbackLimit = $this->limit - $suggestions->getSize();;
+
+ // Copied from PrefixSearch
+ // @todo: verify if this is really needed, if variants are
+ // close enough fuzzy suggestions could already cover this
+ // usecase.
+ if ( $fallbackLimit > 0 ) {
+ global $wgContLang;
+
+ $fallbackSearches =
$wgContLang->autoConvertToAllVariants( $search );
+ $fallbackSearches = array_diff( array_unique(
$fallbackSearches ), array( $search ) );
+
+ $suggester->setLimit( $fallbackLimit );
+ foreach ( $fallbackSearches as $fbs ) {
+ $fallbackResponse = $suggester->suggest( $fbs );
+ if ( !$fallbackResponse->isOK() ) {
+ continue;
+ }
+ $pageIds = $suggestions->map( function( $sugg )
{
+ return $sugg->getSuggestedTitleID();
+ });
+
+ $fallbackSuggestions =
$fallbackResponse->getValue();
+ // Same page can be returned (fuzzy suggestions)
+ foreach( $fallbackSuggestions->getSuggestions()
as $s ) {
+ if ( !in_array (
$s->getSuggestedTitleID(), $pageIds ) ) {
+ $suggestions->addSuggestion( $s
);
+ }
+ }
+
+ $fallbackLimit = $this->limit -
$suggestions->getSize();
+
+ if ( $fallbackLimit <= 0 ) {
+ break;
+ }
+ }
+ }
+
+ // preload the titles with LinkBatch
+ $titles = $suggestions->map( function( $sugg ) { return
$sugg->getSuggestedTitle(); } );
+ $lb = new LinkBatch( $titles );
+ $lb->setCaller( __METHOD__ );
+ $lb->execute();
+
+ $results = $suggestions->map( function( $sugg ) {
+ return $sugg->getSuggestedTitle()->getPrefixedText();
+ });
+
+ // now we can trim
+ $search = trim( $search );
+
+ // Rescore results with an exact title match
+ $rescorer = new SearchExactMatchRescorer();
+ $rescoredResults = $rescorer->rescore( $search,
$this->namespaces, $results, $this->limit );
+
+ if( count( $rescoredResults ) > 0 ) {
+ if ( !in_array( reset( $rescoredResults ), $results ) )
{
+ // If the first result is not in the previous
array it
+ // means that we found a new exact match
+ $exactTitle = Title::newFromText( reset(
$rescoredResults ) );
+ $exactMatch = new SearchSuggestion();
+ $exactMatch->setText(
$exactTitle->getPrefixedText() );
+ $exactMatch->setSuggestedTitle( $exactTitle,
true );
+ $exactMatch->setScore( 0 );
+ $suggestions->insertBestSuggestion( $exactMatch
);
+ $suggestions->shrink( $this->limit );
+ } else {
+ // if the first result is not the same we need
to rescore
+ if( reset( $rescoredResults ) != reset(
$results ) ) {
+ $rescoredIndex = array_search( reset(
$rescoredResults ), $results );
+ $suggestions->rescore( $rescoredIndex );
+ }
+ }
+ }
+
+ return $suggestions;
+ }
+
+ /**
+ * PrefixSearch fallback method to searchSuggestion.
+ * This is needed when:
+ * - the completion suggester is not enabled
+ * - the query is for a namespace not covered by the completion
suggester
+ * - the Special: namespace
+ *
+ * @param string $search the user query
+ * @return SearchSuggestionSet the suggestions
+ */
+ private function searchSuggestionsPrefixSearchFallback( $search ) {
+ $searcher = new TitlePrefixSearch;
+ $titles = $searcher->searchWithVariants( $search, $this->limit,
$this->namespaces );
+ if ( !$titles ) {
+ return SearchSuggestionSet::emptySuggestionSet();
+ }
+ return SearchSuggestionSet::fromTitles( $titles );
+ }
+
private function moreLikeThis( $term, $searcher, $options ) {
// Expand titles chasing through redirects
$titles = array();
diff --git a/includes/CompletionSuggester.php b/includes/CompletionSuggester.php
new file mode 100644
index 0000000..e0edd4d
--- /dev/null
+++ b/includes/CompletionSuggester.php
@@ -0,0 +1,390 @@
+<?php
+
+namespace CirrusSearch;
+
+use Elastica;
+use CirrusSearch;
+use CirrusSearch\BuildDocument\SuggestBuilder;
+use CirrusSearch\Search\SearchContext;
+use CirrusSearch\Search\SearchSuggestion;
+use CirrusSearch\Search\SearchSuggestionSet;
+use ConfigFactory;
+use MediaWiki\Logger\LoggerFactory;
+use Title;
+use User;
+use Elastica\Request;
+use Elastica\Exception\ResponseException;
+
+/**
+ * Performs search as you type queries using Completion Suggester.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/**
+ * Completion Suggester Searcher
+ *
+ * NOTES:
+ * The CompletionSuggester is built on top of the ElasticSearch Completion
+ * Suggester.
+ *
(https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-completion.html).
+ *
+ * This class is used at query time, see
+ * CirrusSearch\BuildDocument\SuggestBuilder for index time logic.
+ *
+ * Document model: Cirrus documents are indexed with 2 suggestions:
+ *
+ * 1. The title suggestion (and close redirects).
+ * This helps to avoid displaying redirects with typos (e.g. Albert Enstein,
+ * Unietd States) where we make the assumption that if the redirect is close
+ * enough it's likely a typo and it's preferable to display the canonical
title.
+ * This decision is made at index-time in
SuggestBuilder::extractTitleAndSimilarRedirects.
+ *
+ * 2. The redirect suggestions
+ * Because the same canonical title can be returned twice we support
fetch_limit_factor
+ * in suggest profiles to fetch more than what the use asked. Because the list
of redirects
+ * can be very large we cannot store all of them in the index (see
limitations). We run a second
+ * pass query on the main cirrus index to fetch them, then we try to detect
which one is the closest
+ * to the user query (see Util::chooseBestRedirect).
+ *
+ * LIMITATIONS:
+ * A number of hacks are required in Cirrus to workaround some limitations in
+ * the elasticsearch completion suggester implementation:
+ * - It is a _suggest API, unlike classic "query then fetch" there is no fetch
+ * phase here.
+ * - Payloads are stored in memory within the FST: we try to avoid them, but
+ * this forces us to implement a second pass query to fetch redirect titles
+ * from the cirrus main index.
+ * - Fuzzy suggestions are ranked by index-time score: we allow to set
+ * 'discount' param in the suggest profile (profiles/SuggestProfiles.php).
The
+ * default profile includes a fuzzy and non-fuzzy suggestion query. This is
to
+ * avoid having fuzzy suggestions ranked higher than exact suggestion.
+ * - The suggestion string cannot be expanded to more than 255 strings at
+ * index time: we limit the number of generated tokens in the analysis config
+ * (see includes/Maintenance/SuggesterAnalysisConfigBuilder.php) but we can't
+ * workaround this problem for geosuggestion (suggestions will be prepended
by
+ * geohash prefixes, one per precision step)
+ *
+ * @todo: investigate new features in elasticsearch completion suggester v2 to
remove
+ * some workarounds (https://github.com/elastic/elasticsearch/issues/10746).
+ */
+class CompletionSuggester extends ElasticsearchIntermediary {
+ /**
+ * @var string term to search.
+ */
+ private $term;
+
+ /**
+ * @var integer maximum number of result
+ */
+ private $limit;
+
+ /**
+ * @var string index base name to use
+ */
+ private $indexBaseName;
+
+ /**
+ * Search environment configuration
+ * @var SearchConfig
+ * Specified as public because of closures. When we move to non-anicent
PHP version, can be made protected.
+ */
+ public $config;
+
+ /**
+ * @var SearchContext
+ */
+ private $searchContext;
+
+ /**
+ * Constructor
+ * @param int $limit Limit the results to this many
+ * @param SearchConfig Configuration settings
+ * @param int[]|null $namespaces Array of namespace numbers to search
or null to search all namespaces.
+ * @param User|null $user user for which this search is being
performed. Attached to slow request logs.
+ * @param string|boolean $index Base name for index to search from,
defaults to wfWikiId()
+ */
+ public function __construct( Connection $conn, $limit, SearchConfig
$config = null, array $namespaces = null,
+ User $user = null, $index = false ) {
+
+ if ( is_null( $config ) ) {
+ // @todo connection has an embeded config ... reuse
that? somehow should
+ // at least ensure they are the same.
+ $config =
ConfigFactory::getDefaultInstance()->makeConfig( 'CirrusSearch' );
+ }
+
+ parent::__construct( $conn, $user, $config->get(
'CirrusSearchSlowSearch' ) );
+ $this->config = $config;
+ $this->limit = $limit;
+ $this->indexBaseName = $index ?: $config->getWikiId();
+ $this->searchContext = new SearchContext( $this->config,
$namespaces );
+ }
+
+ /**
+ * Produce a set of completion suggestions for text using _suggest
+ * See
https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-suggesters-completion.html
+ *
+ * WARNING: experimental API
+ *
+ * @param string $text Search term
+ * @param array $context
+ * @return Status
+ */
+ public function suggest( $text, $context = null ) {
+ // Do not remove spaces at the end, the user might tell us he
finished writing a word
+ $this->term = ltrim( $text );
+
+ if ( mb_strlen( $this->term ) >
SuggestBuilder::MAX_INPUT_LENGTH ) {
+ // Trim the query otherwise we won't find results
+ $this->term = mb_substr( $this->term, 0,
SuggestBuilder::MAX_INPUT_LENGTH );
+ }
+
+ $suggest = array( 'text' => $text );
+ $queryLen = mb_strlen( trim( $text ) ); // Avoid cheating with
spaces
+ $queryType = "comp_suggest";
+
+ $profiles = $this->config->get(
'CirrusSearchCompletionSettings' );
+ if ( $context != null && isset( $context['geo']['lat'] ) &&
isset( $context['geo']['lon'] )
+ && is_numeric( $context['geo']['lat'] ) && is_numeric(
$context['geo']['lon'] )
+ ) {
+ $profiles = $this->prepareGeoContextSuggestProfiles(
$context );
+ $queryType = "comp_suggest_geo";
+ }
+
+ foreach ( $profiles as $name => $config ) {
+ if ( $config['min_query_len'] > $queryLen ) {
+ continue;
+ }
+ if ( isset( $config['max_query_len'] ) && $queryLen >
$config['max_query_len'] ) {
+ continue;
+ }
+ $field = $config['field'];
+ $suggest[$name] = array(
+ 'completion' => array(
+ 'field' => $field,
+ 'size' => $this->limit *
$config['fetch_limit_factor']
+ )
+ );
+ if ( isset( $config['fuzzy'] ) ) {
+ $suggest[$name]['completion']['fuzzy'] =
$config['fuzzy'];
+ }
+ if ( isset( $config['context'] ) ) {
+ $suggest[$name]['completion']['context'] =
$config['context'];
+ }
+ }
+
+ $queryOptions = array();
+ $queryOptions[ 'timeout' ] = $this->config->getElement(
'CirrusSearchSearchShardTimeout', 'default' );
+ $this->connection->setTimeout( $queryOptions[ 'timeout' ] );
+
+ $index = $this->connection->getIndex( $this->indexBaseName,
Connection::TITLE_SUGGEST_TYPE );
+ $logContext = array(
+ 'query' => $text,
+ 'queryType' => $queryType,
+ );
+ $searcher = $this;
+ $limit = $this->limit;
+ $result = Util::doPoolCounterWork(
+ 'CirrusSearch-Search',
+ $this->user,
+ function() use( $searcher, $index, $suggest,
$logContext, $queryOptions,
+ $profiles, $text , $limit ) {
+ $description = "{queryType} search for
'{query}'";
+ $searcher->start( $description, $logContext );
+ try {
+ $result = $index->request( "_suggest",
Request::POST, $suggest, $queryOptions );
+ if( $result->isOk() ) {
+ $result =
$searcher->postProcessSuggest( $text, $result,
+ $profiles, $limit );
+ return $searcher->success(
$result );
+ }
+ return $result;
+ } catch (
\Elastica\Exception\ExceptionInterface $e ) {
+ return $searcher->failure( $e );
+ }
+ }
+ );
+ return $result;
+ }
+
+ /**
+ * prepare the list of suggest requests used for geo context suggestions
+ * This method will merge $this->config->get(
'CirrusSearchCompletionSettings and
+ * $this->config->get( 'CirrusSearchCompletionGeoContextSettings
+ * @param array $context user's geo context
+ * @return array of suggest request profiles
+ */
+ private function prepareGeoContextSuggestProfiles( $context ) {
+ $profiles = array();
+ foreach ( $this->config->get(
'CirrusSearchCompletionGeoContextSettings' ) as $geoname => $geoprof ) {
+ foreach ( $this->config->get(
'CirrusSearchCompletionSettings' ) as $sugname => $sugprof ) {
+ if ( !in_array( $sugname, $geoprof['with'] ) ) {
+ continue;
+ }
+ $profile = $sugprof;
+ $profile['field'] .= $geoprof['field_suffix'];
+ $profile['discount'] *= $geoprof['discount'];
+ $profile['context'] = array(
+ 'location' => array(
+ 'lat' => $context['geo']['lat'],
+ 'lon' => $context['geo']['lon'],
+ 'precision' =>
$geoprof['precision']
+ )
+ );
+ $profiles["$sugname-$geoname"] = $profile;
+ }
+ }
+ return $profiles;
+ }
+
+ /**
+ * merge top level multi-queries and resolve returned pageIds into
Title objects.
+ *
+ * WARNING: experimental API
+ *
+ * @param string $query the user query
+ * @param \Elastica\Response $response Response from elasticsearch
_suggest api
+ * @param array $profiles the suggestion profiles
+ * @param int $limit Maximum suggestions to return, -1 for unlimited
+ * @return SearchSuggestionSet a set of Suggestions
+ */
+ protected function postProcessSuggest( $query, \Elastica\Response
$response, $profiles, $limit = -1 ) {
+ $this->logContext['elasticTookMs'] = intval(
$response->getQueryTime() * 1000 );
+ $data = $response->getData();
+ unset( $data['_shards'] );
+
+ $suggestions = array();
+ foreach ( $data as $name => $results ) {
+ $discount = $profiles[$name]['discount'];
+ foreach ( $results as $suggested ) {
+ foreach ( $suggested['options'] as $suggest ) {
+ $output = SuggestBuilder::decodeOutput(
$suggest['text'] );
+ if ( $output === null ) {
+ // Ignore broken output
+ continue;
+ }
+ $pageId = $output['id'];
+ $type = $output['type'];
+
+ $score = $discount * $suggest['score'];
+ if ( !isset( $suggestions[$pageId] ) ||
+ $score >
$suggestions[$pageId]->getScore()
+ ) {
+ $suggestion = new
SearchSuggestion( null, null, $score, null, $pageId );
+ // If it's a title suggestion
we have the text
+ if ( $type ===
SuggestBuilder::TITLE_SUGGESTION ) {
+ $suggestion->setText(
$output['text'] );
+ }
+ $suggestions[$pageId] =
$suggestion;
+ }
+ }
+ }
+ }
+
+ // simply sort by existing scores
+ uasort( $suggestions, function ( $a, $b ) {
+ return $b->getScore() - $a->getScore();
+ } );
+
+ $this->logContext['hitsTotal'] = count( $suggestions );
+
+ if ( $limit > 0 ) {
+ $suggestions = array_slice( $suggestions, 0, $limit,
true );
+ }
+
+ $this->logContext['hitsReturned'] = count( $suggestions );
+ $this->logContext['hitsOffset'] = 0;
+
+ // we must fetch redirect data for redirect suggestions
+ $missingText = array();
+ foreach ( $suggestions as $id => $suggestion ) {
+ if ( $suggestion->getText() === null ) {
+ $missingText[] = $id;
+ }
+ }
+
+ if ( !empty ( $missingText ) ) {
+ // Experimental.
+ //
+ // Second pass query to fetch redirects.
+ // It's not clear if it's the best option, this will
slowdown the whole query
+ // when we hit a redirect suggestion.
+ // Other option would be to encode redirects as a
payload resulting in a
+ // very big index...
+
+ // XXX: we support only the content index
+ $type = $this->connection->getPageType(
$this->indexBaseName, Connection::CONTENT_INDEX_TYPE );
+ // NOTE: we are already in a poolCounterWork
+ // Multi get is not supported by elastica
+ $redirResponse = null;
+ try {
+ $redirResponse = $type->request( '_mget', 'GET',
+ array( 'ids' => $missingText ),
+ array( '_source_include' => 'redirect'
) );
+ if ( $redirResponse->isOk() ) {
+ $this->logContext['elasticTook2PassMs']
= intval( $redirResponse->getQueryTime() * 1000 );
+ $docs = $redirResponse->getData();
+ $docs = $docs['docs'];
+ foreach ( $docs as $doc ) {
+ $id = $doc['_id'];
+ if ( !isset(
$doc['_source']['redirect'] )
+ || empty(
$doc['_source']['redirect'] )
+ ) {
+ continue;
+ }
+ $text =
Util::chooseBestRedirect( $query, $doc['_source']['redirect'] );
+ $suggestions[$id]->setText(
$text );
+ }
+ } else {
+ LoggerFactory::getInstance(
'CirrusSearch' )->warning(
+ 'Unable to fetch redirects for
suggestion {query} with results {ids} : {error}',
+ array( 'query' => $query,
+ 'ids' => serialize(
$missingText ),
+ 'error' =>
$redirResponse->getError() ) );
+ }
+ } catch ( \Elastica\Exception\ExceptionInterface $e ) {
+ LoggerFactory::getInstance( 'CirrusSearch'
)->warning(
+ 'Unable to fetch redirects for
suggestion {query} with results {ids} : {error}',
+ array( 'query' => $query,
+ 'ids' => serialize(
$missingText ),
+ 'error' =>
$this->extractMessage( $e ) ) );
+ }
+ }
+
+ $retval = array();
+ foreach ( $suggestions as $suggestion ) {
+ if ( $suggestion->getText() === null ) {
+ // We were unable to find a text to display
+ // Maybe a page with redirects when we built
the suggester index
+ // but now without redirects?
+ continue;
+ }
+ // Populate the SearchSuggestion object
+ $suggestion->setSuggestedTitle( Title::makeTitle( 0,
$suggestion->getText() ), true );
+ $retval[] = $suggestion;
+ }
+
+ return new SearchSuggestionSet( $retval );
+ }
+
+ /**
+ * Set the max number of results to extract.
+ * @param int $limit
+ */
+ public function setLimit( $limit ) {
+ $this->limit = $limit;
+ }
+}
diff --git a/includes/Hooks.php b/includes/Hooks.php
index cabf483..e42128f 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -655,7 +655,9 @@
* @return bool
*/
public static function onResourceLoaderGetConfigVars( &$vars ) {
- global $wgCirrusSearchEnableSearchLogging,
$wgCirrusSearchFeedbackLink;
+ global $wgCirrusSearchEnableSearchLogging,
+ $wgCirrusSearchFeedbackLink,
+ $wgCirrusSearchUseCompletionSuggester;
$vars += array(
'wgCirrusSearchEnableSearchLogging' =>
$wgCirrusSearchEnableSearchLogging,
@@ -685,4 +687,45 @@
$query["wprov"] = $wgCirrusSearchInterwikiProv;
}
}
+
+ /**
+ * Activate Completion Suggester as a Beta Feature if available
+ * @param User $user
+ * @param array beta feature prefs
+ * @return boolean
+ */
+ public static function getBetaFeaturePreferences( User $user, &$pref ) {
+ global $wgCirrusSearchUseCompletionSuggester,
+ $wgExtensionAssetsPath;
+ if ( !$wgCirrusSearchUseCompletionSuggester ) {
+ return true;
+ }
+ $pref['cirrussearch-completionsuggester'] = array(
+ 'label-message' =>
'cirrussearch-completionsuggester-pref',
+ 'desc-message' =>
'cirrussearch-completionsuggester-desc',
+ 'info-link' =>
'//mediawiki.org/wiki/Special:MyLanguage/Extension:CirrusSearch/CompletionSuggester',
+ 'discussion-link' =>
'//mediawiki.org/wiki/Special:MyLanguage/Extension_talk:CirrusSearch/CompletionSuggester',
+ 'screenshot' => array(
+ 'ltr' =>
"$wgExtensionAssetsPath/CirrusSearch/resources/images/cirrus-beta-ltr.svg",
+ 'rtl' =>
"$wgExtensionAssetsPath/CirrusSearch/resources/images/cirrus-beta-ltr.svg",
+ )
+ );
+ return true;
+ }
+
+ /**
+ * @param \OutputPage $out
+ * @param \Skin $skin
+ * @return boolean
+ */
+ public static function onBeforePageDisplay( \OutputPage &$out, \Skin
&$skin ) {
+ global $wgCirrusSearchUseCompletionSuggester;
+ if ( $wgCirrusSearchUseCompletionSuggester &&
+ class_exists( '\BetaFeatures' ) &&
+ \BetaFeatures::isFeatureEnabled( $GLOBALS['wgUser'],
'cirrussearch-completionsuggester' ) ) {
+ // We use the js extension only for testing the
suggest-api
+ $out->addModules( array( 'ext.cirrus' ) );
+ }
+ return true;
+ }
}
diff --git a/includes/Search/SearchSuggestion.php
b/includes/Search/SearchSuggestion.php
new file mode 100644
index 0000000..ec46971
--- /dev/null
+++ b/includes/Search/SearchSuggestion.php
@@ -0,0 +1,161 @@
+<?php
+
+namespace CirrusSearch\Search;
+
+use Title;
+
+/**
+ * Search suggestion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+/**
+ * A search suggestion
+ *
+ */
+class SearchSuggestion {
+ /**
+ * @var string the suggestion
+ */
+ private $text;
+
+ /**
+ * @var string the suggestion URL
+ */
+ private $url;
+
+ /**
+ * @var Title|null the suggested title
+ */
+ private $suggestedTitle;
+
+ /**
+ * NOTE: even if suggestedTitle is a redirect suggestedTitleID
+ * is the ID of the target page.
+ * @var int|null the suggested title ID
+ */
+ private $suggestedTitleID;
+
+ /**
+ * @var float|null The suggestion score
+ */
+ private $score;
+
+ /**
+ * Construct a new suggestion
+ * @param string $text|null the suggestion text
+ * @param string $url|null the suggestion URL
+ * @param float|0 the suggestion score
+ * @param Title|null $suggestedTitle the suggested title
+ * @param int|null the suggested title ID
+ */
+ public function __construct( $text = null, $url = null, $score = 0,
Title $suggestedTitle = null, $suggestedTitleID = null ) {
+ $this->text = $text;
+ $this->url = $url;
+ $this->score = $score;
+ $this->suggestedTitle = $suggestedTitle;
+ $this->suggestedTitleID = $suggestedTitleID;
+ }
+
+ /**
+ * The suggestion text
+ * @return string
+ */
+ public function getText() {
+ return $this->text;
+ }
+
+ /**
+ * Set the suggestion text
+ * @param string $text
+ */
+ public function setText( $text ) {
+ $this->text = $text;
+ }
+
+ /**
+ * Title object in the case this suggestion is based on a title.
+ * May return null if the suggestion is not a Title.
+ * @return Title|null
+ */
+ public function getSuggestedTitle() {
+ return $this->suggestedTitle;
+ }
+
+ /**
+ * Set the suggested title
+ * @param Title|null $title
+ * @param boolean|false $generateURL set to true to generate the URL
based on this Title
+ */
+ public function setSuggestedTitle( Title $title = null, $generateURL =
false ) {
+ $this->suggestedTitle = $title;
+ if ( $title !== null && $generateURL ) {
+ $this->url = wfExpandUrl( $title->getFullURL(),
PROTO_CURRENT );
+ }
+ }
+
+ /**
+ * Title ID in the case this suggestion is based on a title.
+ * May return null if the suggestion is not a Title.
+ * @return int|null
+ */
+ public function getSuggestedTitleID() {
+ return $this->suggestedTitleID;
+ }
+
+ /**
+ * Set the suggested title ID
+ * @param int|null $suggestedTitleID
+ */
+ public function setSuggestedTitleID( $suggestedTitleID = null ) {
+ $this->suggestedTitleID = $suggestedTitleID;
+ }
+
+ /**
+ * Suggestion score
+ * @return float Suggestion score
+ */
+ public function getScore() {
+ return $this->score;
+ }
+
+ /**
+ * Set the suggestion score
+ * @param float $score
+ */
+ public function setScore( $score ) {
+ $this->score = $score;
+ }
+
+ /**
+ * Suggestion URL, can be the link to the Title or maybe in the
+ * future a link to the search results for this search suggestion.
+ * @return string Suggestion URL
+ */
+ public function getURL() {
+ return $this->url;
+ }
+
+ /**
+ * Set the suggestion URL
+ * @param string $url
+ */
+ public function setURL( $url ) {
+ $this->url = $url;
+ }
+}
diff --git a/includes/Search/SearchSuggestionSet.php
b/includes/Search/SearchSuggestionSet.php
new file mode 100644
index 0000000..b8376a1
--- /dev/null
+++ b/includes/Search/SearchSuggestionSet.php
@@ -0,0 +1,156 @@
+<?php
+
+namespace CirrusSearch\Search;
+
+/**
+ * Search suggestion sets
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+/**
+ * A set of SearchSuggestions
+ */
+class SearchSuggestionSet {
+ /**
+ * @var SearchSuggestion[]
+ */
+ private $suggestions;
+
+ /**
+ * Builds a new set of suggestions.
+ *
+ * NOTE: the array should be sorted by score (higher is better),
+ * SearchSuggestionSet will not try to re-order this input array.
+ * Providing an unsorted input array is a mistake and will lead to
+ * unexpected behaviors.
+ *
+ * @param SearchSuggestion[] $suggestions (must be sorted by score)
+ */
+ public function __construct( array $suggestions ) {
+ $this->suggestions = array_values( $suggestions );
+ }
+
+ public function getSuggestions() {
+ return $this->suggestions;
+ }
+
+ /**
+ * Call array_map on the suggestions array
+ * @param callback $callback
+ * @return array
+ */
+ public function map( $callback ) {
+ return array_map( $callback, $this->suggestions );
+ }
+
+ /**
+ * Add a new suggestion at the end.
+ * If the score of the new suggestion is greater than the worst one,
+ * the new suggestion score will be updated (worst - 1).
+ *
+ * @param SearchSuggestion $suggestion
+ */
+ public function addSuggestion( SearchSuggestion $suggestion ) {
+ if ( $this->getSize() > 0 && $suggestion->getScore() >=
$this->getWorstScore() ) {
+ $suggestion->setScore( $this->getWorstScore() - 1);
+ }
+ $this->suggestions[] = $suggestion;
+ }
+
+ /**
+ * Move the suggestion at index $key to the first position
+ */
+ public function rescore( $key ) {
+ $removed = array_splice( $this->suggestions, $key, 1 );
+ $this->insertBestSuggestion( $removed[0] );
+ }
+
+ /**
+ * Add a new suggestion at the top. If the new suggestion score
+ * is lower than the best one its score will be updated (best + 1)
+ * @param SearchSuggestion $suggestion
+ */
+ public function insertBestSuggestion( SearchSuggestion $suggestion ) {
+ if( $this->getSize() > 0 && $suggestion->getScore() <=
$this->getBestScore() ) {
+ $suggestion->setScore( $this->getBestScore() + 1 );
+ }
+ array_unshift( $this->suggestions, $suggestion );
+ }
+
+ /**
+ * @return float the best score in this suggestion set
+ */
+ public function getBestScore() {
+ if ( empty( $this->suggestions ) ) {
+ return 0;
+ }
+ return reset( $this->suggestions )->getScore();
+ }
+
+ /**
+ * @return float the worst score in this set
+ */
+ public function getWorstScore() {
+ if ( empty( $this->suggestions ) ) {
+ return 0;
+ }
+ return end( $this->suggestions )->getScore();
+ }
+
+ /**
+ * @return int the number of suggestion in this set
+ */
+ public function getSize() {
+ return count( $this->suggestions );
+ }
+
+ /**
+ * Remove any extra elements in the suggestions set
+ * @param int $limit the max size of this set.
+ */
+ public function shrink( $limit ) {
+ if ( count( $this->suggestions ) > $limit ) {
+ $this->suggestions = array_slice( $this->suggestions,
0, $limit );
+ }
+ }
+
+ /**
+ * Builds a new set of suggestion based on a title array.
+ * Useful when using a backend that supports only Titles.
+ *
+ * NOTE: Suggestion scores will be generated.
+ *
+ * @param Title[] $titles
+ * @return SearchSuggestionSet
+ */
+ public static function fromTitles( array $titles ) {
+ $suggestions = array();
+ $score = count( $titles );
+ foreach( $titles as $title ) {
+ $suggestions[] = new SearchSuggestion(
$title->getPrefixedText(), wfExpandUrl( $title->getFullURL(), PROTO_CURRENT ),
$score--, $title );
+ }
+ return new SearchSuggestionSet( $suggestions );
+ }
+
+ /**
+ * @return SearchSuggestionSet an empty suggestion set
+ */
+ public static function emptySuggestionSet() {
+ return new SearchSuggestionSet( array() );
+ }
+}
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 455c4ae..945d577 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -749,251 +749,6 @@
}
/**
- * Produce a set of completion suggestions for text using _suggest
- * See
https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-suggesters-completion.html
- *
- * WARNING: experimental API
- *
- * @param string $text Search term
- * @return Status
- */
- public function suggest( $text, $context = null ) {
- $this->term = $text;
-
- $suggest = array( 'text' => $text );
- $queryLen = mb_strlen( trim( $text ) ); // Avoid cheating with
spaces
- $profile = $this->config->get( 'CirrusSearchCompletionSettings'
);
-
- if ( $context != null && isset( $context['geo']['lat'] ) &&
isset( $context['geo']['lon'] )
- && is_numeric( $context['geo']['lat'] ) && is_numeric(
$context['geo']['lon'] )
- ) {
- $profile = $this->prepareGeoContextSuggestProfile(
$context );
- $description = "geo suggest query for {query}";
- }
-
- foreach ( $profile as $name => $config ) {
- if ( $config['min_query_len'] > $queryLen ) {
- continue;
- }
- if ( isset( $config['max_query_len'] ) && $queryLen >
$config['max_query_len'] ) {
- continue;
- }
- $field = $config['field'];
- $suggest[$name] = array(
- 'completion' => array(
- 'field' => $field,
- 'size' => $this->limit *
$config['fetch_limit_factor']
- )
- );
- if ( isset( $config['fuzzy'] ) ) {
- $suggest[$name]['completion']['fuzzy'] =
$config['fuzzy'];
- }
- if ( isset( $config['context'] ) ) {
- $suggest[$name]['completion']['context'] =
$config['context'];
- }
- }
-
- $queryOptions = array();
- $queryOptions[ 'timeout' ] = $this->config->getElement(
'CirrusSearchSearchShardTimeout', 'default' );
- $this->connection->setTimeout( $queryOptions[ 'timeout' ] );
-
- $index = $this->connection->getIndex( $this->indexBaseName,
Connection::TITLE_SUGGEST_TYPE );
- $logContext = array(
- 'query' => $text,
- 'queryType' => 'comp_suggest'
- );
- $searcher = $this;
- $limit = $this->limit;
- $result = Util::doPoolCounterWork(
- 'CirrusSearch-Search',
- $this->user,
- function() use( $searcher, $index, $suggest,
$logContext, $queryOptions,
- $profile, $text , $limit ) {
- $description = "{queryType} search for
'{query}'";
- $searcher->start( $description, $logContext );
- try {
- $result = $index->request( "_suggest",
Request::POST, $suggest, $queryOptions );
- if( $result->isOk() ) {
- $result =
$searcher->postProcessSuggest( $text, $result,
- $profile, $limit );
- return $searcher->success(
$result );
- }
- return $result;
- } catch (
\Elastica\Exception\ExceptionInterface $e ) {
- return $searcher->failure( $e );
- }
- }
- );
- return $result;
- }
-
- /**
- * prepare the list of suggest requests used for geo context suggestions
- * This method will merge $this->config->get(
'CirrusSearchCompletionSettings and
- * $this->config->get( 'CirrusSearchCompletionGeoContextSettings
- * @param array $context user's geo context
- * @return array of suggest request profiles
- */
- private function prepareGeoContextSuggestProfile( $context ) {
- $profiles = array();
- foreach ( $this->config->get(
'CirrusSearchCompletionGeoContextSettings' ) as $geoname => $geoprof ) {
- foreach ( $this->config->get(
'CirrusSearchCompletionSettings' ) as $sugname => $sugprof ) {
- if ( !in_array( $sugname, $geoprof['with'] ) ) {
- continue;
- }
- $profile = $sugprof;
- $profile['field'] .= $geoprof['field_suffix'];
- $profile['discount'] *= $geoprof['discount'];
- $profile['context'] = array(
- 'location' => array(
- 'lat' => $context['geo']['lat'],
- 'lon' => $context['geo']['lon'],
- 'precision' =>
$geoprof['precision']
- )
- );
- $profiles["$sugname-$geoname"] = $profile;
- }
- }
- return $profiles;
- }
-
- /**
- * merge top level multi-queries and resolve returned pageIds into
Title objects.
- *
- * WARNING: experimental API
- *
- * @param string $query the user query
- * @param \Elastica\Response $response Response from elasticsearch
_suggest api
- * @param array $profile the suggestion profile
- * @param int $limit Maximum suggestions to return, -1 for unlimited
- * @return Title[] List of suggested titles
- */
- protected function postProcessSuggest( $query, \Elastica\Response
$response, $profile, $limit = -1 ) {
- $this->logContext['elasticTookMs'] = intval(
$response->getQueryTime() * 1000 );
- $data = $response->getData();
- unset( $data['_shards'] );
-
- $suggestions = array();
- foreach ( $data as $name => $results ) {
- $discount = $profile[$name]['discount'];
- foreach ( $results as $suggested ) {
- foreach ( $suggested['options'] as $suggest ) {
- $output = explode( ':',
$suggest['text'], 3 );
- if ( sizeof ( $output ) < 2 ) {
- // Ignore broken output
- continue;
- }
- $pageId = $output[0];
- $type = $output[1];
-
- $score = $discount * $suggest['score'];
- if ( !isset( $suggestions[$pageId] ) ||
- $score >
$suggestions[$pageId]['score']
- ) {
- $suggestion = array(
- 'score' => $score,
- 'pageId' => $pageId
- );
- // If it's a title suggestion
we have the text
- if ( $type === 't' && sizeof(
$output ) == 3 ) {
-
$suggestion['text'] = $output[2];
- }
- $suggestions[$pageId] =
$suggestion;
- }
- }
- }
- }
-
- // simply sort by existing scores
- uasort( $suggestions, function ( $a, $b ) {
- return $b['score'] - $a['score'];
- } );
-
- $this->logContext['hitsTotal'] = count( $suggestions );
-
- if ( $limit > 0 ) {
- $suggestions = array_slice( $suggestions, 0, $limit,
true );
- }
-
- $this->logContext['hitsReturned'] = count( $suggestions );
- $this->logContext['hitsOffset'] = 0;
-
- // we must fetch redirect data for redirect suggestions
- $missingText = array();
- foreach ( $suggestions as $id => $suggestion ) {
- if ( !isset( $suggestion['text'] ) ) {
- $missingText[] = $id;
- }
- }
-
- if ( !empty ( $missingText ) ) {
- // Experimental.
- //
- // Second pass query to fetch redirects.
- // It's not clear if it's the best option, this will
slowdown the whole query
- // when we hit a redirect suggestion.
- // Other option would be to encode redirects as a
payload resulting in a
- // very big index...
-
- // XXX: we support only the content index
- $type = $this->connection->getPageType(
$this->indexBaseName, Connection::CONTENT_INDEX_TYPE );
- // NOTE: we are already in a poolCounterWork
- // Multi get is not supported by elastica
- $redirResponse = null;
- try {
- $redirResponse = $type->request( '_mget', 'GET',
- array( 'ids' => $missingText ),
- array( '_source_include' => 'redirect'
) );
- if ( $redirResponse->isOk() ) {
- $this->logContext['elasticTook2PassMs']
= intval( $redirResponse->getQueryTime() * 1000 );
- $docs = $redirResponse->getData();
- $docs = $docs['docs'];
- foreach ( $docs as $doc ) {
- $id = $doc['_id'];
- if ( !isset(
$doc['_source']['redirect'] )
- || empty(
$doc['_source']['redirect'] )
- ) {
- continue;
- }
- $text =
Util::chooseBestRedirect( $query, $doc['_source']['redirect'] );
- $suggestions[$id]['text'] =
$text;
- }
- } else {
- LoggerFactory::getInstance(
'CirrusSearch' )->warning(
- 'Unable to fetch redirects for
suggestion {query} with results {ids} : {error}',
- array( 'query' => $query,
- 'ids' => serialize(
$missingText ),
- 'error' =>
$redirResponse->getError() ) );
- }
- } catch ( \Elastica\Exception\ExceptionInterface $e ) {
- LoggerFactory::getInstance( 'CirrusSearch'
)->warning(
- 'Unable to fetch redirects for
suggestion {query} with results {ids} : {error}',
- array( 'query' => $query,
- 'ids' => serialize(
$missingText ),
- 'error' =>
$this->extractMessage( $e ) ) );
- }
- }
-
- $retval = array();
- foreach ( $suggestions as $suggestion ) {
- if ( !isset( $suggestion['text'] ) ) {
- // We were unable to find a text to display
- // Maybe a page with redirects when we built
the suggester index
- // but now without redirects?
- continue;
- }
- $retval[] = array(
- // XXX: we run the suggester for namespace 0
for now
- 'title' => Title::makeTitle( 0,
$suggestion['text'] ),
- 'pageId' => $suggestion['pageId'],
- 'score' => $suggestion['score'],
- );
- }
-
- return $retval;
- }
-
- /**
* Builds a match query against $field for $title. $title is munged to
make title matching better more
* intuitive for users.
* @param string $field field containing the title
diff --git a/profiles/SuggestProfiles.php b/profiles/SuggestProfiles.php
index 6e32bd8..2ee0f32 100644
--- a/profiles/SuggestProfiles.php
+++ b/profiles/SuggestProfiles.php
@@ -28,17 +28,20 @@
$wgCirrusSearchCompletionProfiles = array(
// Default profile
'default' => array(
+ // Defines the list of suggest queries to run in the same
request.
// key is the name of the suggestion request
'plain' => array(
// Field to request
'field' => 'suggest',
- // Fire the request only if the user query has
min_query_len chars
+ // Fire the request only if the user query has
min_query_len chars.
+ // See max_query_len to limit on max.
'min_query_len' => 0,
// Discount result scores for this request
// Useful to discount fuzzy request results
'discount' => 1.0,
- // Fetch more result than the limit
- // It's possible to have the same page multiple times.
+ // Fetch more results than the limit
+ // It's possible to have the same page multiple times
+ // (title and redirect suggestion).
// Requesting more than the limit helps to display the
correct number
// of suggestions
'fetch_limit_factor' => 2,
diff --git a/resources/ext.cirrus.suggest.js b/resources/ext.cirrus.suggest.js
new file mode 100644
index 0000000..b2c15da
--- /dev/null
+++ b/resources/ext.cirrus.suggest.js
@@ -0,0 +1,16 @@
+( function ( $, mw ) {
+ $( function() {
+ // Override default opensearch
+ mw.searchSuggest.request = function ( api, query, response,
maxRows ) {
+ return api.get( {
+ action: 'cirrus-suggest',
+ text: query,
+ limit: maxRows
+ } ).done( function ( data ) {
+ response( $.map( data.suggest, function (
suggestion ) {
+ return suggestion.text;
+ } ) );
+ } );
+ };
+ } );
+}( jQuery, mediaWiki ) );
diff --git a/resources/images/cirrus-beta-ltr.svg
b/resources/images/cirrus-beta-ltr.svg
new file mode 100644
index 0000000..ad7ca29
--- /dev/null
+++ b/resources/images/cirrus-beta-ltr.svg
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg width="263px" height="161px" viewBox="0 0 263 161" version="1.1"
xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:sketch="http://www.bohemiancoding.com/sketch/ns">
+ <title>CirrusBeta</title>
+ <description>Created with Sketch
(http://www.bohemiancoding.com/sketch)</description>
+ <defs></defs>
+ <g id="Page-1" stroke="none" stroke-width="1" fill="none"
fill-rule="evenodd" sketch:type="MSPage">
+ <path d="M-0.04296875,0 L262.813802,0 L262.813802,152.139292
L250.700521,161.951823 L236.796875,151.46875 L222.88151,161.186198
L210.27474,151.308594 L196.753906,161.532552 L183.371094,151.445312
L170.528646,161.199219 L155.871094,151.641927 L142.93099,161.692708
L130.571615,151.46875 L117.138021,161.445312 L104.061198,151.394531
L91.1953125,161.445312 L76.6132812,151.97526 L63.1041667,161.61849
L50.0898438,151.445312 L38.4088542,161.16276 L24.2591146,151.445312
L13.046875,161.10026 L-0.375,151.445312 L-0.04296875,0 Z" id="Path-17"
stroke="#E5E5E5" fill="#FFFFFF" sketch:type="MSShapeGroup"></path>
+ <rect id="Rectangle-16" fill="#0E73FC" sketch:type="MSShapeGroup"
x="55" y="26" width="16" height="16" rx="1"></rect>
+ <rect id="Rectangle-8" fill="#D8D8D8" sketch:type="MSShapeGroup" x="8"
y="5" width="32" height="6"></rect>
+ <rect id="Rectangle-8-copy" fill="#0E73FC" sketch:type="MSShapeGroup"
x="75" y="26" width="53" height="7" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#0E73FC" sketch:type="MSShapeGroup"
x="75" y="41" width="73" height="1" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#0E73FC" sketch:type="MSShapeGroup"
x="75" y="38" width="94" height="1" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#0E73FC" sketch:type="MSShapeGroup"
x="75" y="35" width="94" height="1" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#0E73FC" sketch:type="MSShapeGroup"
x="55" y="48" width="63" height="3" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#0E73FC" sketch:type="MSShapeGroup"
x="55" y="53" width="114" height="1" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#0E73FC" sketch:type="MSShapeGroup"
x="55" y="61" width="25" height="3" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#0E73FC" sketch:type="MSShapeGroup"
x="55" y="66" width="114" height="1" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#4A4A4A" sketch:type="MSShapeGroup"
x="55" y="74" width="36" height="3" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#4A4A4A" sketch:type="MSShapeGroup"
x="55" y="79" width="114" height="1" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#D0D0D0" sketch:type="MSShapeGroup"
x="55" y="88" width="42" height="3" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#D0D0D0" sketch:type="MSShapeGroup"
x="55" y="93" width="114" height="1" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#D0D0D0" sketch:type="MSShapeGroup"
x="55" y="102" width="20" height="3" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill="#D0D0D0" sketch:type="MSShapeGroup"
x="55" y="107" width="114" height="1" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill-opacity="0.5" fill="#D4D4D4"
sketch:type="MSShapeGroup" x="55" y="117" width="25" height="3" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill-opacity="0.5" fill="#D4D4D4"
sketch:type="MSShapeGroup" x="55" y="122" width="114" height="1" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill-opacity="0.5" fill="#D4D4D4"
sketch:type="MSShapeGroup" x="55" y="131" width="56" height="3" rx="1"></rect>
+ <rect id="Rectangle-8-copy" fill-opacity="0.5" fill="#D4D4D4"
sketch:type="MSShapeGroup" x="55" y="136" width="114" height="1" rx="1"></rect>
+ <path d="M10,36 C10,28.268 16.268,22 24,22 C31.732,22 38,28.268 38,36
C38,43.732 31.732,50 24,50 C16.268,50 10,43.732 10,36 L10,36 Z" id="Ellipse"
fill="#E5E5E5" sketch:type="MSShapeGroup"></path>
+ <path d="M37,59 L37,54 L12,54 L12,59 L37,59 L37,59 Z" id="Shape"
fill="#E5E5E5" sketch:type="MSShapeGroup"></path>
+ <path d="M37,107 L37,72 L12,72 L12,107 L37,107 L37,107 Z" id="Shape"
fill="#E5E5E5" sketch:type="MSShapeGroup"></path>
+ <path d="M250,5 L257.51,5 L257.51,11 L250,11 L250,5 L250,5 Z"
id="Shape" fill="#E5E5E5" sketch:type="MSShapeGroup"></path>
+ <path d="M235,5 L247.51,5 L247.51,11 L235,11 L235,5 L235,5 Z M235,5
L247.51,5 L247.51,11 L235,11 L235,5 L235,5 Z" id="Shape" fill="#E5E5E5"
sketch:type="MSShapeGroup"></path>
+ <path d="M1,14.5 L261,14.5" id="Shape" stroke="#E5E5E5" fill="#E5E5E5"
sketch:type="MSShapeGroup"></path>
+ <path d="M147,6 L147,10 L55,10 L55,6 L147,6 L147,6 Z M148,5 L54,5
L54,11 L148,11 L148,5 L148,5 Z" id="Shape" fill="#E5E5E5"
sketch:type="MSShapeGroup"></path>
+ <path d="M247.51,5 L247.51,11 L235,11 L235,5 L247.51,5 Z" id="Path"
fill="#D8D8D8" sketch:type="MSShapeGroup"></path>
+ <path d="M231.51,5 L231.51,11 L219,11 L219,5 L231.51,5 Z" id="Path"
fill="#D8D8D8" sketch:type="MSShapeGroup"></path>
+ <path d="M58,7 L58,9 L56,9 L56,7 L58,7 Z" id="Path-copy"
fill="#D8D8D8" sketch:type="MSShapeGroup"></path>
+ <g id="Oval-1-+-Path-13" sketch:type="MSLayerGroup"
transform="translate(59.000000, 30.000000)" stroke="#FFFFFF" stroke-width="2">
+ <path d="M2.66666667,5.33333333 C4.13942607,5.33333333
5.33333333,4.13942607 5.33333333,2.66666667 C5.33333333,1.19390726 4.13942607,0
2.66666667,0 C1.19390726,0 0,1.19390726 0,2.66666667 C0,4.13942607
1.19390726,5.33333333 2.66666667,5.33333333 Z" id="Oval-1"
sketch:type="MSShapeGroup"></path>
+ <path d="M4.55724437,4.51560974 L7.98807804,7.94644341"
id="Path-13" sketch:type="MSShapeGroup"></path>
+ </g>
+ </g>
+</svg>
\ No newline at end of file
diff --git a/resources/images/cirrus-beta-rtl.svg
b/resources/images/cirrus-beta-rtl.svg
new file mode 100644
index 0000000..02bad52
--- /dev/null
+++ b/resources/images/cirrus-beta-rtl.svg
@@ -0,0 +1,252 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ version="1.1"
+ width="263"
+ height="161"
+ viewBox="0 0 263 161"
+ id="svg2">
+ <title
+ id="title4">CirrusBeta</title>
+ <metadata
+ id="metadata70">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title>CirrusBeta</dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <description
+ id="description6">Created with Sketch
(http://www.bohemiancoding.com/sketch)</description>
+ <defs
+ id="defs8" />
+ <g
+ transform="matrix(-1,0,0,1,262.4388,0)"
+ id="Page-1"
+ style="fill:none;stroke:none">
+ <path
+ d="M -0.04296875,0 262.8138,0 l 0,152.13929 -12.11328,9.81253
-13.90365,-10.48307 -13.91536,9.71745 -12.60677,-9.87761 -13.52083,10.22396
-13.38282,-10.08724 -12.84244,9.75391 -14.65756,-9.55729 -12.9401,10.05078
-12.35938,-10.22396 -13.43359,9.97656 -13.07682,-10.05078 -12.865888,10.05078
-14.582031,-9.47005 -13.509114,9.64323 -13.014323,-10.17318 -11.68099,9.71745
-14.149739,-9.71745 -11.21224,9.65495 L -0.375,151.44531 -0.04296875,0 z"
+ id="Path-17"
+ style="fill:#ffffff;stroke:#e5e5e5" />
+ <rect
+ width="16"
+ height="16"
+ rx="1"
+ x="55"
+ y="26"
+ id="Rectangle-16"
+ style="fill:#0e73fc" />
+ <rect
+ width="32"
+ height="6"
+ x="8"
+ y="5"
+ id="Rectangle-8"
+ style="fill:#d8d8d8" />
+ <rect
+ width="53"
+ height="7"
+ rx="1"
+ x="75"
+ y="26"
+ id="Rectangle-8-copy"
+ style="fill:#0e73fc" />
+ <rect
+ width="73"
+ height="1"
+ rx="1"
+ x="75"
+ y="41"
+ id="rect15"
+ style="fill:#0e73fc" />
+ <rect
+ width="94"
+ height="1"
+ rx="1"
+ x="75"
+ y="38"
+ id="rect17"
+ style="fill:#0e73fc" />
+ <rect
+ width="94"
+ height="1"
+ rx="1"
+ x="75"
+ y="35"
+ id="rect19"
+ style="fill:#0e73fc" />
+ <rect
+ width="63"
+ height="3"
+ rx="1"
+ x="55"
+ y="48"
+ id="rect21"
+ style="fill:#0e73fc" />
+ <rect
+ width="114"
+ height="1"
+ rx="1"
+ x="55"
+ y="53"
+ id="rect23"
+ style="fill:#0e73fc" />
+ <rect
+ width="25"
+ height="3"
+ rx="1"
+ x="55"
+ y="61"
+ id="rect25"
+ style="fill:#0e73fc" />
+ <rect
+ width="114"
+ height="1"
+ rx="1"
+ x="55"
+ y="66"
+ id="rect27"
+ style="fill:#0e73fc" />
+ <rect
+ width="36"
+ height="3"
+ rx="1"
+ x="55"
+ y="74"
+ id="rect29"
+ style="fill:#4a4a4a" />
+ <rect
+ width="114"
+ height="1"
+ rx="1"
+ x="55"
+ y="79"
+ id="rect31"
+ style="fill:#4a4a4a" />
+ <rect
+ width="42"
+ height="3"
+ rx="1"
+ x="55"
+ y="88"
+ id="rect33"
+ style="fill:#d0d0d0" />
+ <rect
+ width="114"
+ height="1"
+ rx="1"
+ x="55"
+ y="93"
+ id="rect35"
+ style="fill:#d0d0d0" />
+ <rect
+ width="20"
+ height="3"
+ rx="1"
+ x="55"
+ y="102"
+ id="rect37"
+ style="fill:#d0d0d0" />
+ <rect
+ width="114"
+ height="1"
+ rx="1"
+ x="55"
+ y="107"
+ id="rect39"
+ style="fill:#d0d0d0" />
+ <rect
+ width="25"
+ height="3"
+ rx="1"
+ x="55"
+ y="117"
+ id="rect41"
+ style="fill:#d4d4d4;fill-opacity:0.5" />
+ <rect
+ width="114"
+ height="1"
+ rx="1"
+ x="55"
+ y="122"
+ id="rect43"
+ style="fill:#d4d4d4;fill-opacity:0.5" />
+ <rect
+ width="56"
+ height="3"
+ rx="1"
+ x="55"
+ y="131"
+ id="rect45"
+ style="fill:#d4d4d4;fill-opacity:0.5" />
+ <rect
+ width="114"
+ height="1"
+ rx="1"
+ x="55"
+ y="136"
+ id="rect47"
+ style="fill:#d4d4d4;fill-opacity:0.5" />
+ <path
+ d="m 10,36 c 0,-7.732 6.268,-14 14,-14 7.732,0 14,6.268 14,14 0,7.732
-6.268,14 -14,14 -7.732,0 -14,-6.268 -14,-14 l 0,0 z"
+ id="Ellipse"
+ style="fill:#e5e5e5" />
+ <path
+ d="m 37,59 0,-5 -25,0 0,5 25,0 0,0 z"
+ id="Shape"
+ style="fill:#e5e5e5" />
+ <path
+ d="m 37,107 0,-35 -25,0 0,35 25,0 0,0 z"
+ id="path51"
+ style="fill:#e5e5e5" />
+ <path
+ d="m 250,5 7.51,0 0,6 -7.51,0 0,-6 0,0 z"
+ id="path53"
+ style="fill:#e5e5e5" />
+ <path
+ d="m 235,5 12.51,0 0,6 -12.51,0 0,-6 0,0 z m 0,0 12.51,0 0,6 -12.51,0
0,-6 0,0 z"
+ id="path55"
+ style="fill:#e5e5e5" />
+ <path
+ d="m 1,14.5 260,0"
+ id="path57"
+ style="fill:#e5e5e5;stroke:#e5e5e5" />
+ <path
+ d="m 147,6 0,4 -92,0 0,-4 92,0 0,0 z m 1,-1 -94,0 0,6 94,0 0,-6 0,0 z"
+ id="path59"
+ style="fill:#e5e5e5" />
+ <path
+ d="m 247.51,5 0,6 -12.51,0 0,-6 12.51,0 z"
+ id="Path"
+ style="fill:#d8d8d8" />
+ <path
+ d="m 231.51,5 0,6 -12.51,0 0,-6 12.51,0 z"
+ id="path62"
+ style="fill:#d8d8d8" />
+ <path
+ d="m 58,7 0,2 -2,0 0,-2 2,0 z"
+ id="Path-copy"
+ style="fill:#d8d8d8" />
+ <g
+ transform="translate(59,30)"
+ id="Oval-1-+-Path-13"
+ style="stroke:#ffffff;stroke-width:2">
+ <path
+ d="m 2.6666667,5.3333333 c 1.4727594,0 2.6666666,-1.1939072
2.6666666,-2.6666666 C 5.3333333,1.1939073 4.1394261,0 2.6666667,0 1.1939073,0
0,1.1939073 0,2.6666667 0,4.1394261 1.1939073,5.3333333 2.6666667,5.3333333 z"
+ id="Oval-1" />
+ <path
+ d="M 4.5572444,4.5156097 7.988078,7.9464434"
+ id="Path-13" />
+ </g>
+ </g>
+</svg>
diff --git a/tests/browser/features/step_definitions/search_steps.rb
b/tests/browser/features/step_definitions/search_steps.rb
index c05a4cf..49b7f4b 100644
--- a/tests/browser/features/step_definitions/search_steps.rb
+++ b/tests/browser/features/step_definitions/search_steps.rb
@@ -82,12 +82,12 @@
Then(/^the API should produce list containing (.*)/) do |term|
found = false
@api_result["suggest"].each do |el|
- found = true if el["title"] == term
+ found = true if el["text"] == term
end
found.should == true
end
Then(/^the API should produce list starting with (.*)/) do |term|
- @api_result["suggest"][0]["title"].should == term
+ @api_result["suggest"][0]["text"].should == term
end
Then(/^the API should produce list of length (\d+)/) do |length|
@api_result["suggest"].length.should == length.to_i
diff --git a/tests/browser/features/suggest_api.feature
b/tests/browser/features/suggest_api.feature
index 7390a8d..4e87eb8 100644
--- a/tests/browser/features/suggest_api.feature
+++ b/tests/browser/features/suggest_api.feature
@@ -41,7 +41,7 @@
| max | Max Eisenhardt |
| magnetu | Magneto |
- Scenario Outline: Search prefers exact match over
+ Scenario Outline: Search prefers exact match over fuzzy match
When I ask suggestion API for <term>
Then the API should produce list starting with <suggested>
Examples:
@@ -50,7 +50,24 @@
| mai | Main Page |
| eis | Eisenhardt, Max |
+ Scenario Outline: Search prefers exact db match over partial prefix match
+ When I ask suggestion API at most 2 items for <term>
+ Then the API should produce list starting with <first>
+ And the API should produce list containing <other>
+ Examples:
+ | term | first | other |
+ | Ic | Iceman | Ice |
+ | Ice | Ice | Iceman |
+
Scenario: Ordering & limit
When I ask suggestion API at most 1 item for x-m
Then the API should produce list starting with X-Men
And the API should produce list of length 1
+
+ Scenario Outline: Search fallback to prefix search if namespace is provided
+ When I ask suggestion API for <term>
+ Then the API should produce list starting with <suggested>
+ Examples:
+ | term | suggested |
+ | Special: | Special:ActiveUsers |
+ | Special:Act | Special:ActiveUsers |
diff --git a/tests/browser/features/support/hooks.rb
b/tests/browser/features/support/hooks.rb
index 9ae9776..91be10b 100644
--- a/tests/browser/features/support/hooks.rb
+++ b/tests/browser/features/support/hooks.rb
@@ -639,6 +639,11 @@
And a page named Max Eisenhardt exists with contents #REDIRECT
[[Magneto]]
And a page named Eisenhardt, Max exists with contents #REDIRECT
[[Magneto]]
And a page named Magnetu exists with contents #REDIRECT [[Magneto]]
+ And a page named Ice exists with contents It's cold.
+ And a page named Iceman exists with contents Iceman (Robert "Bobby"
Drake) is a fictional superhero appearing in American comic books published by
Marvel Comics and is...
+ And a page named Ice Man (Marvel Comics) exists with contents
#REDIRECT [[Iceman]]
+ And a page named Ice-Man (comics books) exists with contents #REDIRECT
[[Iceman]]
+ And a page named Ultimate Iceman exists with contents #REDIRECT
[[Iceman]]
And I reindex suggestions
)
suggest = true
diff --git a/tests/jenkins/FullyFeaturedConfig.php
b/tests/jenkins/FullyFeaturedConfig.php
index 0580524..dd841ec 100644
--- a/tests/jenkins/FullyFeaturedConfig.php
+++ b/tests/jenkins/FullyFeaturedConfig.php
@@ -37,6 +37,8 @@
$wgCirrusSearchWikimediaExtraPlugin[ 'field_value_factor_with_default' ] =
true;
$wgCirrusSearchWikimediaExtraPlugin[ 'id_hash_mod_filter' ] = true;
+$wgCirrusSearchUseCompletionSuggester = true;
+
$wgJobQueueAggregator = array(
'class' => 'JobQueueAggregatorRedis',
'redisServer' => 'localhost',
diff --git a/tests/unit/Search/SearchSuggestionSetTest.php
b/tests/unit/Search/SearchSuggestionSetTest.php
new file mode 100644
index 0000000..54062da
--- /dev/null
+++ b/tests/unit/Search/SearchSuggestionSetTest.php
@@ -0,0 +1,100 @@
+<?php
+
+namespace CirrusSearch\Search;
+
+/**
+ * Test for filter utilities.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+class SearchSuggestionSetTest extends \PHPUnit_Framework_TestCase {
+ /**
+ * Test that adding a new suggestion at the end
+ * will keep proper score ordering
+ */
+ public function testAppend() {
+ $set = SearchSuggestionSet::emptySuggestionSet();
+ $this->assertEquals( 0, $set->getSize() );
+ $set->addSuggestion( new SearchSuggestion( null, null, 3 ) );
+ $this->assertEquals( 3, $set->getWorstScore() );
+ $this->assertEquals( 3, $set->getBestScore() );
+
+ $suggestion = new SearchSuggestion( null, null, 4 );
+ $set->addSuggestion( $suggestion );
+ $this->assertEquals( 2, $set->getWorstScore() );
+ $this->assertEquals( 3, $set->getBestScore() );
+ $this->assertEquals( 2, $suggestion->getScore() );
+
+ $suggestion = new SearchSuggestion( null, null, 2 );
+ $set->addSuggestion( $suggestion );
+ $this->assertEquals( 1, $set->getWorstScore() );
+ $this->assertEquals( 3, $set->getBestScore() );
+ $this->assertEquals( 1, $suggestion->getScore() );
+
+ $scores = $set->map( function( $s ) { return $s->getScore(); }
);
+ $sorted = $scores;
+ asort( $sorted );
+ $this->assertEquals( $sorted, $scores );
+ }
+
+ /**
+ * Test that adding a new best suggestion will keep proper score
+ * ordering
+ */
+ public function testInsertBest() {
+ $set = SearchSuggestionSet::emptySuggestionSet();
+ $this->assertEquals( 0, $set->getSize() );
+ $set->insertBestSuggestion( new SearchSuggestion( null, null, 3
) );
+ $this->assertEquals( 3, $set->getWorstScore() );
+ $this->assertEquals( 3, $set->getBestScore() );
+
+ $suggestion = new SearchSuggestion( null, null, 4 );
+ $set->insertBestSuggestion( $suggestion );
+ $this->assertEquals( 3, $set->getWorstScore() );
+ $this->assertEquals( 4, $set->getBestScore() );
+ $this->assertEquals( 4, $suggestion->getScore() );
+
+ $suggestion = new SearchSuggestion( null, null, null );
+ $set->insertBestSuggestion( $suggestion );
+ $this->assertEquals( 3, $set->getWorstScore() );
+ $this->assertEquals( 5, $set->getBestScore() );
+ $this->assertEquals( 5, $suggestion->getScore() );
+
+ $suggestion = new SearchSuggestion( null, null, 2 );
+ $set->insertBestSuggestion( $suggestion );
+ $this->assertEquals( 3, $set->getWorstScore() );
+ $this->assertEquals( 6, $set->getBestScore() );
+ $this->assertEquals( 6, $suggestion->getScore() );
+
+ $scores = $set->map( function( $s ) { return $s->getScore(); }
);
+ $sorted = $scores;
+ asort( $sorted );
+ $this->assertEquals( $sorted, $scores );
+ }
+
+ public function testShrink() {
+ $set = SearchSuggestionSet::emptySuggestionSet();
+ for( $i = 0; $i < 100; $i++) {
+ $set->addSuggestion( new SearchSuggestion() );
+ }
+ $set->shrink( 10 );
+ $this->assertEquals( 10, $set->getSize() );
+
+ $set->shrink( 0 );
+ $this->assertEquals( 0, $set->getSize() );
+ }
+}
diff --git a/tests/unit/SuggestBuilderTest.php
b/tests/unit/SuggestBuilderTest.php
index 416d7f8..176ca73 100644
--- a/tests/unit/SuggestBuilderTest.php
+++ b/tests/unit/SuggestBuilderTest.php
@@ -285,4 +285,47 @@
$coord = $builder->findPrimaryCoordinates( $doc );
$this->assertNull( $coord, "No coord if none is on earth." );
}
+
+ /**
+ * @dataProvider provideOutputEncoder
+ */
+ public function testOutputEncoder( $expected, $encoded ) {
+ $this->assertEquals( $expected, SuggestBuilder::decodeOutput(
$encoded ) );
+ }
+
+ public function provideOutputEncoder() {
+ return array(
+ 'title' => array(
+ array(
+ 'id' => 123,
+ 'type' =>
SuggestBuilder::TITLE_SUGGESTION,
+ 'text' => 'This is a title',
+ ),
+ SuggestBuilder::encodeTitleOutput( 123, "This
is a title" ),
+ ),
+ 'redirect' => array(
+ array(
+ 'id' => 123,
+ 'type' =>
SuggestBuilder::REDIRECT_SUGGESTION,
+ ),
+ SuggestBuilder::encodeRedirectOutput( 123 ),
+ ),
+ 'Garbage' => array(
+ null,
+ 'Garbage',
+ ),
+ 'Broken title' => array(
+ null,
+ '123:t',
+ ),
+ 'Partial encoding' => array(
+ null,
+ '123:',
+ ),
+ 'null output' => array(
+ null,
+ null,
+ ),
+ );
+ }
}
--
To view, visit https://gerrit.wikimedia.org/r/237693
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ida9b9f89043f503a68d50e3f10046dd102b9a2ff
Gerrit-PatchSet: 16
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: DCausse <[email protected]>
Gerrit-Reviewer: Cindy-the-browser-test-bot <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Manybubbles <[email protected]>
Gerrit-Reviewer: MaxSem <[email protected]>
Gerrit-Reviewer: Smalyshev <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits