EBernhardson has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/235133

Change subject: Completion suggester : improve precision
......................................................................

Completion suggester : improve precision

* Splits suggestions into title suggestions with similar redirects
  and redirect suggestions
* With redirect suggestions a second pass query is required to fetch
  the text to display
* Added more config options with SuggestProfiles
* Added support for geo context suggestions
* Improved precision with post-search re-scoring
* Moved complex configuration profiles into 'profiles' folder

NOTE: this is experimental and indexing strategy may change.

Change-Id: I37953179d3f10036344fe16bf31da3fd04a7c075
(cherry picked from commit fa5e1385a41ff9ad72a7c72bf876bf7236f81b74)
---
M CirrusSearch.php
M includes/BuildDocument/SuggestBuilder.php
M includes/BuildDocument/SuggestScoring.php
M includes/ElasticsearchIntermediary.php
M includes/Maintenance/SuggesterAnalysisConfigBuilder.php
M includes/Maintenance/SuggesterMappingConfigBuilder.php
M includes/Searcher.php
M includes/Util.php
M maintenance/updateSuggesterIndex.php
A profiles/PhraseSuggesterProfiles.php
A profiles/SuggestProfiles.php
M tests/browser/features/suggest_api.feature
M tests/browser/features/support/hooks.rb
A tests/unit/SuggestBuilderTest.php
M tests/unit/SuggestScoringTest.php
M tests/unit/UtilTest.php
16 files changed, 1,000 insertions(+), 214 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/33/235133/1

diff --git a/CirrusSearch.php b/CirrusSearch.php
index 14738e1..eb2aa2f 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -21,6 +21,9 @@
  * http://www.gnu.org/copyleft/gpl.html
  */
 
+require_once __DIR__ . "/profiles/SuggestProfiles.php";
+require_once __DIR__ . "/profiles/PhraseSuggesterProfiles.php";
+
 $wgExtensionCredits['other'][] = array(
        'path'           => __FILE__,
        'name'           => 'CirrusSearch',
@@ -265,117 +268,8 @@
 // (This is the minimal value)
 $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit = 2;
 
-// Phrase suggester profiles (Did you mean)
-$wgCirrusSearchPhraseSuggestProfiles = array(
-       // This is the default settings
-       'default' => array(
-               // The suggest mode used by the phrase suggester
-               // can be :
-               //  * missing: Only suggest terms in the suggest text that
-               //             aren’t in the index.
-               //  * popular: Only suggest suggestions that occur in more docs
-               //             then the original suggest text term.
-               //  * always: Suggest any matching suggestions based on terms
-               //            in the suggest text.
-               'mode' => 'always',
-
-               // Confidence level required to suggest new phrases.
-               // See confidence on 
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html
-               'confidence' => 2.0,
-
-               // Maximum number of terms that we ask phrase suggest to 
correct.
-               // See max_errors on 
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html
-               'max_errors' => 2,
-
-               // the likelihood of a term being a misspelled even if the term 
exists in the dictionary.
-               'real_word_error_likelihood' => 0.95,
-
-               // The max term freq used by the phrase suggester.  The maximum
-               // threshold in number of documents a suggest text token can
-               // exist in order to be included. Can be a relative percentage
-               // number (e.g 0.4) or an absolute // number to represent
-               // document frequencies. If an value higher than 1 is specified
-               // then fractional can not be specified. Defaults to 0.01f.  If
-               // a term appears in more then half the docs then don't try to
-               // correct it.  This really shouldn't kick in much because we're
-               // not looking for misspellings.  We're looking for phrases that
-               // can be might off.  Like "noble prize" ->  "nobel prize".  In
-               // any case, the default was 0.01 which way too frequently
-               // decided not to correct some terms.
-               'max_term_freq' => 0.5,
-
-               // The max doc freq (shard level) used by the phrase suggester
-               // The minimal threshold in number of documents a suggestion
-               // should appear in.  This can be specified as an absolute
-               // number or as a relative percentage of number of documents.
-               // This can improve quality by only suggesting high frequency
-               // terms. Defaults to 0f and is not enabled. If a value higher
-               // than 1 is specified then the number cannot be fractional. The
-               // shard level document frequencies are used for this option.
-               // NOTE: this value is ignored if mode is "always"
-               'min_doc_freq' => 0.0,
-
-               // The prefix length used by the phrase suggester The number of
-               // minimal prefix characters that must match in order be a
-               // candidate suggestions. Defaults to 1. Increasing this number
-               // improves spellcheck performance.  Usually misspellings don’t
-               // occur in the beginning of terms.
-               'prefix_length' => 2,
-
-               // Checks each suggestion against a specified query to prune
-               // suggestions for which no matching docs exist in the index.
-               'collate' => false,
-
-               // Controls the minimum_should_match option used by the collate
-               // query.
-               'collate_minimum_should_match' => '3<66%',
-
-               // Smoothing model See
-               // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html
-               'smoothing_model' => array(
-                       'stupid_backoff' => array(
-                               'discount' => 0.4
-                       )
-               ),
-       ),
-       // The 'strict' settings will try to avoid displaying weird suggestions.
-       // (suited for small size wikis)
-       'strict' => array(
-               'mode' => 'always',
-               'confidence' => 2.0,
-               'max_errors' => 2,
-               'real_word_error_likelihood' => 0.95,
-               'max_term_freq' => 0.5,
-               'min_doc_freq' => 0.0,
-               'prefix_length' => 2,
-               'collate' => true,
-               'collate_minimum_should_match' => '3<66%',
-               'smoothing_model' => array(
-                       'laplace' => array(
-                               'alpha' => 0.3
-                       )
-               )
-       ),
-       // Alternative settings, confidence set to 1 but with laplace smoothing
-       'alternative' => array(
-               'mode' => 'always',
-               'confidence' => 1.0,
-               'max_errors' => 2,
-               'real_word_error_likelihood' => 0.95,
-               'max_term_freq' => 0.5,
-               'min_doc_freq' => 0.0,
-               'prefix_length' => 2,
-               'collate' => false,
-               'collate_minimum_should_match' => '3<66%',
-               'smoothing_model' => array(
-                       'laplace' => array(
-                               'alpha' => 0.3
-                       )
-               )
-       )
-);
-
 // Set the Phrase suggester settings using the default profile.
+// see profiles/PhraseSuggesterProfiles.php
 $wgCirrusSearchPhraseSuggestSettings = 
$wgCirrusSearchPhraseSuggestProfiles['default'];
 
 // Look for suggestions in the article text?  Changing this from false to true 
will
@@ -792,20 +686,20 @@
 $wgCirrusSearchUserTesting = array();
 
 /**
- * Settings for completion suggestion options.
- * See CirrusSearch\BuildDocument\SuggestBuilder and CirrusSearch\Searcher
- * fields - set of suggestion fields to use
- * fuzzy - fuzziness configuration (false for no fuzziness)
- * See also: 
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-completion.html
+ * Profile for search as you type suggestion (completion suggestion)
+ * (see profiles/SuggestProfiles.php for more details.)
+ *
+ * NOTE: This is an experimental API
  */
-$wgCirrusSearchCompletionSettings = array(
-       "fields" => array(
-               "suggest", "suggest-stop"
-       ),
-       "fuzzy" => array(
-               "fuzziness" => 2
-       ),
-);
+$wgCirrusSearchCompletionSettings = 
$wgCirrusSearchCompletionProfiles['default'];
+
+/**
+ * Profile for geo context search as you type suggestion (completion 
suggestion)
+ * (see profiles/SuggestProfiles.php for more details.)
+ *
+ * NOTE: This is an experimental API
+ */
+$wgCirrusSearchCompletionGeoContextSettings = 
$wgCirrusSearchCompletionGeoContextProfiles['default'];
 
 $includes = __DIR__ . "/includes/";
 $apiDir = $includes . 'Api/';
diff --git a/includes/BuildDocument/SuggestBuilder.php 
b/includes/BuildDocument/SuggestBuilder.php
index 4188095..9570b2f 100644
--- a/includes/BuildDocument/SuggestBuilder.php
+++ b/includes/BuildDocument/SuggestBuilder.php
@@ -23,9 +23,23 @@
 
 /**
  * Builder used to create suggester docs
+ * NOTE: Experimental
  */
 class SuggestBuilder {
+       /**
+        * We limit the input to 50 chars
+        */
        const MAX_INPUT_LENGTH = 50;
+
+       /**
+        * The acceptable edit distance to group similar strings
+        */
+       const GROUP_ACCEPTABLE_DISTANCE = 2;
+
+       /**
+        * Discount suggestions based on redirects
+        */
+       const REDIRECT_DISCOUNT = 0.1;
 
        /**
         * @var SuggestScoringMethod the scoring function
@@ -33,10 +47,16 @@
        private $scoringMethod;
 
        /**
+        * @var boolean builds geo contextualized suggestions
+        */
+       private $withGeo;
+
+       /**
         * @param SuggestScoringMethod $scoringMethod the scoring function to 
use
         */
-       public function __construct( SuggestScoringMethod $scoringMethod ) {
+       public function __construct( SuggestScoringMethod $scoringMethod, 
$withGeo = true ) {
                $this->scoringMethod = $scoringMethod;
+               $this->withGeo = $withGeo;
        }
 
        /**
@@ -45,39 +65,131 @@
         * @return array a set of suggest documents
         */
        public function build( $id, $inputDoc ) {
+               if( !isset( $inputDoc['title'] ) ) {
+                       // Bad doc, nothing to do here.
+                       return array();
+               }
                $score = $this->scoringMethod->score( $inputDoc );
-               $inputs = $this->buildInputs( $inputDoc );
+
+               // We support only earth and the primary/first coordinates...
+               $location = $this->findPrimaryCoordinates( $inputDoc );
+
+               $suggestions = $this->extractTitleAndSimilarRedirects( 
$inputDoc );
+               $docs[] = $this->buildTitleSuggestion( $id, 
$suggestions['group'], $location, $score );
+               if ( !empty( $suggestions['candidates'] ) ) {
+                       $docs[] = $this->buildRedirectsSuggestion( $id, 
$suggestions['candidates'],
+                               $location, $score );
+               }
+               return $docs;
+       }
+
+       /**
+        * Inspects the 'coordinates' index and return the first coordinates 
flagged as 'primary'
+        * or the first coordinates if no primaries are found.
+        * @param array $inputDoc the input doc
+        * @return array with 'lat' and 'lon' or null
+        */
+       public function findPrimaryCoordinates( $inputDoc ) {
+               if ( !isset( $inputDoc['coordinates'] ) || !is_array( 
$inputDoc['coordinates'] ) ) {
+                       return null;
+               }
+
+               $first = null;
+               foreach( $inputDoc['coordinates'] as $coord ) {
+                       if ( isset( $coord['globe'] ) && $coord['globe'] == 
'earth' && isset( $coord['coord'] ) ) {
+                               if ( $first === null ) {
+                                       $first = $coord['coord'];
+                               }
+                               if ( isset( $coord['primary'] ) && 
$coord['primary'] ) {
+                                       return $coord['coord'];
+                               }
+                       }
+               }
+               return $first;
+       }
+
+       /**
+        * Builds the 'title' suggestion.
+        * The output is encoded as pageId:t:Title.
+        * NOTE: the client will be able to display Title encoded in the output 
when searching.
+        *
+        * @param int $id the page id
+        * @param array $title the title in 'text' and an array of similar 
redirects in 'variants'
+        * @param array $location the geo coordinates or null if unavailable
+        * @param int $score the weight of the suggestion
+        * @return array the suggestion document
+        */
+       private function buildTitleSuggestion( $id, $title, $location, $score ) 
{
+               $inputs = array( $this->prepareInput( $title['text'] ) );
+               foreach ( $title['variants'] as $variant ) {
+                       $inputs[] = $this->prepareInput( $variant );
+               }
+               $output = $id . ":t:" . $title['text'];
+               return $this->buildSuggestion( $output, $inputs, $location, 
$score );
+       }
+
+       /**
+        * Builds the 'redirects' suggestion.
+        * The output is encoded as pageId:r
+        * The score will be discounted by the REDIRECT_DISCOUNT factor.
+        * NOTE: the client will have to fetch the doc redirects when searching
+        * and choose the best one to display. This is because we are unable
+        * to make this decision at index time.
+        *
+        * @param int $id the page id
+        * @param array of string $redirects the redirects
+        * @param array $location the geo coordinates or null if unavailable
+        * @param int $score the weight of the suggestion
+        * @return array the suggestion document
+        */
+       private function buildRedirectsSuggestion( $id, $redirects, $location, 
$score ) {
+               $inputs = array();
+               foreach ( $redirects as $redirect ) {
+                       $inputs[] = $this->prepareInput( $redirect );
+               }
+               $output = $id . ":r";
+               $score = (int) ( $score * self::REDIRECT_DISCOUNT );
+               return $this->buildSuggestion( $output, $inputs, $location, 
$score );
+       }
+
+       /**
+        * Builds a suggestion document.
+        *
+        * @param string $output the suggestion output
+        * @param string $inputs the suggestion inputs
+        * @param array $location the geo coordinates or null if unavailable
+        * @param int $score the weight of the suggestion
+        * @return array a doc ready to be indexed in the completion suggester
+        */
+       private function buildSuggestion( $output, $inputs, $location, $score ) 
{
                $doc = array(
                        'suggest' => array (
                                'input' => $inputs,
-                               'output' => $id,
+                               'output' => $output,
                                'weight' => $score
                        ),
                        'suggest-stop' => array (
                                'input' => $inputs,
-                               'output' => $id,
+                               'output' => $output,
                                'weight' => $score
                        )
                );
 
-               // We support only earth and we take the first coordinate 
only...
-               if ( isset ( $inputDoc['coordinates'][0]['globe'] ) && 
$inputDoc['coordinates'][0]['globe'] === 'earth' ) {
-                       $location = array( 'location' => 
$inputDoc['coordinates'][0]['coord'] );
-
+               if ( $this->withGeo && $location !== null ) {
                        $doc['suggest-geo'] = array(
                                'input' => $inputs,
-                               'output' => $id,
+                               'output' => $output,
                                'weight' => $score,
-                               'context' => $location
+                               'context' => array( 'location' => $location )
                        );
                        $doc['suggest-stop-geo'] = array(
                                'input' => $inputs,
-                               'output' => $id,
+                               'output' => $output,
                                'weight' => $score,
-                               'context' => $location
+                               'context' => array( 'location' => $location )
                        );
                }
-               return array( $doc );
+               return $doc;
        }
 
        /**
@@ -85,10 +197,10 @@
         * @return array list of prepared suggestions that should
         *  resolve to the document.
         */
-       public function buildInputs( array $input ) {
-               $inputs = array( $this->prepareInput( $input['title'] ) );
-               foreach ( $input['redirect'] as $redir ) {
-                       $inputs[] = $this->prepareInput( $redir['title'] );
+       public function buildInputs( $input ) {
+               $inputs = array( $this->prepareInput( $input['text'] ) );
+               foreach ( $input['variants'] as $variant ) {
+                       $inputs[] = $this->prepareInput( $variant );
                }
                return $inputs;
        }
@@ -104,4 +216,91 @@
                }
                return $input;
        }
+
+       /**
+        * Extracts title with redirects that are very close.
+        * It will allow to make one suggestion with title as the
+        * output and title + similar redirects as the inputs.
+        * It can be useful to avoid displaying redirects created to
+        * to handle typos.
+        *
+        * e.g. :
+        *   title: Giraffe
+        *   redirects: Girafe, Girraffe, Mating Giraffes
+        * will output
+        *   - 'group' : { 'text': 'Giraffe', 'variants': ['Girafe', 
'Girraffe'] }
+        *   - 'candidates' : ['Mating Giraffes']
+        *
+        * It would be nice to do this for redirects but we have no way to 
decide
+        * which redirect is a typo and this technique would simply take the 
first
+        * redirect in the list.
+        *
+        * @return array mixed 'group' key contains the group with the
+        *         lead and its variants and 'candidates' contains the remaining
+        *         candidates that were not close enough to $groupHead.
+        */
+       public function extractTitleAndSimilarRedirects( $doc ) {
+               $redirects = array();
+               if ( isset( $doc['redirect'] ) ) {
+                       foreach( $doc['redirect'] as $redir ) {
+                               $redirects[] = $redir['title'];
+                       }
+               }
+               return $this->extractSimilars( $doc['title'], $redirects, true 
);
+       }
+
+       /**
+        * Extracts from $candidates the values that are "similar" to $groupHead
+        *
+        * @param string $groupHead string the group "head"
+        * @param array $candidates array of string the candidates
+        * @param boolean $checkVariants if the candidate does not match the 
groupHead try to match a variant
+        * @return array 'group' key contains the group with the
+        *         head and its variants and 'candidates' contains the remaining
+        *         candidates that were not close enough to $groupHead.
+        */
+       private function extractSimilars( $groupHead, $candidates, 
$checkVariants = false ) {
+               $group = array(
+                       'text' => $groupHead,
+                       'variants' => array()
+               );
+               $newCandidates = array();
+               foreach( $candidates as $c ) {
+                       $distance = $this->distance( $groupHead, $c );
+                       if( $distance > self::GROUP_ACCEPTABLE_DISTANCE && 
$checkVariants ) {
+                               // Run a second pass over the variants
+                               foreach ( $group['variants'] as $v ) {
+                                       $distance = $this->distance( $v, $c );
+                                       if ( $distance <= 
self::GROUP_ACCEPTABLE_DISTANCE ) {
+                                               break;
+                                       }
+                               }
+                       }
+                       if ( $distance <= self::GROUP_ACCEPTABLE_DISTANCE ) {
+                               $group['variants'][] = $c;
+                       } else {
+                               $newCandidates[] = $c;
+                       }
+               }
+
+               return array(
+                       'group' => $group,
+                       'candidates' => $newCandidates
+               );
+       }
+
+       /**
+        * Computes the edit distance between $a and $b.
+        * @param string $a
+        * @param string $b
+        * @return integer the edit distance between a and b
+        */
+       private function distance( $a, $b ) {
+               $a = $this->prepareInput( $a );
+               $b = $this->prepareInput( $b );
+               $a = mb_strtolower( $a );
+               $b = mb_strtolower( $b );
+
+               return levenshtein( $a, $b );
+       }
 }
diff --git a/includes/BuildDocument/SuggestScoring.php 
b/includes/BuildDocument/SuggestScoring.php
index b025b72..fb9c4df 100644
--- a/includes/BuildDocument/SuggestScoring.php
+++ b/includes/BuildDocument/SuggestScoring.php
@@ -72,7 +72,20 @@
 }
 
 /**
- * Score that tries to reflect the quality of a page
+ * Score that tries to reflect the quality of a page.
+ * NOTE: Experimental
+ *
+ * This score makes the assumption that bigger is better.
+ *
+ * Small cities/village which have a high number of incoming links because they
+ * link to each others ( see 
https://en.wikipedia.org/wiki/Villefort,_Loz%C3%A8re )
+ * will be be discounted correctly because others variables are very low.
+ *
+ * On the other hand some pages like List will get sometimes a very high but 
unjustified
+ * score.
+ *
+ * The boost templates feature might help but it's a System message that is 
not necessarily
+ * configured by wiki admins.
  */
 class QualityScore implements SuggestScoringMethod {
        // TODO: move these constants into a cirrus profile
@@ -90,7 +103,7 @@
        const REDIRECT_WEIGHT = 0.1;
 
        // The final score will be in the range [0, SCORE_RANGE]
-       const SCORE_RANGE = 100000;
+       const SCORE_RANGE = 10000000;
 
        /**
         * Template boosts configured by the mediawiki admin.
diff --git a/includes/ElasticsearchIntermediary.php 
b/includes/ElasticsearchIntermediary.php
index 2066df4..3512fac 100644
--- a/includes/ElasticsearchIntermediary.php
+++ b/includes/ElasticsearchIntermediary.php
@@ -50,7 +50,7 @@
        /**
         * @var array map of search request stats to log about the current 
search request
         */
-       private $logContext = array();
+       protected $logContext = array();
 
        /**
         * @var int how many millis a request through this intermediary needs 
to take before it counts as slow.
diff --git a/includes/Maintenance/SuggesterAnalysisConfigBuilder.php 
b/includes/Maintenance/SuggesterAnalysisConfigBuilder.php
index 9f632c9..eaf9ecc 100644
--- a/includes/Maintenance/SuggesterAnalysisConfigBuilder.php
+++ b/includes/Maintenance/SuggesterAnalysisConfigBuilder.php
@@ -56,15 +56,15 @@
                                ),
                                "asciifolding_preserve" => array(
                                        "type" => "asciifolding",
-                                       "preserve_original" => "true",
+                                       "preserve_original" => "false",
                                ),
                                "icu_normalizer" => array(
                                        "type" => "icu_normalizer",
                                        "name" => "nfkc_cf"
                                ),
-                               "50_token_limit" => array(
+                               "token_limit" => array(
                                        "type" => "limit",
-                                       "max_token_count" => "50"
+                                       "max_token_count" => "20"
                                )
                        ),
                        'analyzer' => array(
@@ -75,21 +75,18 @@
                                                "lowercase",
                                                "stop_filter",
                                                "asciifolding_preserve",
-                                               "50_token_limit"
+                                               "token_limit"
                                        ),
                                        "tokenizer" => "standard"
                                ),
-                               // We do not use ascii_folding when searching
-                               // Using ascii folding when searching will 
increase recall
-                               // but could be annoying for the user who makes 
effort to write
-                               // diacritics.
                                "stop_analyzer_search" => array(
                                        "type" => "custom",
                                        "filter" => array(
                                                "standard",
                                                "lowercase",
                                                "stop_filter_search",
-                                               "50_token_limit"
+                                               "asciifolding_preserve",
+                                               "token_limit"
                                        ),
                                        "tokenizer" => "standard"
                                ),
@@ -99,7 +96,7 @@
                                                "standard",
                                                "icu_normalizer",
                                                "asciifolding_preserve",
-                                               "50_token_limit"
+                                               "token_limit"
                                        ),
                                        "tokenizer" => "standard"
                                ),
@@ -108,7 +105,8 @@
                                        "filter" => array(
                                                "standard",
                                                "icu_normalizer",
-                                               "50_token_limit"
+                                               "asciifolding_preserve",
+                                               "token_limit"
                                        ),
                                        "tokenizer" => "standard"
                                )
diff --git a/includes/Maintenance/SuggesterMappingConfigBuilder.php 
b/includes/Maintenance/SuggesterMappingConfigBuilder.php
index 880a998..8303693 100644
--- a/includes/Maintenance/SuggesterMappingConfigBuilder.php
+++ b/includes/Maintenance/SuggesterMappingConfigBuilder.php
@@ -37,7 +37,7 @@
                $geoContext = array(
                        'location' => array(
                                'type' => 'geo',
-                               'precision' => array('1km', '10km', '100km'),
+                               'precision' => array( 6, 4, 3 ), // ~ 1km, 
10km, 100km
                                'neighbors' => true,
                        )
                );
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 59269dc..a95590e 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -777,8 +777,10 @@
        /**
         * Produce a set of completion suggestions for text using _suggest
         * See 
https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-suggesters-completion.html
+        *
+        * WARNING: experimental API
+        *
         * @param string $text Search term
-        * @param array $context Context, see 
https://www.elastic.co/guide/en/elasticsearch/reference/current/suggester-context.html
         * @return Status
         */
        public function suggest( $text, $context = null ) {
@@ -787,26 +789,32 @@
                $this->term = $text;
 
                $suggest = array( 'text' => $text );
-               foreach ( $wgCirrusSearchCompletionSettings[ 'fields' ] as 
$field ) {
-                       $suggest[$field] = array(
+               $queryLen = mb_strlen( $text );
+               $profile = $wgCirrusSearchCompletionSettings;
+
+               if ( $context != null && isset( $context['geo']['lat'] ) && 
isset( $context['geo']['lon'] )
+                       && is_numeric( $context['geo']['lat'] ) && is_numeric( 
$context['geo']['lon'] )
+               ) {
+                       $profile = $this->prepareGeoContextSuggestProfile( 
$context );
+                       $description = "geo suggest query for {query}";
+               }
+
+               foreach ( $profile as $name => $config ) {
+                       if ( $config['min_query_len'] > $queryLen ) {
+                               continue;
+                       }
+                       $field = $config['field'];
+                       $suggest[$name] = array(
                                'completion' => array(
                                        'field' => $field,
+                                       'size' => $this->limit * 
$config['fetch_limit_factor']
                                )
                        );
-                       if ( $context ) {
-                               $suggest[$field]['completion']['context'] = 
$context;
+                       if ( isset( $config['fuzzy'] ) ) {
+                               $suggest[$name]['completion']['fuzzy'] = 
$config['fuzzy'];
                        }
-
-                       if( is_array( $wgCirrusSearchCompletionSettings[ 
'fuzzy' ] ) ) {
-                               $suggest[$field."-fuzzy"] = array(
-                                       'completion' => array(
-                                               'field' => $field,
-                                               'fuzzy' => 
$wgCirrusSearchCompletionSettings[ 'fuzzy' ],
-                                       )
-                               );
-                               if ( $context ) {
-                                       
$suggest[$field."-fuzzy"]['completion']['context'] = $context;
-                               }
+                       if ( isset( $config['context'] ) ) {
+                               $suggest[$name]['completion']['context'] = 
$config['context'];
                        }
                }
 
@@ -815,86 +823,196 @@
                Connection::setTimeout( $wgCirrusSearchSearchShardTimeout[ 
'default' ] );
 
                $index = Connection::getIndex( $this->indexBaseName, 
Connection::TITLE_SUGGEST_TYPE );
-               $description = "completion suggest query for {query}";
                $logContext = array(
                        'query' => $text,
+                       'queryType' => 'comp_suggest'
                );
                $searcher = $this;
+               $limit = $this->limit;
                $result = Util::doPoolCounterWork(
                        'CirrusSearch-Search',
                        $this->user,
-                       function() use( $searcher, $index, $description, 
$suggest, $logContext, $queryOptions ) {
+                       function() use( $searcher, $index, $suggest, 
$logContext, $queryOptions,
+                                       $profile, $text , $limit ) {
+                               $description = "{queryType} search for 
'{query}'";
                                $searcher->start( $description, $logContext );
                                try {
-                                       return $index->request( "_suggest", 
Request::POST, $suggest, $queryOptions );
+                                       $result = $index->request( "_suggest", 
Request::POST, $suggest, $queryOptions );
+                                       if( $result->isOk() ) {
+                                               $result = 
$searcher->postProcessSuggest( $text, $result,
+                                                       $profile, $limit );
+                                               return $searcher->success( 
$result );
+                                       }
+                                       return $result;
                                } catch ( 
\Elastica\Exception\ExceptionInterface $e ) {
                                        return $searcher->failure( $e );
                                }
                        }
                );
-               if( $result->isOk() ) {
-                       $result = $this->postProcessSuggest( $result, 
$this->limit );
-                       return $this->success( $result );
-               }
                return $result;
+       }
+
+       /**
+        * prepare the list of suggest requests used for geo context suggestions
+        * This method will merge $wgCirrusSearchCompletionSettings and
+        * $wgCirrusSearchCompletionGeoContextSettings
+        * @param array $context user's geo context
+        * @return array of suggest request profiles
+        */
+       private function prepareGeoContextSuggestProfile( $context ) {
+               global $wgCirrusSearchCompletionSettings,
+                       $wgCirrusSearchCompletionGeoContextSettings;
+               $profiles = array();
+               foreach ( $wgCirrusSearchCompletionGeoContextSettings as 
$geoname => $geoprof ) {
+                       foreach ( $wgCirrusSearchCompletionSettings as $sugname 
=> $sugprof ) {
+                               if ( !in_array( $sugname, $geoprof['with'] ) ) {
+                                       continue;
+                               }
+                               $profile = $sugprof;
+                               $profile['field'] .= $geoprof['field_suffix'];
+                               $profile['discount'] *= $geoprof['discount'];
+                               $profile['context'] = array(
+                                       'location' => array(
+                                               'lat' => $context['geo']['lat'],
+                                               'lon' => $context['geo']['lon'],
+                                               'precision' => 
$geoprof['precision']
+                                       )
+                               );
+                               $profiles["$sugname-$geoname"] = $profile;
+                       }
+               }
+               return $profiles;
        }
 
        /**
         * merge top level multi-queries and resolve returned pageIds into 
Title objects.
         *
+        * WARNING: experimental API
+        *
+        * @param string $query the user query
         * @param \Elastica\Response $response Response from elasticsearch 
_suggest api
+        * @param array $profile the suggestion profile
         * @param int $limit Maximum suggestions to return, -1 for unlimited
         * @return Title[] List of suggested titles
         */
-       protected function postProcessSuggest( \Elastica\Response $response, 
$limit = -1 ) {
+       protected function postProcessSuggest( $query, \Elastica\Response 
$response, $profile, $limit = -1 ) {
                $data = $response->getData();
                unset( $data['_shards'] );
 
                $suggestions = array();
                foreach ( $data as $name => $results  ) {
-                       foreach ( $results as $suggested ) {
+                       $discount = $profile[$name]['discount'];
+                       foreach ( $results  as $suggested ) {
                                foreach ( $suggested['options'] as $suggest ) {
-                                       $pageId = $suggest['text'];
+                                       $output = explode( ':', 
$suggest['text'], 3 );
+                                       if ( sizeof ( $output ) < 2 ) {
+                                               // Ignore broken output
+                                               continue;
+                                       }
+                                       $pageId = $output[0];
+                                       $type = $output[1];
+
+                                       $score = $discount * $suggest['score'];
                                        if ( !isset( $suggestions[$pageId] ) ||
-                                               $suggest['score'] > 
$suggestions[$pageId]['score']
+                                               $score > 
$suggestions[$pageId]['score']
                                        ) {
-                                               $suggestions[$pageId] = 
$suggest;
+                                               $suggestion = array(
+                                                       'score' => $score,
+                                                       'pageId' => $pageId
+                                               );
+                                               // If it's a title suggestion 
we have the text
+                                               if ( $type === 't' && sizeof( 
$output ) == 3 ) {
+                                                               
$suggestion['text'] = $output[2];
+                                               }
+                                               $suggestions[$pageId] = 
$suggestion;
                                        }
                                }
                        }
                }
 
                // simply sort by existing scores
-               usort( $suggestions, function ( $a, $b ) {
+               uasort( $suggestions, function ( $a, $b ) {
                        return $b['score'] - $a['score'];
                } );
 
+               $this->logContext['hitsTotal'] = count( $suggestions );
+
                if ( $limit > 0 ) {
-                       $suggestions = array_slice( $suggestions, 0, $limit );
+                       $suggestions = array_slice( $suggestions, 0, $limit, 
true );
                }
 
-               // suggest currently returns page ids, we need to resolve those 
now
-               $pageIds = array();
-               foreach ( $suggestions as $suggestion ) {
-                       $pageIds[] = $suggestion['text'];
+               $this->logContext['hitsReturned'] = count( $suggestions );
+               $this->logContext['hitsOffset'] = 0;
+
+               // we must fetch redirect data for redirect suggestions
+               $missingText = array();
+               foreach ( $suggestions as $id => $suggestion ) {
+                       if ( !isset( $suggestion['text'] ) ) {
+                               $missingText[] = $id;
+                       }
                }
 
-               // doesn't guarantee to maintain order
-               $unsortedTitles = Title::newFromIDs( $pageIds );
-               $byId = array();
-               foreach ( $unsortedTitles as $title ) {
-                       $byId[$title->getArticleID()] = $title;
+               if ( !empty ( $missingText ) ) {
+                       // Experimental.
+                       //
+                       // Second pass query to fetch redirects.
+                       // It's not clear if it's the best option, this will 
slowdown the whole query
+                       // when we hit a redirect suggestion.
+                       // Other option would be to encode redirects as a 
payload resulting in a
+                       // very big index...
+
+                       // XXX: we support only the content index
+                       $type = Connection::getPageType( $this->indexBaseName, 
Connection::CONTENT_INDEX_TYPE );
+                       // NOTE: we are already in a poolCounterWork
+                       // Multi get is not supported by elastica
+                       $redirResponse = null;
+                       try {
+                               $redirResponse = $type->request( '_mget', 'GET',
+                                       array( 'ids' => $missingText ),
+                                       array( '_source_include' => 'redirect' 
) );
+                               if ( $redirResponse->isOk() ) {
+                                       $docs = $redirResponse->getData();
+                                       $docs = $docs['docs'];
+                                       foreach ( $docs as $doc ) {
+                                               $id = $doc['_id'];
+                                               if ( !isset( 
$doc['_source']['redirect'] )
+                                                       || empty( 
$doc['_source']['redirect'] )
+                                               ) {
+                                                       continue;
+                                               }
+                                               $text = 
Util::chooseBestRedirect( $query, $doc['_source']['redirect'] );
+                                               $suggestions[$id]['text'] = 
$text;
+                                       }
+                               } else {
+                                       LoggerFactory::getInstance( 
'CirrusSearch' )->warning(
+                                               'Unable to fetch redirects for 
suggestion {query} with results {ids} : {error}',
+                                               array( 'query' => $query,
+                                                       'ids' => serialize( 
$missingText ),
+                                                       'error' => 
$redirResponse->getError() ) );
+                               }
+                       } catch ( \Elastica\Exception\ExceptionInterface $e ) {
+                               LoggerFactory::getInstance( 'CirrusSearch' 
)->warning(
+                                       'Unable to fetch redirects for 
suggestion {query} with results {ids} : {error}',
+                                       array( 'query' => $query,
+                                               'ids' => serialize( 
$missingText ),
+                                               'error' => 
$this->extractMessage( $e ) ) );
+                       }
                }
 
                $retval = array();
                foreach ( $suggestions as $suggestion ) {
-                       $pageId = $suggestion['text'];
-                       if ( isset( $byId[$pageId] ) ) {
-                               $retval[] = array(
-                                       'title' => (string)$byId[$pageId],
-                                       'score' => $suggestion['score'],
-                               );
+                       if ( !isset( $suggestion['text'] ) ) {
+                               // We were unable to find a text to display
+                               // Maybe a page with redirects when we built 
the suggester index
+                               // but now without redirects?
+                               continue;
                        }
+                       $retval[] = array(
+                               // XXX: we run the suggester for namespace 0 
for now
+                               'title' => Title::makeTitle( 0, 
$suggestion['text'] ),
+                               'pageId' => $suggestion['pageId'],
+                               'score' => $suggestion['score'],
+                       );
                }
 
                return $retval;
diff --git a/includes/Util.php b/includes/Util.php
index 08f1872..5550609 100644
--- a/includes/Util.php
+++ b/includes/Util.php
@@ -338,4 +338,35 @@
                $lines = array_filter( $lines );               // Remove empty 
lines
                return $lines;
        }
+
+       /**
+        * Tries to identify the best redirect by finding the link with the
+        * smallest edit distance between the title and the user query.
+        * @param $userQuery string the user query
+        * @param $redirects array the list of redirects
+        * @return string the best redirect text
+        */
+       public static function chooseBestRedirect( $userQuery, $redirects ) {
+               $userQuery = mb_strtolower( $userQuery );
+               $len = mb_strlen( $userQuery );
+               $bestDistance = INF;
+               $best = null;
+
+               foreach( $redirects as $redir ) {
+                       $text = $redir['title'];
+                       if ( mb_strlen( $text ) > $len ) {
+                               $text = mb_substr( $text, 0, $len );
+                       }
+                       $text = mb_strtolower( $text );
+                       $distance = levenshtein( $text, $userQuery );
+                       if ( $distance == 0 ) {
+                               return $redir['title'];
+                       }
+                       if ( $distance < $bestDistance ) {
+                               $bestDistance = $distance;
+                               $best = $redir['title'];
+                       }
+               }
+               return $best;
+       }
 }
diff --git a/maintenance/updateSuggesterIndex.php 
b/maintenance/updateSuggesterIndex.php
index 4df6f0d..106cbd2 100644
--- a/maintenance/updateSuggesterIndex.php
+++ b/maintenance/updateSuggesterIndex.php
@@ -80,6 +80,12 @@
         */
        private $availablePlugins;
 
+
+       /**
+        * @var boolean index geo contextualized suggestions
+        */
+       private $withGeo;
+
        public function __construct() {
                parent::__construct();
                $this->addDescription( "Create a new suggester index." );
@@ -94,6 +100,7 @@
                        'of moving a shard this can time out.  This will retry 
the attempt after some backoff ' .
                        'rather than failing the whole reindex process.  
Defaults to 5.', false, true );
                $this->addOption( 'optimize', 'Optimize the index to 1 segment. 
Defaults to false.', false, false );
+               $this->addOption( 'with-geo', 'Build geo contextualized 
suggestions. Defaults to false.', false, false );
                $this->addOption( 'scoringMethod', 'The scoring method to use 
when computing suggestion weights. ' .
                        'Detauls to quality.', false, true );
        }
@@ -116,6 +123,7 @@
                $this->indexRetryAttempts = $this->getOption( 
'reindexRetryAttempts', 5 );
 
                $this->optimizeIndex = $this->getOption( 'optimize', false );
+               $this->withGeo = $this->getOption( 'with-geo', false );
 
                $utils = new ConfigUtils( $this->getClient(), $this);
 
@@ -212,7 +220,7 @@
 
                $scoreMethodName = $this->getOption( 'scoringMethod', 'quality' 
);
                $this->scoreMethod = 
SuggestScoringMethodFactory::getScoringMethod( $scoreMethodName, 
$totalDocsInIndex );
-               $builder = new SuggestBuilder( $this->scoreMethod );
+               $builder = new SuggestBuilder( $this->scoreMethod, 
$this->withGeo );
 
                $docsDumped = 0;
                $this->output( "Indexing $totalDocsToDump documents 
($totalDocsInIndex in the index)\n" );
diff --git a/profiles/PhraseSuggesterProfiles.php 
b/profiles/PhraseSuggesterProfiles.php
new file mode 100644
index 0000000..885e900
--- /dev/null
+++ b/profiles/PhraseSuggesterProfiles.php
@@ -0,0 +1,131 @@
+<?php
+
+/**
+ * CirrusSearch - List of profiles for "Did you mean" suggestions
+ *
+ * Set $wgSearchType to 'CirrusSearch'
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+$wgCirrusSearchPhraseSuggestProfiles = array(
+       // This is the default settings
+       'default' => array(
+               // The suggest mode used by the phrase suggester
+               // can be :
+               //  * missing: Only suggest terms in the suggest text that
+               //             aren’t in the index.
+               //  * popular: Only suggest suggestions that occur in more docs
+               //             then the original suggest text term.
+               //  * always: Suggest any matching suggestions based on terms
+               //            in the suggest text.
+               'mode' => 'always',
+
+               // Confidence level required to suggest new phrases.
+               // See confidence on 
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html
+               'confidence' => 2.0,
+
+               // Maximum number of terms that we ask phrase suggest to 
correct.
+               // See max_errors on 
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html
+               'max_errors' => 2,
+
+               // the likelihood of a term being a misspelled even if the term 
exists in the dictionary.
+               'real_word_error_likelihood' => 0.95,
+
+               // The max term freq used by the phrase suggester.  The maximum
+               // threshold in number of documents a suggest text token can
+               // exist in order to be included. Can be a relative percentage
+               // number (e.g 0.4) or an absolute // number to represent
+               // document frequencies. If an value higher than 1 is specified
+               // then fractional can not be specified. Defaults to 0.01f.  If
+               // a term appears in more then half the docs then don't try to
+               // correct it.  This really shouldn't kick in much because we're
+               // not looking for misspellings.  We're looking for phrases that
+               // can be might off.  Like "noble prize" ->  "nobel prize".  In
+               // any case, the default was 0.01 which way too frequently
+               // decided not to correct some terms.
+               'max_term_freq' => 0.5,
+
+               // The max doc freq (shard level) used by the phrase suggester
+               // The minimal threshold in number of documents a suggestion
+               // should appear in.  This can be specified as an absolute
+               // number or as a relative percentage of number of documents.
+               // This can improve quality by only suggesting high frequency
+               // terms. Defaults to 0f and is not enabled. If a value higher
+               // than 1 is specified then the number cannot be fractional. The
+               // shard level document frequencies are used for this option.
+               // NOTE: this value is ignored if mode is "always"
+               'min_doc_freq' => 0.0,
+
+               // The prefix length used by the phrase suggester The number of
+               // minimal prefix characters that must match in order be a
+               // candidate suggestions. Defaults to 1. Increasing this number
+               // improves spellcheck performance.  Usually misspellings don’t
+               // occur in the beginning of terms.
+               'prefix_length' => 2,
+
+               // Checks each suggestion against a specified query to prune
+               // suggestions for which no matching docs exist in the index.
+               'collate' => false,
+
+               // Controls the minimum_should_match option used by the collate
+               // query.
+               'collate_minimum_should_match' => '3<66%',
+
+               // Smoothing model See
+               // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html
+               'smoothing_model' => array(
+                       'stupid_backoff' => array(
+                               'discount' => 0.4
+                       )
+               ),
+       ),
+       // The 'strict' settings will try to avoid displaying weird suggestions.
+       // (suited for small size wikis)
+       'strict' => array(
+               'mode' => 'always',
+               'confidence' => 2.0,
+               'max_errors' => 2,
+               'real_word_error_likelihood' => 0.95,
+               'max_term_freq' => 0.5,
+               'min_doc_freq' => 0.0,
+               'prefix_length' => 2,
+               'collate' => true,
+               'collate_minimum_should_match' => '3<66%',
+               'smoothing_model' => array(
+                       'laplace' => array(
+                               'alpha' => 0.3
+                       )
+               )
+       ),
+       // Alternative settings, confidence set to 1 but with laplace smoothing
+       'alternative' => array(
+               'mode' => 'always',
+               'confidence' => 1.0,
+               'max_errors' => 2,
+               'real_word_error_likelihood' => 0.95,
+               'max_term_freq' => 0.5,
+               'min_doc_freq' => 0.0,
+               'prefix_length' => 2,
+               'collate' => false,
+               'collate_minimum_should_match' => '3<66%',
+               'smoothing_model' => array(
+                       'laplace' => array(
+                               'alpha' => 0.3
+                       )
+               )
+       )
+);
diff --git a/profiles/SuggestProfiles.php b/profiles/SuggestProfiles.php
new file mode 100644
index 0000000..94b94c4
--- /dev/null
+++ b/profiles/SuggestProfiles.php
@@ -0,0 +1,107 @@
+<?php
+
+/**
+ * CirrusSearch - List of profiles for search as you type suggestions
+ * (Completion suggester)
+ *
+ * Set $wgSearchType to 'CirrusSearch'
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/**
+ *
+ * See CirrusSearch\BuildDocument\SuggestBuilder and CirrusSearch\Searcher
+ * See also: 
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-completion.html
+ */
+$wgCirrusSearchCompletionProfiles = array(
+       // Default profile
+       'default' => array(
+               // key is the name of the suggestion request
+               'plain' => array(
+                       // Field to request
+                       'field' => 'suggest',
+                       // Fire the request only if the user query has 
min_query_len chars
+                       'min_query_len' => 0,
+                       // Discount result scores for this request
+                       // Useful to discount fuzzy request results
+                       'discount' => 1.0,
+                       // Fetch more result than the limit
+                       // It's possible to have the same page multiple times.
+                       // Requesting more than the limit helps to display the 
correct number
+                       // of suggestions
+                       'fetch_limit_factor' => 2,
+               ),
+               'plain_stop' => array(
+                       'field' => 'suggest-stop',
+                       'min_query_len' => 0,
+                       'discount' => 0.1,
+                       'fetch_limit_factor' => 2,
+               ),
+               'plain_fuzzy' => array(
+                       'field' => 'suggest',
+                       'min_query_len' => 3,
+                       'discount' => 0.005,
+                       'fetch_limit_factor' => 2,
+                       'fuzzy' => array(
+                               'fuzzyness' => 'AUTO',
+                               'prefix_length' => 0,
+                               'unicode_aware' => true,
+                       )
+               ),
+               'plain_stop_fuzzy' => array(
+                       'field' => 'suggest-stop',
+                       'min_query_len' => 3,
+                       'discount' => 0.001,
+                       'fetch_limit_factor' => 2,
+                       'fuzzy' => array(
+                               'fuzzyness' => 'AUTO',
+                               'prefix_length' => 0,
+                               'unicode_aware' => true,
+                       )
+               )
+       )
+);
+
+/**
+ * List of profiles for geo context suggestions
+ */
+$wgCirrusSearchCompletionGeoContextProfiles = array(
+       'default' => array(
+               'geo-1km' => array(
+                       'field_suffix' => '-geo',
+                       // Discount applied to the score, this value will be 
multiplied
+                       // to the discount from 
$wgCirrusSearchCompletionProfiles
+                       'discount' => 1.0,
+                       'precision' => 6,
+                       // List of requests to run with this precision
+                       // must be a valid name from the active 
$wgCirrusSearchCompletionProfiles
+                       'with' => array( 'plain', 'plain_stop', 'plain_fuzzy', 
'plain_stop_fuzzy' )
+               ),
+               'geo-10km' => array(
+                       'field_suffix' => '-geo',
+                       'discount' => 0.5,
+                       'precision' => 4,
+                       'with' => array( 'plain', 'plain_stop', 'plain_fuzzy' )
+               ),
+               'geo-100km' => array(
+                       'field_suffix' => '-geo',
+                       'discount' => 0.2,
+                       'precision' => 3,
+                       'with' => array( 'plain', 'plain_stop' )
+               )
+       )
+);
diff --git a/tests/browser/features/suggest_api.feature 
b/tests/browser/features/suggest_api.feature
index c4b203f..7390a8d 100644
--- a/tests/browser/features/suggest_api.feature
+++ b/tests/browser/features/suggest_api.feature
@@ -25,10 +25,32 @@
       Then the API should produce empty list
 
   Scenario: Ordering
-       When I ask suggestion API for x-m
-         Then the API should produce list starting with X-Men
+    When I ask suggestion API for x-m
+      Then the API should produce list starting with X-Men
+
+  Scenario: Fuzzy
+    When I ask suggestion API for xmen
+      Then the API should produce list starting with X-Men
+
+  Scenario Outline: Search redirects shows the best redirect
+    When I ask suggestion API for <term>
+      Then the API should produce list containing <suggested>
+  Examples:
+    |   term      |    suggested      |
+    | eise        | Eisenhardt, Max   |
+    | max         | Max Eisenhardt    |
+    | magnetu     | Magneto           |
+
+  Scenario Outline: Search prefers exact match over
+    When I ask suggestion API for <term>
+      Then the API should produce list starting with <suggested>
+  Examples:
+    |   term      |    suggested      |
+    | max         | Max Eisenhardt    |
+    | mai         | Main Page         |
+    | eis         | Eisenhardt, Max   |
 
   Scenario: Ordering & limit
-       When I ask suggestion API at most 1 item for x-m
-         Then the API should produce list starting with X-Men
-         And the API should produce list of length 1
\ No newline at end of file
+    When I ask suggestion API at most 1 item for x-m
+      Then the API should produce list starting with X-Men
+      And the API should produce list of length 1
diff --git a/tests/browser/features/support/hooks.rb 
b/tests/browser/features/support/hooks.rb
index 796bff9..33f087f 100644
--- a/tests/browser/features/support/hooks.rb
+++ b/tests/browser/features/support/hooks.rb
@@ -617,6 +617,10 @@
       Given a page named X-Men exists with contents The X-Men are a fictional 
team of superheroes
         And a page named Xavier, Charles exists with contents Professor 
Charles Francis Xavier (also known as Professor X) is the founder of [[X-Men]]
         And a page named X-Force exists with contents X-Force is a fictional 
team of of [[X-Men]]
+        And a page named Magneto exists with contents Magneto is a fictional 
character appearing in American comic books
+        And a page named Max Eisenhardt exists with contents #REDIRECT 
[[Magneto]]
+        And a page named Eisenhardt, Max exists with contents #REDIRECT 
[[Magneto]]
+        And a page named Magnetu exists with contents #REDIRECT [[Magneto]]
         And I reindex suggestions
     )
     suggest = true
diff --git a/tests/unit/SuggestBuilderTest.php 
b/tests/unit/SuggestBuilderTest.php
new file mode 100644
index 0000000..e361631
--- /dev/null
+++ b/tests/unit/SuggestBuilderTest.php
@@ -0,0 +1,245 @@
+<?php
+
+namespace CirrusSearch;
+
+use CirrusSearch\BuildDocument\SuggestBuilder;
+use CirrusSearch\BuildDocument\SuggestScoringMethodFactory;
+
+/**
+ * test suggest builder.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+class SuggestBuilderTest extends \MediaWikiTestCase {
+       public function testEinstein() {
+               $builder = new SuggestBuilder( 
SuggestScoringMethodFactory::getScoringMethod( 'incomingLinks', 1 ) );
+               $score = 10;
+               $redirScore = (int) ( $score * 
SuggestBuilder::REDIRECT_DISCOUNT );
+               $doc = array(
+                       'title' => 'Albert Einstein',
+                       'redirect' => array(
+                               array( 'title' => "Albert Enstein", 'namespace' 
=> 0 ),
+                               array( 'title' => "Albert Einsten", 'namespace' 
=> 0 ),
+                               array( 'title' => 'Albert Einstine', 
'namespace' => 0 ),
+                               array( 'title' => "Enstein", 'namespace' => 0 ),
+                               array( 'title' => "Einstein", 'namespace' => 0 
),
+                       ),
+                       'incoming_links' => $score
+               );
+               $expected = array(
+                       array(
+                               'suggest' => array(
+                                       'input' => array( 'Albert Einstein', 
'Albert Enstein',
+                                               'Albert Einsten', 'Albert 
Einstine' ),
+                                       'output' => '1:t:Albert Einstein',
+                                       'weight' => $score
+                               ),
+                               'suggest-stop' => array(
+                                       'input' => array( 'Albert Einstein', 
'Albert Enstein',
+                                               'Albert Einsten', 'Albert 
Einstine' ),
+                                       'output' => '1:t:Albert Einstein',
+                                       'weight' => $score
+                               )
+                       ),
+                       array(
+                               'suggest' => array(
+                                       'input' => array( 'Enstein', 'Einstein' 
),
+                                       'output' => '1:r',
+                                       'weight' => $redirScore
+                               ),
+                               'suggest-stop' => array(
+                                       'input' => array( 'Enstein', 'Einstein' 
),
+                                       'output' => '1:r',
+                                       'weight' => $redirScore
+                               )
+                       )
+               );
+
+               $suggestions = $builder->build( 1, $doc );
+               $this->assertSame( $expected, $suggestions );
+       }
+
+       public function testUlm() {
+               $builder = new SuggestBuilder( 
SuggestScoringMethodFactory::getScoringMethod( 'incomingLinks', 1 ) );
+               $score = 10;
+               $redirScore = (int) ( $score * 
SuggestBuilder::REDIRECT_DISCOUNT );
+               $doc = array(
+                       'title' => 'Ulm',
+                       'redirect' => array(
+                               array( 'title' => 'UN/LOCODE:DEULM', 
'namespace' => 0 ),
+                               array( 'title'=> 'Ulm, Germany', 'namespace' => 
0 ),
+                               array( 'title' => "Ulm displaced persons camp", 
'namespace' => 0 ),
+                               array( 'title' => "Söflingen", 'namespace' => 0 
),
+                       ),
+                       'coordinates' => array(
+                               array(
+                                       'coord' => array(
+                                               'lat' => 48.3985,
+                                               'lon' => 9.9918
+                                       ),
+                                       'region' => "BW",
+                                       'dim' => 10000,
+                                       'name' => "",
+                                       'primary' => true,
+                                       'type' => "city",
+                                       'globe' => "earth",
+                                       'country' => "DE"
+                               )
+                       ),
+                       'incoming_links' => $score
+               );
+
+               $expected = array(
+                       array(
+                               'suggest' => array(
+                                       'input' => array( 'Ulm' ),
+                                       'output' => '1:t:Ulm',
+                                       'weight' => $score
+                               ),
+                               'suggest-stop' => array(
+                                       'input' => array( 'Ulm' ),
+                                       'output' => '1:t:Ulm',
+                                       'weight' => $score
+                               ),
+                               'suggest-geo' => array(
+                                       'input' => array( 'Ulm' ),
+                                       'output' => '1:t:Ulm',
+                                       'weight' => $score,
+                                       'context' => array(
+                                               'location' => array(
+                                                       'lat' => 48.3985,
+                                                       'lon' => 9.9918
+                                               )
+                                       )
+                               ),
+                               'suggest-stop-geo' => array(
+                                       'input' => array( 'Ulm' ),
+                                       'output' => '1:t:Ulm',
+                                       'weight' => $score,
+                                       'context' => array(
+                                               'location' => array(
+                                                       'lat' => 48.3985,
+                                                       'lon' => 9.9918
+                                               )
+                                       )
+                               )
+                       ),
+                       array(
+                               'suggest' => array(
+                                       'input' => array( 'UN/LOCODE:DEULM', 
'Ulm, Germany',
+                                               'Ulm displaced persons camp', 
'Söflingen' ),
+                                       'output' => '1:r',
+                                       'weight' => $redirScore
+                               ),
+                               'suggest-stop' => array(
+                                       'input' => array( 'UN/LOCODE:DEULM', 
'Ulm, Germany',
+                                               'Ulm displaced persons camp', 
'Söflingen' ),
+                                       'output' => '1:r',
+                                       'weight' => $redirScore
+                               ),
+                               'suggest-geo' => array(
+                                       'input' => array( 'UN/LOCODE:DEULM', 
'Ulm, Germany',
+                                               'Ulm displaced persons camp', 
'Söflingen' ),
+                                       'output' => '1:r',
+                                       'weight' => $redirScore,
+                                       'context' => array(
+                                               'location' => array(
+                                                       'lat' => 48.3985,
+                                                       'lon' => 9.9918
+                                               )
+                                       )
+                               ),
+                               'suggest-stop-geo' => array(
+                                       'input' => array( 'UN/LOCODE:DEULM', 
'Ulm, Germany',
+                                               'Ulm displaced persons camp', 
'Söflingen' ),
+                                       'output' => '1:r',
+                                       'weight' => $redirScore,
+                                       'context' => array(
+                                               'location' => array(
+                                                       'lat' => 48.3985,
+                                                       'lon' => 9.9918
+                                               )
+                                       )
+                               )
+                       )
+               );
+               $suggestions = $builder->build( 1, $doc );
+               $this->assertSame( $expected, $suggestions );
+       }
+
+       public function testMultipleCoordinates() {
+               $doc = array(
+                       'coordinates' => array(
+                               array(
+                                       'coord' => array(
+                                               'lat' => 0.70777777777778,
+                                               'lon' => -50.089444444444
+                                       ),
+                                       'region' => null,
+                                       'dim' => 10000,
+                                       'name' => "",
+                                       'primary' => true,
+                                       'type' => "river",
+                                       'globe' => "earth",
+                                       'country' => "BR"
+                               ),
+                               array(
+                                       'coord' => array(
+                                               'lat' => -15.518055555556,
+                                               'lon' => -71.765277777778
+                                       ),
+                                       'region' => null,
+                                       'dim' => 10000,
+                                       'name' => "",
+                                       'primary' => false,
+                                       'type' => "river",
+                                       'globe' => "earth",
+                                       'country' => "BR"
+                               )
+                       )
+               );
+
+               $builder = new SuggestBuilder( 
SuggestScoringMethodFactory::getScoringMethod( 'incomingLinks', 1 ) );
+               $coord = $builder->findPrimaryCoordinates( $doc );
+               $expected = array( 'lat' => 0.70777777777778, 'lon' => 
-50.089444444444 );
+               $this->assertSame( $expected, $coord );
+
+               $doc['coordinates'][1]['primary'] = true;
+               $coord = $builder->findPrimaryCoordinates( $doc );
+               $expected = array( 'lat' => 0.70777777777778, 'lon' => 
-50.089444444444 );
+               $this->assertSame( $expected, $coord, "With two primaries coord 
we choose the first one" );
+
+               $doc['coordinates'][0]['primary'] = false;
+               $coord = $builder->findPrimaryCoordinates( $doc );
+               $expected = array( 'lat' => -15.518055555556, 'lon' => 
-71.765277777778 );
+               $this->assertSame( $expected, $coord, "Choose primary coord 
even if it's not the first one." );
+
+               $doc['coordinates'][1]['primary'] = false;
+               $coord = $builder->findPrimaryCoordinates( $doc );
+               $expected = array( 'lat' => 0.70777777777778, 'lon' => 
-50.089444444444 );
+               $this->assertSame( $expected, $coord, "Choose first coord if 
there's no primary." );
+
+               $doc['coordinates'][0]['primary'] = true;
+               $doc['coordinates'][0]['globe'] = 'Magrathea';
+               $coord = $builder->findPrimaryCoordinates( $doc );
+               $expected = array( 'lat' => -15.518055555556, 'lon' => 
-71.765277777778 );
+               $this->assertSame( $expected, $coord, "Choose first coord on 
earth." );
+
+               $doc['coordinates'][1]['globe'] = 'Magrathea';
+               $coord = $builder->findPrimaryCoordinates( $doc );
+               $this->assertNull( $coord, "No coord if none is on earth." );
+       }
+}
diff --git a/tests/unit/SuggestScoringTest.php 
b/tests/unit/SuggestScoringTest.php
index c2bf342..493878d 100644
--- a/tests/unit/SuggestScoringTest.php
+++ b/tests/unit/SuggestScoringTest.php
@@ -37,7 +37,7 @@
                        $this->assertGreaterThanOrEqual( 0, $score, 
"scoreNormL2 cannot produce a score lower than 0" );
                }
 
-               # Edges
+               // Edges
                $score = $qs->scoreNorm( 1, 1 );
                $this->assertLessThanOrEqual( 1, $score, "scoreNorm cannot 
produce a score greater than 1" );
                $this->assertGreaterThanOrEqual( 0, $score, "scoreNorm cannot 
produce a score lower than 0" );
@@ -66,13 +66,13 @@
                        if ( $boost > 1 ) {
                                $this->assertGreaterThan( $score, $res, "With a 
boost ($boost) greater than 1 the boosted score must be greater than the 
original." );
                        } else if ( $boost < 1 ) {
-                               $this->assertLessThan( $score, $res, "With a 
boost ($boost) lesser than 1 the boosted score must be lesser than the 
original." );
+                               $this->assertLessThan( $score, $res, "With a 
boost ($boost) less than 1 the boosted score must be less than the original." );
                        } else {
                                $this->assertEquals( $score, $res, "When boost 
is 1 the score remains unchanged." );
                        }
                }
                for( $i = 1; $i < 1000; $i++ ) {
-                       # The same boost value must keep original score ordering
+                       // The same boost value must keep original score 
ordering
                        $score1 = 0.1;
                        $score2 = 0.5;
 
@@ -87,7 +87,7 @@
                        $this->assertGreaterThan( $res1, $res2, "A boost cannot 
'overboost' a score" );
                }
 
-               # Edges
+               // Edges
                $res = $qs->boost( 1, 1 );
                $this->assertEquals( $res, 1, "When boost is 1 the score 
remains unchanged." );
                $res = $qs->boost( 1, 0 );
@@ -123,7 +123,7 @@
                $this->assertGreaterThan( $score, $res, "A good doc gets a 
better score" );
 
                $res = $qs->boostTemplates( $badDoc, $score );
-               $this->assertLessThan( $score, $res, "A good doc gets a lower 
score" );
+               $this->assertLessThan( $score, $res, "A bad doc gets a lower 
score" );
 
                $res = $qs->boostTemplates( $mixedDoc, $score );
                $this->assertEquals( $score, $res, "A mixed doc gets the same 
score");
@@ -227,7 +227,7 @@
                        $this->assertLessThan( QualityScore::SCORE_RANGE, 
$qs->score( $page ), "Score is always lower than " . QualityScore::SCORE_RANGE 
);
                }
 
-               # Edges
+               // Edges
                $page = array(
                        'incoming_links' => $maxDocs * 
QualityScore::INCOMING_LINKS_MAX_DOCS_FACTOR,
                        'external_link' => array_fill( 0, 
QualityScore::EXTERNAL_LINKS_NORM, null ),
@@ -251,7 +251,7 @@
                $page = array();
                $this->assertEquals( 0, $qs->score( $page ), "Score of a broken 
article is 0" );
 
-               # A very small wiki
+               // A very small wiki
                $qs = new QualityScore( 1 );
                $page = array(
                        'incoming_links' => 1,
@@ -263,7 +263,7 @@
                );
                $this->assertEquals( QualityScore::SCORE_RANGE, $qs->score( 
$page ), "With very small wiki the highest score is also " . 
QualityScore::SCORE_RANGE );
 
-               # The scoring function should not fail with 0 page
+               // The scoring function should not fail with 0 page
                $qs = new QualityScore( 0 );
                $page = array(
                        'incoming_links' => 1,
diff --git a/tests/unit/UtilTest.php b/tests/unit/UtilTest.php
index bd325db..91d25e3 100644
--- a/tests/unit/UtilTest.php
+++ b/tests/unit/UtilTest.php
@@ -257,4 +257,20 @@
                $this->assertEquals( 6, $calls );
                $this->assertEquals( 5, $errorCallbackCalls );
        }
+
+       public function testChooseBestRedirect() {
+               $convert = function( $x ) {
+                       $redirect = array();
+                       foreach( $x as $t ) {
+                               $redirect[] = array( 'title' => $t, 'namespace' 
=> 0 );
+                       }
+                       return $redirect;
+               };
+               $input = $convert( array( 'Al. Einstein', 'Albert Einstein', 
'A. Einstein', 'Einstein, Albert' ) );
+               $this->assertEquals( 'Al. Einstein', Util::chooseBestRedirect( 
'a', $input ) );
+               $this->assertEquals( 'Al. Einstein', Util::chooseBestRedirect( 
'al', $input ) );
+               $this->assertEquals( 'Albert Einstein', 
Util::chooseBestRedirect( 'albet', $input ) );
+               $this->assertEquals( 'Einstein, Albert', 
Util::chooseBestRedirect( 'Einstein', $input ) );
+               $this->assertEquals( 'Einstein, Albert', 
Util::chooseBestRedirect( 'Ens', $input ) );
+       }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/235133
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I37953179d3f10036344fe16bf31da3fd04a7c075
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: wmf/1.26wmf20
Gerrit-Owner: EBernhardson <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to