jenkins-bot has submitted this change and it was merged.

Change subject: Optionally enable sloppy prefix matching
......................................................................


Optionally enable sloppy prefix matching

This prefix matching has tons of slop:  "m p" matches "Main Page".

Bug: 54974
Change-Id: Ib7decd6fdb98e08240507b1248a7f7f2761b52f4
---
M CirrusSearch.php
M includes/CirrusSearchAnalysisConfigBuilder.php
M includes/CirrusSearchMappingConfigBuilder.php
M includes/CirrusSearchSearcher.php
4 files changed, 41 insertions(+), 5 deletions(-)

Approvals:
  Chad: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/CirrusSearch.php b/CirrusSearch.php
index cd2fba4..0515b3a 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -51,6 +51,15 @@
 // set to 1 for some redundancy, if not 2 for more redundancy.
 $wgCirrusSearchContentReplicaCount = array( 'content' => 0, 'general' => 0 );
 
+// Is it ok if the prefix starts on any word in the title or just the first 
word?
+// Defaults to false (first word only) because that is the wikipedia behavior 
and so
+// what we expect users to expect.  Does not effect the prefix: search filter 
or
+// url parameter - that always starts with the first word.  false -> true will 
break
+// prefix searching until an in place reindex is complete.  true -> false is 
fine
+// any time and you can then go false -> true if you haven't run an in place 
reindex
+// since the change.
+$wgCirrusSearchPrefixSearchStartsWithAnyWord = false;
+
 // When searching for a phrase how many words not searched for can be in the 
phrase
 // before it doesn't match. If I search for "like yellow candy" then 
phraseSlop of 0
 // won't match "like brownish yellow candy" but phraseSlop of 1 will.
diff --git a/includes/CirrusSearchAnalysisConfigBuilder.php 
b/includes/CirrusSearchAnalysisConfigBuilder.php
index 7a4bf00..73c359d 100644
--- a/includes/CirrusSearchAnalysisConfigBuilder.php
+++ b/includes/CirrusSearchAnalysisConfigBuilder.php
@@ -69,6 +69,11 @@
                                        'tokenizer' => 'prefix',
                                        'filter' => array( 'lowercase' )
                                ),
+                               'word_prefix' => array(
+                                       'type' => 'custom',
+                                       'tokenizer' => 'standard',
+                                       'filter' => array( 'standard', 
'lowercase', 'prefix_ngram_filter' ),
+                               ),
                                'lowercase_keyword' => array(
                                        'type' => 'custom',
                                        'tokenizer' => 'no_splitting',
@@ -85,11 +90,14 @@
                                'lowercase' => array(
                                        'type' => 'lowercase',
                                ),
-
                                'aggressive_splitting' => array(
                                        'type' => 'word_delimiter',
                                        'stem_english_possessive' => 'false', 
// No need
-                               )
+                               ),
+                               'prefix_ngram_filter' => array(
+                                       'type' => 'edgeNGram',
+                                       'max_gram' => 
CirrusSearchSearcher::MAX_PREFIX_SEARCH,
+                               ),
                        ),
                        'tokenizer' => array(
                                'prefix' => array(
@@ -98,7 +106,7 @@
                                ),
                                'no_splitting' => array( // Just grab the whole 
term.
                                        'type' => 'keyword',
-                               )
+                               ),
                        )
                );
        }
@@ -108,6 +116,7 @@
         */
        private function customize( $config ) {
                global $wgCirrusSearchUseAggressiveSplitting;
+
                switch ( $this->language ) {
                // Please add languages in alphabetical order.
                case 'el':
diff --git a/includes/CirrusSearchMappingConfigBuilder.php 
b/includes/CirrusSearchMappingConfigBuilder.php
index a62bdc6..2187d75 100644
--- a/includes/CirrusSearchMappingConfigBuilder.php
+++ b/includes/CirrusSearchMappingConfigBuilder.php
@@ -31,9 +31,15 @@
         * @return array the mapping config
         */
        public function buildConfig() {
+               global $wgCirrusSearchPrefixSearchStartsWithAnyWord;
                global $wgCirrusSearchPhraseUseText;
                // Note never to set something as type='object' here because 
that isn't returned by elasticsearch
                // and is infered anyway.
+
+               $titleExtraAnalyzers = array( 'suggest', 'prefix' );
+               if ( $wgCirrusSearchPrefixSearchStartsWithAnyWord ) {
+                       $titleExtraAnalyzers[] = 'word_prefix';
+               }
 
                $textExtraAnalyzers = array();
                if ( $wgCirrusSearchPhraseUseText ) {
@@ -42,7 +48,7 @@
 
                return array(
                        'properties' => array(
-                               'title' => $this->buildStringField( 'title', 
array( 'suggest', 'prefix' ) ),
+                               'title' => $this->buildStringField( 'title', 
$titleExtraAnalyzers ),
                                'text' => $this->buildStringField( 'text', 
$textExtraAnalyzers ),
                                'category' => 
$this->buildLowercaseKeywordField(),
                                'heading' => $this->buildStringField( 'heading' 
),
diff --git a/includes/CirrusSearchSearcher.php 
b/includes/CirrusSearchSearcher.php
index c227915..86f2d93 100644
--- a/includes/CirrusSearchSearcher.php
+++ b/includes/CirrusSearchSearcher.php
@@ -84,6 +84,7 @@
         * @param array(string) of titles
         */
        public function prefixSearch( $search ) {
+               global $wgCirrusSearchPrefixSearchStartsWithAnyWord;
                $requestLength = strlen( $search );
                if ( $requestLength > self::MAX_PREFIX_SEARCH ) {
                        throw new UsageException( 'Prefix search requset was 
longer longer than the maximum allowed length.' .
@@ -91,7 +92,18 @@
                }
                wfDebugLog( 'CirrusSearch', "Prefix searching:  $search" );
 
-               $this->filters[] = $this->buildPrefixFilter( $search );
+               if ( $wgCirrusSearchPrefixSearchStartsWithAnyWord ) {
+                       $match = new \Elastica\Query\Match();
+                       $match->setField( 'title.word_prefix', array(
+                               'query' => $search,
+                               'analyzer' => 'plain',
+                               'operator' => 'and',
+                       ) );
+                       $this->filters[] = new \Elastica\Filter\Query( $match );
+               } else {
+                       $this->filters[] = $this->buildPrefixFilter( $search );
+               }
+
                $this->description = "prefix search for '$search'";
                $this->buildFullTextResults = false;
                return $this->search();

-- 
To view, visit https://gerrit.wikimedia.org/r/90150
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib7decd6fdb98e08240507b1248a7f7f2761b52f4
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>
Gerrit-Reviewer: Chad <[email protected]>
Gerrit-Reviewer: Manybubbles <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to