jenkins-bot has submitted this change and it was merged.

Change subject: Switch incategory to simple matching.
......................................................................


Switch incategory to simple matching.

incategory is no longer a search like other searches - it now only
ignores case and accents.  Otherwise, you have to specify the category
exactly.  This prevented the problem where you would search for a
category like "movie" and get stuff in the "movie production techniques"
category.

Bug: 53415
Change-Id: Ib03fc923af8d8b8f4e4a0bc0f71fa3592f309fe9
---
M CirrusSearch.php
M CirrusSearchAnalysisConfigBuilder.php
M CirrusSearchMappingConfigBuilder.php
M CirrusSearchSearcher.php
M CirrusSearchUpdater.php
5 files changed, 27 insertions(+), 9 deletions(-)

Approvals:
  Chad: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/CirrusSearch.php b/CirrusSearch.php
index fbaa01f..a363f15 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -114,6 +114,7 @@
 $wgAutoloadClasses['Elastica\Filter\Bool'] = $elasticaDir . 'Filter/Bool.php';
 $wgAutoloadClasses['Elastica\Filter\Prefix'] = $elasticaDir . 
'Filter/Prefix.php';
 $wgAutoloadClasses['Elastica\Filter\Query'] = $elasticaDir . 
'Filter/Query.php';
+$wgAutoloadClasses['Elastica\Filter\Term'] = $elasticaDir . 'Filter/Term.php';
 $wgAutoloadClasses['Elastica\Filter\Terms'] = $elasticaDir . 
'Filter/Terms.php';
 $wgAutoloadClasses['Elastica\Index\Settings'] = $elasticaDir . 
'Index/Settings.php';
 $wgAutoloadClasses['Elastica\Index\Status'] = $elasticaDir . 
'Index/Status.php';
diff --git a/CirrusSearchAnalysisConfigBuilder.php 
b/CirrusSearchAnalysisConfigBuilder.php
index ab5b4e3..3538e0e 100644
--- a/CirrusSearchAnalysisConfigBuilder.php
+++ b/CirrusSearchAnalysisConfigBuilder.php
@@ -60,7 +60,12 @@
                                        'tokenizer' => 'prefix',
                                        'filter' => array( 'lowercase' )
                                ),
-                               'prefix_query' => array(
+                               'prefix_query' => array( // TODO remove this 
after lowercase_keyword is fully deployed
+                                       'type' => 'custom',
+                                       'tokenizer' => 'no_splitting',
+                                       'filter' => array( 'lowercase' )
+                               ),
+                               'lowercase_keyword' => array(
                                        'type' => 'custom',
                                        'tokenizer' => 'no_splitting',
                                        'filter' => array( 'lowercase' )
@@ -109,9 +114,11 @@
                                'type' => 'stemmer',
                                'language' => 'possessive_english',
                        );
-                       // Add asciifolding to the prefix queries
+                       // Add asciifolding to the prefix queries and 
incategory filters
                        $config[ 'analyzer' ][ 'prefix' ][ 'filter' ][] = 
'asciifolding';
+                       // TODO remove prefix_query after lowercase_keyword is 
fully deployed
                        $config[ 'analyzer' ][ 'prefix_query' ][ 'filter' ][] = 
'asciifolding';
+                       $config[ 'analyzer' ][ 'lowercase_keyword' ][ 'filter' 
][] = 'asciifolding';
                        break;
                case 'tr':
                        $config[ 'filter' ][ 'lowercase' ][ 'language' ] = 
'turkish';
diff --git a/CirrusSearchMappingConfigBuilder.php 
b/CirrusSearchMappingConfigBuilder.php
index 7011ffd..ae6da50 100644
--- a/CirrusSearchMappingConfigBuilder.php
+++ b/CirrusSearchMappingConfigBuilder.php
@@ -38,7 +38,7 @@
                        'properties' => array(
                                'title' => $this->buildStringField( 'title', 
array( 'suggest', 'prefix' ), true ),
                                'text' => $this->buildStringField( 'text', 
array( 'suggest' ), true ),
-                               'category' => $this->buildStringField(),
+                               'category' => 
$this->buildLowercaseKeywordField(),
                                'redirect' => array(
                                        'properties' => array(
                                                'title' => 
$this->buildStringField( 'title', null, true )
@@ -57,7 +57,7 @@
        }
 
        /**
-        * Build a string field.
+        * Build a string field that does standard analysis for the language.
         * @param $name string|null Name of the field.  Required if extra is 
not false.
         * @param $extra array|null Extra analyzers for this field beyond the 
basic string type.  If not falsy the
         *              field will be a multi_field.
@@ -85,4 +85,12 @@
                return $field;
        }
 
+       /**
+        * Create a string field that only lower cases and does ascii folding 
(if enabled) for the language.
+        * @return array definition of the field
+        */
+       private function buildLowercaseKeywordField() {
+               return array( 'type' => 'string', 'analyzer' => 
'lowercase_keyword' );
+       }
+
 }
diff --git a/CirrusSearchSearcher.php b/CirrusSearchSearcher.php
index d0b6ec1..af0fa19 100644
--- a/CirrusSearchSearcher.php
+++ b/CirrusSearchSearcher.php
@@ -52,7 +52,7 @@
                $match = new \Elastica\Query\Match();
                $match->setField( 'title.prefix', array(
                        'query' => substr( $search, 0, self::MAX_PREFIX_SEARCH 
),
-                       'analyzer' => 'prefix_query'
+                       'analyzer' => 'prefix_query'  // TODO switch this to 
lowercase_keyword after the it is fully deployed
                ) );
                $mainFilter->addMust( new \Elastica\Filter\Query( $match ) );
                $query->setFilter( $mainFilter );
@@ -129,11 +129,12 @@
                        '/(?<key>[^ ]+):(?<value>(?:"[^"]+")|(?:[^ "]+)) ?/',
                        function ( $matches ) use ( &$filters, 
&$extraQueryStrings ) {
                                $key = $matches['key'];
-                               $value = trim( $matches['value'], '"' );
+                               $value = $matches['value'];  // Note that if 
the user supplied quotes they are not removed
                                switch ( $key ) {
                                        case 'incategory':
-                                               $filters[] = new 
\Elastica\Filter\Query( new \Elastica\Query\Field(
-                                                       'category', 
CirrusSearchSearcher::fixupQueryString( $value ) ) );
+                                               $match = new 
\Elastica\Query\Match();
+                                               $match->setFieldQuery( 
'category', trim( $value, '"' ) );
+                                               $filters[] = new 
\Elastica\Filter\Query( $match );
                                                return '';
                                        case 'prefix':
                                                return "$value* ";
diff --git a/CirrusSearchUpdater.php b/CirrusSearchUpdater.php
index 42b7683..4fb4af6 100644
--- a/CirrusSearchUpdater.php
+++ b/CirrusSearchUpdater.php
@@ -172,7 +172,8 @@
 
                $categories = array();
                foreach ( $parserOutput->getCategories() as $key => $value ) {
-                       $categories[] = $key;
+                       $category = Category::newFromName( $key );
+                       $categories[] = $category->getTitle()->getText();
                }
 
                $backlinkCache = new BacklinkCache( $title );

-- 
To view, visit https://gerrit.wikimedia.org/r/82037
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib03fc923af8d8b8f4e4a0bc0f71fa3592f309fe9
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>
Gerrit-Reviewer: Chad <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to