Manybubbles has uploaded a new change for review.
https://gerrit.wikimedia.org/r/82037
Change subject: Switch incategory to simple matching.
......................................................................
Switch incategory to simple matching.
incategory is no longer a search like other searches - it now only
ignores case and accents. Otherwise, you have to specify the category
exactly. This prevented the problem where you would search for a
category like "movie" and get stuff in the "movie production techniques"
category.
Bug: 53415
Change-Id: Ib03fc923af8d8b8f4e4a0bc0f71fa3592f309fe9
---
M CirrusSearch.php
M CirrusSearchAnalysisConfigBuilder.php
M CirrusSearchMappingConfigBuilder.php
M CirrusSearchSearcher.php
M CirrusSearchUpdater.php
5 files changed, 27 insertions(+), 9 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch
refs/changes/37/82037/1
diff --git a/CirrusSearch.php b/CirrusSearch.php
index fbaa01f..a363f15 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -114,6 +114,7 @@
$wgAutoloadClasses['Elastica\Filter\Bool'] = $elasticaDir . 'Filter/Bool.php';
$wgAutoloadClasses['Elastica\Filter\Prefix'] = $elasticaDir .
'Filter/Prefix.php';
$wgAutoloadClasses['Elastica\Filter\Query'] = $elasticaDir .
'Filter/Query.php';
+$wgAutoloadClasses['Elastica\Filter\Term'] = $elasticaDir . 'Filter/Term.php';
$wgAutoloadClasses['Elastica\Filter\Terms'] = $elasticaDir .
'Filter/Terms.php';
$wgAutoloadClasses['Elastica\Index\Settings'] = $elasticaDir .
'Index/Settings.php';
$wgAutoloadClasses['Elastica\Index\Status'] = $elasticaDir .
'Index/Status.php';
diff --git a/CirrusSearchAnalysisConfigBuilder.php
b/CirrusSearchAnalysisConfigBuilder.php
index ab5b4e3..3538e0e 100644
--- a/CirrusSearchAnalysisConfigBuilder.php
+++ b/CirrusSearchAnalysisConfigBuilder.php
@@ -60,7 +60,12 @@
'tokenizer' => 'prefix',
'filter' => array( 'lowercase' )
),
- 'prefix_query' => array(
+ 'prefix_query' => array( // TODO remove this
after lowercase_keyword is fully deployed
+ 'type' => 'custom',
+ 'tokenizer' => 'no_splitting',
+ 'filter' => array( 'lowercase' )
+ ),
+ 'lowercase_keyword' => array(
'type' => 'custom',
'tokenizer' => 'no_splitting',
'filter' => array( 'lowercase' )
@@ -109,9 +114,11 @@
'type' => 'stemmer',
'language' => 'possessive_english',
);
- // Add asciifolding to the prefix queries
+ // Add asciifolding to the prefix queries and
incategory filters
$config[ 'analyzer' ][ 'prefix' ][ 'filter' ][] =
'asciifolding';
+ // TODO remove prefix_query after lowercase_keyword is
fully deployed
$config[ 'analyzer' ][ 'prefix_query' ][ 'filter' ][] =
'asciifolding';
+ $config[ 'analyzer' ][ 'lowercase_keyword' ][ 'filter'
][] = 'asciifolding';
break;
case 'tr':
$config[ 'filter' ][ 'lowercase' ][ 'language' ] =
'turkish';
diff --git a/CirrusSearchMappingConfigBuilder.php
b/CirrusSearchMappingConfigBuilder.php
index 7011ffd..ae6da50 100644
--- a/CirrusSearchMappingConfigBuilder.php
+++ b/CirrusSearchMappingConfigBuilder.php
@@ -38,7 +38,7 @@
'properties' => array(
'title' => $this->buildStringField( 'title',
array( 'suggest', 'prefix' ), true ),
'text' => $this->buildStringField( 'text',
array( 'suggest' ), true ),
- 'category' => $this->buildStringField(),
+ 'category' =>
$this->buildLowercaseKeywordField(),
'redirect' => array(
'properties' => array(
'title' =>
$this->buildStringField( 'title', null, true )
@@ -57,7 +57,7 @@
}
/**
- * Build a string field.
+ * Build a string field that does standard analysis for the language.
* @param $name string|null Name of the field. Required if extra is
not false.
* @param $extra array|null Extra analyzers for this field beyond the
basic string type. If not falsy the
* field will be a multi_field.
@@ -85,4 +85,12 @@
return $field;
}
+ /**
+ * Create a string field that only lower cases and does ascii folding
(if enabled) for the language.
+ * @return array definition of the field
+ */
+ private function buildLowercaseKeywordField() {
+ return array( 'type' => 'string', 'analyzer' =>
'lowercase_keyword' );
+ }
+
}
diff --git a/CirrusSearchSearcher.php b/CirrusSearchSearcher.php
index d0b6ec1..af0fa19 100644
--- a/CirrusSearchSearcher.php
+++ b/CirrusSearchSearcher.php
@@ -52,7 +52,7 @@
$match = new \Elastica\Query\Match();
$match->setField( 'title.prefix', array(
'query' => substr( $search, 0, self::MAX_PREFIX_SEARCH
),
- 'analyzer' => 'prefix_query'
+ 'analyzer' => 'prefix_query' // TODO switch this to
lowercase_keyword after the it is fully deployed
) );
$mainFilter->addMust( new \Elastica\Filter\Query( $match ) );
$query->setFilter( $mainFilter );
@@ -129,11 +129,12 @@
'/(?<key>[^ ]+):(?<value>(?:"[^"]+")|(?:[^ "]+)) ?/',
function ( $matches ) use ( &$filters,
&$extraQueryStrings ) {
$key = $matches['key'];
- $value = trim( $matches['value'], '"' );
+ $value = $matches['value']; // Note that if
the user supplied quotes they are not removed
switch ( $key ) {
case 'incategory':
- $filters[] = new
\Elastica\Filter\Query( new \Elastica\Query\Field(
- 'category',
CirrusSearchSearcher::fixupQueryString( $value ) ) );
+ $match = new
\Elastica\Query\Match();
+ $match->setFieldQuery(
'category', trim( $value, '"' ) );
+ $filters[] = new
\Elastica\Filter\Query( $match );
return '';
case 'prefix':
return "$value* ";
diff --git a/CirrusSearchUpdater.php b/CirrusSearchUpdater.php
index 42b7683..4fb4af6 100644
--- a/CirrusSearchUpdater.php
+++ b/CirrusSearchUpdater.php
@@ -172,7 +172,8 @@
$categories = array();
foreach ( $parserOutput->getCategories() as $key => $value ) {
- $categories[] = $key;
+ $category = Category::newFromName( $key );
+ $categories[] = $category->getTitle()->getText();
}
$backlinkCache = new BacklinkCache( $title );
--
To view, visit https://gerrit.wikimedia.org/r/82037
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib03fc923af8d8b8f4e4a0bc0f71fa3592f309fe9
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits