Tjones has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/346168 )

Change subject: Enable Ukrainian Elastic/Morfologik Language Analyzer
......................................................................

Enable Ukrainian Elastic/Morfologik Language Analyzer

Add config to enable Morfologik Ukrainian analyzer if present.

Remove Ukrainian-specific bits from Russian config.

Change-Id: Ib1027a12a314ac96620a9bdd4e555043ecda22dc
---
M includes/Maintenance/AnalysisConfigBuilder.php
1 file changed, 2 insertions(+), 8 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/68/346168/1

diff --git a/includes/Maintenance/AnalysisConfigBuilder.php 
b/includes/Maintenance/AnalysisConfigBuilder.php
index 1735928..a3e9859 100644
--- a/includes/Maintenance/AnalysisConfigBuilder.php
+++ b/includes/Maintenance/AnalysisConfigBuilder.php
@@ -746,7 +746,7 @@
 
                        $config[ 'char_filter' ][ 'near_space_flattener' ][ 
'mappings' ][] = '\u0301=>'; // T102298
 
-                       // The Russian analyzer is also used for Ukrainian and 
Rusyn for now, so processing that's
+                       // The Russian analyzer is also used for Rusyn for now, 
so processing that's
                        // very specific to Russian should be separated out
                        if ($this->language == 'ru') {
                                // T124592 fold ё=>е and Ё=>Е, precomposed or 
with combining diacritic
@@ -759,13 +759,6 @@
                                $config[ 'char_filter' ][ 
'near_space_flattener' ][ 'mappings' ][] = '\u0401=>\u0415';
                                $config[ 'char_filter' ][ 
'near_space_flattener' ][ 'mappings' ][] = '\u0435\u0308=>\u0435';
                                $config[ 'char_filter' ][ 
'near_space_flattener' ][ 'mappings' ][] = '\u0415\u0308=>\u0415';
-                       }
-
-                       // Ukrainian uses the Russian analyzer for now, but we 
want some Ukrainian-specific processing
-                       if ($this->language == 'uk') {
-                               // T146358 map right quote and modifier letter 
apostrophe to apostrophe
-                               $config[ 'char_filter' ][ 'russian_charfilter' 
][ 'mappings' ][] = '\u02BC=>\u0027';
-                               $config[ 'char_filter' ][ 'russian_charfilter' 
][ 'mappings' ][] = '\u2019=>\u0027';
                        }
 
                        // Drop acute stress marks and fold ё=>е everywhere
@@ -1018,6 +1011,7 @@
                // current version of elasticsearch:
                'elasticsearch-analysis-hebrew' => [ 'he' => 'hebrew' ],
                // TODO Hebrew requires some special query handling....
+               'analysis-ukrainian' => [ 'uk' => 'ukrainian' ],
        ];
 
        /**

-- 
To view, visit https://gerrit.wikimedia.org/r/346168
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib1027a12a314ac96620a9bdd4e555043ecda22dc
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Tjones <tjo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to