Tjones has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/357299 )
Change subject: Enable Hebrew Analysis
......................................................................
Enable Hebrew Analysis
Update config to enable HebMorph ("analysis-hebrew") if it is
available, and configure analysis elements for text and text_search.
Update AnalysisConfigBuilder tests.
Bug: T162741
Change-Id: Ice9ffc7a35d879d857659311aae8dd9d01576189
---
M includes/Maintenance/AnalysisConfigBuilder.php
M tests/unit/Maintenance/AnalysisConfigBuilderTest.php
M tests/unit/fixtures/languageAnalysis/he.config
M tests/unit/fixtures/languageAnalysis/he.expected
4 files changed, 23 insertions(+), 16 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch
refs/changes/99/357299/1
diff --git a/includes/Maintenance/AnalysisConfigBuilder.php
b/includes/Maintenance/AnalysisConfigBuilder.php
index 19c816e..7818250 100644
--- a/includes/Maintenance/AnalysisConfigBuilder.php
+++ b/includes/Maintenance/AnalysisConfigBuilder.php
@@ -695,11 +695,12 @@
$config[ 'filter' ][ 'lowercase' ][ 'language' ] =
'greek';
break;
case 'hebrew':
- // If the hebrew plugin kicked us over to the hebrew
analyzer use its companion
- // analyzer for queries.
- if ( $config[ 'analyzer' ][ 'text_search' ][ 'type' ]
=== 'hebrew' ) {
- $config[ 'analyzer' ][ 'text_search' ][ 'type'
] = 'hebrew_exact';
- }
+ $config[ 'analyzer' ][ 'text' ] = [
+ 'type' => 'custom',
+ 'tokenizer' => 'hebrew',
+ 'filter' => [ 'niqqud', 'hebrew_lemmatizer',
'lowercase', 'asciifolding' ],
+ ];
+ $config[ 'analyzer' ][ 'text_search' ] = $config[
'analyzer' ][ 'text' ];
break;
case 'italian':
$config[ 'filter' ][ 'italian_elision' ] = [
@@ -1038,8 +1039,7 @@
'analysis-kuromoji' => [ 'ja' => 'kuromoji' ],
'analysis-smartcn' => [ 'zh-hans' => 'smartcn' ],
'analysis-stconvert,analysis-smartcn' => [ 'zh' => 'chinese' ],
- 'elasticsearch-analysis-hebrew' => [ 'he' => 'hebrew' ],
- // TODO Hebrew requires some special query handling....
+ 'analysis-hebrew' => [ 'he' => 'hebrew' ],
'analysis-ukrainian' => [ 'uk' => 'ukrainian' ],
];
diff --git a/tests/unit/Maintenance/AnalysisConfigBuilderTest.php
b/tests/unit/Maintenance/AnalysisConfigBuilderTest.php
index aed0e82..921ce51 100644
--- a/tests/unit/Maintenance/AnalysisConfigBuilderTest.php
+++ b/tests/unit/Maintenance/AnalysisConfigBuilderTest.php
@@ -394,7 +394,7 @@
]);
$plugins = [
'analysis-stempel', 'analysis-kuromoji',
- 'analysis-smartcn', 'elasticsearch-analysis-hebrew',
+ 'analysis-smartcn', 'analysis-hebrew',
'analysis-ukrainian', 'analysis-stconvert'
];
$builder = new AnalysisConfigBuilder( $langCode, $plugins,
$config );
diff --git a/tests/unit/fixtures/languageAnalysis/he.config
b/tests/unit/fixtures/languageAnalysis/he.config
index 2c63c08..0967ef4 100644
--- a/tests/unit/fixtures/languageAnalysis/he.config
+++ b/tests/unit/fixtures/languageAnalysis/he.config
@@ -1,2 +1 @@
-{
-}
+{}
diff --git a/tests/unit/fixtures/languageAnalysis/he.expected
b/tests/unit/fixtures/languageAnalysis/he.expected
index 745240c..f374709 100644
--- a/tests/unit/fixtures/languageAnalysis/he.expected
+++ b/tests/unit/fixtures/languageAnalysis/he.expected
@@ -1,15 +1,23 @@
{
"analyzer": {
"text": {
- "type": "hebrew",
- "char_filter": [
- "word_break_helper"
+ "type": "custom",
+ "tokenizer": "hebrew",
+ "filter": [
+ "niqqud",
+ "hebrew_lemmatizer",
+ "lowercase",
+ "asciifolding"
]
},
"text_search": {
- "type": "hebrew_exact",
- "char_filter": [
- "word_break_helper"
+ "type": "custom",
+ "tokenizer": "hebrew",
+ "filter": [
+ "niqqud",
+ "hebrew_lemmatizer",
+ "lowercase",
+ "asciifolding"
]
},
"plain": {
--
To view, visit https://gerrit.wikimedia.org/r/357299
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ice9ffc7a35d879d857659311aae8dd9d01576189
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Tjones <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits