jenkins-bot has submitted this change and it was merged.

Change subject: Turn on textcat based language detection for search
......................................................................


Turn on textcat based language detection for search

Enabling for wiki's we have built up high quality sets of models to
detect languages for. More wiki's will come in the future as we have
those sets of models prepared.

Change-Id: Ifca63c340d343b4f35b84d04fe5359988d121497
---
M wmf-config/CirrusSearch-common.php
M wmf-config/InitialiseSettings.php
2 files changed, 26 insertions(+), 55 deletions(-)

Approvals:
  Thcipriani: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/wmf-config/CirrusSearch-common.php 
b/wmf-config/CirrusSearch-common.php
index e7bb4bb..e20cbea 100644
--- a/wmf-config/CirrusSearch-common.php
+++ b/wmf-config/CirrusSearch-common.php
@@ -123,16 +123,14 @@
 }
 
 // Configuration for initial test deployment of inline interwiki search via
-// language detection on the search terms. With EnableAltLanguage set to false
-// this is only available with a special query string (cirrusAltLanguage=yes)
-$wgCirrusSearchEnableAltLanguage = $wmgCirrusSearchEnableAltLanguage;
-$wgCirrusSearchInterwikiProv = 'iwsw1';
+// language detection on the search terms.
 
 $wgCirrusSearchWikiToNameMap = $wmgCirrusSearchWikiToNameMap;
 $wgCirrusSearchLanguageToWikiMap = $wmgCirrusSearchLanguageToWikiMap;
 
-// will be overridden by UserTesting triggers, but we need to set the default.
-$wgCirrusSearchTextcatLanguages = [];
+$wgCirrusSearchEnableAltLanguage = $wmgCirrusSearchEnableAltLanguage;
+$wgCirrusSearchLanguageDetectors = $wmgCirrusSearchLanguageDetectors;
+$wgCirrusSearchTextcatLanguages = $wmgCirrusSearchTextcatLanguages;
 $wgCirrusSearchTextcatModel = "$IP/vendor/wikimedia/textcat/LM-query";
 
 $wgHooks['CirrusSearchMappingConfig'][] = function( array &$config, 
$mappingConfigBuilder ) {
diff --git a/wmf-config/InitialiseSettings.php 
b/wmf-config/InitialiseSettings.php
index 3416ef4..52c8ad3 100644
--- a/wmf-config/InitialiseSettings.php
+++ b/wmf-config/InitialiseSettings.php
@@ -16534,59 +16534,32 @@
        'default' => false,
 ],
 
-
 'wmgCirrusSearchUserTesting' => [
-       // Only being utilized on some wiki's, but because this requires a 
special query parameter
-       // to trigger it is safe to use everywhere rather than duplicate the 
config.
-       'default' => [
-               'textcat1' => [
-                       // Test only activated via query string trigger
-                       'sampleRate' => 0,
-                       // variables applied to all buckets
-                       'globals' => [
-                               'wgCirrusSearchInterwikiThreshold' => 3,
-                               'wgCirrusSearchLanguageDetectors' => [
-                                       'textcat' => 
'CirrusSearch\\LanguageDetector\\TextCat',
-                               ],
-                       ],
-                       'buckets' => [
-                               // control bucket. setup threshold and detector 
but don't turn on the
-                               // language query
-                               'a' => [
-                                       'trigger' => 'textcat2:a',
-                                       'globals' => [
-                                               
'wgCirrusSearchEnableAltLanguage' => false,
-                                       ],
-                               ],
-                               // test bucket. try textcat and append results 
when < 3 results found
-                               'b' => [
-                                       'trigger' => 'textcat2:b',
-                                       'globals' => [
-                                               
'wgCirrusSearchEnableAltLanguage' => true,
-                                               'wgCirrusSearchInterwikiProv' 
=> 'iwsw8',
-                                       ],
-                               ],
-                               // test bucket. try accept-language, fallback 
to textcat
-                               'c' => [
-                                       'trigger' => 'textcat2:c',
-                                       'globals' => [
-                                               
'wgCirrusSearchEnableAltLanguage' => true,
-                                               'wgCirrusSearchInterwikiProv' 
=> 'iwsw9',
-                                               
'wgCirrusSearchLanguageDetectors' => [
-                                                       'accept-lang' => 
'CirrusSearch\\LanguageDetector\\HttpAccept',
-                                                       'textcat' => 
'CirrusSearch\\LanguageDetector\\TextCat',
-                                               ],
-                                       ],
-                               ],
-                       ],
-               ],
-       ],
+       'default' => [],
 ],
 
-'wgCirrusSearchTextcatLanguages' => [
+'wmgCirrusSearchLanguageDetectors' => [
+       'default' => [],
+       'enwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+       'dewiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+       'eswiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+       'itwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+       'frwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+],
+
+'wmgCirrusSearchEnableAltLanguage' => [
+       'default' => false,
+       'enwiki' => true,
+       'dewiki' => true,
+       'eswiki' => true,
+       'itwiki' => true,
+       'frwiki' => true,
+],
+
+'wmgCirrusSearchTextcatLanguages' => [
        'default' => [
-                       'en', 'es', 'zh', 'pt', 'ar', 'ru', 'fa', 'ko',
-                       'bn', 'bg', 'hi', 'el', 'ja', 'ta', 'th', 'he'
+               'en', 'es', 'zh', 'pt', 'ar', 'ru', 'fa', 'ko',
+               'bn', 'bg', 'hi', 'el', 'ja', 'ta', 'th', 'he'
        ],
        'frwiki' => [
                'fr', 'en', 'ar', 'ru', 'zh', 'th', 'el', 'hy',

-- 
To view, visit https://gerrit.wikimedia.org/r/301369
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ifca63c340d343b4f35b84d04fe5359988d121497
Gerrit-PatchSet: 2
Gerrit-Project: operations/mediawiki-config
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>
Gerrit-Reviewer: Thcipriani <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to