EBernhardson has uploaded a new change for review.
https://gerrit.wikimedia.org/r/301369
Change subject: Turn on textcat based language detection for search
......................................................................
Turn on textcat based language detection for search
Enabling for wiki's we have built up high quality sets of models to
detect languages for. More wiki's will come in the future as we have
those sets of models prepared.
Change-Id: Ifca63c340d343b4f35b84d04fe5359988d121497
---
M wmf-config/CirrusSearch-common.php
M wmf-config/InitialiseSettings.php
2 files changed, 26 insertions(+), 55 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/mediawiki-config
refs/changes/69/301369/1
diff --git a/wmf-config/CirrusSearch-common.php
b/wmf-config/CirrusSearch-common.php
index e7bb4bb..e20cbea 100644
--- a/wmf-config/CirrusSearch-common.php
+++ b/wmf-config/CirrusSearch-common.php
@@ -123,16 +123,14 @@
}
// Configuration for initial test deployment of inline interwiki search via
-// language detection on the search terms. With EnableAltLanguage set to false
-// this is only available with a special query string (cirrusAltLanguage=yes)
-$wgCirrusSearchEnableAltLanguage = $wmgCirrusSearchEnableAltLanguage;
-$wgCirrusSearchInterwikiProv = 'iwsw1';
+// language detection on the search terms.
$wgCirrusSearchWikiToNameMap = $wmgCirrusSearchWikiToNameMap;
$wgCirrusSearchLanguageToWikiMap = $wmgCirrusSearchLanguageToWikiMap;
-// will be overridden by UserTesting triggers, but we need to set the default.
-$wgCirrusSearchTextcatLanguages = [];
+$wgCirrusSearchEnableAltLanguage = $wmgCirrusSearchEnableAltLanguage;
+$wgCirrusSearchLanguageDetectors = $wmgCirrusSearchLanguageDetectors;
+$wgCirrusSearchTextcatLanguages = $wmgCirrusSearchTextcatLanguages;
$wgCirrusSearchTextcatModel = "$IP/vendor/wikimedia/textcat/LM-query";
$wgHooks['CirrusSearchMappingConfig'][] = function( array &$config,
$mappingConfigBuilder ) {
diff --git a/wmf-config/InitialiseSettings.php
b/wmf-config/InitialiseSettings.php
index bd47195..43deefe 100644
--- a/wmf-config/InitialiseSettings.php
+++ b/wmf-config/InitialiseSettings.php
@@ -16555,59 +16555,32 @@
'default' => false,
],
-
'wmgCirrusSearchUserTesting' => [
- // Only being utilized on some wiki's, but because this requires a
special query parameter
- // to trigger it is safe to use everywhere rather than duplicate the
config.
- 'default' => [
- 'textcat1' => [
- // Test only activated via query string trigger
- 'sampleRate' => 0,
- // variables applied to all buckets
- 'globals' => [
- 'wgCirrusSearchInterwikiThreshold' => 3,
- 'wgCirrusSearchLanguageDetectors' => [
- 'textcat' =>
'CirrusSearch\\LanguageDetector\\TextCat',
- ],
- ],
- 'buckets' => [
- // control bucket. setup threshold and detector
but don't turn on the
- // language query
- 'a' => [
- 'trigger' => 'textcat2:a',
- 'globals' => [
-
'wgCirrusSearchEnableAltLanguage' => false,
- ],
- ],
- // test bucket. try textcat and append results
when < 3 results found
- 'b' => [
- 'trigger' => 'textcat2:b',
- 'globals' => [
-
'wgCirrusSearchEnableAltLanguage' => true,
- 'wgCirrusSearchInterwikiProv'
=> 'iwsw8',
- ],
- ],
- // test bucket. try accept-language, fallback
to textcat
- 'c' => [
- 'trigger' => 'textcat2:c',
- 'globals' => [
-
'wgCirrusSearchEnableAltLanguage' => true,
- 'wgCirrusSearchInterwikiProv'
=> 'iwsw9',
-
'wgCirrusSearchLanguageDetectors' => [
- 'accept-lang' =>
'CirrusSearch\\LanguageDetector\\HttpAccept',
- 'textcat' =>
'CirrusSearch\\LanguageDetector\\TextCat',
- ],
- ],
- ],
- ],
- ],
- ],
+ 'default' => [],
],
-'wgCirrusSearchTextcatLanguages' => [
+'wmgCirrusSearchLanguageDetectors' => [
+ 'default' => [],
+ 'enwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+ 'dewiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+ 'eswiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+ 'itwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+ 'frwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+],
+
+'wmgCirrusSearchEnableAltLanguage' => [
+ 'default' => false,
+ 'enwiki' => true,
+ 'dewiki' => true,
+ 'eswiki' => true,
+ 'itwiki' => true,
+ 'frwiki' => true,
+],
+
+'wmgCirrusSearchTextcatLanguages' => [
'default' => [
- 'en', 'es', 'zh', 'pt', 'ar', 'ru', 'fa', 'ko',
- 'bn', 'bg', 'hi', 'el', 'ja', 'ta', 'th', 'he'
+ 'en', 'es', 'zh', 'pt', 'ar', 'ru', 'fa', 'ko',
+ 'bn', 'bg', 'hi', 'el', 'ja', 'ta', 'th', 'he'
],
'frwiki' => [
'fr', 'en', 'ar', 'ru', 'zh', 'th', 'el', 'hy',
--
To view, visit https://gerrit.wikimedia.org/r/301369
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifca63c340d343b4f35b84d04fe5359988d121497
Gerrit-PatchSet: 1
Gerrit-Project: operations/mediawiki-config
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits