DCausse has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/324738

Change subject: [cirrus] enable BM25 on all but wikis with spaecless languages
......................................................................

[cirrus] enable BM25 on all but wikis with spaecless languages

Bug: T152092
Change-Id: I6c30dc6ab75b2e05ba9294641e9d1d661ffb4d61
---
M tests/cirrusTest.php
M wmf-config/InitialiseSettings.php
2 files changed, 66 insertions(+), 20 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/mediawiki-config 
refs/changes/38/324738/1

diff --git a/tests/cirrusTest.php b/tests/cirrusTest.php
index dcf048e..f135f87 100644
--- a/tests/cirrusTest.php
+++ b/tests/cirrusTest.php
@@ -102,16 +102,29 @@
                $wgConf = $this->loadWgConf( $wmfRealm );
 
                list( $site, $lang ) = $wgConf->siteFromDB( $wgDBname );
-               $globals = $wgConf->getAll( $wgDBname, $dbSuffix, array(
-                               'lang' => $lang,
-                               'docRoot' => '/dev/null',
-                               'site' => $site,
-                               'stdlogo' => 'file://dev/null',
-                       ),
-                       // Not sure if it's the right way to enable the 
wikipedia -> enwiki resolution
-                       array( $site )
-               );
+               $wikiTags = [];
+               foreach ( [ 'private', 'fishbowl', 'special', 'closed', 'flow', 
'flaggedrevs', 'small', 'medium',
+                               'large', 'wikimania', 'wikidata', 
'wikidataclient', 'visualeditor-nondefault',
+                               'commonsuploads', 'nonbetafeatures', 'group0', 
'group1', 'group2', 'wikipedia', 'nonglobal',
+                               'wikitech', 'nonecho', 'mobilemainpagelegacy', 
'clldefault', 'nowikidatadescriptiontaglines',
+                               'top6-wikipedia'
+                       ] as $tag ) {
+                       $dblist = MWWikiversions::readDbListFile( $tag );
+                       if ( in_array( $wgDBname, $dblist ) ) {
+                               $wikiTags[] = $tag;
+                       }
+               }
 
+               $dbSuffix = ( $site === 'wikipedia' ) ? 'wiki' : $site;
+               $confParams = [
+                       'lang'    => $lang,
+                       'docRoot' => $_SERVER['DOCUMENT_ROOT'],
+                       'site'    => $site,
+                       'stdlogo' => "//" ,
+               ];
+               // Add a per-language tag as well
+               $wikiTags[] = $wgConf->get( 'wgLanguageCode', $wgDBname, 
$dbSuffix, $confParams, $wikiTags );
+               $globals = $wgConf->getAll( $wgDBname, $dbSuffix, $confParams, 
$wikiTags );
                extract( $globals );
 
                // variables that would have been setup elsewhere, perhaps in 
mediawiki
@@ -239,4 +252,23 @@
                        $this->assertGreaterThanOrEqual( $numServers - 3, 
$totalShards );
                }
        }
+
+       public static function provideSimilarityByLanguage() {
+               return [
+                       'zhwiki' => [ 'zhwiki', 'wiki', 'default' ],
+                       'zh_min_nanwikisource' => [ 'zh_min_nanwikisource', 
'wikisource', 'default' ],
+                       'zh_classicalwiki' => [ 'zh_classicalwiki', 'wiki', 
'default' ],
+                       'thwiktionary' => [ 'thwiktionary', 'wiktionary', 
'default' ],
+                       'zh_yuewiki' => [ 'zh_yuewiki', 'wiki', 'default' ],
+                       'enwiki' => [ 'enwiki', 'wiki', 'wmf_defaults' ],
+                       'frwiktionary' => [ 'frwiktionary', 'wiktionary', 
'wmf_defaults' ],
+               ];
+       }
+       /**
+        * @dataProvider provideSimilarityByLanguage
+        */
+       public function testSimilarityByLanguage( $wiki, $type, 
$expectedSimilarity ) {
+               $config = $this->loadCirrusConfig( 'production', $wiki, $type );
+               $this->assertEquals( 
$config['wmgCirrusSearchSimilarityProfile'], $expectedSimilarity );
+       }
 }
diff --git a/wmf-config/InitialiseSettings.php 
b/wmf-config/InitialiseSettings.php
index 7868e70..cc32475 100644
--- a/wmf-config/InitialiseSettings.php
+++ b/wmf-config/InitialiseSettings.php
@@ -16677,17 +16677,31 @@
 ],
 
 'wmgCirrusSearchSimilarityProfile' => [
-       'default' => 'default',
-       'enwiki' => 'wmf_defaults',
-       'arwiki' => 'wmf_defaults',
-       'dewiki' => 'wmf_defaults',
-       'eswiki' => 'wmf_defaults',
-       'frwiki' => 'wmf_defaults',
-       'itwiki' => 'wmf_defaults',
-       'nlwiki' => 'wmf_defaults',
-       'ruwiki' => 'wmf_defaults',
-       'plwiki' => 'wmf_defaults',
-       'ptwiki' => 'wmf_defaults',
+       'default' => 'wmf_defaults',
+       // Uses the lang tag, list of spaceless languages
+       // (see 
https://www.mediawiki.org/wiki/User:TJones_(WMF)/Notes/Spaceless_Writing_Systems_and_Wiki-Projects)
+       "bo" => "default",
+       "dz" => "default",
+       "gan" => "default",
+       "ja" => "default",
+       "km" => "default",
+       "lo" => "default",
+       "my" => "default",
+       "th" => "default",
+       "wuu" => "default",
+       "zh" => "default",
+       "zh-classical" => "default", // should be deprecated in favor of lzh
+       "lzh" => "default", // zh-classical
+       "zh-yue" => "default", // should be deprecated in favor of yue
+       "yue" => "default", // zh_yue
+       // This list below are languages that may use use mixed scripts
+       "bug" => "default",
+       "cdo" => "default",
+       "cr" => "default",
+       "hak" => "default",
+       "jv" => "default",
+       "zh-min-nan" => "default", // needs to support deprecated language code 
(e.g. zh_min_nanwikisource)
+       "nan" => "default", // e.g. zh_min_nan
 ],
 
 'wmgCirrusSearchRescoreProfile' => [

-- 
To view, visit https://gerrit.wikimedia.org/r/324738
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I6c30dc6ab75b2e05ba9294641e9d1d661ffb4d61
Gerrit-PatchSet: 1
Gerrit-Project: operations/mediawiki-config
Gerrit-Branch: master
Gerrit-Owner: DCausse <dcau...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to