DCausse has uploaded a new change for review. https://gerrit.wikimedia.org/r/324738
Change subject: [cirrus] enable BM25 on all but wikis with spaecless languages ...................................................................... [cirrus] enable BM25 on all but wikis with spaecless languages Bug: T152092 Change-Id: I6c30dc6ab75b2e05ba9294641e9d1d661ffb4d61 --- M tests/cirrusTest.php M wmf-config/InitialiseSettings.php 2 files changed, 66 insertions(+), 20 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/mediawiki-config refs/changes/38/324738/1 diff --git a/tests/cirrusTest.php b/tests/cirrusTest.php index dcf048e..f135f87 100644 --- a/tests/cirrusTest.php +++ b/tests/cirrusTest.php @@ -102,16 +102,29 @@ $wgConf = $this->loadWgConf( $wmfRealm ); list( $site, $lang ) = $wgConf->siteFromDB( $wgDBname ); - $globals = $wgConf->getAll( $wgDBname, $dbSuffix, array( - 'lang' => $lang, - 'docRoot' => '/dev/null', - 'site' => $site, - 'stdlogo' => 'file://dev/null', - ), - // Not sure if it's the right way to enable the wikipedia -> enwiki resolution - array( $site ) - ); + $wikiTags = []; + foreach ( [ 'private', 'fishbowl', 'special', 'closed', 'flow', 'flaggedrevs', 'small', 'medium', + 'large', 'wikimania', 'wikidata', 'wikidataclient', 'visualeditor-nondefault', + 'commonsuploads', 'nonbetafeatures', 'group0', 'group1', 'group2', 'wikipedia', 'nonglobal', + 'wikitech', 'nonecho', 'mobilemainpagelegacy', 'clldefault', 'nowikidatadescriptiontaglines', + 'top6-wikipedia' + ] as $tag ) { + $dblist = MWWikiversions::readDbListFile( $tag ); + if ( in_array( $wgDBname, $dblist ) ) { + $wikiTags[] = $tag; + } + } + $dbSuffix = ( $site === 'wikipedia' ) ? 'wiki' : $site; + $confParams = [ + 'lang' => $lang, + 'docRoot' => $_SERVER['DOCUMENT_ROOT'], + 'site' => $site, + 'stdlogo' => "//" , + ]; + // Add a per-language tag as well + $wikiTags[] = $wgConf->get( 'wgLanguageCode', $wgDBname, $dbSuffix, $confParams, $wikiTags ); + $globals = $wgConf->getAll( $wgDBname, $dbSuffix, $confParams, $wikiTags ); extract( $globals ); // variables that would have been setup elsewhere, perhaps in mediawiki @@ -239,4 +252,23 @@ $this->assertGreaterThanOrEqual( $numServers - 3, $totalShards ); } } + + public static function provideSimilarityByLanguage() { + return [ + 'zhwiki' => [ 'zhwiki', 'wiki', 'default' ], + 'zh_min_nanwikisource' => [ 'zh_min_nanwikisource', 'wikisource', 'default' ], + 'zh_classicalwiki' => [ 'zh_classicalwiki', 'wiki', 'default' ], + 'thwiktionary' => [ 'thwiktionary', 'wiktionary', 'default' ], + 'zh_yuewiki' => [ 'zh_yuewiki', 'wiki', 'default' ], + 'enwiki' => [ 'enwiki', 'wiki', 'wmf_defaults' ], + 'frwiktionary' => [ 'frwiktionary', 'wiktionary', 'wmf_defaults' ], + ]; + } + /** + * @dataProvider provideSimilarityByLanguage + */ + public function testSimilarityByLanguage( $wiki, $type, $expectedSimilarity ) { + $config = $this->loadCirrusConfig( 'production', $wiki, $type ); + $this->assertEquals( $config['wmgCirrusSearchSimilarityProfile'], $expectedSimilarity ); + } } diff --git a/wmf-config/InitialiseSettings.php b/wmf-config/InitialiseSettings.php index 7868e70..cc32475 100644 --- a/wmf-config/InitialiseSettings.php +++ b/wmf-config/InitialiseSettings.php @@ -16677,17 +16677,31 @@ ], 'wmgCirrusSearchSimilarityProfile' => [ - 'default' => 'default', - 'enwiki' => 'wmf_defaults', - 'arwiki' => 'wmf_defaults', - 'dewiki' => 'wmf_defaults', - 'eswiki' => 'wmf_defaults', - 'frwiki' => 'wmf_defaults', - 'itwiki' => 'wmf_defaults', - 'nlwiki' => 'wmf_defaults', - 'ruwiki' => 'wmf_defaults', - 'plwiki' => 'wmf_defaults', - 'ptwiki' => 'wmf_defaults', + 'default' => 'wmf_defaults', + // Uses the lang tag, list of spaceless languages + // (see https://www.mediawiki.org/wiki/User:TJones_(WMF)/Notes/Spaceless_Writing_Systems_and_Wiki-Projects) + "bo" => "default", + "dz" => "default", + "gan" => "default", + "ja" => "default", + "km" => "default", + "lo" => "default", + "my" => "default", + "th" => "default", + "wuu" => "default", + "zh" => "default", + "zh-classical" => "default", // should be deprecated in favor of lzh + "lzh" => "default", // zh-classical + "zh-yue" => "default", // should be deprecated in favor of yue + "yue" => "default", // zh_yue + // This list below are languages that may use use mixed scripts + "bug" => "default", + "cdo" => "default", + "cr" => "default", + "hak" => "default", + "jv" => "default", + "zh-min-nan" => "default", // needs to support deprecated language code (e.g. zh_min_nanwikisource) + "nan" => "default", // e.g. zh_min_nan ], 'wmgCirrusSearchRescoreProfile' => [ -- To view, visit https://gerrit.wikimedia.org/r/324738 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I6c30dc6ab75b2e05ba9294641e9d1d661ffb4d61 Gerrit-PatchSet: 1 Gerrit-Project: operations/mediawiki-config Gerrit-Branch: master Gerrit-Owner: DCausse <dcau...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits