jenkins-bot has submitted this change and it was merged.

Change subject: [cirrus] switch cirrus BM25 A/B test config to ja, zh, th
......................................................................


[cirrus] switch cirrus BM25 A/B test config to ja, zh, th

Simplified the A/B test config by using profiles defined
in cirrus.
Removed the bucket with the reverse field.

Bug: T147508
Bug: T147495
Change-Id: Idba31a76eee37f242e01630d83a3d2de912806cc
---
M wmf-config/CirrusSearch-common.php
1 file changed, 11 insertions(+), 264 deletions(-)

Approvals:
  EBernhardson: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/wmf-config/CirrusSearch-common.php 
b/wmf-config/CirrusSearch-common.php
index b7b3466..20271f0 100644
--- a/wmf-config/CirrusSearch-common.php
+++ b/wmf-config/CirrusSearch-common.php
@@ -30,200 +30,21 @@
 }
 $wgCirrusSearchWriteClusters = $wmgCirrusSearchWriteClusters;
 $wgCirrusSearchClusterOverrides = $wmgCirrusSearchClusterOverrides;
+// TODO: remove, transitional config hack to support
+// var name change and avoid warnings with interwiki
+// (textcat) searches
+$wgCirrusSearchFullTextClusterOverrides = $wmgCirrusSearchClusterOverrides;
 
 # Enable user testing
 $wgCirrusSearchUserTesting = $wmgCirrusSearchUserTesting;
 
-# BM25 A/B test, enabled only on enwiki to avoid conflicts with
-# with TextCat language detection
-# UserTesting requires that a var exists in $GLOBALS before setting it
-# All extra vars needed to customize rescore weights. These must be defined
-# at the top level so textcat can still attempt to fetch them when building
-# an other-wiki query.
-$wgCirrusSearchPageViewsW = 1.0;
-$wgCirrusSearchPageViewsK = 1.0;
-$wgCirrusSearchPageViewsA = 1.0;
-$wgCirrusSearchIncLinksW = 1.0;
-$wgCirrusSearchIncLinksK = 1.0;
-$wgCirrusSearchIncLinksA = 1.0;
-$wgCirrusSearchIncLinksAloneW = 1.0;
-$wgCirrusSearchIncLinksAloneK = 1.0;
-$wgCirrusSearchIncLinksAloneA = 1.0;
-
-if ( $wgDBname === 'enwiki' ) {
-       $wgCirrusSearchUserTesting['bm25'] = [
+// BM25 A/B test for ja, zh and th
+if ( in_array( $wgDBname, ['jawiki', 'zhwiki', 'thwiki'] ) ) {
+       $wgCirrusSearchUserTesting['bm25sc'] = [
                'sampleRate' => 0,
-               'globals' => [
-                       'wgCirrusSearchBoostTemplates' => [],
-                       'wgCirrusSearchRescoreProfiles' => 
$wgCirrusSearchRescoreProfiles + [
-                               'wsum_inclinks' => [
-                                       'supported_namespaces' => 'all',
-                                       'rescore' => [
-                                               [
-                                                       'window' => 8192,
-                                                       'window_size_override' 
=> 'CirrusSearchFunctionRescoreWindowSize',
-                                                       'query_weight' => 1.0,
-                                                       'rescore_query_weight' 
=> 1.0,
-                                                       'score_mode' => 'total',
-                                                       'type' => 
'function_score',
-                                                       'function_chain' => 
'wsum_inclinks'
-                                               ],
-                                               [
-                                                       'window' => 8192,
-                                                       'window_size_override' 
=> 'CirrusSearchFunctionRescoreWindowSize',
-                                                       'query_weight' => 1.0,
-                                                       'rescore_query_weight' 
=> 1.0,
-                                                       'score_mode' => 
'multiply',
-                                                       'type' => 
'function_score',
-                                                       'function_chain' => 
'optional_chain'
-                                               ],
-                                       ],
-                               ],
-                               'wsum_inclinks_pv' => [
-                                       'supported_namespaces' => 'content',
-                                       'fallback_profile' => 'wsum_inclinks',
-                                       'rescore' => [
-                                               [
-                                                       'window' => 8192,
-                                                       'window_size_override' 
=> 'CirrusSearchFunctionRescoreWindowSize',
-                                                       'query_weight' => 1.0,
-                                                       'rescore_query_weight' 
=> 1.0,
-                                                       'score_mode' => 'total',
-                                                       'type' => 
'function_score',
-                                                       'function_chain' => 
'wsum_inclinks_pv'
-                                               ],
-                                               [
-                                                       'window' => 8192,
-                                                       'window_size_override' 
=> 'CirrusSearchFunctionRescoreWindowSize',
-                                                       'query_weight' => 1.0,
-                                                       'rescore_query_weight' 
=> 1.0,
-                                                       'score_mode' => 
'multiply',
-                                                       'type' => 
'function_score',
-                                                       'function_chain' => 
'optional_chain'
-                                               ],
-                                       ],
-                               ],
-                       ],
-                       'wgCirrusSearchRescoreFunctionScoreChains' => 
$wgCirrusSearchRescoreFunctionScoreChains + [
-                               'wsum_inclinks' => [
-                                       'functions' => [
-                                               [
-                                                       'type' => 'satu',
-                                                       'weight' => [
-                                                               'value' => 1.2,
-                                                               
'config_override' => 'CirrusSearchIncLinksAloneW',
-                                                               
'uri_param_override' => 'cirrusIncLinksAloneW',
-                                                       ],
-                                                       'params' => [
-                                                               'field' => 
'incoming_links',
-                                                               'k' => [
-                                                                       'value' 
=> 10,
-                                                                       
'config_override' => 'CirrusSearchIncLinksAloneK',
-                                                                       
'uri_param_override' => 'cirrusIncLinksAloneK',
-                                                               ],
-                                                               'a' => [
-                                                                       'value' 
=> 1,
-                                                                       
'config_override' => 'CirrusSearchIncLinksAloneA',
-                                                                       
'uri_param_override' => 'cirrusIncLinksAloneA',
-                                                               ]
-                                                       ],
-                                               ],
-                                       ],
-                               ],
-                               'wsum_inclinks_pv' => [
-                                       'score_mode' => 'sum',
-                                       'boost_mode' => 'sum',
-                                       'functions' => [
-                                               [
-                                                       'type' => 'satu',
-                                                       'weight' => [
-                                                               'value' => 1.8,
-                                                               
'config_override' => 'CirrusSearchPageViewsW',
-                                                               
'uri_param_override' => 'cirrusPageViewsW',
-                                                       ],
-                                                       'params' => [
-                                                               'field' => 
'popularity_score',
-                                                               'k' => [
-                                                                       'value' 
=> 0.0000007,
-                                                                       
'config_override' => 'CirrusSearchPageViewsK',
-                                                                       
'uri_param_override' => 'cirrusPageViewsK',
-                                                               ],
-                                                               'a' => [
-                                                                       'value' 
=> 1,
-                                                                       
'config_override' => 'CirrusSearchPageViewsA',
-                                                                       
'uri_param_override' => 'cirrusPageViewsA',
-                                                               ],
-                                                       ],
-                                               ],
-                                               [
-                                                       'type' => 'satu',
-                                                       'weight' => [
-                                                               'value' => 0.6,
-                                                               
'config_override' => 'CirrusSearchIncLinksW',
-                                                               
'uri_param_override' => 'cirrusIncLinkssW',
-                                                       ],
-                                                       'params' => [
-                                                               'field' => 
'incoming_links',
-                                                               'k' => [
-                                                                       'value' 
=> 10,
-                                                                       
'config_override' => 'CirrusSearchIncLinksK',
-                                                                       
'uri_param_override' => 'cirrusIncLinksK',
-                                                               ],
-                                                               'a' => [
-                                                                       'value' 
=> 1,
-                                                                       
'config_override' => 'CirrusSearchIncLinksA',
-                                                                       
'uri_param_override' => 'cirrusIncLinksA',
-                                                               ],
-                                                       ],
-                                               ],
-                                       ],
-                               ],
-                       ],
-                       'wgCirrusSearchFullTextQueryBuilderProfiles' => 
$wgCirrusSearchFullTextQueryBuilderProfiles + [
-                               'perfield_builder' => [
-                                       'builder_class' => 
\CirrusSearch\Query\FullTextSimpleMatchQueryBuilder::class,
-                                       'settings' => [
-                                               'default_min_should_match' => 
'1',
-                                               'default_query_type' => 
'most_fields',
-                                               'default_stem_weight' => 3.0,
-                                               'fields' => [
-                                                       'title' => 0.3,
-                                                       'redirect.title' => [
-                                                               'boost' => 0.27,
-                                                               'in_dismax' => 
'redirects_or_shingles'
-                                                       ],
-                                                       'suggest' => [
-                                                               'is_plain' => 
true,
-                                                               'boost' => 0.20,
-                                                               'in_dismax' => 
'redirects_or_shingles',
-                                                       ],
-                                                       'category' => 0.05,
-                                                       'heading' => 0.05,
-                                                       'text' => [
-                                                               'boost' => 0.6,
-                                                               'in_dismax' => 
'text_and_opening_text',
-                                                       ],
-                                                       'opening_text' => [
-                                                               'boost' => 0.5,
-                                                               'in_dismax' => 
'text_and_opening_text',
-                                                       ],
-                                                       'auxiliary_text' => 
0.05,
-                                                       'file_text' => 0.5,
-                                               ],
-                                               'phrase_rescore_fields' => [
-                                                       // very low (don't 
forget it's multiplied by 10 by default)
-                                                       // Use the all field to 
avoid loading positions on another field,
-                                                       // score is roughly the 
same when used on text
-                                                       'all' => 0.03,
-                                                       'all.plain' => 0.05,
-                                               ],
-                                       ],
-                               ],
-                       ],
-               ],
+               'globals' => [],
                'buckets' => [
                        // Prod settings on eqiad
-                       // nDCG@5 0.2772 (enwiki scores excluded)
                        'control' => [
                                'trigger' => 'bm25:control',
                                'globals' => [
@@ -232,111 +53,37 @@
                                ],
                        ],
                        // BM25+allfield and QueryString, inclinks as a sum
-                       // nDCG@5 0.2689 (enwiki scores excluded)
                        'bm25_allfield' => [
                                'trigger' => 'bm25:allfield',
                                'globals' => [
                                        'wgCirrusSearchDefaultCluster' => 
'codfw',
                                        
'wgCirrusSearchFullTextQueryBuilderProfile' => 'default',
-                                       
'wgCirrusSearchPhraseSuggestReverseField' => [
-                                               'build' => true,
-                                               'use' => false,
-                                       ],
                                        
'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true,
-                                       // set only here because only needed 
for reindexing
-                                       'wgCirrusSearchSimilarityProfile' => [
-                                               'similarity' => [
-                                                       'arrays' => [
-                                                               'type' => 
'BM25',
-                                                               'k1' => 1.2,
-                                                               'b' => 0.3,
-                                                       ],
-                                                       'text' => [
-                                                               'type' => 
'BM25',
-                                                               'k1' => 1.2,
-                                                               'b' => 0.75,
-                                                       ],
-                                               ],
-                                               'fields' => [
-                                                       '__default__' => 'text',
-                                                       'category' => 'arrays',
-                                                       'heading' => 'arrays',
-                                                       'redirect.title' => 
'arrays',
-                                                       'suggest' => 'arrays',
-                                               ],
-                                       ],
+                                       'wgCirrusSearchSimilarityProfile' => 
'wmf_defaults',
                                        'wgCirrusSearchRescoreProfile' => 
'wsum_inclinks',
-                                       'wgCirrusSearchIncLinksAloneW' => 1.3,
-                                       'wgCirrusSearchIncLinksAloneK' => 30,
-                                       'wgCirrusSearchIncLinksAloneA' => 0.7,
                                ]
                        ],
                        // BM25, perfield and SimpleMatch Query builder, 
inclinks as a sum
-                       // nDCG@5 0.3371 (enwiki scores excluded)
                        'bm25_inclinks' => [
                                'trigger' => 'bm25:inclinks',
                                'globals' => [
                                        'wgCirrusSearchDefaultCluster' => 
'codfw',
                                        
'wgCirrusSearchFullTextQueryBuilderProfile' => 'perfield_builder',
                                        
'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true,
-                                       
'wgCirrusSearchPhraseSuggestReverseField' => [
-                                               'build' => true,
-                                               'use' => false,
-                                       ],
+                                       'wgCirrusSearchSimilarityProfile' => 
'wmf_defaults',
                                        'wgCirrusSearchRescoreProfile' => 
'wsum_inclinks',
-                                       'wgCirrusSearchIncLinksAloneW' => 6.5,
-                                       'wgCirrusSearchIncLinksAloneK' => 30,
-                                       'wgCirrusSearchIncLinksAloneA' => 0.7,
                                ]
                        ],
                        // BM25, perfield and SimpleMatch Query builder, 
inclinks+pop score as a sum
-                       // nDCG@5 0.3368 (enwiki scores excluded)
                        'bm25_inclinks_pv' => [
                                'trigger' => 'bm25:inclinks_pv',
                                'globals' => [
                                        'wgCirrusSearchDefaultCluster' => 
'codfw',
                                        
'wgCirrusSearchFullTextQueryBuilderProfile' => 'perfield_builder',
                                        
'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true,
-                                       
'wgCirrusSearchPhraseSuggestReverseField' => [
-                                               'build' => true,
-                                               'use' => false,
-                                       ],
+                                       'wgCirrusSearchSimilarityProfile' => 
'wmf_defaults',
                                        'wgCirrusSearchRescoreProfile' => 
'wsum_inclinks_pv',
-                                       'wgCirrusSearchPageViewsW' => 1.5,
-                                       'wgCirrusSearchPageViewsK' => 8E-6,
-                                       'wgCirrusSearchPageViewsA' => 0.8,
-                                       'wgCirrusSearchIncLinksW' => 5.0,
-                                       'wgCirrusSearchIncLinksK' => 30,
-                                       'wgCirrusSearchIncLinksA' => 0.7,
-                                       'wgCirrusSearchIncLinksAloneW' => 6.5,
-                                       'wgCirrusSearchIncLinksAloneK' => 30,
-                                       'wgCirrusSearchIncLinksAloneA' => 0.7,
                                ]
-                       ],
-                       // BM25, perfield and SimpleMatch Query builder, 
inclinks+pop score as a sum
-                       // nDCG@5 0.3368 (enwiki scores excluded)
-                       // Reverse field enabled for DYM
-                       'bm25_inclinks_pv_rev' => [
-                               'trigger' => 'bm25:inclinks_pv_rev',
-                               'globals' => [
-                                       'wgCirrusSearchDefaultCluster' => 
'codfw',
-                                       
'wgCirrusSearchFullTextQueryBuilderProfile' => 'perfield_builder',
-                                       
'wgCirrusSearchPhraseSuggestReverseField' => [
-                                               'build' => true,
-                                               'use' => true,
-                                       ],
-                                       
'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true,
-                                       'wgCirrusSearchPageViewsW' => 1.5,
-                                       'wgCirrusSearchPageViewsK' => 8E-6,
-                                       'wgCirrusSearchPageViewsA' => 0.8,
-                                       'wgCirrusSearchIncLinksW' => 5.0,
-                                       'wgCirrusSearchIncLinksK' => 30,
-                                       'wgCirrusSearchIncLinksA' => 0.7,
-                                       'wgCirrusSearchRescoreProfile' => 
'wsum_inclinks_pv',
-                                       'wgCirrusSearchIncLinksAloneW' => 6.5,
-                                       'wgCirrusSearchIncLinksAloneK' => 30,
-                                       'wgCirrusSearchIncLinksAloneA' => 0.7,
-                               ],
                        ],
                ],
        ];

-- 
To view, visit https://gerrit.wikimedia.org/r/315250
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Idba31a76eee37f242e01630d83a3d2de912806cc
Gerrit-PatchSet: 4
Gerrit-Project: operations/mediawiki-config
Gerrit-Branch: master
Gerrit-Owner: DCausse <dcau...@wikimedia.org>
Gerrit-Reviewer: DCausse <dcau...@wikimedia.org>
Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org>
Gerrit-Reviewer: Florianschmidtwelzow <florian.schmidt.stargatewis...@gmail.com>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to