EBernhardson has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/370497 )

Change subject: Enable max token count for phrase rescore on zh lang wikis
......................................................................

Enable max token count for phrase rescore on zh lang wikis

Bug: T169498
Change-Id: Ia9d36d04400e010855e79750aa77262126187fc9
---
M tests/cirrusTest.php
M wmf-config/CirrusSearch-common.php
M wmf-config/InitialiseSettings.php
3 files changed, 12 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/mediawiki-config 
refs/changes/97/370497/1

diff --git a/tests/cirrusTest.php b/tests/cirrusTest.php
index 648eef4..646e0cb 100644
--- a/tests/cirrusTest.php
+++ b/tests/cirrusTest.php
@@ -243,6 +243,7 @@
                                        'wmgCirrusSearchSimilarityProfile' => 
'wmf_defaults',
                                        'wmgCirrusSearchRescoreProfile' => 
'wsum_inclinks',
                                        
'wmgCirrusSearchFullTextQueryBuilderProfile' => 'perfield_builder',
+                                       'wmgCirrusSearchMaxPhraseTokens' => 10,
                                ],
                        ],
                        'zh_min_nanwikisource' => [ 'zh_min_nanwikisource', 
'wikisource',
diff --git a/wmf-config/CirrusSearch-common.php 
b/wmf-config/CirrusSearch-common.php
index b440d43..6215815 100644
--- a/wmf-config/CirrusSearch-common.php
+++ b/wmf-config/CirrusSearch-common.php
@@ -257,6 +257,9 @@
 // Configure extra index settings set during index creation
 $wgCirrusSearchExtraIndexSettings = $wmgCirrusSearchExtraIndexSettings;
 
+// Limit on the number of tokens we will run phrase rescores with
+$wgCirrusSearchMaxPhraseTokens = $wmgCirrusSearchMaxPhraseTokens;
+
 # Load per realm specific configuration, either:
 # - CirrusSearch-labs.php
 # - CirrusSearch-production.php
diff --git a/wmf-config/InitialiseSettings.php 
b/wmf-config/InitialiseSettings.php
index bea92dc..cef61ec 100644
--- a/wmf-config/InitialiseSettings.php
+++ b/wmf-config/InitialiseSettings.php
@@ -18382,6 +18382,14 @@
 ],
 // @} end of wmgCirrusSearchRecycleCompletionSuggesterIndex
 
+// Disable phrase rescore on zh queries with too many tokens.
+// Bandaid for T169498, should be removed when a proper
+// fix is determined
+'wmgCirrusSearchMaxPhraseTokens' => [
+       'default' => 'null',
+       'zh' => 10,
+],
+
 // Configure ICU Folding, 'default': controlled by cirrus
 // 'no': disable, 'yes': force
 'wmgCirrusSearchUseIcuFolding' => [

-- 
To view, visit https://gerrit.wikimedia.org/r/370497
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia9d36d04400e010855e79750aa77262126187fc9
Gerrit-PatchSet: 1
Gerrit-Project: operations/mediawiki-config
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to