DCausse has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/317512

Change subject: Add wgContentNamespaces to the list of vars loaded by 
SearchConfig
......................................................................

Add wgContentNamespaces to the list of vars loaded by SearchConfig

This is our best bet to explain the errors that started to spam the
logs when we activated BM25 on the top10 wikipedias.
It was the first time that a rescore profile was used with the value
'content' as supported_namespaces, this tells the Rescorebuilder to
inspect wgContentNamespaces.
Unfortunately when running a secondTry query issued from a TextCat
language detection we load an external wiki config via SearchConfig.
This class uses a prefix filter for cirrus vars and a whitelist
approach for non cirrus one.

This patch adds 'wgContentNamespaces' to this whitelist.

Bug: T148840
Change-Id: If7452e0d12dd0cb3d3fed36106708e5a0f95c41d
(cherry picked from commit 47bd6747a1537ab85d0443396c06e92ab4a674c8)
---
M includes/SearchConfig.php
A tests/unit/SearchConfigTest.php
2 files changed, 50 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/12/317512/1

diff --git a/includes/SearchConfig.php b/includes/SearchConfig.php
index 5576d96..89d73cf 100644
--- a/includes/SearchConfig.php
+++ b/includes/SearchConfig.php
@@ -14,6 +14,13 @@
        // Constants for referring to various config values. Helps prevent 
fat-fingers
        const INDEX_BASE_NAME = 'CirrusSearchIndexBaseName';
        const PREFIX_IDS = 'CirrusSearchPrefixIds';
+       const CIRRUS_VAR_PREFIX = 'wgCirrus';
+
+       /** @static string[] non cirrus vars to load when loading external wiki 
config */
+       private static $nonCirrusVars = [
+               'wgLanguageCode',
+               'wgContentNamespaces',
+       ];
 
        /**
         * Override settings
@@ -53,6 +60,14 @@
 
        /**
         * Create new search config for current or other wiki.
+        * NOTE: if loading another wiki config the list of variables extracted
+        * is:
+        *   - all globals with a prefix 'wgCirrus'
+        *   - all non cirrus vars defined in self::$nonCirrusVars
+        * Make sure to update this array when new vars are needed or you may 
encounter
+        * issues when running queries on external wiki such as TextCat lang 
detection
+        * see CirrusSearch::searchTextSecondTry().
+        *
         * @param string|null $overrideWiki Interwiki link name for wiki
         * @param string|null $overrideName DB name for the wiki
         */
@@ -61,7 +76,7 @@
                if ( $overrideWiki && $overrideName ) {
                        $this->wikiId = $overrideName;
                        if ( $this->wikiId != wfWikiID() ) {
-                               $this->source = new \HashConfig( 
$this->getConfigVars( $overrideName, 'wgCirrus' ) );
+                               $this->source = new \HashConfig( 
$this->getConfigVars( $overrideName, self::CIRRUS_VAR_PREFIX ) );
                                $this->prefix = 'wg';
                                // Re-create language object
                                $this->source->set( 'wgContLang', 
\Language::factory( $this->source->get( 'wgLanguageCode' ) ) );
@@ -74,11 +89,14 @@
 
        /**
         * Get search config vars from other wiki's config
+        *
+        * Public for unit test purpose only.
+        *
         * @param string $wiki Target wiki
         * @param string $prefix Cirrus variables prefix
         * @return array
         */
-       private function getConfigVars( $wiki, $prefix ) {
+       public function getConfigVars( $wiki, $prefix ) {
                global $wgConf;
 
                $cirrusVars = array_filter( array_keys($GLOBALS),
@@ -89,7 +107,7 @@
                                        return strncmp( $key, $prefix, 
strlen($prefix) ) === 0;
                                }
                );
-               $cirrusVars[] = 'wgLanguageCode';
+               $cirrusVars = array_merge( $cirrusVars, self::$nonCirrusVars );
                // Hack to work around https://phabricator.wikimedia.org/T111441
                putenv( 'REQUEST_METHOD' );
                return $wgConf->getConfig( $wiki, $cirrusVars );
@@ -304,4 +322,12 @@
                        $this->writableClusters = $this->availableClusters;
                }
        }
+
+       /**
+        * for unit tests purpose only
+        * @return string[] list of "non-cirrus" var names
+        */
+       public static function getNonCirrusConfigVarNames() {
+               return self::$nonCirrusVars;
+       }
 }
diff --git a/tests/unit/SearchConfigTest.php b/tests/unit/SearchConfigTest.php
new file mode 100644
index 0000000..24f11c3
--- /dev/null
+++ b/tests/unit/SearchConfigTest.php
@@ -0,0 +1,21 @@
+<?php
+
+namespace CirrusSearch;
+
+class SearchConfigTest extends \MediaWikiTestCase {
+       public function testInterWikiConfig() {
+               $config = new SearchConfig();
+               $config = new \HashConfig( $config->getConfigVars(wfWikiID(), 
SearchConfig::CIRRUS_VAR_PREFIX) );
+               $prefix = SearchConfig::CIRRUS_VAR_PREFIX;
+               foreach( $GLOBALS as $n => $v ) {
+                       if ( $v === null ) {
+                               continue;
+                       }
+                       if ( strncmp( $n, $prefix, strlen( $prefix ) ) == 0
+                               || in_array( $n, 
SearchConfig::getNonCirrusConfigVarNames() )
+                       ) {
+                               $this->assertEquals( $v, $config->get( $n ), 
"Var $n" );
+                       }
+               }
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/317512
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If7452e0d12dd0cb3d3fed36106708e5a0f95c41d
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: wmf/1.28.0-wmf.22
Gerrit-Owner: DCausse <dcau...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to