jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/377498 )

Change subject: Put default search namespaces in content index
......................................................................


Put default search namespaces in content index

Part of the benefit of splitting our data between content and
general indices, besides frequency counts, is offering better
performance by querying a much smaller dataset. A variety of wikis
though may configure their default search namespaces to include
more than just content namespaces. Adjust our handling so all
namespaces searched by default are included in the content
index.

After deployment this will require a run of the saneitizer on
all wikis that have additional namespaces in their
wgNamespacesToBeSearchedDefault.

Change-Id: Iba8b8e1f204958ccdf2cab562dc34e0008fe97ea
---
M includes/Api/ConfigDump.php
M includes/Connection.php
M includes/Search/RescoreBuilders.php
M includes/SearchConfig.php
M tests/unit/ConnectionTest.php
M tests/unit/RescoreBuilderTest.php
M tests/unit/SearcherTest.php
7 files changed, 49 insertions(+), 15 deletions(-)

Approvals:
  Smalyshev: Looks good to me, approved
  Cindy-the-browser-test-bot: Looks good to me, but someone else must approve
  jenkins-bot: Verified
  DCausse: Looks good to me, but someone else must approve



diff --git a/includes/Api/ConfigDump.php b/includes/Api/ConfigDump.php
index 14dd3ef..aba3420 100644
--- a/includes/Api/ConfigDump.php
+++ b/includes/Api/ConfigDump.php
@@ -122,6 +122,7 @@
                'CirrusSearchMaxPhraseTokens',
                'LanguageCode',
                'ContentNamespaces',
+               'NamespacesToBeSearchedDefault',
        ];
 
        public function execute() {
diff --git a/includes/Connection.php b/includes/Connection.php
index 128985d..c2a4da0 100644
--- a/includes/Connection.php
+++ b/includes/Connection.php
@@ -265,6 +265,10 @@
                if ( isset( $mappings[$namespace] ) ) {
                        return $mappings[$namespace];
                }
+               $defaultSearch = $this->config->get( 
'NamespacesToBeSearchedDefault' );
+               if ( isset( $defaultSearch[$namespace] ) && 
$defaultSearch[$namespace] ) {
+                       return self::CONTENT_INDEX_TYPE;
+               }
 
                return MWNamespace::isContent( $namespace ) ?
                        self::CONTENT_INDEX_TYPE : self::GENERAL_INDEX_TYPE;
@@ -282,14 +286,29 @@
                }
 
                $mappings = $this->config->get( 'CirrusSearchNamespaceMappings' 
);
-               $count = count( array_keys( $mappings, $indexType ) );
+               $inIndexType = [];
+               foreach ( $mappings as $ns => $type ) {
+                       if ( $indexType === $type ) {
+                               $inIndexType[$ns] = true;
+                       }
+               }
                if ( $indexType === self::CONTENT_INDEX_TYPE ) {
                        // The content namespace includes everything set in the 
mappings to content (count right now)
                        // Plus everything in wgContentNamespaces that isn't 
already in namespace mappings
                        $contentNamespaces = $this->config->get( 
'ContentNamespaces' );
-                       $count += count( array_diff( $contentNamespaces, 
array_keys( $mappings ) ) );
+                       foreach ( $contentNamespaces as $ns ) {
+                               if ( !isset( $mappings[$ns] ) ) {
+                                       $inIndexType[$ns] = true;
+                               }
+                       }
+                       $defaultSearch = $this->config->get( 
'NamespacesToBeSearchedDefault' );
+                       foreach ( $defaultSearch as $ns => $shouldSearch ) {
+                               if ( $shouldSearch && !isset( $mappings[$ns] ) 
) {
+                                       $inIndexType[$ns] = true;
+                               }
+                       }
                }
-               return $count;
+               return count( $inIndexType );
        }
 
        /**
diff --git a/includes/Search/RescoreBuilders.php 
b/includes/Search/RescoreBuilders.php
index 2650c52..16f5fbc 100644
--- a/includes/Search/RescoreBuilders.php
+++ b/includes/Search/RescoreBuilders.php
@@ -222,6 +222,13 @@
                                return true;
                        case 'content':
                                $profileNs = $this->context->getConfig()->get( 
'ContentNamespaces' );
+                               // Default search namespaces are also 
considered content
+                               $defaultSearch = 
$this->context->getConfig()->get( 'NamespacesToBeSearchedDefault' );
+                               foreach ( $defaultSearch as $ns => $isDefault ) 
{
+                                       if ( $isDefault ) {
+                                               $profileNs[] = $ns;
+                                       }
+                               }
                                break;
                        default:
                                throw new InvalidRescoreProfileException( 
"Invalid rescore profile: supported_namespaces should be 'all', 'content' or an 
array of namespaces" );
diff --git a/includes/SearchConfig.php b/includes/SearchConfig.php
index 4d49ddd..a44443c 100644
--- a/includes/SearchConfig.php
+++ b/includes/SearchConfig.php
@@ -20,6 +20,7 @@
        private static $nonCirrusVars = [
                'wgLanguageCode',
                'wgContentNamespaces',
+               'wgNamespacesToBeSearchedDefault',
        ];
 
        /**
diff --git a/tests/unit/ConnectionTest.php b/tests/unit/ConnectionTest.php
index 99f3e43..a2ee516 100644
--- a/tests/unit/ConnectionTest.php
+++ b/tests/unit/ConnectionTest.php
@@ -26,10 +26,11 @@
        /**
         * @dataProvider provideNamespacesInIndexType
         */
-       public function testNamespacesInIndexType( $contentNamespaces, 
$namespaceMappings, $indexType, $expected ) {
+       public function testNamespacesInIndexType( $contentNamespaces, 
$defaultSearchNamespaces, $namespaceMappings, $indexType, $expected ) {
                $config = new HashSearchConfig( [
                        'ContentNamespaces' => $contentNamespaces,
                        'CirrusSearchNamespaceMappings' => $namespaceMappings,
+                       'NamespacesToBeSearchedDefault' => 
$defaultSearchNamespaces,
                ], [ 'inherit' ] );
                $conn = new Connection( $config );
                $this->assertEquals( $expected, $conn->namespacesInIndexType( 
$indexType ) );
@@ -38,20 +39,24 @@
        public static function provideNamespacesInIndexType() {
                return [
                        // Standard:
-                       [ [ NS_MAIN ], [], 'content', 1 ],
-                       [ [ NS_MAIN ], [], 'general', false ],
+                       [ [ NS_MAIN ], [ NS_MAIN => true ], [], 'content', 1 ],
+                       [ [ NS_MAIN ], [ NS_MAIN => true ], [], 'general', 
false ],
 
                        // Commons:
-                       [ [ NS_MAIN ], [ NS_FILE => 'file' ], 'file', 1 ],
+                       [ [ NS_MAIN ], [ NS_MAIN => true ], [ NS_FILE => 'file' 
], 'file', 1 ],
 
                        // Funky:
-                       [ [ NS_MAIN ], [ NS_FILE => 'file', NS_FILE_TALK => 
'file' ], 'file', 2 ],
-                       [ [ NS_MAIN ], [ NS_FILE => 'file', NS_FILE_TALK => 
'file' ], 'conent', false ],
-                       [ [ NS_MAIN, NS_FILE ], [], 'content', 2 ],
-                       [ [ NS_MAIN, NS_FILE ], [ NS_FILE => 'file' ], 'file', 
1 ],
-                       [ [ NS_MAIN, NS_FILE ], [ NS_FILE => 'file' ], 
'content', 1 ],
-                       [ [ NS_MAIN, NS_FILE, NS_FILE_TALK ], [ NS_FILE => 
'file' ], 'content', 2 ],
-                       [ [ NS_MAIN, NS_FILE, NS_FILE_TALK ], [], 'content', 3 
],
+                       [ [ NS_MAIN ], [ NS_MAIN => true ], [ NS_FILE => 
'file', NS_FILE_TALK => 'file' ], 'file', 2 ],
+                       [ [ NS_MAIN ], [ NS_MAIN => true ], [ NS_FILE => 
'file', NS_FILE_TALK => 'file' ], 'conent', false ],
+                       [ [ NS_MAIN, NS_FILE ], [ NS_MAIN => true ], [], 
'content', 2 ],
+                       [ [ NS_MAIN, NS_FILE ], [ NS_MAIN => true ], [ NS_FILE 
=> 'file' ], 'file', 1 ],
+                       [ [ NS_MAIN, NS_FILE ], [ NS_MAIN => true ], [ NS_FILE 
=> 'file' ], 'content', 1 ],
+                       [ [ NS_MAIN, NS_FILE, NS_FILE_TALK ], [ NS_MAIN => true 
], [ NS_FILE => 'file' ], 'content', 2 ],
+                       [ [ NS_MAIN, NS_FILE, NS_FILE_TALK ], [ NS_MAIN => true 
], [], 'content', 3 ],
+                       [ [ NS_MAIN ], [ NS_MAIN => true, NS_FILE => true ], [ 
NS_FILE => 'file' ], 'content', 1 ],
+                       [ [ NS_MAIN ], [ NS_MAIN => true, NS_FILE => true ], [ 
NS_FILE => 'file' ], 'file', 1 ],
+                       [ [ NS_MAIN, NS_FILE ], [ NS_MAIN => true, NS_FILE => 
true ], [ NS_FILE => 'file' ], 'content', 1 ],
+                       [ [ NS_MAIN, NS_FILE ], [ NS_MAIN => true, NS_FILE => 
true ], [ NS_FILE => 'file' ], 'file', 1 ],
                ];
        }
 
diff --git a/tests/unit/RescoreBuilderTest.php 
b/tests/unit/RescoreBuilderTest.php
index 28730b4..d4abb56 100644
--- a/tests/unit/RescoreBuilderTest.php
+++ b/tests/unit/RescoreBuilderTest.php
@@ -265,6 +265,7 @@
                ];
                $profile = [
                        'ContentNamespaces' => [ 1, 2 ],
+                       'NamespacesToBeSearchedDefault' => [ 1 => true ],
                        'CirrusSearchRescoreProfiles' => [
                                'full' => [
                                        'supported_namespaces' => [ 0, 1 ],
diff --git a/tests/unit/SearcherTest.php b/tests/unit/SearcherTest.php
index 182eaaf..9795468 100644
--- a/tests/unit/SearcherTest.php
+++ b/tests/unit/SearcherTest.php
@@ -165,7 +165,7 @@
                                }
                        }
                        $this->assertEmpty( $notInApi, implode( ',', $notInApi 
) . " are exported from \CirrusSearch\Api\ConfigDump" );
-                       $this->assertEmpty( $notInSearchConfig, implode( ',', 
$notInApi ) . " are allowed in SearchConfig::getNonCirrusConfigVarNames()" );
+                       $this->assertEmpty( $notInSearchConfig, implode( ',', 
$notInSearchConfig ) . " are allowed in 
SearchConfig::getNonCirrusConfigVarNames()" );
                } finally {
                        SearchConfigUsageDecorator::resetUsedConfigKeys();
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/377498
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iba8b8e1f204958ccdf2cab562dc34e0008fe97ea
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org>
Gerrit-Reviewer: Cindy-the-browser-test-bot <bernhardsone...@gmail.com>
Gerrit-Reviewer: DCausse <dcau...@wikimedia.org>
Gerrit-Reviewer: Gehel <guillaume.leder...@wikimedia.org>
Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org>
Gerrit-Reviewer: Tjones <tjo...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to