EBernhardson has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/319498

Change subject: [WIP] Script to generate wmgCirrusSearchInterwikiSources
......................................................................

[WIP] Script to generate wmgCirrusSearchInterwikiSources

CirrusSearch has a variable, $wgCirrusSearchInterwikiSources, that
lists the sister-wiki's that should be queried as part of Special:Search
and displayed in a sidebar. This generates the full map for
$wmgCirrusSearchInterwikiSources that will be included into
InitialiseSettings.php.

We are working up something so this can be more directly done, but
before that is complete we need to start running some load tests to get
an idea of other changes that need to be made. This should be a
reasonable stop-gap solution.

Change-Id: I7145b63a97ed6caf142df076d19486752af46426
---
A dumpCirrusInterwikiSources.php
1 file changed, 112 insertions(+), 0 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikimediaMaintenance 
refs/changes/98/319498/1

diff --git a/dumpCirrusInterwikiSources.php b/dumpCirrusInterwikiSources.php
new file mode 100644
index 0000000..01f9340
--- /dev/null
+++ b/dumpCirrusInterwikiSources.php
@@ -0,0 +1,112 @@
+<?php
+
+/**
+ * Build map of wikidb to the list of it's sister sites db names
+ * and the interwiki prefixes that are used.
+ *
+ * The output of this is intended to be used in the mediawiki-config
+ * repository as the value for 'wmgCirrusSearchInterwikiSources'
+ * in InitialiseSettings.php
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ * @ingroup Wikimedia
+ */
+require_once __DIR__ . '/WikimediaMaintenance.php';
+
+
+class DumpCirrusInterwikiSources extends Maintenance {
+
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "Build CirrusSearch interwiki source map";
+       }
+
+       private function expandDbList( $project ) {
+               static $private = null;
+               if ( $private === null ) {
+                       $private = MWWikiversions::evalDbListExpression( 
'private' );
+               }
+               if ( $project === 'wiki' ) {
+                       $project = 'wikipedia';
+               }
+               $list = MWWikiversions::evalDbListExpression( $project );
+               return array_diff( $list, $private );
+       }
+       
+       private function findInterwikiPrefix( $dbName, $targetDomain ) {
+               global $wgInterwikiCache;
+
+               if ( $targetDomain === 'wiki' ) {
+                       $targetDomain = 'wikipedia';
+               }
+
+               foreach ( $wgInterwikiCache as $key => $value ) {
+                       if (substr($key, 0, strlen($dbName)) !== $dbName) {
+                               continue;
+                       } 
+                       list( $_, $prefix ) = explode( ':', $key, 2 );
+                       list( $_, $url ) = explode( ' ', $value, 2 );
+                       $host = parse_url( $url, PHP_URL_HOST );
+                       list( $lang, $domain, $tld ) = explode( '.', $host, 3 );
+                       if ( $domain === $targetDomain ) {
+                               return $prefix;
+                       }
+               }
+               return null;
+       }
+
+       public function execute() {
+               $all = array_flip( expandDbList( "all" ) );
+               
+               $matrix = new SiteMatrix();
+               $map = [];
+               foreach ( $matrix->getSites() as $baseSite ) {
+                       // Collect things that look like wikipedias
+                       $wikis = expandDbList( $baseSite );
+                       foreach ( $wikis as $dbName ) {
+                               list( $_, $lang ) = $wgConf->siteFromDB( 
$dbName );
+                               foreach ( $matrix->getSites() as $sisterSite ) {
+                                       if ( $baseSite === $sisterSite ) {
+                                               continue;
+                                       }
+                                       if ( !$matrix->exist( $lang, 
$sisterSite ) ) {
+                                               continue;
+                                       }
+                                       $iwPrefix = findInterwikiPrefix( 
$dbName, $sisterSite );
+                                       if ($iwPrefix === null ) {
+                                               fwrite( STDERR, "Matrix reports 
site at $lang / $sisterSite, but no interwiki prefix found" );
+                                               continue;
+                                       }
+                                       $sisterDbName = strtr( $lang, '-', '_' 
) . $sisterSite;
+                                       if ( !isset( $all[$sisterDbName] ) ) {
+                                               fwrite( STDERR, "Found prefix 
($iwPrefix) from $dbName but no db ($sisterDbName)\n" );
+                                               continue;
+                                       }
+                                       $map[$dbName][$sisterDbName] = 
$iwPrefix;
+                               }
+                       }
+               }
+               
+               foreach ( array_keys($map) as $dbName ) {
+                       ksort($map[$dbName]);
+               }
+               ksort($map);
+               var_export($map);
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/319498
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7145b63a97ed6caf142df076d19486752af46426
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/WikimediaMaintenance
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to