jenkins-bot has submitted this change and it was merged. Change subject: (bug 51876) handle populates sites table interwiki ids correctly ......................................................................
(bug 51876) handle populates sites table interwiki ids correctly along with cleaning up the code, splitting it out of Utils... Change-Id: Ie55e2f126ba3c585a3f6719408460a7f3e2221f1 --- M lib/WikibaseLib.classes.php A lib/includes/sites/SiteMatrixParser.php A lib/includes/sites/SitesBuilder.php M lib/maintenance/populateSitesTable.php A lib/tests/phpunit/sites/SiteMatrixParserTest.php A lib/tests/phpunit/sites/SitesBuilderTest.php 6 files changed, 629 insertions(+), 28 deletions(-) Approvals: Daniel Kinzler: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/WikibaseLib.classes.php b/lib/WikibaseLib.classes.php index 09d3bec..6de6322 100644 --- a/lib/WikibaseLib.classes.php +++ b/lib/WikibaseLib.classes.php @@ -132,6 +132,10 @@ 'Wikibase\Lib\Serializers\SnakSerializer' => 'includes/serializers/SnakSerializer.php', 'Wikibase\Lib\Serializers\Unserializer' => 'includes/serializers/Unserializer.php', + // includes/sites + 'SiteMatrixParser' => 'includes/sites/SiteMatrixParser.php', + 'SitesBuilder' => 'includes/sites/SitesBuilder.php', + // includes/store 'Wikibase\ChunkCache' => 'includes/store/ChunkCache.php', 'Wikibase\ChunkAccess' => 'includes/store/ChunkAccess.php', diff --git a/lib/includes/sites/SiteMatrixParser.php b/lib/includes/sites/SiteMatrixParser.php new file mode 100644 index 0000000..df3020b --- /dev/null +++ b/lib/includes/sites/SiteMatrixParser.php @@ -0,0 +1,157 @@ +<?php + +/** + * Translates api sitematrix results json into an array of Site objects + * + * @licence GNU GPL v2+ + * + * @author Katie Filbert < aude.w...@gmail.com > + */ +class SiteMatrixParser { + + /** + * @var string + */ + protected $scriptPath; + + /** + * @var string + */ + protected $articlePath; + + /** + * @var boolean + */ + protected $expandGroup; + + /** + * @var string + */ + protected $stripProtocol; + + /** + * @param string $scriptPath (e.g. '/w/$1') + * @param string $articlePath (e.g. '/wiki/$1') + * @param string $stripProtocol + * @param boolean $expandGroup expands site matrix group codes from wiki to wikipedia + */ + public function __construct( $scriptPath, $articlePath, $stripProtocol, $expandGroup = true ) { + $this->scriptPath = $scriptPath; + $this->articlePath = $articlePath; + $this->stripProtocol = $stripProtocol; + $this->expandGroup = $expandGroup; + } + + /** + * @param string $json + * + * @return Site[] + */ + public function sitesFromJson( $json ) { + $specials = null; + + $data = json_decode( $json, true ); + + if ( !is_array( $data ) || !array_key_exists( 'sitematrix', $data ) ) { + throw new InvalidArgumentException( 'Cannot decode site matrix data.' ); + } + + if ( array_key_exists( 'specials', $data['sitematrix'] ) ) { + $specials = $data['sitematrix']['specials']; + unset( $data['sitematrix']['specials'] ); + } + + if ( array_key_exists( 'count', $data['sitematrix'] ) ) { + unset( $data['sitematrix']['count'] ); + } + + $groups = $data['sitematrix']; + + $sites = array(); + + foreach( $groups as $groupData ) { + $sites = array_merge( + $sites, + $this->getSitesFromLangGroup( $groupData ) + ); + } + + $sites = array_merge( + $sites, + $this->getSpecialSites( $specials ) + ); + + return $sites; + } + + /** + * @param array $specialSites + * + * @return Site[] + */ + protected function getSpecialSites( array $specialSites ) { + $sites = array(); + + foreach( $specialSites as $specialSite ) { + $site = $this->getSiteFromSiteData( $specialSite ); + $siteId= $site->getGlobalId(); + $sites[$siteId] = $this->getSiteFromSiteData( $specialSite ); + } + + return $sites; + } + + /** + * Gets an array of Site objects for all sites of the same language + * subdomain grouping used in the site matrix. + * + * @param array $langGroup + * + * @return Site[] + */ + protected function getSitesFromLangGroup( $langGroup ) { + $sites = array(); + + foreach( $langGroup['site'] as $siteData ) { + if ( !array_key_exists( 'code', $langGroup ) ) { + continue; + } + + $site = $this->getSiteFromSiteData( $siteData, $langGroup['code'], false ); + $site->setLanguageCode( $langGroup['code'] ); + $siteId = $site->getGlobalId(); + $sites[$siteId] = $site; + } + + return $sites; + } + + /** + * @param array $siteData + * @param string $langCode + * + * @return Site + */ + protected function getSiteFromSiteData( $siteData ) { + $site = new MediaWikiSite(); + $site->setGlobalId( $siteData['dbname'] ); + + // @note: expandGroup is specific to wikimedia site matrix sources + $siteGroup = ( $this->expandGroup && $siteData['code'] === 'wiki' ) + ? 'wikipedia' : $siteData['code']; + + $site->setGroup( $siteGroup ); + + $url = $siteData['url']; + + if ( $this->stripProtocol === 'stripProtocol' ) { + $url = preg_replace( '@^https?:@', '', $url ); + } + + $site->setFilePath( $url . $this->scriptPath ); + $site->setPagePath( $url . $this->articlePath ); + + return $site; + } + +} diff --git a/lib/includes/sites/SitesBuilder.php b/lib/includes/sites/SitesBuilder.php new file mode 100644 index 0000000..4857e50 --- /dev/null +++ b/lib/includes/sites/SitesBuilder.php @@ -0,0 +1,91 @@ +<?php + +/** + * Builds the site identifiers table + * + * @since 0.5 + * @note: this should move out of Wikibase + * + * @licence GNU GPL v2+ + * + * @author Katie Filbert < aude.w...@gmail.com > + */ +class SitesBuilder { + + /** + * @var SiteStore + */ + protected $store; + + public function __construct( SiteStore $store ) { + $this->store = $store; + } + + /** + * @param Site[] $sites + * @param string $siteGroup + * @param string $wikiId + */ + public function buildStore( array $sites, $siteGroup, $wikiId ) { + $sites = $this->addInterwikiIdsToGroup( $sites, $siteGroup, $wikiId ); + + $this->store->getSites( "nocache" ); + $this->store->saveSites( $sites ); + } + + /** + * @param Site[] $sites + * @param string $siteGroup + * @param string $wikiId + * + * @return Site[] + */ + public function addInterwikiIdsToGroup( array $sites, $siteGroup, $wikiId ) { + if ( $siteGroup !== null ) { + $sites = $this->addInterwikiIds( $sites, $siteGroup ); + } elseif ( is_string( $wikiId ) ) { + $siteGroup = $this->getSiteGroupFromWikiId( $sites, $wikiId ); + $sites = $this->addInterwikiIds( $sites, $siteGroup ); + } + + return $sites; + } + + /** + * @param Site[] $sites + * @param string $siteGroup + * + * @return Site[] + */ + protected function addInterwikiIds( array $sites, $siteGroup ) { + foreach( $sites as $site ) { + if( $site->getGroup() === $siteGroup ) { + $localId = $site->getLanguageCode(); + + if ( $localId ) { + $site->addNavigationId( $localId ); + $site->addInterwikiId( $localId ); + } + } + } + + return $sites; + } + + /** + * @param Site[] $sites + * @param string $wikiId + * + * @return string + */ + protected function getSiteGroupFromWikiId( $sites, $wikiId ) { + if ( !array_key_exists( $wikiId, $sites ) ) { + return null; + } + + $site = $sites[$wikiId]; + + return $site->getGroup(); + } + +} diff --git a/lib/maintenance/populateSitesTable.php b/lib/maintenance/populateSitesTable.php index 41050e1..080bb82 100644 --- a/lib/maintenance/populateSitesTable.php +++ b/lib/maintenance/populateSitesTable.php @@ -7,31 +7,14 @@ * Maintenance script for populating the Sites table from another wiki that runs the * SiteMatrix extension. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * * @since 0.1 - * @note: this should move into core, together with \Wikibase\Utils::insertDefaultSites - * - * @file - * @ingroup WikibaseLib + * @note: this should move out of Wikibase * * @licence GNU GPL v2+ * @author Daniel Kinzler + * @author Katie Filbert < aude.w...@gmail.com > */ -class PopulateSitesTable extends \Maintenance { +class PopulateSitesTable extends Maintenance { public function __construct() { $this->mDescription = 'Populate the sites table from another wiki that runs the SiteMatrix extension'; @@ -39,24 +22,68 @@ $this->addOption( 'strip-protocols', "Strip http/https from URLs to make them protocol relative." ); $this->addOption( 'load-from', "Full URL to the API of the wiki to fetch the site info from. " . "Default is https://meta.wikimedia.org/w/api.php", false, true ); + $this->addOption( 'script-path', 'Script path to use for wikis in the site matrix. ' + . ' (e.g. "/w/$1")', false, true ); + $this->addOption( 'article-path', 'Article path for wikis in the site matrix. ' + . ' (e.g. "/wiki/$1")', false, true ); + $this->addOption( 'site-group', 'Site group that this wiki is a member of. Used to populate ' + . ' local interwiki identifiers in the site identifiers table. If not set and --wiki' + . ' is set, the script will try to determine which site group the wiki is part of' + . ' and populate interwiki ids for sites in that group.', false, true ); + $this->addOption( 'no-expand-group', 'Do not expand site group codes in site matrix. ' + . ' By default, "wiki" is expanded to "wikipedia".' ); parent::__construct(); } public function execute() { - if ( !defined( 'WBL_VERSION' ) ) { - $this->output( "You need to have WikibaseLib enabled in order to use this maintenance script!\n\n" ); - exit; + $stripProtocols = $this->getOption( 'strip-protocols' ) ? "stripProtocol" : false; + $url = $this->getOption( 'load-from', 'https://meta.wikimedia.org/w/api.php' ); + $scriptPath = $this->getOption( 'script-path', '/w/$1' ); + $articlePath = $this->getOption( 'article-path', '/wiki/$1' ); + $expandGroup = !$this->getOption( 'no-expand-group', false ); + $siteGroup = $this->getOption( 'site-group' ); + $wikiId = $this->getOption( 'wiki' ); + + try { + $json = $this->getSiteMatrixData( $url ); + + $siteMatrixParser = new SiteMatrixParser( $scriptPath, $articlePath, + $stripProtocols, $expandGroup ); + + $sites = $siteMatrixParser->sitesFromJson( $json ); + + $store = SiteSQLStore::newInstance(); + $sitesBuilder = new SitesBuilder( $store ); + $sitesBuilder->buildStore( $sites, $siteGroup, $wikiId ); + + } catch ( MWException $e ) { + $this->output( $e->getMessage() ); } - $stripProtocols = $this->getOption( 'strip-protocols' ) ? "stripProtocol" : false; - $wiki = $this->getOption( 'load-from', 'https://meta.wikimedia.org/w/api.php' ); - - \Wikibase\Utils::insertSitesFrom( $wiki, $stripProtocols ); $this->output( "done.\n" ); } + /** + * @param string $url + * + * @throws MWException + * + * @return string + */ + protected function getSiteMatrixData( $url ) { + $url .= '?action=sitematrix&format=json'; + + //NOTE: the raiseException option needs change Iad3995a6 to be merged, otherwise it is ignored. + $json = Http::get( $url, 'default', array( 'raiseException' => true ) ); + + if ( !$json ) { + throw new MWException( "Got no data from $url" ); + } + + return $json; + } } $maintClass = 'PopulateSitesTable'; -require_once( RUN_MAINTENANCE_IF_MAIN ); +require_once ( RUN_MAINTENANCE_IF_MAIN ); diff --git a/lib/tests/phpunit/sites/SiteMatrixParserTest.php b/lib/tests/phpunit/sites/SiteMatrixParserTest.php new file mode 100644 index 0000000..3483efa --- /dev/null +++ b/lib/tests/phpunit/sites/SiteMatrixParserTest.php @@ -0,0 +1,188 @@ +<?php + +/** + * @covers SiteMatrixParser + * + * @since 0.1 + * + * @group Wikibase + * + * @licence GNU GPL v2+ + * @author Katie Filbert < aude.w...@gmail.com > + */ +class SiteMatrixParserTest extends PHPUnit_Framework_TestCase { + + /** + * @dataProvider sitesFromJsonProvider + */ + public function testSitesFromJson( $json, $expected ) { + $siteMatrixParser = new SiteMatrixParser( '/w/$1', '/wiki/$1', false ); + $sites = $siteMatrixParser->sitesFromJson( $json ); + $this->assertEquals( ksort( $expected ), ksort( $sites ) ); + } + + public function sitesFromJsonProvider() { + $json = $this->getSiteMatrixJson(); + $sitesData = $this->getSitesData(); + $sites = $this->getSites( $sitesData ); + + return array( + array( $json, $sites ) + ); + } + + protected function getSiteMatrixJson() { + $sites = array( + array( + 'code' => 'en', + 'name' => 'English', + 'site' => array( + array( + 'url' => 'http://en.wikipedia.org', + 'dbname' => 'enwiki', + 'code' => 'wiki', + 'sitename' => 'Wikipedia' + ), + array( + 'url' => 'http://en.wikivoyage.org', + 'dbname' => 'enwikivoyage', + 'code' => 'wikivoyage', + 'sitename' => 'Wikipedia' + ), + array( + 'url' => 'http://en.wikiquote.org', + 'dbname' => 'enwikiquote', + 'code' => 'wikiquote', + 'sitename' => 'Wikipedia' + ) + ), + ), + array( + 'code' => 'fr', + 'name' => 'français', + 'site' => array( + array( + 'url' => 'http://fr.wikipedia.org', + 'dbname' => 'frwiki', + 'code' => 'wiki', + 'sitename' => 'Wikipedia' + ), + array( + 'url' => 'http://fr.wikivoyage.org', + 'dbname' => 'frwikivoyage', + 'code' => 'wikivoyage', + 'sitename' => 'Wikipedia' + ) + ) + ) + ); + + $specialSites = array(); + + $specialSites[] = array( + 'url' => 'http://commons.wikimedia.org', + 'dbname' => 'commonswiki', + 'code' => 'commons' + ); + + $specialSites[] = array( + 'url' => 'http://www.wikidata.org', + 'dbname' => 'wikidatawiki', + 'code' => 'wikidata' + ); + + $specials = array( + 'specials' => $specialSites + ); + + $siteMatrix = array_merge( + array( 'count' => 879 ), + $sites, + $specials + ); + + $data = array( + 'sitematrix' => $siteMatrix + ); + + return json_encode( $data ); + } + + protected function getSitesData() { + $sitesData = array( + array( + 'id' => 'enwiki', + 'group' => 'wikipedia', + 'lang' => 'en', + 'scriptpath' => 'http://en.wikipedia.org/w/$1', + 'articlepath' => 'http://en.wikipedia.org/wiki/$1' + ), + array( + 'id' => 'frwiki', + 'group' => 'wikipedia', + 'lang' => 'fr', + 'scriptpath' => 'http://fr.wikipedia.org/w/$1', + 'articlepath' => 'http://fr.wikipedia.org/wiki/$1' + ), + array( + 'id' => 'enwikivoyage', + 'group' => 'wikivoyage', + 'lang' => 'en', + 'scriptpath' => 'http://en.wikivoyage.org/w/$1', + 'articlepath' => 'http://en.wikivoyage.org/wiki/$1' + ), + array( + 'id' => 'frwikivoyage', + 'group' => 'wikivoyage', + 'lang' => 'fr', + 'scriptpath' => 'http://fr.wikivoyage.org/w/$1', + 'articlepath' => 'http://fr.wikivoyage.org/wiki/$1' + ), + array( + 'id' => 'enwikiquote', + 'group' => 'wikiquote', + 'lang' => 'en', + 'scriptpath' => 'http://en.wikiquote.org/w/$1', + 'articlepath' => 'http://en.wikiquote.org/wiki/$1' + ), + array( + 'id' => 'commonswiki', + 'group' => 'commons', + 'scriptpath' => 'http://www.commons.org/w/$1', + 'articlepath' => 'http://www.commons.org/wiki/$1' + ), + array( + 'id' => 'wikidatawiki', + 'group' => 'wikidata', + 'scriptpath' => 'http://www.wikidata.org/w/$1', + 'articlepath' => 'http://www.wikidata.org/wiki/$1' + ) + ); + + return $sitesData; + } + + protected function getSites( $sitesData ) { + $sites = array(); + + foreach( $sitesData as $siteData ) { + $siteId = $siteData['id']; + + $site = new MediaWikiSite(); + $site->setGlobalId( $siteId ); + $site->setGroup( $siteData['group'] ); + + if( array_key_exists( 'lang', $siteData ) ) { + $site->setLanguageCode( $siteData['lang'] ); + } + + $site->setFilePath( $siteData['scriptpath'] ); + $site->setPagePath( $siteData['articlepath'] ); + + $sites[$siteId] = $site; + } + + return $sites; + } + +} diff --git a/lib/tests/phpunit/sites/SitesBuilderTest.php b/lib/tests/phpunit/sites/SitesBuilderTest.php new file mode 100644 index 0000000..60b2fe8 --- /dev/null +++ b/lib/tests/phpunit/sites/SitesBuilderTest.php @@ -0,0 +1,134 @@ +<?php + +use Wikibase\Test\MockSiteStore; + +/** + * @covers SitesBuilder + * + * @since 0.1 + * + * @group Wikibase + * + * @licence GNU GPL v2+ + * @author Katie Filbert < aude.w...@gmail.com > + */ +class SitesBuilderTest extends PHPUnit_Framework_TestCase { + + /** + * @dataProvider sitesProvider + */ + public function testBuildSites( $sites, $group, $wikiId, $expected ) { + $store = new MockSiteStore(); + + $sitesBuilder = new SitesBuilder( $store ); + $sitesBuilder->buildStore( $sites, $group, $wikiId ); + + $expectedSiteList = new SiteList( $expected ); + + $this->assertEquals( $expectedSiteList, $store->getSites() ); + } + + /** + * @dataProvider sitesProvider + */ + public function testAddInterwikiIdsToGroup( $sites, $group, $wikiId, $expected ) { + $store = new MockSiteStore(); + + $sitesBuilder = new SitesBuilder( $store ); + $sites = $sitesBuilder->addInterwikiIdsToGroup( $sites, $group, $wikiId ); + + $this->assertEquals( $expected, $sites ); + } + + public function sitesProvider() { + $sitesData = $this->getSitesData(); + $sites = $this->getSites( $sitesData ); + + $groupData = array( + 'enwikivoyage' => 'en', + 'frwikivoyage' => 'fr' + ); + + $expectedSites = $sites; + + foreach( $expectedSites as $site ) { + $siteId = $site->getGlobalId(); + + if( array_key_exists( $siteId, $groupData ) ) { + $site->addInterwikiId( $groupData[$siteId] ); + $site->addNavigationId( $groupData[$siteId] ); + } + } + + return array( + array( $sites, 'wikivoyage', null, $expectedSites ), + array( $sites, 'wikidata', null, $sites ), + array( $sites, null, 'enwikivoyage', $expectedSites ) + ); + } + + protected function getSitesData() { + $sitesData = array( + array( + 'id' => 'enwiki', + 'group' => 'wikipedia', + 'lang' => 'en' + ), + array( + 'id' => 'dewiki', + 'group' => 'wikipedia', + 'lang' => 'de' + ), + array( + 'id' => 'enwikivoyage', + 'group' => 'wikivoyage', + 'lang' => 'en' + ), + array( + 'id' => 'frwikivoyage', + 'group' => 'wikivoyage', + 'lang' => 'fr' + ), + array( + 'id' => 'enwikiquote', + 'group' => 'wikiquote', + 'lang' => 'en' + ), + array( + 'id' => 'commonswiki', + 'group' => 'commons' + ), + array( + 'id' => 'wikidatawiki', + 'group' => 'wikidata' + ) + ); + + return $sitesData; + } + + protected function getSites( $sitesData ) { + $sites = array(); + + foreach( $sitesData as $siteData ) { + $siteId = $siteData['id']; + + $site = new MediaWikiSite(); + $site->setGlobalId( $siteId ); + $site->setGroup( $siteData['group'] ); + + if( array_key_exists( 'lang', $siteData ) ) { + $site->setLanguageCode( $siteData['lang'] ); + } + + if( array_key_exists( 'interwiki', $siteData ) ) { + $site->addInterwikiId( $siteData['interwiki'] ); + $site->addNavigationId( $siteData['interwiki'] ); + } + + $sites[$siteId] = $site; + } + + return $sites; + } +} -- To view, visit https://gerrit.wikimedia.org/r/84520 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ie55e2f126ba3c585a3f6719408460a7f3e2221f1 Gerrit-PatchSet: 11 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Aude <aude.w...@gmail.com> Gerrit-Reviewer: Aude <aude.w...@gmail.com> Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits