Aude has submitted this change and it was merged.
Change subject: (Bug 47288) Abstraction layer for usage tracking.
......................................................................
(Bug 47288) Abstraction layer for usage tracking.
This introduces the EntityUsageIndex for tracking which client
pages used which entity.
Change-Id: I50dbac5760ec5cb331d3372545f1eac4e53a19bd
---
M lib/WikibaseLib.classes.php
M lib/WikibaseLib.hooks.php
A lib/includes/store/EntityUsageIndex.php
A lib/tests/phpunit/store/EntityUsageIndexTest.php
4 files changed, 648 insertions(+), 0 deletions(-)
Approvals:
Aude: Checked; Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/WikibaseLib.classes.php b/lib/WikibaseLib.classes.php
index 9bd68de..6dd483d 100644
--- a/lib/WikibaseLib.classes.php
+++ b/lib/WikibaseLib.classes.php
@@ -116,6 +116,7 @@
'Wikibase\ChunkAccess' => 'includes/store/ChunkAccess.php',
'Wikibase\EntityLookup' => 'includes/store/EntityLookup.php',
'Wikibase\PropertyLabelResolver' =>
'includes/store/PropertyLabelResolver.php',
+ 'Wikibase\EntityUsageIndex' =>
'includes/store/EntityUsageIndex.php',
'Wikibase\SiteLinkCache' => 'includes/store/SiteLinkCache.php',
'Wikibase\SiteLinkLookup' =>
'includes/store/SiteLinkLookup.php',
'Wikibase\TermIndex' => 'includes/store/TermIndex.php',
diff --git a/lib/WikibaseLib.hooks.php b/lib/WikibaseLib.hooks.php
index 2bb2884..6df4241 100644
--- a/lib/WikibaseLib.hooks.php
+++ b/lib/WikibaseLib.hooks.php
@@ -63,6 +63,7 @@
'store/SiteLinkTable',
'store/WikiPageEntityLookup',
'store/CachingEntityLoader',
+ 'store/EntityUsageIndex',
'store/TermPropertyLabelResolver',
diff --git a/lib/includes/store/EntityUsageIndex.php
b/lib/includes/store/EntityUsageIndex.php
new file mode 100644
index 0000000..85ba5f4
--- /dev/null
+++ b/lib/includes/store/EntityUsageIndex.php
@@ -0,0 +1,183 @@
+<?php
+
+namespace Wikibase;
+
+use Site;
+
+/**
+ * Index for tracking the usage of entities on a specific client wiki.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 0.4
+ *
+ * @file
+ * @ingroup WikibaseLib
+ *
+ * @licence GNU GPL v2+
+ *
+ *
+ * @author Daniel Kinzler
+ */
+class EntityUsageIndex {
+
+ /**
+ * @param Site $clientSite
+ * @param SiteLinkLookup $siteLinks
+ */
+ public function __construct( Site $clientSite, SiteLinkLookup
$siteLinks ) {
+ $this->clientSite = $clientSite;
+ $this->siteLinks = $siteLinks;
+ }
+
+ /**
+ * Returns the Site of the client wiki this usage index is tracking.
+ *
+ * @since 0.4
+ *
+ * @return Site
+ */
+ public function getClientSite() {
+ return $this->clientSite;
+ }
+
+ /**
+ * Determines which pages use any of the given entities.
+ *
+ * @since 0.4
+ *
+ * @param EntityId[] $entities
+ *
+ * @return String[] list of pages using any of the given entities
+ */
+ public function getEntityUsage( array $entities ) {
+ if ( empty( $entities ) ) {
+ return array();
+ }
+
+ $ids = array_map(
+ function ( EntityId $id ) {
+ return $id->getNumericId();
+ },
+ $entities
+ );
+
+ $rows = $this->siteLinks->getLinks( $ids, array(
$this->clientSite->getGlobalId() ) ) ;
+
+ $pages = array_map(
+ function ( array $row ) {
+ return $row[1]; // page name
+ },
+ $rows
+ );
+
+ $pages = array_unique( $pages );
+ return $pages;
+ }
+
+ /**
+ * Checks which of the given entities is used on the target wiki,
+ * and removed all others.
+ *
+ * @since 0.4
+ *
+ * @param EntityID[] $entities The entities to check
+ * @param string|null $type The entity type to check. This is an
optional hint that may
+ * be used for optimization. If given, all
IDs in the $entities
+ * array must refer to entities of the
given type.
+ *
+ * @return EntityID[] the entities actually used on the target wiki
+ * @throws \MWException if $type is set and one of the ids in $entities
+ */
+ public function filterUnusedEntities( array $entities, $type = null ) {
+ if ( empty( $entities ) ) {
+ return array();
+ }
+
+ if ( $type !== null && $type !== Item::ENTITY_TYPE ) {
+ return array();
+ }
+
+ $ids = array_map(
+ function ( EntityId $id ) use ( $type ) {
+ if ( $type !== null && $id->getEntityType() !==
$type ) {
+ throw new \MWException( "Optimizing for
$type, encountered ID for " . $id->getEntityType() );
+ }
+
+ return $id->getNumericId();
+ },
+ $entities
+ );
+
+ //todo: pass the type hint to the SiteLinksLookup, to allow for
more efficient queries
+ $rows = $this->siteLinks->getLinks( $ids, array(
$this->clientSite->getGlobalId() ) ) ;
+
+ $used = array_map(
+ function ( array $row ) {
+ return intval($row[2]); // item id
+ },
+ $rows
+ );
+
+ $used = array_flip( $used );
+
+ $filtered = array_filter(
+ $entities,
+ function ( EntityId $id ) use ( $used ) {
+ return array_key_exists( $id->getNumericId(),
$used );
+ }
+ );
+
+ return $filtered;
+
+ }
+
+ /**
+ * Determines which entities are used by any of the given pages.
+ *
+ * The page titles must be strings in the canonical form, as returned
+ * by Title::getPrefixedText() on the target wiki. Note that it is not
+ * reliable to use Title objects locally to represent pages on another
wiki,
+ * since namespaces and normalization rules may differ.
+ *
+ * @since 0.4
+ *
+ * @param string[] $pages The titles of the pages to check.
+ *
+ * @return EntityID[] The entities used.
+ */
+ public function getUsedEntities( array $pages ) {
+ if ( empty( $pages ) ) {
+ return array();
+ }
+
+ $entities = array();
+
+ //todo: implement batched lookup in SiteLinkLookup
+ foreach ( $pages as $page ) {
+ $id = $this->siteLinks->getItemIdForLink(
$this->clientSite->getGlobalId(), $page );
+
+ if ( $id !== false ) {
+ //Note: we are using the numeric ID as the key
here to make sure each item only
+ // shows up once. If we had other entity
types too, we'd need to use the
+ // prefixed ID.
+ $entities[$id] = new EntityId(
Item::ENTITY_TYPE, $id );
+ }
+ }
+
+ return $entities;
+ }
+}
diff --git a/lib/tests/phpunit/store/EntityUsageIndexTest.php
b/lib/tests/phpunit/store/EntityUsageIndexTest.php
new file mode 100644
index 0000000..daaa85b
--- /dev/null
+++ b/lib/tests/phpunit/store/EntityUsageIndexTest.php
@@ -0,0 +1,463 @@
+<?php
+
+namespace Wikibase\Test;
+use MediaWikiSite;
+use Site;
+use Wikibase\Item;
+use Wikibase\EntityUsageIndex;
+use Wikibase\EntityId;
+use Wikibase\Property;
+use Wikibase\SiteLink;
+
+/**
+ * Test class for EntityUsageIndex
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 0.4
+ *
+ * @file
+ * @ingroup WikibaseLib
+ * @ingroup Test
+ *
+ * @licence GNU GPL v2+
+ * @author Daniel Kinzler
+ */
+class EntityUsageIndexTest extends \MediaWikiTestCase {
+
+ /**
+ * @param int $id
+ * @param array $links associative array of site => page.
+ *
+ * @return Item
+ */
+ protected static function newItemFromLinks( $id, array $links ) {
+ $sites = self::getTestSites();
+
+ $item = Item::newEmpty();
+ $item->setId( $id );
+
+ foreach ( $links as $siteId => $page ) {
+ $site = $sites[$siteId];
+ $link = new SiteLink( $site, $page );
+ $item->addSiteLink( $link );
+ }
+
+ return $item;
+ }
+
+ /**
+ * @param Item[] $items
+ * @param Site $site
+ *
+ * @return EntityUsageIndex
+ */
+ protected function newEntityUsageIndex( array $items, Site $site ) {
+ $repo = new MockRepository();
+
+ foreach ( $items as $item ) {
+ $repo->putEntity( $item );
+ }
+
+ $index = new EntityUsageIndex( $site, $repo );
+ return $index;
+ }
+
+ protected static function getTestSites() {
+ static $sites = array();
+
+ if ( !empty( $sites ) ) {
+ return $sites;
+ }
+
+ $ids = array( "foo", "bar" );
+
+ foreach ( $ids as $id ) {
+ $site = new MediaWikiSite();
+ $site->setGlobalId( $id );
+ $sites[$id] = $site;
+ }
+
+ return $sites;
+ }
+
+ protected static function getTestItems() {
+ static $items = array();
+
+ if ( !empty( $items ) ) {
+ return $items;
+ }
+
+ $items[] = self::newItemFromLinks( 1,
+ array(
+ 'foo' => 'Foo',
+ 'bar' => 'Bar',
+ )
+ ) ;
+
+ $items[] = self::newItemFromLinks( 2,
+ array(
+ 'foo' => 'Too',
+ )
+ ) ;
+
+ return $items;
+ }
+
+ public static function provideGetEntityUsage() {
+ $sites = self::getTestSites();
+
+ $fooWiki = $sites['foo'];
+ $barWiki = $sites['bar'];
+
+ $items = self::getTestItems();
+
+ return array(
+ array( // #0
+ $items,
+ $fooWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 1 )
+ ),
+ array( // expectedUsage
+ 'Foo'
+ )
+ ),
+
+ array( // #1
+ $items,
+ $barWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 1 )
+ ),
+ array( // expectedUsage
+ 'Bar'
+ )
+ ),
+
+ array( // #2
+ $items,
+ $fooWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 2 )
+ ),
+ array( // expectedUsage
+ 'Too'
+ )
+ ),
+
+ array( // #3
+ $items,
+ $barWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 2 )
+ ),
+ array( // expectedUsage
+ )
+ ),
+
+ array( // #4
+ $items,
+ $fooWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ new EntityId( Item::ENTITY_TYPE, 2 ),
+ ),
+ array( // expectedUsage
+ 'Foo', 'Too'
+ )
+ ),
+
+ array( // #5
+ $items,
+ $barWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ new EntityId( Item::ENTITY_TYPE, 2 ),
+ ),
+ array( // expectedUsage
+ 'Bar'
+ )
+ ),
+
+ array( // #6
+ $items,
+ $fooWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ ),
+ array( // expectedUsage
+ 'Foo'
+ )
+ ),
+
+ array( // #7
+ $items,
+ $barWiki,
+ array( // wantedEntities
+ ),
+ array( // expectedUsage
+ )
+ ),
+ );
+ }
+
+ /**
+ * @dataProvider provideGetEntityUsage
+ *
+ */
+ public function testGetEntityUsage( array $repoItems,
+ Site $site, $wantedEntities, $expectedUsage ) {
+
+ $index = $this->newEntityUsageIndex( $repoItems, $site );
+ $usage = $index->getEntityUsage( $wantedEntities );
+
+ $this->assertArrayEquals( $expectedUsage, $usage );
+ }
+
+ public static function provideGetUsedEntities() {
+ $sites = self::getTestSites();
+ $fooWiki = $sites['foo'];
+ $barWiki = $sites['bar'];
+
+ $items = self::getTestItems();
+
+ return array(
+ array( // #0
+ $items,
+ $fooWiki,
+ array( // wantedPages
+ 'Foo'
+ ),
+ array( // expectedUsed
+ new EntityId( Item::ENTITY_TYPE, 1 )
+ )
+ ),
+
+ array( // #1
+ $items,
+ $barWiki,
+ array( // wantedPages
+ 'Bar'
+ ),
+ array( // expectedUsed
+ new EntityId( Item::ENTITY_TYPE, 1 )
+ )
+ ),
+
+ array( // #2
+ $items,
+ $fooWiki,
+ array( // wantedPages
+ 'Too'
+ ),
+ array( // expectedUsed
+ new EntityId( Item::ENTITY_TYPE, 2 )
+ )
+ ),
+
+ array( // #3
+ $items,
+ $barWiki,
+ array( // wantedPages
+ 'Xoo'
+ ),
+ array( // expectedUsed
+ )
+ ),
+
+ array( // #4
+ $items,
+ $fooWiki,
+ array( // wantedPages
+ 'Foo', 'Too'
+ ),
+ array( // expectedUsed
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ new EntityId( Item::ENTITY_TYPE, 2 ),
+ )
+ ),
+
+ array( // #5
+ $items,
+ $barWiki,
+ array( // wantedPages
+ 'Bar', 'Tar'
+ ),
+ array( // expectedUsed
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ )
+ ),
+
+ array( // #6
+ $items,
+ $fooWiki,
+ array( // wantedPages
+ 'Foo', 'Foo'
+ ),
+ array( // expectedUsed
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ )
+ ),
+
+ array( // #7
+ $items,
+ $barWiki,
+ array( // wantedPages
+ ),
+ array( // expectedUsed
+ )
+ ),
+ );
+ }
+
+ /**
+ * @dataProvider provideGetUsedEntities
+ *
+ */
+ public function testGetUsedEntities( array $repoItems,
+ Site $site, $wantedPages, $expectedUsed ) {
+
+ $index = $this->newEntityUsageIndex( $repoItems, $site );
+ $used = $index->getUsedEntities( $wantedPages );
+
+ $this->assertArrayEquals( $expectedUsed, $used );
+ }
+
+
+ public static function provideFilterUnusedEntities() {
+ $sites = self::getTestSites();
+ $fooWiki = $sites['foo'];
+ $barWiki = $sites['bar'];
+
+ $items = self::getTestItems();
+
+ return array(
+ array( // #0
+ $items,
+ $fooWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 1 )
+ ),
+ Item::ENTITY_TYPE,
+ array( // expectedUsage
+ new EntityId( Item::ENTITY_TYPE, 1 )
+ )
+ ),
+
+ array( // #1
+ $items,
+ $barWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 1 )
+ ),
+ Item::ENTITY_TYPE,
+ array( // expectedUsage
+ new EntityId( Item::ENTITY_TYPE, 1 )
+ )
+ ),
+
+ array( // #2
+ $items,
+ $fooWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 2 )
+ ),
+ Item::ENTITY_TYPE,
+ array( // expectedUsage
+ new EntityId( Item::ENTITY_TYPE, 2 )
+ )
+ ),
+
+ array( // #3
+ $items,
+ $barWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 2 )
+ ),
+ Item::ENTITY_TYPE,
+ array( // expectedUsage
+ )
+ ),
+
+ array( // #4
+ $items,
+ $fooWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ new EntityId( Item::ENTITY_TYPE, 2 ),
+ new EntityId( Item::ENTITY_TYPE, 3 ),
+ ),
+ Item::ENTITY_TYPE,
+ array( // expectedUsage
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ new EntityId( Item::ENTITY_TYPE, 2 ),
+ )
+ ),
+
+ array( // #5
+ $items,
+ $barWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ new EntityId( Item::ENTITY_TYPE, 2 ),
+ ),
+ Item::ENTITY_TYPE,
+ array( // expectedUsage
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ )
+ ),
+
+ array( // #6
+ $items,
+ $fooWiki,
+ array( // wantedEntities
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ ),
+ Item::ENTITY_TYPE,
+ array( // expectedUsage
+ new EntityId( Item::ENTITY_TYPE, 1 ),
+ new EntityId( Item::ENTITY_TYPE, 1 ),
//TODO: do we want to remove dupes here too?!
+ )
+ ),
+
+ array( // #7
+ $items,
+ $barWiki,
+ array( // wantedEntities
+ ),
+ Item::ENTITY_TYPE,
+ array( // expectedUsage
+ )
+ ),
+ );
+ }
+
+ /**
+ * @dataProvider provideFilterUnusedEntities
+ *
+ */
+ public function testFilterUnusedEntities( array $repoItems,
+ Site $site, $wantedEntities, $wantedType, $expectedUsed ) {
+
+ $index = $this->newEntityUsageIndex( $repoItems, $site );
+ $used = $index->filterUnusedEntities( $wantedEntities,
$wantedType );
+
+ $this->assertArrayEquals( $expectedUsed, $used );
+ }
+}
--
To view, visit https://gerrit.wikimedia.org/r/59412
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I50dbac5760ec5cb331d3372545f1eac4e53a19bd
Gerrit-PatchSet: 6
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Jeroen De Dauw <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits