Aude has submitted this change and it was merged.

Change subject: (Bug 47288) Abstraction layer for usage tracking.
......................................................................


(Bug 47288) Abstraction layer for usage tracking.

This introduces the EntityUsageIndex for tracking which client
 pages used which entity.

Change-Id: I50dbac5760ec5cb331d3372545f1eac4e53a19bd
---
M lib/WikibaseLib.classes.php
M lib/WikibaseLib.hooks.php
A lib/includes/store/EntityUsageIndex.php
A lib/tests/phpunit/store/EntityUsageIndexTest.php
4 files changed, 648 insertions(+), 0 deletions(-)

Approvals:
  Aude: Checked; Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/WikibaseLib.classes.php b/lib/WikibaseLib.classes.php
index 9bd68de..6dd483d 100644
--- a/lib/WikibaseLib.classes.php
+++ b/lib/WikibaseLib.classes.php
@@ -116,6 +116,7 @@
                'Wikibase\ChunkAccess' => 'includes/store/ChunkAccess.php',
                'Wikibase\EntityLookup' => 'includes/store/EntityLookup.php',
                'Wikibase\PropertyLabelResolver' => 
'includes/store/PropertyLabelResolver.php',
+               'Wikibase\EntityUsageIndex' => 
'includes/store/EntityUsageIndex.php',
                'Wikibase\SiteLinkCache' => 'includes/store/SiteLinkCache.php',
                'Wikibase\SiteLinkLookup' => 
'includes/store/SiteLinkLookup.php',
                'Wikibase\TermIndex' => 'includes/store/TermIndex.php',
diff --git a/lib/WikibaseLib.hooks.php b/lib/WikibaseLib.hooks.php
index 2bb2884..6df4241 100644
--- a/lib/WikibaseLib.hooks.php
+++ b/lib/WikibaseLib.hooks.php
@@ -63,6 +63,7 @@
                        'store/SiteLinkTable',
                        'store/WikiPageEntityLookup',
                        'store/CachingEntityLoader',
+                       'store/EntityUsageIndex',
 
                        'store/TermPropertyLabelResolver',
 
diff --git a/lib/includes/store/EntityUsageIndex.php 
b/lib/includes/store/EntityUsageIndex.php
new file mode 100644
index 0000000..85ba5f4
--- /dev/null
+++ b/lib/includes/store/EntityUsageIndex.php
@@ -0,0 +1,183 @@
+<?php
+
+namespace Wikibase;
+
+use Site;
+
+/**
+ * Index for tracking the usage of entities on a specific client wiki.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 0.4
+ *
+ * @file
+ * @ingroup WikibaseLib
+ *
+ * @licence GNU GPL v2+
+ *
+ *
+ * @author Daniel Kinzler
+ */
+class EntityUsageIndex {
+
+       /**
+        * @param Site           $clientSite
+        * @param SiteLinkLookup $siteLinks
+        */
+       public function __construct( Site $clientSite, SiteLinkLookup 
$siteLinks ) {
+               $this->clientSite = $clientSite;
+               $this->siteLinks = $siteLinks;
+       }
+
+       /**
+        * Returns the Site of the client wiki this usage index is tracking.
+        *
+        * @since    0.4
+        *
+        * @return Site
+        */
+       public function getClientSite() {
+               return $this->clientSite;
+       }
+
+       /**
+        * Determines which pages use any of the given entities.
+        *
+        * @since    0.4
+        *
+        * @param EntityId[] $entities
+        *
+        * @return String[] list of pages using any of the given entities
+        */
+       public function getEntityUsage( array $entities ) {
+               if ( empty( $entities ) ) {
+                       return array();
+               }
+
+               $ids = array_map(
+                       function ( EntityId $id ) {
+                               return $id->getNumericId();
+                       },
+                       $entities
+               );
+
+               $rows = $this->siteLinks->getLinks( $ids, array( 
$this->clientSite->getGlobalId() ) ) ;
+
+               $pages = array_map(
+                       function ( array $row ) {
+                               return $row[1]; // page name
+                       },
+                       $rows
+               );
+
+               $pages = array_unique( $pages );
+               return $pages;
+       }
+
+       /**
+        * Checks which of the given entities is used on the target wiki,
+        * and removed all others.
+        *
+        * @since    0.4
+        *
+        * @param EntityID[]  $entities The entities to check
+        * @param string|null $type     The entity type to check. This is an 
optional hint that may
+        *                              be used for optimization. If given, all 
IDs in the $entities
+        *                              array must refer to entities of the 
given type.
+        *
+        * @return EntityID[] the entities actually used on the target wiki
+        * @throws \MWException if $type is set and one of the ids in $entities
+        */
+       public function filterUnusedEntities( array $entities, $type = null ) {
+               if ( empty( $entities ) ) {
+                       return array();
+               }
+
+               if ( $type !== null && $type !== Item::ENTITY_TYPE ) {
+                       return array();
+               }
+
+               $ids = array_map(
+                       function ( EntityId $id ) use ( $type ) {
+                               if ( $type !== null && $id->getEntityType() !== 
$type ) {
+                                       throw new \MWException( "Optimizing for 
$type, encountered ID for " . $id->getEntityType() );
+                               }
+
+                               return $id->getNumericId();
+                       },
+                       $entities
+               );
+
+               //todo: pass the type hint to the SiteLinksLookup, to allow for 
more efficient queries
+               $rows = $this->siteLinks->getLinks( $ids, array( 
$this->clientSite->getGlobalId() ) ) ;
+
+               $used = array_map(
+                       function ( array $row ) {
+                               return intval($row[2]); // item id
+                       },
+                       $rows
+               );
+
+               $used = array_flip( $used );
+
+               $filtered = array_filter(
+                       $entities,
+                       function ( EntityId $id ) use ( $used ) {
+                               return array_key_exists( $id->getNumericId(), 
$used );
+                       }
+               );
+
+               return $filtered;
+
+       }
+
+       /**
+        * Determines which entities are used by any of the given pages.
+        *
+        * The page titles must be strings in the canonical form, as returned
+        * by Title::getPrefixedText() on the target wiki. Note that it is not
+        * reliable to use Title objects locally to represent pages on another 
wiki,
+        * since namespaces and normalization rules may differ.
+        *
+        * @since 0.4
+        *
+        * @param string[] $pages The titles of the pages to check.
+        *
+        * @return EntityID[] The entities used.
+        */
+       public function getUsedEntities( array $pages ) {
+               if ( empty( $pages ) ) {
+                       return array();
+               }
+
+               $entities = array();
+
+               //todo: implement batched lookup in SiteLinkLookup
+               foreach ( $pages as $page ) {
+                       $id = $this->siteLinks->getItemIdForLink( 
$this->clientSite->getGlobalId(), $page );
+
+                       if ( $id !== false ) {
+                               //Note: we are using the numeric ID as the key 
here to make sure each item only
+                               //      shows up once. If we had other entity 
types too, we'd need to use the
+                               //      prefixed ID.
+                               $entities[$id] = new EntityId( 
Item::ENTITY_TYPE, $id );
+                       }
+               }
+
+               return $entities;
+       }
+}
diff --git a/lib/tests/phpunit/store/EntityUsageIndexTest.php 
b/lib/tests/phpunit/store/EntityUsageIndexTest.php
new file mode 100644
index 0000000..daaa85b
--- /dev/null
+++ b/lib/tests/phpunit/store/EntityUsageIndexTest.php
@@ -0,0 +1,463 @@
+<?php
+
+namespace Wikibase\Test;
+use MediaWikiSite;
+use Site;
+use Wikibase\Item;
+use Wikibase\EntityUsageIndex;
+use Wikibase\EntityId;
+use Wikibase\Property;
+use Wikibase\SiteLink;
+
+/**
+ * Test class for EntityUsageIndex
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 0.4
+ *
+ * @file
+ * @ingroup WikibaseLib
+ * @ingroup Test
+ *
+ * @licence GNU GPL v2+
+ * @author Daniel Kinzler
+ */
+class EntityUsageIndexTest extends \MediaWikiTestCase {
+
+       /**
+        * @param int $id
+        * @param array $links associative array of site => page.
+        *
+        * @return Item
+        */
+       protected static function newItemFromLinks( $id, array $links ) {
+               $sites = self::getTestSites();
+
+               $item = Item::newEmpty();
+               $item->setId( $id );
+
+               foreach ( $links as $siteId => $page ) {
+                       $site = $sites[$siteId];
+                       $link = new SiteLink( $site, $page );
+                       $item->addSiteLink( $link );
+               }
+
+               return $item;
+       }
+
+       /**
+        * @param Item[]      $items
+        * @param Site        $site
+        *
+        * @return EntityUsageIndex
+        */
+       protected function newEntityUsageIndex( array $items, Site $site ) {
+               $repo = new MockRepository();
+
+               foreach ( $items as $item ) {
+                       $repo->putEntity( $item );
+               }
+
+               $index = new EntityUsageIndex( $site, $repo );
+               return $index;
+       }
+
+       protected static function getTestSites() {
+               static $sites = array();
+
+               if ( !empty( $sites ) ) {
+                       return $sites;
+               }
+
+               $ids = array( "foo", "bar" );
+
+               foreach ( $ids as $id ) {
+                       $site = new MediaWikiSite();
+                       $site->setGlobalId( $id );
+                       $sites[$id] = $site;
+               }
+
+               return $sites;
+       }
+
+       protected static function getTestItems() {
+               static $items = array();
+
+               if ( !empty( $items ) ) {
+                       return $items;
+               }
+
+               $items[] = self::newItemFromLinks( 1,
+                       array(
+                               'foo' => 'Foo',
+                               'bar' => 'Bar',
+                       )
+               ) ;
+
+               $items[] = self::newItemFromLinks( 2,
+                       array(
+                               'foo' => 'Too',
+                       )
+               ) ;
+
+               return $items;
+       }
+
+       public static function provideGetEntityUsage() {
+               $sites = self::getTestSites();
+
+               $fooWiki = $sites['foo'];
+               $barWiki = $sites['bar'];
+
+               $items = self::getTestItems();
+
+               return array(
+                       array( // #0
+                               $items,
+                               $fooWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 1 )
+                               ),
+                               array( // expectedUsage
+                                       'Foo'
+                               )
+                       ),
+
+                       array( // #1
+                               $items,
+                               $barWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 1 )
+                               ),
+                               array( // expectedUsage
+                                       'Bar'
+                               )
+                       ),
+
+                       array( // #2
+                               $items,
+                               $fooWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 2 )
+                               ),
+                               array( // expectedUsage
+                                       'Too'
+                               )
+                       ),
+
+                       array( // #3
+                               $items,
+                               $barWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 2 )
+                               ),
+                               array( // expectedUsage
+                               )
+                       ),
+
+                       array( // #4
+                               $items,
+                               $fooWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                                       new EntityId( Item::ENTITY_TYPE, 2 ),
+                               ),
+                               array( // expectedUsage
+                                       'Foo', 'Too'
+                               )
+                       ),
+
+                       array( // #5
+                               $items,
+                               $barWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                                       new EntityId( Item::ENTITY_TYPE, 2 ),
+                               ),
+                               array( // expectedUsage
+                                       'Bar'
+                               )
+                       ),
+
+                       array( // #6
+                               $items,
+                               $fooWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                               ),
+                               array( // expectedUsage
+                                       'Foo'
+                               )
+                       ),
+
+                       array( // #7
+                               $items,
+                               $barWiki,
+                               array( // wantedEntities
+                               ),
+                               array( // expectedUsage
+                               )
+                       ),
+               );
+       }
+
+       /**
+        * @dataProvider provideGetEntityUsage
+        *
+        */
+       public function testGetEntityUsage( array $repoItems,
+               Site $site, $wantedEntities, $expectedUsage ) {
+
+               $index = $this->newEntityUsageIndex( $repoItems, $site );
+               $usage = $index->getEntityUsage( $wantedEntities );
+
+               $this->assertArrayEquals( $expectedUsage, $usage );
+       }
+
+       public static function provideGetUsedEntities() {
+               $sites = self::getTestSites();
+               $fooWiki = $sites['foo'];
+               $barWiki = $sites['bar'];
+
+               $items = self::getTestItems();
+
+               return array(
+                       array( // #0
+                               $items,
+                               $fooWiki,
+                               array( // wantedPages
+                                       'Foo'
+                               ),
+                               array( // expectedUsed
+                                       new EntityId( Item::ENTITY_TYPE, 1 )
+                               )
+                       ),
+
+                       array( // #1
+                               $items,
+                               $barWiki,
+                               array( // wantedPages
+                                       'Bar'
+                               ),
+                               array( // expectedUsed
+                                       new EntityId( Item::ENTITY_TYPE, 1 )
+                               )
+                       ),
+
+                       array( // #2
+                               $items,
+                               $fooWiki,
+                               array( // wantedPages
+                                       'Too'
+                               ),
+                               array( // expectedUsed
+                                       new EntityId( Item::ENTITY_TYPE, 2 )
+                               )
+                       ),
+
+                       array( // #3
+                               $items,
+                               $barWiki,
+                               array( // wantedPages
+                                       'Xoo'
+                               ),
+                               array( // expectedUsed
+                               )
+                       ),
+
+                       array( // #4
+                               $items,
+                               $fooWiki,
+                               array( // wantedPages
+                                       'Foo', 'Too'
+                               ),
+                               array( // expectedUsed
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                                       new EntityId( Item::ENTITY_TYPE, 2 ),
+                               )
+                       ),
+
+                       array( // #5
+                               $items,
+                               $barWiki,
+                               array( // wantedPages
+                                       'Bar', 'Tar'
+                               ),
+                               array( // expectedUsed
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                               )
+                       ),
+
+                       array( // #6
+                               $items,
+                               $fooWiki,
+                               array( // wantedPages
+                                       'Foo', 'Foo'
+                               ),
+                               array( // expectedUsed
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                               )
+                       ),
+
+                       array( // #7
+                               $items,
+                               $barWiki,
+                               array( // wantedPages
+                               ),
+                               array( // expectedUsed
+                               )
+                       ),
+               );
+       }
+
+       /**
+        * @dataProvider provideGetUsedEntities
+        *
+        */
+       public function testGetUsedEntities( array $repoItems,
+               Site $site, $wantedPages, $expectedUsed ) {
+
+               $index = $this->newEntityUsageIndex( $repoItems, $site );
+               $used = $index->getUsedEntities( $wantedPages );
+
+               $this->assertArrayEquals( $expectedUsed, $used );
+       }
+
+
+       public static function provideFilterUnusedEntities() {
+               $sites = self::getTestSites();
+               $fooWiki = $sites['foo'];
+               $barWiki = $sites['bar'];
+
+               $items = self::getTestItems();
+
+               return array(
+                       array( // #0
+                               $items,
+                               $fooWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 1 )
+                               ),
+                               Item::ENTITY_TYPE,
+                               array( // expectedUsage
+                                       new EntityId( Item::ENTITY_TYPE, 1 )
+                               )
+                       ),
+
+                       array( // #1
+                               $items,
+                               $barWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 1 )
+                               ),
+                               Item::ENTITY_TYPE,
+                               array( // expectedUsage
+                                       new EntityId( Item::ENTITY_TYPE, 1 )
+                               )
+                       ),
+
+                       array( // #2
+                               $items,
+                               $fooWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 2 )
+                               ),
+                               Item::ENTITY_TYPE,
+                               array( // expectedUsage
+                                       new EntityId( Item::ENTITY_TYPE, 2 )
+                               )
+                       ),
+
+                       array( // #3
+                               $items,
+                               $barWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 2 )
+                               ),
+                               Item::ENTITY_TYPE,
+                               array( // expectedUsage
+                               )
+                       ),
+
+                       array( // #4
+                               $items,
+                               $fooWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                                       new EntityId( Item::ENTITY_TYPE, 2 ),
+                                       new EntityId( Item::ENTITY_TYPE, 3 ),
+                               ),
+                               Item::ENTITY_TYPE,
+                               array( // expectedUsage
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                                       new EntityId( Item::ENTITY_TYPE, 2 ),
+                               )
+                       ),
+
+                       array( // #5
+                               $items,
+                               $barWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                                       new EntityId( Item::ENTITY_TYPE, 2 ),
+                               ),
+                               Item::ENTITY_TYPE,
+                               array( // expectedUsage
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                               )
+                       ),
+
+                       array( // #6
+                               $items,
+                               $fooWiki,
+                               array( // wantedEntities
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                               ),
+                               Item::ENTITY_TYPE,
+                               array( // expectedUsage
+                                       new EntityId( Item::ENTITY_TYPE, 1 ),
+                                       new EntityId( Item::ENTITY_TYPE, 1 ), 
//TODO: do we want to remove dupes here too?!
+                               )
+                       ),
+
+                       array( // #7
+                               $items,
+                               $barWiki,
+                               array( // wantedEntities
+                               ),
+                               Item::ENTITY_TYPE,
+                               array( // expectedUsage
+                               )
+                       ),
+               );
+       }
+
+       /**
+        * @dataProvider provideFilterUnusedEntities
+        *
+        */
+       public function testFilterUnusedEntities( array $repoItems,
+               Site $site, $wantedEntities, $wantedType, $expectedUsed ) {
+
+               $index = $this->newEntityUsageIndex( $repoItems, $site );
+               $used = $index->filterUnusedEntities( $wantedEntities, 
$wantedType );
+
+               $this->assertArrayEquals( $expectedUsed, $used );
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/59412
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I50dbac5760ec5cb331d3372545f1eac4e53a19bd
Gerrit-PatchSet: 6
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Jeroen De Dauw <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to