jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/405013 )

Change subject: Deduplicate entity usages when returning from 
ParserOutputUsageAccumulator
......................................................................


Deduplicate entity usages when returning from ParserOutputUsageAccumulator

Bug: T178079
Change-Id: Ic101f58f6d6f700cd9da02844b518973d2a891d8
---
M client/autoload.php
M client/includes/Usage/ParserOutputUsageAccumulator.php
A client/includes/Usage/UsageDeduplicator.php
M client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
A client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php
5 files changed, 156 insertions(+), 1 deletion(-)

Approvals:
  jenkins-bot: Verified
  Thiemo Kreuz (WMDE): Looks good to me, approved



diff --git a/client/autoload.php b/client/autoload.php
index 91bd32b..223e319 100644
--- a/client/autoload.php
+++ b/client/autoload.php
@@ -112,6 +112,7 @@
        'Wikibase\\Client\\Usage\\SubscriptionManager' => __DIR__ . 
'/includes/Usage/SubscriptionManager.php',
        'Wikibase\\Client\\Usage\\UsageAccumulator' => __DIR__ . 
'/includes/Usage/UsageAccumulator.php',
        'Wikibase\\Client\\Usage\\UsageAspectTransformer' => __DIR__ . 
'/includes/Usage/UsageAspectTransformer.php',
+       'Wikibase\\Client\\Usage\\UsageDeduplicator' => __DIR__ . 
'/includes/Usage/UsageDeduplicator.php',
        'Wikibase\\Client\\Usage\\UsageLookup' => __DIR__ . 
'/includes/Usage/UsageLookup.php',
        'Wikibase\\Client\\Usage\\UsageTracker' => __DIR__ . 
'/includes/Usage/UsageTracker.php',
        'Wikibase\\Client\\Usage\\UsageTrackerException' => __DIR__ . 
'/includes/Usage/UsageTrackerException.php',
diff --git a/client/includes/Usage/ParserOutputUsageAccumulator.php 
b/client/includes/Usage/ParserOutputUsageAccumulator.php
index 819ef59..9d5d579 100644
--- a/client/includes/Usage/ParserOutputUsageAccumulator.php
+++ b/client/includes/Usage/ParserOutputUsageAccumulator.php
@@ -42,7 +42,10 @@
         */
        public function getUsages() {
                $usages = $this->parserOutput->getExtensionData( 
'wikibase-entity-usage' );
-               return $usages ?: [];
+               if ( $usages ) {
+                       return ( new UsageDeduplicator() )->deduplicate( 
$usages );
+               }
+               return [];
        }
 
 }
diff --git a/client/includes/Usage/UsageDeduplicator.php 
b/client/includes/Usage/UsageDeduplicator.php
new file mode 100644
index 0000000..10c5feb
--- /dev/null
+++ b/client/includes/Usage/UsageDeduplicator.php
@@ -0,0 +1,95 @@
+<?php
+
+namespace Wikibase\Client\Usage;
+
+/**
+ * This class de-duplicates entity usages for performance and storage reasons
+ *
+ * @license GPL-2.0+
+ * @author Amir Sarabadani
+ */
+class UsageDeduplicator {
+
+       /**
+        * @param EntityUsage[] $usages
+        * @return EntityUsage[]
+        */
+       public function deduplicate( array $usages ) {
+               $structuredUsages = $this->structureUsages( $usages );
+
+               foreach ( $structuredUsages as $entityId => $usages ) {
+                       $structuredUsages[$entityId] = 
$this->deduplicateUsagesPerEntity( $usages );
+               }
+
+               // Flatten the structured array
+               $return = [];
+               array_walk_recursive(
+                       $structuredUsages,
+                       function( $a ) use ( &$return ) {
+                               /* @var EntityUsage $a */
+                               $return[$a->getIdentityString()] = $a;
+                       }
+               );
+               return $return;
+       }
+
+       /**
+        * @param EntityUsage[] $usages
+        * @return array[]
+        */
+       private function structureUsages( array $usages ) {
+               $structuredUsages = [];
+               foreach ( $usages as $usage ) {
+                       $entityId = $usage->getEntityId();
+                       $structuredUsages[$entityId->getSerialization()][] = 
$usage;
+               }
+
+               return array_map( [ $this, 'structureUsagesPerEntity' ], 
$structuredUsages );
+       }
+
+       /**
+        * @param EntityUsage[] $usages
+        * @return array[]
+        */
+       private function structureUsagesPerEntity( array $usages ) {
+               $structuredUsages = [
+                       EntityUsage::DESCRIPTION_USAGE => [],
+                       EntityUsage::LABEL_USAGE => [],
+               ];
+               foreach ( $usages as $usage ) {
+                       $aspect = $usage->getAspect();
+                       $structuredUsages[$aspect][] = $usage;
+               }
+
+               return $structuredUsages;
+       }
+
+       /**
+        * @param EntityUsage[] $usages
+        * @return EntityUsage[]
+        */
+       private function deduplicateUsagesPerEntity( array $usages ) {
+               $usages[EntityUsage::DESCRIPTION_USAGE] = 
$this->deduplicatePerType(
+                       $usages[EntityUsage::DESCRIPTION_USAGE]
+               );
+               $usages[EntityUsage::LABEL_USAGE] = $this->deduplicatePerType(
+                       $usages[EntityUsage::LABEL_USAGE]
+               );
+               return $usages;
+       }
+
+       /**
+        * @param EntityUsage[] $usages
+        * @return EntityUsage[]
+        */
+       private function deduplicatePerType( array $usages ) {
+               foreach ( $usages as $usage ) {
+                       if ( $usage->getModifier() === null ) {
+                               return [ $usage ];
+                       }
+               }
+
+               return $usages;
+       }
+
+}
diff --git 
a/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php 
b/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
index 6c40387..6283971 100644
--- a/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
+++ b/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
@@ -156,6 +156,8 @@
        private function assertSameUsages( array $expected, array $actual, 
$message = '' ) {
                $expected = $this->getIdentityStrings( $expected );
                $actual = $this->getIdentityStrings( $actual );
+               sort( $expected );
+               sort( $actual );
 
                Assert::assertEquals( $expected, $actual, $message );
        }
diff --git a/client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php 
b/client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php
new file mode 100644
index 0000000..cf2041e
--- /dev/null
+++ b/client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php
@@ -0,0 +1,54 @@
+<?php
+
+namespace Wikibase\Client\Tests\Usage;
+
+use Wikibase\Client\Usage\EntityUsage;
+use Wikibase\Client\Usage\UsageDeduplicator;
+use Wikibase\DataModel\Entity\ItemId;
+
+/**
+ * @covers Wikibase\Client\Usage\UsageDeduplicator
+ *
+ * @group Wikibase
+ * @group WikibaseClient
+ * @group WikibaseUsageTracking
+ *
+ * @license GPL-2.0+
+ * @author Amir Sarabadani
+ */
+class UsageDeduplicatorTest extends \PHPUnit_Framework_TestCase {
+
+       public function provideDeduplicate() {
+               $q1 = new ItemId( 'Q1' );
+               $q1Label = new EntityUsage( $q1, EntityUsage::LABEL_USAGE );
+               $q1LabelEn = new EntityUsage( $q1, EntityUsage::LABEL_USAGE, 
'en' );
+               $q1All = new EntityUsage( $q1, EntityUsage::ALL_USAGE );
+               $q1Statement = new EntityUsage( $q1, 
EntityUsage::STATEMENT_USAGE, 'P15' );
+
+               $q2 = new ItemId( 'Q2' );
+               $q2Label = new EntityUsage( $q2, EntityUsage::LABEL_USAGE );
+               $q2Description = new EntityUsage( $q2, 
EntityUsage::DESCRIPTION_USAGE );
+               $q2DescriptionFa = new EntityUsage( $q2, 
EntityUsage::DESCRIPTION_USAGE, 'fa' );
+
+               return [
+                       [ [ $q1LabelEn, $q1Label ], [ $q1Label ] ],
+                       [ [ $q1LabelEn ], [ $q1LabelEn ] ],
+                       [ [ $q1LabelEn, $q1Label, $q2Description, $q1All ], [ 
$q1Label, $q1All, $q2Description ] ],
+                       [ [ $q1LabelEn, $q2Label, $q1Statement ], [ $q1LabelEn, 
$q1Statement, $q2Label ] ],
+                       [ [ $q2DescriptionFa, $q2Description, $q1All ], [ 
$q2Description, $q1All ] ],
+               ];
+       }
+
+       /**
+        * @covers \Wikibase\Client\Usage\UsageDeduplicator::deduplicate
+        * @dataProvider provideDeduplicate
+        */
+       public function testDeduplicate( $usages, $output ) {
+               $expected = [];
+               foreach ( $output as $usage ) {
+                       $expected[$usage->getIdentityString()] = $usage;
+               }
+               $this->assertEquals( $expected, ( new UsageDeduplicator() 
)->deduplicate( $usages ) );
+       }
+
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/405013
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ic101f58f6d6f700cd9da02844b518973d2a891d8
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <[email protected]>
Gerrit-Reviewer: Eranroz <[email protected]>
Gerrit-Reviewer: Hoo man <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Lucas Werkmeister (WMDE) <[email protected]>
Gerrit-Reviewer: Thiemo Kreuz (WMDE) <[email protected]>
Gerrit-Reviewer: WMDE-leszek <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to