Hoo man has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/370764 )

Change subject: Avoid fetching terms more than once in 
BufferingTermLookup::prefetchTerms
......................................................................

Avoid fetching terms more than once in BufferingTermLookup::prefetchTerms

Currently we often call out to BufferingTermLookup::prefetchTerms
via DispatchingTermBuffer::getTermsOfType with the same arguments
during client page parses (of pages with a lot of Wikibase content
on). This causes a lot of SELECTs on the wb_terms table to happen
(set a break point in TermSqlIndex::fetchTerms to see all of these).

This should reduce the amount of terms table queries in production
by a fair amount (I don't have any numbers, though).

Change-Id: I71687d7f929d0dd99e76f4d667637c1234dec336
---
M lib/includes/Store/BufferingTermLookup.php
M lib/tests/phpunit/Store/BufferingTermLookupTest.php
2 files changed, 80 insertions(+), 12 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase 
refs/changes/64/370764/1

diff --git a/lib/includes/Store/BufferingTermLookup.php 
b/lib/includes/Store/BufferingTermLookup.php
index ba16986..86ced04 100644
--- a/lib/includes/Store/BufferingTermLookup.php
+++ b/lib/includes/Store/BufferingTermLookup.php
@@ -114,12 +114,22 @@
                        return;
                }
 
-               // We could first check what's already in the buffer, but it's 
hard to determine which
-               // entities are actually "fully covered" by the cached terms. 
Also, our current use case
-               // (the ChangesListInitRows hook) would generally, trigger only 
one call to prefetchTerms,
-               // before any call to getTermsOfType().
+               // Try to detect whether the entities in question have already 
been prefetched, in case we
+               // know the needed term types and language codes.
+               // If they are not/ only partially prefetched or we don't know 
whether our prefetched data on
+               // them is complete, we just resort to fetching them (again).
+               $entityIdsToFetch = [];
+               if ( $termTypes !== null && $languageCodes !== null ) {
+                       $entityIdsToFetch = 
$this->getIncompletelyPrefetchedEntityIds( $entityIds, $termTypes, 
$languageCodes );
+               } else {
+                       $entityIdsToFetch = $entityIds;
+               }
 
-               $entityIdsByType = $this->groupEntityIds( $entityIds );
+               if ( empty( $entityIdsToFetch ) ) {
+                       return;
+               }
+
+               $entityIdsByType = $this->groupEntityIds( $entityIdsToFetch );
                $terms = [];
 
                foreach ( $entityIdsByType as $entityIdGroup ) {
@@ -131,11 +141,53 @@
                $bufferedKeys = $this->setBufferedTermObjects( $terms );
 
                if ( !empty( $languageCodes ) ) {
-                       $this->setUndefinedTerms( $entityIds, $termTypes, 
$languageCodes, $bufferedKeys );
+                       $this->setUndefinedTerms( $entityIdsToFetch, 
$termTypes, $languageCodes, $bufferedKeys );
                }
        }
 
        /**
+        * Get a list of EntityIds for which we don't have all the needed data 
prefetched for.
+        *
+        * @param EntityId[] $entityIds
+        * @param string[] $termTypes
+        * @param string[] $languageCodes
+        *
+        * @return EntityId[]
+        */
+       private function getIncompletelyPrefetchedEntityIds( array $entityIds, 
array $termTypes, array $languageCodes ) {
+               $entityIdsToFetch = [];
+
+               foreach ( $entityIds as $entityId ) {
+                       if ( $this->isIncompletelyPrefetched( $entityId, 
$termTypes, $languageCodes ) ) {
+                               $entityIdsToFetch[] = $entityId;
+                       }
+               }
+
+               return $entityIdsToFetch;
+       }
+
+       /**
+        * Has the term type and language code combination from the given 
entity already been prefeteched?
+        *
+        * @param EntityId $entityId
+        * @param string[] $termTypes
+        * @param string[] $languageCodes
+        *
+        * @return bool
+        */
+       private function isIncompletelyPrefetched( EntityId $entityId, array 
$termTypes, array $languageCodes ) {
+               foreach ( $termTypes as $termType ) {
+                       foreach ( $languageCodes as $lang ) {
+                               if ( $this->getPrefetchedTerm( $entityId, 
$termType, $lang ) === null ) {
+                                       return true;
+                               }
+                       }
+               }
+
+               return false;
+       }
+
+       /**
         * Returns a term that was previously loaded by prefetchTerms.
         *
         * @param EntityId $entityId
diff --git a/lib/tests/phpunit/Store/BufferingTermLookupTest.php 
b/lib/tests/phpunit/Store/BufferingTermLookupTest.php
index a973887..748a47e 100644
--- a/lib/tests/phpunit/Store/BufferingTermLookupTest.php
+++ b/lib/tests/phpunit/Store/BufferingTermLookupTest.php
@@ -110,20 +110,36 @@
                $lookup = new BufferingTermLookup( $termIndex, 10 );
 
                // This should trigger a call to getTermsOfEntities
-               $q116 = new ItemId( 'Q123' );
-               $lookup->prefetchTerms( [ $q116 ], [ 'label' ], [ 'en', 'de' ] 
);
+               $q123 = new ItemId( 'Q123' );
+               $lookup->prefetchTerms( [ $q123 ], [ 'label' ], [ 'en', 'de' ] 
);
 
-               // This should trigger no call to the TermIndex
+               // This shouldn't trigger a call to the TermIndex
                $expected = [ 'de' => 'Wien' ];
-               $this->assertEquals( $expected, $lookup->getLabels( $q116, [ 
'de' ] ) );
+               $this->assertEquals( $expected, $lookup->getLabels( $q123, [ 
'de' ] ) );
 
                // This should trigger a call to getTermsOfEntity
                $expected = [ 'de' => 'Wien', 'en' => 'Vienna', 'fr' => 
'Vienne' ];
-               $this->assertEquals( $expected, $lookup->getLabels( $q116, [ 
'de', 'en', 'fr' ] ) );
+               $this->assertEquals( $expected, $lookup->getLabels( $q123, [ 
'de', 'en', 'fr' ] ) );
 
                // This should trigger no more calls, since all languages are 
in the buffer now.
                $expected = [ 'de' => 'Wien', 'fr' => 'Vienne' ];
-               $this->assertEquals( $expected, $lookup->getLabels( $q116, [ 
'de', 'fr' ] ) );
+               $this->assertEquals( $expected, $lookup->getLabels( $q123, [ 
'de', 'fr' ] ) );
+       }
+
+       public function testGetLabels_onlyPrefetchOnce() {
+               $termIndex = $this->getRestrictedTermIndex( 0, 1 );
+               $lookup = new BufferingTermLookup( $termIndex, 10 );
+
+               // This should trigger a call to getTermsOfEntities
+               $q123 = new ItemId( 'Q123' );
+               $lookup->prefetchTerms( [ $q123 ], [ 'label', 'description' ], 
[ 'en', 'de' ] );
+
+               // The following should not trigger a call to getTermsOfEntity/ 
getTermsOfEntities
+               $lookup->prefetchTerms( [ $q123 ], [ 'label' ], [ 'de' ] );
+               $lookup->prefetchTerms( [ $q123 ], [ 'description' ], [ 'en', 
'de' ] );
+
+               $expected = [ 'de' => 'Wien' ];
+               $this->assertEquals( $expected, $lookup->getLabels( $q123, [ 
'de' ] ) );
        }
 
        public function testGetLabels_buffer() {

-- 
To view, visit https://gerrit.wikimedia.org/r/370764
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I71687d7f929d0dd99e76f4d667637c1234dec336
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Hoo man <h...@online.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to