jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/370764 )
Change subject: Avoid fetching terms more than once in BufferingTermLookup::prefetchTerms ...................................................................... Avoid fetching terms more than once in BufferingTermLookup::prefetchTerms Currently we often call out to BufferingTermLookup::prefetchTerms via DispatchingTermBuffer::getTermsOfType with the same arguments during client page parses (of pages with a lot of Wikibase content on). This causes a lot of SELECTs on the wb_terms table to happen (set a break point in TermSqlIndex::fetchTerms to see all of these). This should reduce the amount of terms table queries in production by a fair amount (I don't have any numbers, though). Change-Id: I71687d7f929d0dd99e76f4d667637c1234dec336 --- M lib/includes/Store/BufferingTermLookup.php M lib/tests/phpunit/Store/BufferingTermLookupTest.php 2 files changed, 80 insertions(+), 12 deletions(-) Approvals: Daniel Kinzler: Looks good to me, approved WMDE-leszek: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/lib/includes/Store/BufferingTermLookup.php b/lib/includes/Store/BufferingTermLookup.php index ba16986..86ced04 100644 --- a/lib/includes/Store/BufferingTermLookup.php +++ b/lib/includes/Store/BufferingTermLookup.php @@ -114,12 +114,22 @@ return; } - // We could first check what's already in the buffer, but it's hard to determine which - // entities are actually "fully covered" by the cached terms. Also, our current use case - // (the ChangesListInitRows hook) would generally, trigger only one call to prefetchTerms, - // before any call to getTermsOfType(). + // Try to detect whether the entities in question have already been prefetched, in case we + // know the needed term types and language codes. + // If they are not/ only partially prefetched or we don't know whether our prefetched data on + // them is complete, we just resort to fetching them (again). + $entityIdsToFetch = []; + if ( $termTypes !== null && $languageCodes !== null ) { + $entityIdsToFetch = $this->getIncompletelyPrefetchedEntityIds( $entityIds, $termTypes, $languageCodes ); + } else { + $entityIdsToFetch = $entityIds; + } - $entityIdsByType = $this->groupEntityIds( $entityIds ); + if ( empty( $entityIdsToFetch ) ) { + return; + } + + $entityIdsByType = $this->groupEntityIds( $entityIdsToFetch ); $terms = []; foreach ( $entityIdsByType as $entityIdGroup ) { @@ -131,11 +141,53 @@ $bufferedKeys = $this->setBufferedTermObjects( $terms ); if ( !empty( $languageCodes ) ) { - $this->setUndefinedTerms( $entityIds, $termTypes, $languageCodes, $bufferedKeys ); + $this->setUndefinedTerms( $entityIdsToFetch, $termTypes, $languageCodes, $bufferedKeys ); } } /** + * Get a list of EntityIds for which we don't have all the needed data prefetched for. + * + * @param EntityId[] $entityIds + * @param string[] $termTypes + * @param string[] $languageCodes + * + * @return EntityId[] + */ + private function getIncompletelyPrefetchedEntityIds( array $entityIds, array $termTypes, array $languageCodes ) { + $entityIdsToFetch = []; + + foreach ( $entityIds as $entityId ) { + if ( $this->isIncompletelyPrefetched( $entityId, $termTypes, $languageCodes ) ) { + $entityIdsToFetch[] = $entityId; + } + } + + return $entityIdsToFetch; + } + + /** + * Has the term type and language code combination from the given entity already been prefeteched? + * + * @param EntityId $entityId + * @param string[] $termTypes + * @param string[] $languageCodes + * + * @return bool + */ + private function isIncompletelyPrefetched( EntityId $entityId, array $termTypes, array $languageCodes ) { + foreach ( $termTypes as $termType ) { + foreach ( $languageCodes as $lang ) { + if ( $this->getPrefetchedTerm( $entityId, $termType, $lang ) === null ) { + return true; + } + } + } + + return false; + } + + /** * Returns a term that was previously loaded by prefetchTerms. * * @param EntityId $entityId diff --git a/lib/tests/phpunit/Store/BufferingTermLookupTest.php b/lib/tests/phpunit/Store/BufferingTermLookupTest.php index a973887..748a47e 100644 --- a/lib/tests/phpunit/Store/BufferingTermLookupTest.php +++ b/lib/tests/phpunit/Store/BufferingTermLookupTest.php @@ -110,20 +110,36 @@ $lookup = new BufferingTermLookup( $termIndex, 10 ); // This should trigger a call to getTermsOfEntities - $q116 = new ItemId( 'Q123' ); - $lookup->prefetchTerms( [ $q116 ], [ 'label' ], [ 'en', 'de' ] ); + $q123 = new ItemId( 'Q123' ); + $lookup->prefetchTerms( [ $q123 ], [ 'label' ], [ 'en', 'de' ] ); - // This should trigger no call to the TermIndex + // This shouldn't trigger a call to the TermIndex $expected = [ 'de' => 'Wien' ]; - $this->assertEquals( $expected, $lookup->getLabels( $q116, [ 'de' ] ) ); + $this->assertEquals( $expected, $lookup->getLabels( $q123, [ 'de' ] ) ); // This should trigger a call to getTermsOfEntity $expected = [ 'de' => 'Wien', 'en' => 'Vienna', 'fr' => 'Vienne' ]; - $this->assertEquals( $expected, $lookup->getLabels( $q116, [ 'de', 'en', 'fr' ] ) ); + $this->assertEquals( $expected, $lookup->getLabels( $q123, [ 'de', 'en', 'fr' ] ) ); // This should trigger no more calls, since all languages are in the buffer now. $expected = [ 'de' => 'Wien', 'fr' => 'Vienne' ]; - $this->assertEquals( $expected, $lookup->getLabels( $q116, [ 'de', 'fr' ] ) ); + $this->assertEquals( $expected, $lookup->getLabels( $q123, [ 'de', 'fr' ] ) ); + } + + public function testGetLabels_onlyPrefetchOnce() { + $termIndex = $this->getRestrictedTermIndex( 0, 1 ); + $lookup = new BufferingTermLookup( $termIndex, 10 ); + + // This should trigger a call to getTermsOfEntities + $q123 = new ItemId( 'Q123' ); + $lookup->prefetchTerms( [ $q123 ], [ 'label', 'description' ], [ 'en', 'de' ] ); + + // The following should not trigger a call to getTermsOfEntity/ getTermsOfEntities + $lookup->prefetchTerms( [ $q123 ], [ 'label' ], [ 'de' ] ); + $lookup->prefetchTerms( [ $q123 ], [ 'description' ], [ 'en', 'de' ] ); + + $expected = [ 'de' => 'Wien' ]; + $this->assertEquals( $expected, $lookup->getLabels( $q123, [ 'de' ] ) ); } public function testGetLabels_buffer() { -- To view, visit https://gerrit.wikimedia.org/r/370764 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I71687d7f929d0dd99e76f4d667637c1234dec336 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Hoo man <h...@online.de> Gerrit-Reviewer: Aude <aude.w...@gmail.com> Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de> Gerrit-Reviewer: Ladsgroup <ladsgr...@gmail.com> Gerrit-Reviewer: WMDE-leszek <leszek.mani...@wikimedia.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits