[MediaWiki-commits] [Gerrit] mediawiki...Wikibase[master]: Avoid fetching terms more than once in BufferingTermLookup::...
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/370764 ) Change subject: Avoid fetching terms more than once in BufferingTermLookup::prefetchTerms .. Avoid fetching terms more than once in BufferingTermLookup::prefetchTerms Currently we often call out to BufferingTermLookup::prefetchTerms via DispatchingTermBuffer::getTermsOfType with the same arguments during client page parses (of pages with a lot of Wikibase content on). This causes a lot of SELECTs on the wb_terms table to happen (set a break point in TermSqlIndex::fetchTerms to see all of these). This should reduce the amount of terms table queries in production by a fair amount (I don't have any numbers, though). Change-Id: I71687d7f929d0dd99e76f4d667637c1234dec336 --- M lib/includes/Store/BufferingTermLookup.php M lib/tests/phpunit/Store/BufferingTermLookupTest.php 2 files changed, 80 insertions(+), 12 deletions(-) Approvals: Daniel Kinzler: Looks good to me, approved WMDE-leszek: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/lib/includes/Store/BufferingTermLookup.php b/lib/includes/Store/BufferingTermLookup.php index ba16986..86ced04 100644 --- a/lib/includes/Store/BufferingTermLookup.php +++ b/lib/includes/Store/BufferingTermLookup.php @@ -114,12 +114,22 @@ return; } - // We could first check what's already in the buffer, but it's hard to determine which - // entities are actually "fully covered" by the cached terms. Also, our current use case - // (the ChangesListInitRows hook) would generally, trigger only one call to prefetchTerms, - // before any call to getTermsOfType(). + // Try to detect whether the entities in question have already been prefetched, in case we + // know the needed term types and language codes. + // If they are not/ only partially prefetched or we don't know whether our prefetched data on + // them is complete, we just resort to fetching them (again). + $entityIdsToFetch = []; + if ( $termTypes !== null && $languageCodes !== null ) { + $entityIdsToFetch = $this->getIncompletelyPrefetchedEntityIds( $entityIds, $termTypes, $languageCodes ); + } else { + $entityIdsToFetch = $entityIds; + } - $entityIdsByType = $this->groupEntityIds( $entityIds ); + if ( empty( $entityIdsToFetch ) ) { + return; + } + + $entityIdsByType = $this->groupEntityIds( $entityIdsToFetch ); $terms = []; foreach ( $entityIdsByType as $entityIdGroup ) { @@ -131,11 +141,53 @@ $bufferedKeys = $this->setBufferedTermObjects( $terms ); if ( !empty( $languageCodes ) ) { - $this->setUndefinedTerms( $entityIds, $termTypes, $languageCodes, $bufferedKeys ); + $this->setUndefinedTerms( $entityIdsToFetch, $termTypes, $languageCodes, $bufferedKeys ); } } /** +* Get a list of EntityIds for which we don't have all the needed data prefetched for. +* +* @param EntityId[] $entityIds +* @param string[] $termTypes +* @param string[] $languageCodes +* +* @return EntityId[] +*/ + private function getIncompletelyPrefetchedEntityIds( array $entityIds, array $termTypes, array $languageCodes ) { + $entityIdsToFetch = []; + + foreach ( $entityIds as $entityId ) { + if ( $this->isIncompletelyPrefetched( $entityId, $termTypes, $languageCodes ) ) { + $entityIdsToFetch[] = $entityId; + } + } + + return $entityIdsToFetch; + } + + /** +* Has the term type and language code combination from the given entity already been prefeteched? +* +* @param EntityId $entityId +* @param string[] $termTypes +* @param string[] $languageCodes +* +* @return bool +*/ + private function isIncompletelyPrefetched( EntityId $entityId, array $termTypes, array $languageCodes ) { + foreach ( $termTypes as $termType ) { + foreach ( $languageCodes as $lang ) { + if ( $this->getPrefetchedTerm( $entityId, $termType, $lang ) === null ) { + return true; + } + } + } + + return false; + } + + /** * Returns a term that was previously loaded by prefetchTerms. * * @param EntityId $
[MediaWiki-commits] [Gerrit] mediawiki...Wikibase[master]: Avoid fetching terms more than once in BufferingTermLookup::...
Hoo man has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/370764 ) Change subject: Avoid fetching terms more than once in BufferingTermLookup::prefetchTerms .. Avoid fetching terms more than once in BufferingTermLookup::prefetchTerms Currently we often call out to BufferingTermLookup::prefetchTerms via DispatchingTermBuffer::getTermsOfType with the same arguments during client page parses (of pages with a lot of Wikibase content on). This causes a lot of SELECTs on the wb_terms table to happen (set a break point in TermSqlIndex::fetchTerms to see all of these). This should reduce the amount of terms table queries in production by a fair amount (I don't have any numbers, though). Change-Id: I71687d7f929d0dd99e76f4d667637c1234dec336 --- M lib/includes/Store/BufferingTermLookup.php M lib/tests/phpunit/Store/BufferingTermLookupTest.php 2 files changed, 80 insertions(+), 12 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/64/370764/1 diff --git a/lib/includes/Store/BufferingTermLookup.php b/lib/includes/Store/BufferingTermLookup.php index ba16986..86ced04 100644 --- a/lib/includes/Store/BufferingTermLookup.php +++ b/lib/includes/Store/BufferingTermLookup.php @@ -114,12 +114,22 @@ return; } - // We could first check what's already in the buffer, but it's hard to determine which - // entities are actually "fully covered" by the cached terms. Also, our current use case - // (the ChangesListInitRows hook) would generally, trigger only one call to prefetchTerms, - // before any call to getTermsOfType(). + // Try to detect whether the entities in question have already been prefetched, in case we + // know the needed term types and language codes. + // If they are not/ only partially prefetched or we don't know whether our prefetched data on + // them is complete, we just resort to fetching them (again). + $entityIdsToFetch = []; + if ( $termTypes !== null && $languageCodes !== null ) { + $entityIdsToFetch = $this->getIncompletelyPrefetchedEntityIds( $entityIds, $termTypes, $languageCodes ); + } else { + $entityIdsToFetch = $entityIds; + } - $entityIdsByType = $this->groupEntityIds( $entityIds ); + if ( empty( $entityIdsToFetch ) ) { + return; + } + + $entityIdsByType = $this->groupEntityIds( $entityIdsToFetch ); $terms = []; foreach ( $entityIdsByType as $entityIdGroup ) { @@ -131,11 +141,53 @@ $bufferedKeys = $this->setBufferedTermObjects( $terms ); if ( !empty( $languageCodes ) ) { - $this->setUndefinedTerms( $entityIds, $termTypes, $languageCodes, $bufferedKeys ); + $this->setUndefinedTerms( $entityIdsToFetch, $termTypes, $languageCodes, $bufferedKeys ); } } /** +* Get a list of EntityIds for which we don't have all the needed data prefetched for. +* +* @param EntityId[] $entityIds +* @param string[] $termTypes +* @param string[] $languageCodes +* +* @return EntityId[] +*/ + private function getIncompletelyPrefetchedEntityIds( array $entityIds, array $termTypes, array $languageCodes ) { + $entityIdsToFetch = []; + + foreach ( $entityIds as $entityId ) { + if ( $this->isIncompletelyPrefetched( $entityId, $termTypes, $languageCodes ) ) { + $entityIdsToFetch[] = $entityId; + } + } + + return $entityIdsToFetch; + } + + /** +* Has the term type and language code combination from the given entity already been prefeteched? +* +* @param EntityId $entityId +* @param string[] $termTypes +* @param string[] $languageCodes +* +* @return bool +*/ + private function isIncompletelyPrefetched( EntityId $entityId, array $termTypes, array $languageCodes ) { + foreach ( $termTypes as $termType ) { + foreach ( $languageCodes as $lang ) { + if ( $this->getPrefetchedTerm( $entityId, $termType, $lang ) === null ) { + return true; + } + } + } + + return false; + } + + /** * Returns a term that was previously loaded by prefetchTerms. * * @param EntityId $entityId diff --git a/lib/tests/phpunit/Store/BufferingT