jenkins-bot has submitted this change and it was merged.
Change subject: Batched term lookups for RC lists
......................................................................
Batched term lookups for RC lists
This hooks into RecentChanges list to pre-fetch the labels
of any entity pages that will be listed. This should make the
label lookup performed by the LinkBegin hook more efficient.
Bug: T74310
Change-Id: I1d28e4c4148e14e9e33ca6829362f79fdad33d03
---
M lib/includes/store/BufferingTermLookup.php
M lib/includes/store/LabelLookup.php
M repo/Wikibase.php
A repo/includes/Hook/LabelPrefetchHookHandlers.php
M repo/includes/Hook/LinkBeginHookHandler.php
M repo/includes/WikibaseRepo.php
A repo/tests/phpunit/includes/Hook/LabelPrefetchHookHandlersTest.php
M repo/tests/phpunit/includes/WikibaseRepoTest.php
8 files changed, 382 insertions(+), 7 deletions(-)
Approvals:
JanZerebecki: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/includes/store/BufferingTermLookup.php
b/lib/includes/store/BufferingTermLookup.php
index abd3160..d88165e 100644
--- a/lib/includes/store/BufferingTermLookup.php
+++ b/lib/includes/store/BufferingTermLookup.php
@@ -126,7 +126,15 @@
// (the ChangesListInitRows hook) would generally, trigger only
one call to prefetchTerms,
// before any call to getTermsOfType().
- $terms = $this->termIndex->getTermsOfEntities( $entityIds,
null, $termTypes, $languageCodes );
+ $entityIdsByType = $this->groupEntityIds( $entityIds );
+ $terms = array();
+
+ foreach ( $entityIdsByType as $entityType => $entityIdGroup ) {
+ $terms = array_merge(
+ $terms,
+ $this->termIndex->getTermsOfEntities(
$entityIdGroup, $entityType, $termTypes, $languageCodes )
+ );
+ }
$bufferedKeys = $this->setBufferedTermObjects( $terms );
if ( !empty( $languageCodes ) ) {
@@ -224,4 +232,21 @@
return array_filter( $terms, 'is_string' );
}
+ /**
+ * @param EntityId[] $entityIds
+ *
+ * @return EntityId[][]
+ */
+ private function groupEntityIds( $entityIds ) {
+ $entityIdsByType = array();
+
+ foreach ( $entityIds as $id ) {
+ $type = $id->getEntityType();
+ $key = $id->getSerialization();
+
+ $entityIdsByType[$type][$key] = $id;
+ }
+
+ return $entityIdsByType;
+ }
}
diff --git a/lib/includes/store/LabelLookup.php
b/lib/includes/store/LabelLookup.php
index 6ff88ce..1190b93 100644
--- a/lib/includes/store/LabelLookup.php
+++ b/lib/includes/store/LabelLookup.php
@@ -8,6 +8,9 @@
/**
* @since 0.5
*
+ * @todo: add a method for getting a description, or make this interface
neutral
+ * so we can use one instance for labels and one for descriptions.
+ *
* @licence GNU GPL v2+
* @author Katie Filbert < [email protected] >
*/
diff --git a/repo/Wikibase.php b/repo/Wikibase.php
index 9be6173..f9c7cbe 100644
--- a/repo/Wikibase.php
+++ b/repo/Wikibase.php
@@ -177,6 +177,7 @@
$wgHooks['ArticleUndelete'][]
= 'Wikibase\RepoHooks::onArticleUndelete';
$wgHooks['GetPreferences'][]
= 'Wikibase\RepoHooks::onGetPreferences';
$wgHooks['LinkBegin'][]
= 'Wikibase\Repo\Hook\LinkBeginHookHandler::onLinkBegin';
+ $wgHooks['ChangesListInitRows'][]
= 'Wikibase\Repo\Hook\LabelPrefetchHookHandlers::onChangesListInitRows';
$wgHooks['OutputPageBodyAttributes'][] =
'Wikibase\RepoHooks::onOutputPageBodyAttributes';
//FIXME: handle other types of entities with autocomments too!
$wgHooks['FormatAutocomments'][]
= array( 'Wikibase\RepoHooks::onFormat', array( CONTENT_MODEL_WIKIBASE_ITEM,
"wikibase-item" ) );
diff --git a/repo/includes/Hook/LabelPrefetchHookHandlers.php
b/repo/includes/Hook/LabelPrefetchHookHandlers.php
new file mode 100644
index 0000000..30b9a32
--- /dev/null
+++ b/repo/includes/Hook/LabelPrefetchHookHandlers.php
@@ -0,0 +1,152 @@
+<?php
+
+namespace Wikibase\Repo\Hook;
+
+use ChangesList;
+use RequestContext;
+use ResultWrapper;
+use Title;
+use Wikibase\Client\Store\TitleFactory;
+use Wikibase\Lib\Store\StorageException;
+use Wikibase\Store\TermBuffer;
+use Wikibase\Repo\WikibaseRepo;
+use Wikibase\Store\EntityIdLookup;
+use Wikibase\Term;
+
+
+/**
+ * Hook handlers for triggering prefetching of labels.
+ *
+ * Wikibase uses the LinkBegin hook handler
+ *
+ * @see LinkBeginHookHandler
+ *
+ * @since 0.5.
+ *
+ * @license GPL 2+
+ * @author Daniel Kinzler
+ */
+class LabelPrefetchHookHandlers {
+
+ /**
+ * @var TermBuffer
+ */
+ private $buffer;
+
+ /**
+ * @var EntityIdLookup
+ */
+ private $idLookup;
+
+ /**
+ * @var TitleFactory
+ */
+ private $titleFactory;
+
+ /**
+ * @var string[]
+ */
+ private $termTypes;
+
+ /**
+ * @var string[]
+ */
+ private $languageCodes;
+
+ /**
+ * @return null|LabelPrefetchHookHandlers
+ */
+ private static function newFromGlobalState() {
+ $termBuffer =
WikibaseRepo::getDefaultInstance()->getTermBuffer();
+
+ if ( $termBuffer === null ) {
+ return null;
+ }
+
+ $idLookup =
WikibaseRepo::getDefaultInstance()->getEntityIdLookup();
+ $titleFactory = new TitleFactory();
+ $termTypes = array( Term::TYPE_LABEL, Term::TYPE_DESCRIPTION );
+
+ // NOTE: keep in sync with fallback chain construction in
LinkBeginHookHandler::newFromGlobalState
+ $context = RequestContext::getMain();
+ $languageFallbackChainFactory =
WikibaseRepo::getDefaultInstance()->getLanguageFallbackChainFactory();
+ $languageFallbackChain =
$languageFallbackChainFactory->newFromContext( $context );
+
+ return new LabelPrefetchHookHandlers(
+ $termBuffer,
+ $idLookup,
+ $titleFactory,
+ $termTypes,
+ $languageFallbackChain->getFetchLanguageCodes()
+ );
+ }
+
+ /**
+ * Static handler for the ChangesListInitRows hook.
+ *
+ * @param ChangesList $list
+ * @param ResultWrapper|array $rows
+ *
+ * @return bool
+ */
+ public static function onChangesListInitRows(
+ ChangesList $list,
+ $rows
+ ) {
+ $handler = self::newFromGlobalState();
+
+ if ( !$handler ) {
+ return true;
+ }
+
+ return $handler->doChangesListInitRows( $list, $rows );
+ }
+
+ public function __construct(
+ TermBuffer $buffer,
+ EntityIdLookup $idLookup,
+ TitleFactory $titleFactory,
+ array $termTypes,
+ array $languageCodes
+ ) {
+
+ $this->buffer = $buffer;
+ $this->idLookup = $idLookup;
+ $this->titleFactory = $titleFactory;
+ $this->termTypes = $termTypes;
+ $this->languageCodes = $languageCodes;
+ }
+
+ /**
+ * @param ChangesList $list
+ * @param ResultWrapper|array $rows
+ *
+ * @return bool
+ */
+ public function doChangesListInitRows( ChangesList $list, $rows ) {
+ try {
+ $titles = $this->getChangedTitles( $rows );
+ $entityIds = $this->idLookup->getEntityIds( $titles );
+ $this->buffer->prefetchTerms( $entityIds,
$this->termTypes, $this->languageCodes );
+ } catch ( StorageException $ex ) {
+ wfLogWarning( __METHOD__ . ': ' . $ex->getMessage() );
+ }
+
+ return true;
+ }
+
+ /**
+ * @param ResultWrapper|array $rows
+ *
+ * @return Title[]
+ */
+ private function getChangedTitles( $rows ) {
+ $titles = array();
+
+ foreach ( $rows as $row ) {
+ $titles[] = $this->titleFactory->makeTitle(
$row->rc_namespace, $row->rc_title );
+ }
+
+ return $titles;
+ }
+}
diff --git a/repo/includes/Hook/LinkBeginHookHandler.php
b/repo/includes/Hook/LinkBeginHookHandler.php
index 29799f6..92eaa02 100644
--- a/repo/includes/Hook/LinkBeginHookHandler.php
+++ b/repo/includes/Hook/LinkBeginHookHandler.php
@@ -17,6 +17,15 @@
use Wikibase\Store\EntityIdLookup;
/**
+ * Handler for the LinkBegin hook, used to change the default link text of
links to wikibase Entity
+ * pages to the respective entity's label. This is used mainly for listings on
special pages, where
+ * it is useful to see pages listed by label rather than their entity ID.
+ *
+ * Label lookups are relatively expensive if done repeatedly for individual
labels. If possible,
+ * labels should be pre-loaded and buffered for later use via the LinkBegin
hook.
+ *
+ * @see LabelPrefetchHookHandlers
+ *
* @since 0.5
*
* @licence GNU GPL v2+
@@ -53,9 +62,9 @@
*/
private static function newFromGlobalState() {
$wikibaseRepo = WikibaseRepo::getDefaultInstance();
- $context = RequestContext::getMain();
-
$languageFallbackChainFactory =
$wikibaseRepo->getLanguageFallbackChainFactory();
+ // NOTE: keep in sync with fallback chain construction in
LabelPrefetchHookHandler::newFromGlobalState
+ $context = RequestContext::getMain();
$languageFallbackChain =
$languageFallbackChainFactory->newFromContext( $context );
return new self(
@@ -169,10 +178,16 @@
return;
}
+ // @todo: this re-implements the logic in
LanguageFallbackLabelLookup,
+ // just so it can be applied to descriptions as well as
labels. Either
+ // have two lookups with the same interface, or two
methods in the lookup
+ // interface.
+
+ // NOTE: keep in sync with with fallback languages in
LabelPrefetchHookHandler::newFromGlobalState
+
try {
- //@todo: only fetch the labels we need for the fallback
chain
- $labels = $this->termLookup->getLabels( $entityId );
- $descriptions = $this->termLookup->getDescriptions(
$entityId );
+ $labels = $this->termLookup->getLabels( $entityId,
$this->languageFallback->getFetchLanguageCodes() );
+ $descriptions = $this->termLookup->getDescriptions(
$entityId, $this->languageFallback->getFetchLanguageCodes() );
} catch ( StorageException $ex ) {
// This shouldn't happen if $target->exists() return
true!
wfProfileOut( __METHOD__ );
diff --git a/repo/includes/WikibaseRepo.php b/repo/includes/WikibaseRepo.php
index d6dc125..36898f3 100644
--- a/repo/includes/WikibaseRepo.php
+++ b/repo/includes/WikibaseRepo.php
@@ -68,6 +68,7 @@
use Wikibase\Repo\Notifications\DatabaseChangeTransmitter;
use Wikibase\Repo\Notifications\DummyChangeTransmitter;
use Wikibase\Repo\Store\EntityPermissionChecker;
+use Wikibase\Store\BufferingTermLookup;
use Wikibase\Store\EntityIdLookup;
use Wikibase\Repo\View\EntityViewFactory;
use Wikibase\Settings;
@@ -75,6 +76,7 @@
use Wikibase\SnakFactory;
use Wikibase\SqlStore;
use Wikibase\Store;
+use Wikibase\Store\TermBuffer;
use Wikibase\StringNormalizer;
use Wikibase\SummaryFormatter;
use Wikibase\Template\TemplateFactory;
@@ -170,6 +172,11 @@
* @var EntityNamespaceLookup|null
*/
private $entityNamespaceLookup = null;
+
+ /**
+ * @var TermLookup
+ */
+ private $termLookup;
/**
* Returns the default instance constructed using newInstance().
@@ -498,10 +505,24 @@
}
/**
+ * @return TermBuffer
+ */
+ public function getTermBuffer() {
+ return $this->getTermLookup();
+ }
+
+ /**
* @return TermLookup
*/
public function getTermLookup() {
- return new EntityTermLookup( $this->getStore()->getTermIndex(),
$this->getEntityLookup() );
+ if ( !$this->termLookup ) {
+ $this->termLookup = new BufferingTermLookup(
+ $this->getStore()->getTermIndex(),
+ 1000 // @todo: configure buffer size
+ );
+ }
+
+ return $this->termLookup;
}
/**
diff --git a/repo/tests/phpunit/includes/Hook/LabelPrefetchHookHandlersTest.php
b/repo/tests/phpunit/includes/Hook/LabelPrefetchHookHandlersTest.php
new file mode 100644
index 0000000..e5d6782
--- /dev/null
+++ b/repo/tests/phpunit/includes/Hook/LabelPrefetchHookHandlersTest.php
@@ -0,0 +1,141 @@
+<?php
+
+namespace Wikibase\Test;
+
+use ChangesList;
+use FauxRequest;
+use PHPUnit_Framework_Assert;
+use RequestContext;
+use Title;
+use Wikibase\Client\Store\TitleFactory;
+use Wikibase\DataModel\Entity\BasicEntityIdParser;
+use Wikibase\DataModel\Entity\EntityId;
+use Wikibase\DataModel\Entity\EntityIdParsingException;
+use Wikibase\DataModel\Entity\ItemId;
+use Wikibase\DataModel\Entity\PropertyId;
+use Wikibase\Repo\Hook\LabelPrefetchHookHandlers;
+
+/**
+ * @covers Wikibase\Repo\Hook\LabelPrefetchHookHandlers
+ *
+ * @since 0.5
+ *
+ * @group WikibaseRepo
+ * @group Wikibase
+ * @group Database
+ * ^--- who knows what ChangesList may do internally...
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ * @author Daniel Kinzler
+ */
+class LabelPrefetchHookHandlersTest extends \PHPUnit_Framework_TestCase {
+
+ /**
+ * @param Title[] $titles
+ *
+ * @return EntityId[]
+ */
+ public function titlesToIds( array $titles ) {
+ $entityIds = array();
+ $idParser = new BasicEntityIdParser();
+
+ foreach ( $titles as $title ) {
+ try {
+ // Pretend the article ID is the numeric entity
ID.
+ $entityId = $idParser->parse( $title->getText()
);
+ $key = $entityId->getNumericId();
+
+ $entityIds[$key] = $entityId;
+ } catch ( EntityIdParsingException $ex ) {
+ // skip
+ }
+ }
+
+ return $entityIds;
+ }
+
+ /**
+ * @param callback $prefetchTerms
+ *
+ * @return LabelPrefetchHookHandlers
+ */
+ private function getLabelPrefetchHookHandlers( $prefetchTerms,
$termTypes, $languageCodes ) {
+
+ $termBuffer = $this->getMock( 'Wikibase\Store\TermBuffer' );
+ $termBuffer->expects( $this->atLeastOnce() )
+ ->method( 'prefetchTerms' )
+ ->will( $this->returnCallback( $prefetchTerms ) );
+
+ $idLookup = $this->getMock( 'Wikibase\Store\EntityIdLookup' );
+ $idLookup->expects( $this->atLeastOnce() )
+ ->method( 'getEntityIds' )
+ ->will( $this->returnCallback( array( $this,
'titlesToIds' ) ) );
+
+ $titleFactory = new TitleFactory();
+
+ return new LabelPrefetchHookHandlers(
+ $termBuffer,
+ $idLookup,
+ $titleFactory,
+ $termTypes,
+ $languageCodes
+ );
+
+ }
+
+ public function testDoChangesListInitRows() {
+
+ $rows = array(
+ (object)array( 'rc_namespace' => NS_MAIN, 'rc_title' =>
'XYZ' ),
+ (object)array( 'rc_namespace' => NS_MAIN, 'rc_title' =>
'Q23' ),
+ (object)array( 'rc_namespace' => NS_MAIN, 'rc_title' =>
'P55' ),
+ );
+
+ $expectedTermTypes = array( 'label', 'description' );
+ $expectedLanguageCodes = array( 'de', 'en', 'it' );
+
+ $expectedIds = array(
+ new ItemId( 'Q23' ),
+ new PropertyId( 'P55' ),
+ );
+
+ $prefetchTerms = function (
+ array $entityIds,
+ array $termTypes = null,
+ array $languageCodes = null
+ ) use (
+ $expectedIds,
+ $expectedTermTypes,
+ $expectedLanguageCodes
+ ) {
+ $expectedIdStrings = array_map( function ( $id ) {
return $id->getSerialization(); }, $expectedIds );
+ $entityIdStrings = array_map( function ( $id ) { return
$id->getSerialization(); }, $entityIds );
+
+ sort( $expectedIdStrings );
+ sort( $entityIdStrings );
+
+ PHPUnit_Framework_Assert::assertEquals(
$expectedIdStrings, $entityIdStrings );
+ PHPUnit_Framework_Assert::assertEquals(
$expectedTermTypes, $termTypes );
+ PHPUnit_Framework_Assert::assertEquals(
$expectedLanguageCodes, $languageCodes );
+ };
+
+ $linkBeginHookHandler = $this->getLabelPrefetchHookHandlers(
+ $prefetchTerms,
+ $expectedTermTypes,
+ $expectedLanguageCodes
+ );
+
+ $context = new RequestContext();
+ $context->setRequest( new FauxRequest() );
+ $context->setTitle( new Title( NS_SPECIAL, 'Watchlist' ) );
+
+ $changesList = new ChangesList( $context );
+
+ $linkBeginHookHandler->doChangesListInitRows(
+ $changesList,
+ $rows
+ );
+ }
+
+}
diff --git a/repo/tests/phpunit/includes/WikibaseRepoTest.php
b/repo/tests/phpunit/includes/WikibaseRepoTest.php
index da06422..27760c0 100644
--- a/repo/tests/phpunit/includes/WikibaseRepoTest.php
+++ b/repo/tests/phpunit/includes/WikibaseRepoTest.php
@@ -236,4 +236,21 @@
$this->assertInstanceOf( 'Wikibase\Api\ApiHelperFactory',
$factory );
}
+ public function testGetTermLookup() {
+ $service = $this->getWikibaseRepo()->getTermLookup();
+ $this->assertInstanceOf( 'Wikibase\Lib\Store\TermLookup',
$service );
+ }
+
+ public function testGetTermBuffer() {
+ $service = $this->getWikibaseRepo()->getTermBuffer();
+ $this->assertInstanceOf( 'Wikibase\Store\TermBuffer', $service
);
+ }
+
+ public function testGetTermBuffer_instance() {
+ $repo = $this->getWikibaseRepo();
+ $service = $repo->getTermBuffer();
+ $this->assertSame( $service, $repo->getTermBuffer(), 'Second
call should return same instance' );
+ $this->assertSame( $service, $repo->getTermLookup(),
'TermBuffer and TermLookup should be the same object' );
+ }
+
}
--
To view, visit https://gerrit.wikimedia.org/r/180140
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I1d28e4c4148e14e9e33ca6829362f79fdad33d03
Gerrit-PatchSet: 11
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Adrian Lang <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Hoo man <[email protected]>
Gerrit-Reviewer: JanZerebecki <[email protected]>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits