Thiemo Mättig (WMDE) has uploaded a new change for review. https://gerrit.wikimedia.org/r/244455
Change subject: Introduce ParserOutputDataUpdate infrastructure ...................................................................... Introduce ParserOutputDataUpdate infrastructure This is split from I7b03cc0. This patch adds the new classes, but does not use them. This patch also misses the PropertyDataTypeMatcher. This will be added in a later patch. WIP: TESTS MISSING! Bug: T114220 Change-Id: Iadc00e9e6df20caed9bf5a87348e5e9ae6852ad1 --- A repo/includes/DataUpdates/EntityParserOutputDataUpdater.php A repo/includes/DataUpdates/ExternalLinksDataUpdate.php A repo/includes/DataUpdates/ImageLinksDataUpdate.php A repo/includes/DataUpdates/ParserOutputDataUpdate.php A repo/includes/DataUpdates/ReferencedEntitiesDataUpdate.php A repo/includes/DataUpdates/SiteLinkDataUpdate.php A repo/includes/DataUpdates/StatementDataUpdate.php A repo/tests/phpunit/includes/DataUpdates/EntityParserOutputDataUpdaterTest.php A repo/tests/phpunit/includes/DataUpdates/ExternalLinksDataUpdateTest.php A repo/tests/phpunit/includes/DataUpdates/ImageLinksDataUpdateTest.php A repo/tests/phpunit/includes/DataUpdates/ReferencedEntitiesDataUpdateTest.php 11 files changed, 536 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/55/244455/1 diff --git a/repo/includes/DataUpdates/EntityParserOutputDataUpdater.php b/repo/includes/DataUpdates/EntityParserOutputDataUpdater.php new file mode 100644 index 0000000..a4f2483 --- /dev/null +++ b/repo/includes/DataUpdates/EntityParserOutputDataUpdater.php @@ -0,0 +1,117 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use ParserOutput; +use Wikibase\DataModel\Entity\EntityDocument; +use Wikibase\DataModel\Entity\Item; +use Wikibase\DataModel\Statement\StatementListProvider; + +/** + * @since 0.5 + * + * @license GNU GPL v2+ + * @author Katie Filbert < [email protected] > + * @author Thiemo Mättig + */ +class EntityParserOutputDataUpdater { + + /** + * @var ParserOutputDataUpdate[] + */ + private $dataUpdates; + + /** + * @param ParserOutputDataUpdate[] $dataUpdates + */ + public function __construct( array $dataUpdates ) { + $this->dataUpdates = $dataUpdates; + } + + /** + * @param EntityDocument $entity + */ + public function processEntity( EntityDocument $entity ) { + if ( $entity instanceof StatementListProvider ) { + $this->processStatements( $entity ); + } + + if ( $entity instanceof Item ) { + $this->processSiteLinks( $entity ); + } + } + + /** + * @param StatementListProvider $entity + */ + private function processStatements( StatementListProvider $entity ) { + $dataUpdates = $this->getStatementDataUpdates(); + + if ( empty( $dataUpdates ) ) { + return; + } + + foreach ( $entity->getStatements() as $statement ) { + foreach ( $dataUpdates as $dataUpdate ) { + $dataUpdate->processStatement( $statement ); + } + } + } + + /** + * @param Item $item + */ + private function processSiteLinks( Item $item ) { + $dataUpdates = $this->getSiteLinkDataUpdates(); + + if ( empty( $dataUpdates ) ) { + return; + } + + foreach ( $item->getSiteLinkList() as $siteLink ) { + foreach ( $dataUpdates as $dataUpdate ) { + $dataUpdate->processSiteLink( $siteLink ); + } + } + } + + /** + * @return StatementDataUpdate[] + */ + private function getStatementDataUpdates() { + $statementDataUpdates = array(); + + foreach ( $this->dataUpdates as $dataUpdate ) { + if ( $dataUpdate instanceof StatementDataUpdate ) { + $statementDataUpdates[] = $dataUpdate; + } + } + + return $statementDataUpdates; + } + + /** + * @return SiteLinkDataUpdate[] + */ + private function getSiteLinkDataUpdates() { + $siteLinkDataUpdates = array(); + + foreach ( $this->dataUpdates as $dataUpdate ) { + if ( $dataUpdate instanceof SiteLinkDataUpdate ) { + $siteLinkDataUpdates[] = $dataUpdate; + } + } + + return $siteLinkDataUpdates; + } + + /** + * @param ParserOutput $parserOutput + */ + public function updateParserOutput( ParserOutput $parserOutput ) { + foreach ( $this->dataUpdates as $dataUpdate ) { + $dataUpdate->updateParserOutput( $parserOutput ); + } + } + +} diff --git a/repo/includes/DataUpdates/ExternalLinksDataUpdate.php b/repo/includes/DataUpdates/ExternalLinksDataUpdate.php new file mode 100644 index 0000000..bd98fae --- /dev/null +++ b/repo/includes/DataUpdates/ExternalLinksDataUpdate.php @@ -0,0 +1,63 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use DataValues\StringValue; +use ParserOutput; +use Wikibase\DataModel\Snak\PropertyValueSnak; +use Wikibase\DataModel\Snak\Snak; +use Wikibase\DataModel\Statement\Statement; + +/** + * Add url data values as external links in ParserOutput. + * + * @since 0.5 + * + * @license GNU GPL v2+ + * @author Bene* < [email protected] > + * @author Katie Filbert < [email protected] > + * @author Thiemo Mättig + */ +class ExternalLinksDataUpdate implements StatementDataUpdate { + + /** + * @var string[] + */ + private $urls = array(); + + /** + * @param Statement $statement + */ + public function processStatement( Statement $statement ) { + foreach ( $statement->getAllSnaks() as $snak ) { + $this->processSnak( $snak ); + } + } + + /** + * @param Snak $snak + */ + private function processSnak( Snak $snak ) { + if ( $snak instanceof PropertyValueSnak ) { + $value = $snak->getDataValue(); + + if ( $value instanceof StringValue ) { + $url = $value->getValue(); + + if ( $url !== '' ) { + $this->urls[$value->getValue()] = $value->getValue(); + } + } + } + } + + /** + * @param ParserOutput $parserOutput + */ + public function updateParserOutput( ParserOutput $parserOutput ) { + foreach ( $this->urls as $url ) { + $parserOutput->addExternalLink( $url ); + } + } + +} diff --git a/repo/includes/DataUpdates/ImageLinksDataUpdate.php b/repo/includes/DataUpdates/ImageLinksDataUpdate.php new file mode 100644 index 0000000..91e7efe --- /dev/null +++ b/repo/includes/DataUpdates/ImageLinksDataUpdate.php @@ -0,0 +1,65 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use DataValues\StringValue; +use ParserOutput; +use Wikibase\DataModel\Snak\PropertyValueSnak; +use Wikibase\DataModel\Snak\Snak; +use Wikibase\DataModel\Statement\Statement; + +/** + * Register commonsMedia values as used images in ParserOutput. + * + * @since 0.5 + * + * @license GNU GPL v2+ + * @author Bene* < [email protected] > + * @author Katie Filbert < [email protected] > + * @author Thiemo Mättig + */ +class ImageLinksDataUpdate implements StatementDataUpdate { + + /** + * @var string[] + */ + private $images = array(); + + /** + * @param Statement $statement + */ + public function processStatement( Statement $statement ) { + foreach ( $statement->getAllSnaks() as $snak ) { + $this->processSnak( $snak ); + } + } + + /** + * @param Snak $snak + */ + private function processSnak( Snak $snak ) { + if ( $snak instanceof PropertyValueSnak ) { + $value = $snak->getDataValue(); + + if ( $value instanceof StringValue ) { + $fileName = str_replace( ' ', '_', $value->getValue() ); + + if ( $fileName !== '' ) { + $this->images[$value->getValue()] = $value->getValue(); + } + } + } + } + + /** + * Treat CommonsMedia values as file transclusions + * + * @param ParserOutput $parserOutput + */ + public function updateParserOutput( ParserOutput $parserOutput ) { + foreach ( $this->images as $image ) { + $parserOutput->addImage( $image ); + } + } + +} diff --git a/repo/includes/DataUpdates/ParserOutputDataUpdate.php b/repo/includes/DataUpdates/ParserOutputDataUpdate.php new file mode 100644 index 0000000..56946cd --- /dev/null +++ b/repo/includes/DataUpdates/ParserOutputDataUpdate.php @@ -0,0 +1,23 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use ParserOutput; + +/** + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Katie Filbert < [email protected] > + */ +interface ParserOutputDataUpdate { + + /** + * Update extension data, properties or other data in ParserOutput. + * These updates are invoked when EntityContent::getParserOutput is called. + * + * @param ParserOutput $parserOutput + */ + public function updateParserOutput( ParserOutput $parserOutput ); + +} diff --git a/repo/includes/DataUpdates/ReferencedEntitiesDataUpdate.php b/repo/includes/DataUpdates/ReferencedEntitiesDataUpdate.php new file mode 100644 index 0000000..2acaa1d --- /dev/null +++ b/repo/includes/DataUpdates/ReferencedEntitiesDataUpdate.php @@ -0,0 +1,146 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use DataValues\DataValue; +use DataValues\QuantityValue; +use LinkBatch; +use ParserOutput; +use Title; +use Wikibase\DataModel\Entity\EntityId; +use Wikibase\DataModel\Entity\EntityIdParser; +use Wikibase\DataModel\Entity\EntityIdParsingException; +use Wikibase\DataModel\Entity\EntityIdValue; +use Wikibase\DataModel\SiteLink; +use Wikibase\DataModel\Snak\PropertyValueSnak; +use Wikibase\DataModel\Snak\Snak; +use Wikibase\DataModel\Statement\Statement; +use Wikibase\Lib\Store\EntityTitleLookup; + +/** + * Finds linked entities on an Entity and add the links to ParserOutput. + * + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Katie Filbert < [email protected] > + * @author Bene* < [email protected] > + */ +class ReferencedEntitiesDataUpdate implements SiteLinkDataUpdate, StatementDataUpdate { + + /** + * @var EntityTitleLookup + */ + private $entityTitleLookup; + + /** + * @var EntityIdParser + */ + private $externalEntityIdParser; + + /** + * @var EntityId[] + */ + private $entityIds = array(); + + /** + * @param EntityTitleLookup $entityTitleLookup + * @param EntityIdParser $externalEntityIdParser Parser for external entity IDs (usually URIs) + * into EntityIds. Such external entity IDs may be used for units in QuantityValues, for + * calendar models in TimeValue, and for the reference globe in GlobeCoordinateValues. + */ + public function __construct( + EntityTitleLookup $entityTitleLookup, + EntityIdParser $externalEntityIdParser + ) { + $this->entityTitleLookup = $entityTitleLookup; + $this->externalEntityIdParser = $externalEntityIdParser; + } + + /** + * @param Statement $statement + */ + public function processStatement( Statement $statement ) { + foreach ( $statement->getAllSnaks() as $snak ) { + $this->addEntityIdsFromSnak( $snak ); + } + } + + /** + * @param Snak $snak + */ + private function addEntityIdsFromSnak( Snak $snak ) { + $propertyId = $snak->getPropertyId(); + $this->entityIds[$propertyId->getSerialization()] = $propertyId; + + if ( $snak instanceof PropertyValueSnak ) { + $value = $snak->getDataValue(); + $this->addEntityIdsFromDataValue( $value ); + } + } + + /** + * @param DataValue $dataValue + */ + private function addEntityIdsFromDataValue( DataValue $dataValue ) { + if ( $dataValue instanceof EntityIdValue ) { + $entityId = $dataValue->getEntityId(); + $this->entityIds[$entityId->getSerialization()] = $entityId; + } elseif ( $dataValue instanceof QuantityValue ) { + $unitUri = $dataValue->getUnit(); + $this->addEntityIdFromUri( $unitUri ); + } + + // TODO: EntityIds from GlobeCoordinateValue's globe URI (Wikidata, not local item URI!) + // TODO: EntityIds from TimeValue's calendar URI (Wikidata, not local item URI!) + } + + /** + * @param string $uri + */ + private function addEntityIdFromUri( $uri ) { + try { + $entityId = $this->externalEntityIdParser->parse( $uri ); + $this->entityIds[$entityId->getSerialization()] = $entityId; + } catch ( EntityIdParsingException $ex ) { + // noop + } + } + + /** + * @param SiteLink $siteLink + */ + public function processSiteLink( SiteLink $siteLink ) { + $this->entityIds = array_merge( $this->entityIds, $siteLink->getBadges() ); + } + + /** + * @param ParserOutput $parserOutput + */ + public function updateParserOutput( ParserOutput $parserOutput ) { + $this->addLinksToParserOutput( $parserOutput ); + } + + /** + * @param ParserOutput $parserOutput + */ + private function addLinksToParserOutput( ParserOutput $parserOutput ) { + $linkBatch = new LinkBatch(); + + foreach ( $this->entityIds as $entityId ) { + $linkBatch->addObj( $this->entityTitleLookup->getTitleForId( $entityId ) ); + } + + $pages = $linkBatch->doQuery(); + + if ( $pages === false ) { + return; + } + + foreach ( $pages as $page ) { + $title = Title::makeTitle( $page->page_namespace, $page->page_title ); + $parserOutput->addLink( $title, $page->page_id ); + } + } + +} diff --git a/repo/includes/DataUpdates/SiteLinkDataUpdate.php b/repo/includes/DataUpdates/SiteLinkDataUpdate.php new file mode 100644 index 0000000..116b3ae --- /dev/null +++ b/repo/includes/DataUpdates/SiteLinkDataUpdate.php @@ -0,0 +1,25 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use Wikibase\DataModel\SiteLink; + +/** + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Katie Filbert < [email protected] > + */ +interface SiteLinkDataUpdate extends ParserOutputDataUpdate { + + /** + * Extract some data or do processing on a SiteLink during parsing. + * + * This method is invoked when processing a SiteLinkList on an Item, + * or other entity type that has SiteLinks. + * + * @param SiteLink $siteLink + */ + public function processSiteLink( SiteLink $siteLink ); + +} diff --git a/repo/includes/DataUpdates/StatementDataUpdate.php b/repo/includes/DataUpdates/StatementDataUpdate.php new file mode 100644 index 0000000..c1e5ff5 --- /dev/null +++ b/repo/includes/DataUpdates/StatementDataUpdate.php @@ -0,0 +1,25 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use Wikibase\DataModel\Statement\Statement; + +/** + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Katie Filbert < [email protected] > + */ +interface StatementDataUpdate extends ParserOutputDataUpdate { + + /** + * Extract some data or do processing on a Statement during parsing. + * + * This method is normally invoked when processing a StatementList + * for all Statements on a StatementListProvider (e.g. an Item). + * + * @param Statement $statement + */ + public function processStatement( Statement $statement ); + +} diff --git a/repo/tests/phpunit/includes/DataUpdates/EntityParserOutputDataUpdaterTest.php b/repo/tests/phpunit/includes/DataUpdates/EntityParserOutputDataUpdaterTest.php new file mode 100644 index 0000000..cedb222 --- /dev/null +++ b/repo/tests/phpunit/includes/DataUpdates/EntityParserOutputDataUpdaterTest.php @@ -0,0 +1,18 @@ +<?php + +namespace Wikibase\Repo\Tests\DataUpdates; + +use Wikibase\Repo\DataUpdates\EntityParserOutputDataUpdater; + +/** + * @covers Wikibase\Repo\DataUpdates\EntityParserOutputDataUpdater + * + * @group Wikibase + * @group WikibaseRepo + * + * @licence GNU GPL v2+ + * @author Thiemo Mättig + */ +class EntityParserOutputDataUpdaterTest { + +} diff --git a/repo/tests/phpunit/includes/DataUpdates/ExternalLinksDataUpdateTest.php b/repo/tests/phpunit/includes/DataUpdates/ExternalLinksDataUpdateTest.php new file mode 100644 index 0000000..0f61fbd --- /dev/null +++ b/repo/tests/phpunit/includes/DataUpdates/ExternalLinksDataUpdateTest.php @@ -0,0 +1,18 @@ +<?php + +namespace Wikibase\Repo\Tests\DataUpdates; + +use Wikibase\Repo\DataUpdates\ExternalLinksDataUpdate; + +/** + * @covers Wikibase\Repo\DataUpdates\ExternalLinksDataUpdate + * + * @group Wikibase + * @group WikibaseRepo + * + * @licence GNU GPL v2+ + * @author Thiemo Mättig + */ +class ExternalLinksDataUpdateTest { + +} diff --git a/repo/tests/phpunit/includes/DataUpdates/ImageLinksDataUpdateTest.php b/repo/tests/phpunit/includes/DataUpdates/ImageLinksDataUpdateTest.php new file mode 100644 index 0000000..064319d --- /dev/null +++ b/repo/tests/phpunit/includes/DataUpdates/ImageLinksDataUpdateTest.php @@ -0,0 +1,18 @@ +<?php + +namespace Wikibase\Repo\Tests\DataUpdates; + +use Wikibase\Repo\DataUpdates\ImageLinksDataUpdate; + +/** + * @covers Wikibase\Repo\DataUpdates\ImageLinksDataUpdate + * + * @group Wikibase + * @group WikibaseRepo + * + * @licence GNU GPL v2+ + * @author Thiemo Mättig + */ +class ImageLinksDataUpdateTest { + +} diff --git a/repo/tests/phpunit/includes/DataUpdates/ReferencedEntitiesDataUpdateTest.php b/repo/tests/phpunit/includes/DataUpdates/ReferencedEntitiesDataUpdateTest.php new file mode 100644 index 0000000..2612c0e --- /dev/null +++ b/repo/tests/phpunit/includes/DataUpdates/ReferencedEntitiesDataUpdateTest.php @@ -0,0 +1,18 @@ +<?php + +namespace Wikibase\Repo\Tests\DataUpdates; + +use Wikibase\Repo\DataUpdates\ReferencedEntitiesDataUpdate; + +/** + * @covers Wikibase\Repo\DataUpdates\ReferencedEntitiesDataUpdate + * + * @group Wikibase + * @group WikibaseRepo + * + * @licence GNU GPL v2+ + * @author Thiemo Mättig + */ +class ReferencedEntitiesDataUpdateTest { + +} -- To view, visit https://gerrit.wikimedia.org/r/244455 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iadc00e9e6df20caed9bf5a87348e5e9ae6852ad1 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Thiemo Mättig (WMDE) <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
