Thiemo Mättig (WMDE) has uploaded a new change for review. https://gerrit.wikimedia.org/r/244643
Change subject: Introduce ParserOutputDataUpdate infrastructure ...................................................................... Introduce ParserOutputDataUpdate infrastructure PS1 is an untouched copy from I7b03cc0 (PS14). Bug: T114220 Change-Id: I1618217b0926b0dd213f9a0d4b315514b66f2b54 --- A repo/includes/DataUpdates/EntityParserOutputDataUpdater.php A repo/includes/DataUpdates/ExternalLinksDataUpdate.php A repo/includes/DataUpdates/ImageLinksDataUpdate.php A repo/includes/DataUpdates/ParserOutputDataUpdate.php A repo/includes/DataUpdates/ReferencedEntitiesDataUpdate.php A repo/includes/DataUpdates/SiteLinkDataUpdate.php A repo/includes/DataUpdates/StatementDataUpdate.php 7 files changed, 504 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/43/244643/1 diff --git a/repo/includes/DataUpdates/EntityParserOutputDataUpdater.php b/repo/includes/DataUpdates/EntityParserOutputDataUpdater.php new file mode 100644 index 0000000..8e649f2 --- /dev/null +++ b/repo/includes/DataUpdates/EntityParserOutputDataUpdater.php @@ -0,0 +1,117 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use ParserOutput; +use Wikibase\DataModel\Entity\EntityDocument; +use Wikibase\DataModel\Entity\Item; +use Wikibase\DataModel\SiteLinkList; +use Wikibase\DataModel\Snak\Snak; +use Wikibase\DataModel\Statement\StatementList; +use Wikibase\DataModel\Statement\StatementListProvider; + +/** + * @todo have ItemParserOutputDataUpdate, etc. instead. + * + * @since 0.5 + * + * @license GNU GPL v2+ + * @author Katie Filbert < [email protected] > + */ +class EntityParserOutputDataUpdater { + + /** + * @param ParserOutputDataUpdate[] $dataUpdates + */ + public function __construct( array $dataUpdates ) { + $this->dataUpdates = $dataUpdates; + } + + /** + * @param EntityDocument $entity + */ + public function processEntity( EntityDocument $entity ) { + if ( $entity instanceof StatementListProvider ) { + $this->processStatements( $entity->getStatements() ); + } + + if ( $entity instanceof Item ) { + $this->processSiteLinks( $entity->getSiteLinkList() ); + } + } + + /** + * @param StatementList $statements + */ + private function processStatements( StatementList $statements ) { + $statementDataUpdates = $this->getStatementDataUpdates(); + + if ( empty( $statementDataUpdates ) ) { + return; + } + + foreach ( $statements as $statement ) { + foreach ( $statementDataUpdates as $statementDataUpdate ) { + $statementDataUpdate->processStatement( $statement ); + } + } + } + + /** + * @param SiteLinkList $siteLinks + */ + private function processSiteLinks( SiteLinkList $siteLinks ) { + $siteLinkDataUpdates = $this->getSiteLinkDataUpdates(); + + if ( empty( $siteLinkDataUpdates ) ) { + return; + } + + // process things like badges + foreach ( $siteLinks as $siteLink ) { + foreach ( $siteLinkDataUpdates as $siteLinkDataUpdate ) { + $siteLinkDataUpdate->processSiteLink( $siteLink ); + } + } + } + + /** + * @param ParserOutput $parserOutput + */ + public function updateParserOutput( ParserOutput $parserOutput ) { + foreach ( $this->dataUpdates as $dataUpdate ) { + $dataUpdate->updateParserOutput( $parserOutput ); + } + } + + /** + * @return SiteLinkDataUpdate[] + */ + private function getSiteLinkDataUpdates() { + $siteLinkDataUpdates = array(); + + foreach ( $this->dataUpdates as $dataUpdate ) { + if ( $dataUpdate instanceof SiteLinkDataUpdate ) { + $siteLinkDataUpdates[] = $dataUpdate; + } + } + + return $siteLinkDataUpdates; + } + + /** + * @return StatementDataUpdate[] + */ + private function getStatementDataUpdates() { + $statementDataUpdates = array(); + + foreach ( $this->dataUpdates as $dataUpdate ) { + if ( $dataUpdate instanceof StatementDataUpdate ) { + $statementDataUpdates[] = $dataUpdate; + } + } + + return $statementDataUpdates; + } + +} diff --git a/repo/includes/DataUpdates/ExternalLinksDataUpdate.php b/repo/includes/DataUpdates/ExternalLinksDataUpdate.php new file mode 100644 index 0000000..c42e11f --- /dev/null +++ b/repo/includes/DataUpdates/ExternalLinksDataUpdate.php @@ -0,0 +1,78 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use DataValues\DataValue; +use ParserOutput; +use Wikibase\DataModel\Entity\EntityDocument; +use Wikibase\DataModel\Snak\PropertyValueSnak; +use Wikibase\DataModel\Snak\Snak; +use Wikibase\DataModel\Statement\Statement; +use Wikibase\Lib\Store\PropertyDataTypeMatcher; + +/** + * Add url data values as external links in ParserOutput. + * + * @since 0.5 + * + * @license GNU GPL v2+ + * @author Bene* < [email protected] > + * @author Katie Filbert < [email protected] > + */ +class ExternalLinksDataUpdate implements StatementDataUpdate { + + /** + * @var PropertyDataTypeMatcher + */ + private $propertyDataTypeMatcher; + + /** + * @var DataValue[] + */ + private $usedUrls = array(); + + /** + * @param PropertyDataTypeMatcher $propertyDataTypeMatcher + */ + public function __construct( PropertyDataTypeMatcher $propertyDataTypeMatcher ) { + $this->propertyDataTypeMatcher = $propertyDataTypeMatcher; + } + + /** + * Add DataValue to list of used urls, if Snak property has 'url' data type. + * + * @param Statement $statement + */ + public function processStatement( Statement $statement ) { + $snaks = $statement->getAllSnaks(); + $this->extractUsedUrls( $snaks ); + } + + /** + * @param Snak[] $snaks + */ + public function extractUsedUrls( array $snaks ) { + foreach ( $snaks as $snak ) { + if ( $snak instanceof PropertyValueSnak && + $this->propertyDataTypeMatcher->isMatchingDataType( $snak->getPropertyId(), 'url' ) + ) { + $dataValue = $snak->getDataValue(); + $this->usedUrls[$dataValue->getHash()] = $dataValue; + } + } + } + + /** + * @param ParserOutput $parserOutput + */ + public function updateParserOutput( ParserOutput $parserOutput ) { + // treat URL values as external links ------ + foreach ( $this->usedUrls as $url ) { + $value = $url->getValue(); + if ( is_string( $value ) ) { + $parserOutput->addExternalLink( $value ); + } + } + } + +} diff --git a/repo/includes/DataUpdates/ImageLinksDataUpdate.php b/repo/includes/DataUpdates/ImageLinksDataUpdate.php new file mode 100644 index 0000000..77db754 --- /dev/null +++ b/repo/includes/DataUpdates/ImageLinksDataUpdate.php @@ -0,0 +1,82 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use DataValues\DataValue; +use ParserOutput; +use Wikibase\DataModel\Entity\EntityDocument; +use Wikibase\DataModel\Snak\PropertyValueSnak; +use Wikibase\DataModel\Snak\Snak; +use Wikibase\DataModel\Statement\Statement; +use Wikibase\Lib\Store\PropertyDataTypeMatcher; + +/** + * Register commonsMedia values as used images in ParserOutput. + * + * @since 0.5 + * + * @license GNU GPL v2+ + * @author Bene* < [email protected] > + * @author Katie Filbert < [email protected] > + */ +class ImageLinksDataUpdate implements StatementDataUpdate { + + /** + * @var PropertyDataTypeMatcher + */ + private $propertyDataTypeMatcher; + + /** + * @var DataValue[] + */ + private $usedImages = array(); + + /** + * @param PropertyDataTypeMatcher $propertyDataTypeMatcher + */ + public function __construct( PropertyDataTypeMatcher $propertyDataTypeMatcher ) { + $this->propertyDataTypeMatcher = $propertyDataTypeMatcher; + } + + /** + * Add DataValue to list of used images if Snak property data type is commonsMedia. + * + * @param Statement $statement + */ + public function processStatement( Statement $statement ) { + $snaks = $statement->getAllSnaks( $statement ); + $this->extractUsedImagesFromSnaks( $snaks ); + } + + /** + * @param Snak[] $snaks + */ + private function extractUsedImagesFromSnaks( array $snaks ) { + foreach ( $snaks as $snak ) { + if ( $snak instanceof PropertyValueSnak && + $this->propertyDataTypeMatcher->isMatchingDataType( + $snak->getPropertyId(), + 'commonsMedia' + ) + ) { + $dataValue = $snak->getDataValue(); + $this->usedImages[$dataValue->getHash()] = $dataValue; + } + } + } + + /** + * Treat CommonsMedia values as file transclusions + * + * @param ParserOutput $parserOutput + */ + public function updateParserOutput( ParserOutput $parserOutput ) { + foreach ( $this->usedImages as $image ) { + $value = $image->getValue(); + if ( is_string( $value ) ) { + $parserOutput->addImage( str_replace( ' ', '_', $value ) ); + } + } + } + +} diff --git a/repo/includes/DataUpdates/ParserOutputDataUpdate.php b/repo/includes/DataUpdates/ParserOutputDataUpdate.php new file mode 100644 index 0000000..c2b835a --- /dev/null +++ b/repo/includes/DataUpdates/ParserOutputDataUpdate.php @@ -0,0 +1,23 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use ParserOutput; + +/** + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Katie Filbert < [email protected] > + */ +interface ParserOutputDataUpdate { + + /** + * Update extension data, properties or other data in ParserOutput. + * These updates are invoked when EntityContent::getParserOutput is called. + * + * @param ParserOutput + */ + public function updateParserOutput( ParserOutput $parserOutput ); + +} diff --git a/repo/includes/DataUpdates/ReferencedEntitiesDataUpdate.php b/repo/includes/DataUpdates/ReferencedEntitiesDataUpdate.php new file mode 100644 index 0000000..abe2966 --- /dev/null +++ b/repo/includes/DataUpdates/ReferencedEntitiesDataUpdate.php @@ -0,0 +1,154 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use DataValues\DataValue; +use DataValues\QuantityValue; +use LinkBatch; +use ParserOutput; +use Title; +use Wikibase\DataModel\Entity\EntityDocument; +use Wikibase\DataModel\Entity\EntityId; +use Wikibase\DataModel\Entity\EntityIdValue; +use Wikibase\DataModel\Entity\EntityIdParser; +use Wikibase\DataModel\Entity\EntityIdParsingException; +use Wikibase\DataModel\Entity\Item; +use Wikibase\DataModel\Snak\PropertyValueSnak; +use Wikibase\DataModel\SiteLink; +use Wikibase\DataModel\Snak\Snak; +use Wikibase\DataModel\Statement\Statement; +use Wikibase\Lib\Store\EntityTitleLookup; + +/** + * Finds linked entities on an Entity and add the links to ParserOutput. + * + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Katie Filbert < [email protected] > + * @author Bene* < [email protected] > + */ +class ReferencedEntitiesDataUpdate implements SiteLinkDataUpdate, StatementDataUpdate { + + /** + * @var EntityTitleLookup + */ + private $entityTitleLookup; + + /** + * @var EntityIdParser + */ + private $externalEntityIdParser; + + /** + * @var array + */ + private $entityIds = array(); + + /** + * @param EntityTitleLookup $entityTitleLookup + * @param EntityIdParser $externalEntityIdParser Parser for external entity IDs (usually URIs) + * into EntityIds. Such external entity IDs may be used for units in QuantityValues, for + * calendar models in TimeValue, and for the reference globe in GlobeCoordinateValues. + */ + public function __construct( + EntityTitleLookup $entityTitleLookup, + EntityIdParser $externalEntityIdParser + ) { + $this->entityTitleLookup = $entityTitleLookup; + $this->externalEntityIdParser = $externalEntityIdParser; + } + + /** + * Finds linked entities in a Statement. + * + * @param Statement $statement + */ + public function processStatement( Statement $statement ) { + $snaks = $statement->getAllSnaks(); + $this->extractEntityIdsFromSnaks( $snaks ); + } + + /** + * @param Snak[] $snaks + */ + private function extractEntityIdsFromSnaks( array $snaks ) { + foreach ( $snaks as $snak ) { + $propertyId = $snak->getPropertyId(); + $this->entityIds[$propertyId->getSerialization()] = $propertyId; + + if ( $snak instanceof PropertyValueSnak ) { + $dataValue = $snak->getDataValue(); + $this->addEntityIdsFromValue( $dataValue ); + } + } + } + + /** + * @param SiteLink $siteLink + */ + public function processSiteLink( SiteLink $siteLink ) { + $this->entityIds = array_merge( $this->entityIds, $siteLink->getBadges() ); + } + + /** + * @param ParserOutput $parserOutput + */ + public function updateParserOutput( ParserOutput $parserOutput ) { + // needed and used in EntityParserOutputGenerator, for getEntityInfo, + // to allow this data to be accessed later in processing. + $parserOutput->setExtensionData( 'referenced-entities', $this->entityIds ); + $this->addEntityLinksToParserOutput( $parserOutput ); + } + + /** + * @param ParserOutput $parserOutput + */ + private function addEntityLinksToParserOutput( ParserOutput $parserOutput ) { + $linkBatch = new LinkBatch(); + + foreach ( $this->entityIds as $entityId ) { + $linkBatch->addObj( $this->entityTitleLookup->getTitleForId( $entityId ) ); + } + + $pages = $linkBatch->doQuery(); + + if ( $pages === false ) { + return; + } + + foreach ( $pages as $page ) { + $title = Title::makeTitle( $page->page_namespace, $page->page_title ); + $parserOutput->addLink( $title, $page->page_id ); + } + } + + /** + * @param DataValue $dataValue + */ + private function addEntityIdsFromValue( DataValue $dataValue ) { + if ( $dataValue instanceof EntityIdValue ) { + $entityId = $dataValue->getEntityId(); + $this->entityIds[$entityId->getSerialization()] = $entityId; + } elseif ( $dataValue instanceof QuantityValue ) { + $unitUri = $dataValue->getUnit(); + $this->addEntityIdsFromURI( $unitUri ); + } + + // TODO: EntityIds from GlobeCoordinateValue's globe URI (Wikidata, not local item URI!) + // TODO: EntityIds from TimeValue's calendar URI (Wikidata, not local item URI!) + } + + /** + * @param string $uri + */ + private function addEntityIdsFromURI( $uri ) { + try { + $entityId = $this->externalEntityIdParser->parse( $uri ); + $this->entityIds[$entityId->getSerialization()] = $entityId; + } catch ( EntityIdParsingException $ex ) { + // noop + } + } + +} diff --git a/repo/includes/DataUpdates/SiteLinkDataUpdate.php b/repo/includes/DataUpdates/SiteLinkDataUpdate.php new file mode 100644 index 0000000..116b3ae --- /dev/null +++ b/repo/includes/DataUpdates/SiteLinkDataUpdate.php @@ -0,0 +1,25 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use Wikibase\DataModel\SiteLink; + +/** + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Katie Filbert < [email protected] > + */ +interface SiteLinkDataUpdate extends ParserOutputDataUpdate { + + /** + * Extract some data or do processing on a SiteLink during parsing. + * + * This method is invoked when processing a SiteLinkList on an Item, + * or other entity type that has SiteLinks. + * + * @param SiteLink $siteLink + */ + public function processSiteLink( SiteLink $siteLink ); + +} diff --git a/repo/includes/DataUpdates/StatementDataUpdate.php b/repo/includes/DataUpdates/StatementDataUpdate.php new file mode 100644 index 0000000..424711a --- /dev/null +++ b/repo/includes/DataUpdates/StatementDataUpdate.php @@ -0,0 +1,25 @@ +<?php + +namespace Wikibase\Repo\DataUpdates; + +use Wikibase\DataModel\Statement\Statement; + +/** + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Katie Filbert < [email protected] > + */ +interface StatementDataUpdate extends ParserOutputDataUpdate { + + /** + * Extract some data or do processing on a Statement, during parsing. + * + * This method is normally invoked when processing a StatementList + * for all Statements on a StatementListProvider. (e.g. an Item) + * + * @param Statement $statement + */ + public function processStatement( Statement $statement ); + +} -- To view, visit https://gerrit.wikimedia.org/r/244643 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1618217b0926b0dd213f9a0d4b315514b66f2b54 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Thiemo Mättig (WMDE) <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
