jenkins-bot has submitted this change and it was merged.
Change subject: Add geodata to parser output
......................................................................
Add geodata to parser output
Bug: T75482
Change-Id: If4ec903b530568da4d644892678fdf87c0e92078
---
M docs/options.wiki
M repo/config/Wikibase.default.php
A repo/includes/DataUpdates/GeoDataDataUpdate.php
M repo/includes/EntityParserOutputGeneratorFactory.php
M repo/includes/WikibaseRepo.php
A repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
6 files changed, 505 insertions(+), 3 deletions(-)
Approvals:
Daniel Kinzler: Looks good to me, approved
JanZerebecki: Looks good to me, approved
jenkins-bot: Verified
diff --git a/docs/options.wiki b/docs/options.wiki
index 83e81da..77760d4 100644
--- a/docs/options.wiki
+++ b/docs/options.wiki
@@ -48,6 +48,7 @@
;badgeItems: Items allowed to be used as badges. This setting expects an array
of serialized item ids pointing to their CSS class names, like <code>array(
'Q101' => 'wb-badge-goodarticle' )</code>. With this class name it is possible
to change the icon of a specific badge.
;preferredPageImagesProperties: List of image property id strings, in order of
preference, that should be considered for the <code>page_image</code> page
property. Defaults to an empty array.
;conceptBaseUri: Base URI for building concept URIs (for example used in Rdf
output). This has to include the protocol and domain, only an entity identifier
will be appended.
+;preferredGeoDataProperties: List of properties (by id string), in order of
preference, that are considered when finding primary coordinates for the
GeoData extension on an entity. Defaults to an empty array.
=== Expert Settings ===
;dispatchBatchChunkFactor: Chunk factor used internally by the
<code>dispatchChanges.php</code> script. The default is 3. If most clients are
not interested in most changes, this factor can be raised to lower the number
of database queries needed to fetch a batch of changes.
diff --git a/repo/config/Wikibase.default.php b/repo/config/Wikibase.default.php
index 863bb9c..a1c361d 100644
--- a/repo/config/Wikibase.default.php
+++ b/repo/config/Wikibase.default.php
@@ -143,6 +143,12 @@
'de-formal' => 'de-x-formal',
'nl-informal' => 'nl-x-informal',
),
+
+ // List of globe-coordinate properties (listed by id string),
in order of preference,
+ // to consider for primary coordinates when extracting
coordinates from an Entity
+ // for the GeoData extension.
+ // e.g. array( 'P625', 'P1259' )
+ 'preferredGeoDataProperties' => array(),
);
return $defaults;
diff --git a/repo/includes/DataUpdates/GeoDataDataUpdate.php
b/repo/includes/DataUpdates/GeoDataDataUpdate.php
new file mode 100644
index 0000000..c224129
--- /dev/null
+++ b/repo/includes/DataUpdates/GeoDataDataUpdate.php
@@ -0,0 +1,223 @@
+<?php
+
+namespace Wikibase\Repo\DataUpdates;
+
+use Coord;
+use CoordinatesOutput;
+use DataValues\Geo\Values\GlobeCoordinateValue;
+use ParserOutput;
+use RuntimeException;
+use UnexpectedValueException;
+use Wikibase\DataModel\Snak\PropertyValueSnak;
+use Wikibase\DataModel\Snak\Snak;
+use Wikibase\DataModel\Statement\Statement;
+use Wikibase\DataModel\Statement\StatementList;
+use Wikibase\Lib\Store\PropertyDataTypeMatcher;
+
+/**
+ * Extracts and stashes coordinates from Statement main snaks and
+ * adds to ParserOutput for use by the GeoData extension.
+ *
+ * GeoData populates the geo_tags table, and if using
+ * the 'elastic' backend, also adds coordinates to CirrusSearch.
+ * GeoData then provides API modules to get coordinates for pages,
+ * and to find nearby pages to a requested location.
+ *
+ * This class uses the Coord and CoordinatesOutput classes from the
+ * GeoData extension.
+ *
+ * @license GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class GeoDataDataUpdate implements StatementDataUpdate {
+
+ /**
+ * @var PropertyDataTypeMatcher
+ */
+ private $propertyDataTypeMatcher;
+
+ /**
+ * @var string[]
+ */
+ private $preferredProperties;
+
+ /**
+ * @var StatementList[]
+ */
+ private $statementsByGeoProperty;
+
+ /**
+ * @param PropertyDataTypeMatcher $propertyDataTypeMatcher
+ * @param string[] $preferredProperties
+ * @param StatementList[] $statementsByGeoProperty Statements by
globe-coordinate property
+ * @throws RuntimeException
+ */
+ public function __construct(
+ PropertyDataTypeMatcher $propertyDataTypeMatcher,
+ array $preferredProperties,
+ array $statementsByGeoProperty = array()
+ ) {
+ if ( !class_exists( 'GeoData' ) ) {
+ throw new RuntimeException( 'GeoDataDataUpdate requires
the GeoData extension '
+ . 'to be enabled' );
+ }
+
+ $this->propertyDataTypeMatcher = $propertyDataTypeMatcher;
+ $this->preferredProperties = $preferredProperties;
+ $this->statementsByGeoProperty = $statementsByGeoProperty;
+ }
+
+ /**
+ * Extract globe-coordinate DataValues for storing in ParserOutput for
GeoData.
+ *
+ * @param Statement $statement
+ */
+ public function processStatement( Statement $statement ) {
+ $propertyId = $statement->getMainSnak()->getPropertyId();
+
+ if ( $this->propertyDataTypeMatcher->isMatchingDataType(
+ $propertyId,
+ 'globe-coordinate'
+ ) ) {
+ $serializedId = $propertyId->getSerialization();
+
+ if ( !array_key_exists( $serializedId,
$this->statementsByGeoProperty ) ) {
+ $this->statementsByGeoProperty[$serializedId] =
new StatementList();
+ }
+
+
$this->statementsByGeoProperty[$serializedId]->addStatement( $statement );
+ }
+ }
+
+ /**
+ * @param ParserOutput $parserOutput
+ */
+ public function updateParserOutput( ParserOutput $parserOutput ) {
+ if ( $this->statementsByGeoProperty === array() ) {
+ return;
+ }
+
+ $coordinatesOutput = new CoordinatesOutput();
+
+ $secondaryCoordinates = $this->extractMainSnakCoords();
+ $primaryCoordinate = $this->findPrimaryCoordinate(
$secondaryCoordinates );
+
+ if ( $primaryCoordinate !== null ) {
+ $primaryCoordinate->primary = true;
+ $coordinatesOutput->addPrimary( $primaryCoordinate );
+ }
+
+ foreach ( $secondaryCoordinates as $coordinate ) {
+ $coordinatesOutput->addSecondary( $coordinate );
+ }
+
+ $parserOutput->geoData = $coordinatesOutput;
+ }
+
+ /**
+ * @param Coord[] &$secondaryCoordinates Primary coordinate gets
removed.
+ *
+ * @return Coord|null
+ */
+ private function findPrimaryCoordinate( array &$secondaryCoordinates ) {
+
+ foreach ( $this->preferredProperties as $propertyId ) {
+ $primaryCoordinate = null;
+
+ if ( array_key_exists( $propertyId,
$this->statementsByGeoProperty ) ) {
+ $bestStatements =
$this->statementsByGeoProperty[$propertyId]->getBestStatements();
+
+ // maybe the only statements have deprecated
rank
+ if ( $bestStatements->isEmpty() ) {
+ continue;
+ }
+
+ foreach ( $bestStatements as $bestStatement ) {
+ if ( $primaryCoordinate instanceof
Coord ) {
+ // already set and there are
multiple best statements, so
+ // can't just (somewhat)
arbitrarily pick one. Instead, don't
+ // mark any as primary and
consider them all as secondary.
+ $primaryCoordinate = null;
+ break;
+ }
+
+ try {
+ $primaryCoordinate =
$this->extractMainSnakCoord( $bestStatement );
+ $guid =
$bestStatement->getGuid();
+ } catch ( UnexpectedValueException $ex
) {
+ // could be a mismatching snak
value, and then should just skip it.
+ continue;
+ }
+ }
+ }
+
+ if ( $primaryCoordinate !== null ) {
+ // primary coordinate is only primary and not
secondary
+ unset( $secondaryCoordinates[$guid] );
+
+ return $primaryCoordinate;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * @return Coord[]
+ */
+ private function extractMainSnakCoords() {
+ $coordinates = array();
+
+ foreach ( $this->statementsByGeoProperty as $propertyId =>
$statements ) {
+ foreach ( $statements as $statement ) {
+ try {
+ $coord = $this->extractMainSnakCoord(
$statement );
+
+ if ( $coord instanceof Coord ) {
+ $guid = $statement->getGuid();
+ $coordinates[$guid] = $coord;
+ }
+ } catch ( UnexpectedValueException $ex ) {
+ // can happen if there is a mismatch
between property and value type.
+ continue;
+ }
+ }
+ }
+
+ return $coordinates;
+ }
+
+ /**
+ * @param Statement $statement
+ *
+ * @return Coord|null
+ */
+ private function extractMainSnakCoord( Statement $statement ) {
+ $snak = $statement->getMainSnak();
+
+ if ( !$snak instanceof PropertyValueSnak ) {
+ return null;
+ }
+
+ return $this->extractCoordFromSnak( $snak );
+ }
+
+ /**
+ * @param Snak $snak
+ *
+ * @return Coord
+ * @throws UnexpectedValueException
+ */
+ private function extractCoordFromSnak( Snak $snak ) {
+ $dataValue = $snak->getDataValue();
+
+ if ( !$dataValue instanceof GlobeCoordinateValue ) {
+ throw new UnexpectedValueException(
+ '$dataValue expected to be a
GlobeCoordinateValue'
+ );
+ }
+
+ return new Coord( $dataValue->getLatitude(),
$dataValue->getLongitude() );
+ }
+
+}
diff --git a/repo/includes/EntityParserOutputGeneratorFactory.php
b/repo/includes/EntityParserOutputGeneratorFactory.php
index e14f880..0ddebb7 100644
--- a/repo/includes/EntityParserOutputGeneratorFactory.php
+++ b/repo/includes/EntityParserOutputGeneratorFactory.php
@@ -10,6 +10,7 @@
use Wikibase\Lib\Store\PropertyDataTypeMatcher;
use Wikibase\Repo\DataUpdates\EntityParserOutputDataUpdater;
use Wikibase\Repo\DataUpdates\ExternalLinksDataUpdate;
+use Wikibase\Repo\DataUpdates\GeoDataDataUpdate;
use Wikibase\Repo\DataUpdates\ImageLinksDataUpdate;
use Wikibase\Repo\DataUpdates\ReferencedEntitiesDataUpdate;
use Wikibase\Repo\LinkedData\EntityDataFormatProvider;
@@ -64,6 +65,11 @@
*/
private $externalEntityIdParser;
+ /**
+ * @var string[]
+ */
+ private $preferredGeoDataProperties;
+
public function __construct(
EntityViewFactory $entityViewFactory,
EntityInfoBuilderFactory $entityInfoBuilderFactory,
@@ -72,7 +78,8 @@
TemplateFactory $templateFactory,
EntityDataFormatProvider $entityDataFormatProvider,
PropertyDataTypeLookup $propertyDataTypeLookup,
- EntityIdParser $externalEntityIdParser
+ EntityIdParser $externalEntityIdParser,
+ array $preferredGeoDataProperties
) {
$this->entityViewFactory = $entityViewFactory;
$this->entityInfoBuilderFactory = $entityInfoBuilderFactory;
@@ -82,6 +89,7 @@
$this->entityDataFormatProvider = $entityDataFormatProvider;
$this->propertyDataTypeLookup = $propertyDataTypeLookup;
$this->externalEntityIdParser = $externalEntityIdParser;
+ $this->preferredGeoDataProperties = $preferredGeoDataProperties;
}
/**
@@ -133,7 +141,7 @@
private function getDataUpdates() {
$propertyDataTypeMatcher = new PropertyDataTypeMatcher(
$this->propertyDataTypeLookup );
- return array(
+ $dataUpdates = array(
new ReferencedEntitiesDataUpdate(
$this->entityTitleLookup,
$this->externalEntityIdParser
@@ -141,6 +149,15 @@
new ExternalLinksDataUpdate( $propertyDataTypeMatcher ),
new ImageLinksDataUpdate( $propertyDataTypeMatcher )
);
+
+ if ( class_exists( 'GeoData' ) ) {
+ $dataUpdates[] = new GeoDataDataUpdate(
+ $propertyDataTypeMatcher,
+ $this->preferredGeoDataProperties
+ );
+ }
+
+ return $dataUpdates;
}
}
diff --git a/repo/includes/WikibaseRepo.php b/repo/includes/WikibaseRepo.php
index f84e15b..6ddb0cc 100644
--- a/repo/includes/WikibaseRepo.php
+++ b/repo/includes/WikibaseRepo.php
@@ -1343,7 +1343,8 @@
$templateFactory,
$entityDataFormatProvider,
$this->getPropertyDataTypeLookup(),
- $this->getLocalEntityUriParser()
+ $this->getLocalEntityUriParser(),
+ $this->settings->getSetting(
'preferredGeoDataProperties' )
);
}
diff --git a/repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
b/repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
new file mode 100644
index 0000000..89c1433
--- /dev/null
+++ b/repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
@@ -0,0 +1,254 @@
+<?php
+
+namespace Wikibase\Test;
+
+use Coord;
+use CoordinatesOutput;
+use DataValues\DataValue;
+use DataValues\Geo\Values\LatLongValue;
+use DataValues\Geo\Values\GlobeCoordinateValue;
+use DataValues\StringValue;
+use ParserOutput;
+use Wikibase\DataModel\Entity\ItemId;
+use Wikibase\DataModel\Entity\PropertyId;
+use Wikibase\DataModel\Services\Lookup\InMemoryDataTypeLookup;
+use Wikibase\DataModel\Services\Statement\GuidGenerator;
+use Wikibase\DataModel\Snak\PropertySomeValueSnak;
+use Wikibase\DataModel\Snak\PropertyValueSnak;
+use Wikibase\DataModel\Snak\SnakList;
+use Wikibase\DataModel\Statement\Statement;
+use Wikibase\DataModel\Statement\StatementList;
+use Wikibase\Lib\Store\PropertyDataTypeMatcher;
+use Wikibase\Repo\DataUpdates\GeoDataDataUpdate;
+
+/**
+ * @covers Wikibase\Repo\DataUpdates\GeoDataDataUpdate;
+ *
+ * @group Wikibase
+ * @group WikibaseRepo
+ * @group Database
+ *
+ * @license GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class GeoDataDataUpdateTest extends \MediaWikiTestCase {
+
+ protected function setUp() {
+ if ( !class_exists( 'GeoData' ) ) {
+ $this->markTestSkipped( 'GeoData extension is
required.' );
+ }
+
+ parent::setUp();
+ }
+
+ /**
+ * @dataProvider processStatementProvider
+ */
+ public function testProcessStatement( array $expected, array
$statements, $message ) {
+ $dataUpdate = new GeoDataDataUpdate(
+ new PropertyDataTypeMatcher(
$this->getPropertyDataTypeLookup() ),
+ array( 'P625', 'P9000' )
+ );
+
+ foreach ( $statements as $statement ) {
+ $dataUpdate->processStatement( $statement );
+ }
+
+ $this->assertAttributeEquals(
+ $expected,
+ 'statementsByGeoProperty',
+ $dataUpdate,
+ $message
+ );
+ }
+
+ public function testUpdateParserOutput() {
+ $statements = $this->getStatements();
+
+ $statementsByGeoProperty = array(
+ 'P625' => new StatementList( array(
+ $statements['geo-property-P625']
+ ) ),
+ 'P10' => new StatementList( array(
+ $statements['geo-property-P10-A'],
+ $statements['geo-property-P10-B'],
+ $statements['mismatch-P10']
+ ) ),
+ 'P9000' => new StatementList( array(
+ $statements['geo-property-P9000']
+ ) ),
+ 'P20' => new StatementList( array(
+ $statements['some-value-P20']
+ ) ),
+ 'P17' => new StatementList( array(
+ $statements['deprecated-geo-P17']
+ ) )
+ );
+
+ $dataUpdate = new GeoDataDataUpdate(
+ new PropertyDataTypeMatcher(
$this->getPropertyDataTypeLookup() ),
+ array( 'P17', 'P404', 'P10', 'P20', 'P9000', 'P625' ),
+ $statementsByGeoProperty
+ );
+
+ $parserOutput = new ParserOutput();
+
+ $dataUpdate->updateParserOutput( $parserOutput );
+
+ $expected = new CoordinatesOutput();
+
+ // P9000 statement
+ $coord = new Coord( 33.643664, 20.464222 );
+ $coord->primary = true;
+
+ $expected->addPrimary( $coord );
+ $expected->addSecondary( new Coord( 35.690278, 139.700556 ) );
+ $expected->addSecondary( new Coord( 40.748433, -73.985655 ) );
+ $expected->addSecondary( new Coord( 44.264464, 52.643666 ) );
+ $expected->addSecondary( new Coord( 10.0234, 11.52352 ) );
+
+ $this->assertEquals( $expected, $parserOutput->geoData );
+ }
+
+ public function processStatementProvider() {
+ $statements = $this->getStatements();
+
+ return array(
+ array(
+ array(),
+ array( $statements['string-property'] ),
+ 'non-geo property'
+ ),
+ array(
+ array(
+ 'P625' => new StatementList(
+ array(
$statements['geo-property-P625'] )
+ )
+ ),
+ array( $statements['geo-property-P625'] ),
+ 'geo property'
+ ),
+ array(
+ array(
+ 'P17' => new StatementList(
+ array(
$statements['deprecated-geo-P17'] )
+ )
+ ),
+ array( $statements['deprecated-geo-P17'] ),
+ 'deprecated geo statement'
+ ),
+ array(
+ array(
+ 'P10' => new StatementList(
+ array(
+
$statements['geo-property-P10-A'],
+
$statements['geo-property-P10-B']
+ )
+ )
+ ),
+ array( $statements['geo-property-P10-A'],
$statements['geo-property-P10-B'] ),
+ 'multiple geo statements'
+ ),
+ array(
+ array(
+ 'P20' => new StatementList(
+ array(
$statements['some-value-P20'] )
+ )
+ ),
+ array( $statements['some-value-P20'] ),
+ 'some value snak, still added during initial
processing'
+ ),
+ array(
+ array(),
+ array( $statements['unknown-property'] ),
+ 'statement with unknown property, not in
PropertyDataTypeLookup'
+ )
+ );
+ }
+
+ private function getStatements() {
+ $statements = array();
+
+ $statements['string-property'] = $this->newStatement(
+ new PropertyId( 'P42' ),
+ new StringValue( 'kittens!' )
+ );
+
+ $statements['geo-property-P625'] = $this->newStatement(
+ new PropertyId( 'P625' ),
+ $this->newGlobeCoordinateValue( 35.690278, 139.700556 )
+ );
+
+ $statements['geo-property-P10-A'] = $this->newStatement(
+ new PropertyId( 'P10' ),
+ $this->newGlobeCoordinateValue( 40.748433, -73.985655 )
+ );
+
+ $statements['geo-property-P10-B'] = $this->newStatement(
+ new PropertyId( 'P10' ),
+ $this->newGlobeCoordinateValue( 44.264464, 52.643666 )
+ );
+
+ $statements['geo-property-P9000'] = $this->newStatement(
+ new PropertyId( 'P9000' ),
+ $this->newGlobeCoordinateValue( 33.643664, 20.464222 )
+ );
+
+ $deprecatedGeoValueStatement = $this->newStatement(
+ new PropertyId( 'P17' ),
+ $this->newGlobeCoordinateValue( 10.0234, 11.52352 )
+ );
+
+ $deprecatedGeoValueStatement->setRank(
Statement::RANK_DEPRECATED );
+
+ $statements['deprecated-geo-P17'] =
$deprecatedGeoValueStatement;
+
+ $statements['some-value-P20'] = $this->newStatement( new
PropertyId( 'P20' ) );
+
+ $statements['mismatch-P10'] = $this->newStatement(
+ new PropertyId( 'P10' ),
+ new StringValue( 'omg! wrong value type' )
+ );
+
+ $statements['unknown-property'] = $this->newStatement(
+ new PropertyId( 'P404' ),
+ $this->newGlobeCoordinateValue( 40.733643, -72.352153 )
+ );
+
+ return $statements;
+ }
+
+ private function newStatement( PropertyId $propertyId, DataValue
$dataValue = null ) {
+ $guidGenerator = new GuidGenerator();
+
+ if ( $dataValue === null ) {
+ $snak = new PropertySomeValueSnak( $propertyId );
+ } else {
+ $snak = new PropertyValueSnak( $propertyId, $dataValue
);
+ }
+
+ $guid = $guidGenerator->newGuid( new ItemId( 'Q64' ) );
+
+ return new Statement( $snak, null, null, $guid );
+ }
+
+ private function newGlobeCoordinateValue( $lat, $lon ) {
+ $latLongValue = new LatLongValue( $lat, $lon );
+
+ return new GlobeCoordinateValue( $latLongValue, 0.001 );
+ }
+
+ private function getPropertyDataTypeLookup() {
+ $dataTypeLookup = new InMemoryDataTypeLookup();
+
+ $dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P42'
), 'string' );
+ $dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P10'
), 'globe-coordinate' );
+ $dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P17'
), 'globe-coordinate' );
+ $dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P20'
), 'globe-coordinate' );
+ $dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P625'
), 'globe-coordinate' );
+ $dataTypeLookup->setDataTypeForProperty( new PropertyId(
'P9000' ), 'globe-coordinate' );
+
+ return $dataTypeLookup;
+ }
+
+}
--
To view, visit https://gerrit.wikimedia.org/r/243625
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: If4ec903b530568da4d644892678fdf87c0e92078
Gerrit-PatchSet: 17
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Aude <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: JanZerebecki <[email protected]>
Gerrit-Reviewer: Jonas Kress (WMDE) <[email protected]>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits