jenkins-bot has submitted this change and it was merged.

Change subject: Add geodata to parser output
......................................................................


Add geodata to parser output

Bug: T75482
Change-Id: If4ec903b530568da4d644892678fdf87c0e92078
---
M docs/options.wiki
M repo/config/Wikibase.default.php
A repo/includes/DataUpdates/GeoDataDataUpdate.php
M repo/includes/EntityParserOutputGeneratorFactory.php
M repo/includes/WikibaseRepo.php
A repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
6 files changed, 505 insertions(+), 3 deletions(-)

Approvals:
  Daniel Kinzler: Looks good to me, approved
  JanZerebecki: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/docs/options.wiki b/docs/options.wiki
index 83e81da..77760d4 100644
--- a/docs/options.wiki
+++ b/docs/options.wiki
@@ -48,6 +48,7 @@
 ;badgeItems: Items allowed to be used as badges. This setting expects an array 
of serialized item ids pointing to their CSS class names, like <code>array( 
'Q101' => 'wb-badge-goodarticle' )</code>. With this class name it is possible 
to change the icon of a specific badge.
 ;preferredPageImagesProperties: List of image property id strings, in order of 
preference, that should be considered for the <code>page_image</code> page 
property. Defaults to an empty array.
 ;conceptBaseUri: Base URI for building concept URIs (for example used in Rdf 
output). This has to include the protocol and domain, only an entity identifier 
will be appended.
+;preferredGeoDataProperties: List of properties (by id string), in order of 
preference, that are considered when finding primary coordinates for the 
GeoData extension on an entity. Defaults to an empty array.
 
 === Expert Settings ===
 ;dispatchBatchChunkFactor: Chunk factor used internally by the 
<code>dispatchChanges.php</code> script. The default is 3. If most clients are 
not interested in most changes, this factor can be raised to lower the number 
of database queries needed to fetch a batch of changes.
diff --git a/repo/config/Wikibase.default.php b/repo/config/Wikibase.default.php
index 863bb9c..a1c361d 100644
--- a/repo/config/Wikibase.default.php
+++ b/repo/config/Wikibase.default.php
@@ -143,6 +143,12 @@
                                'de-formal'   => 'de-x-formal',
                                'nl-informal' => 'nl-x-informal',
                ),
+
+               // List of globe-coordinate properties (listed by id string), 
in order of preference,
+               // to consider for primary coordinates when extracting 
coordinates from an Entity
+               // for the GeoData extension.
+               // e.g. array( 'P625', 'P1259' )
+               'preferredGeoDataProperties' => array(),
        );
 
        return $defaults;
diff --git a/repo/includes/DataUpdates/GeoDataDataUpdate.php 
b/repo/includes/DataUpdates/GeoDataDataUpdate.php
new file mode 100644
index 0000000..c224129
--- /dev/null
+++ b/repo/includes/DataUpdates/GeoDataDataUpdate.php
@@ -0,0 +1,223 @@
+<?php
+
+namespace Wikibase\Repo\DataUpdates;
+
+use Coord;
+use CoordinatesOutput;
+use DataValues\Geo\Values\GlobeCoordinateValue;
+use ParserOutput;
+use RuntimeException;
+use UnexpectedValueException;
+use Wikibase\DataModel\Snak\PropertyValueSnak;
+use Wikibase\DataModel\Snak\Snak;
+use Wikibase\DataModel\Statement\Statement;
+use Wikibase\DataModel\Statement\StatementList;
+use Wikibase\Lib\Store\PropertyDataTypeMatcher;
+
+/**
+ * Extracts and stashes coordinates from Statement main snaks and
+ * adds to ParserOutput for use by the GeoData extension.
+ *
+ * GeoData populates the geo_tags table, and if using
+ * the 'elastic' backend, also adds coordinates to CirrusSearch.
+ * GeoData then provides API modules to get coordinates for pages,
+ * and to find nearby pages to a requested location.
+ *
+ * This class uses the Coord and CoordinatesOutput classes from the
+ * GeoData extension.
+ *
+ * @license GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class GeoDataDataUpdate implements StatementDataUpdate {
+
+       /**
+        * @var PropertyDataTypeMatcher
+        */
+       private $propertyDataTypeMatcher;
+
+       /**
+        * @var string[]
+        */
+       private $preferredProperties;
+
+       /**
+        * @var StatementList[]
+        */
+       private $statementsByGeoProperty;
+
+       /**
+        * @param PropertyDataTypeMatcher $propertyDataTypeMatcher
+        * @param string[] $preferredProperties
+        * @param StatementList[] $statementsByGeoProperty Statements by 
globe-coordinate property
+        * @throws RuntimeException
+        */
+       public function __construct(
+               PropertyDataTypeMatcher $propertyDataTypeMatcher,
+               array $preferredProperties,
+               array $statementsByGeoProperty = array()
+       ) {
+               if ( !class_exists( 'GeoData' ) ) {
+                       throw new RuntimeException( 'GeoDataDataUpdate requires 
the GeoData extension '
+                               . 'to be enabled' );
+               }
+
+               $this->propertyDataTypeMatcher = $propertyDataTypeMatcher;
+               $this->preferredProperties = $preferredProperties;
+               $this->statementsByGeoProperty = $statementsByGeoProperty;
+       }
+
+       /**
+        * Extract globe-coordinate DataValues for storing in ParserOutput for 
GeoData.
+        *
+        * @param Statement $statement
+        */
+       public function processStatement( Statement $statement ) {
+               $propertyId = $statement->getMainSnak()->getPropertyId();
+
+               if ( $this->propertyDataTypeMatcher->isMatchingDataType(
+                       $propertyId,
+                       'globe-coordinate'
+               ) ) {
+                       $serializedId = $propertyId->getSerialization();
+
+                       if ( !array_key_exists( $serializedId, 
$this->statementsByGeoProperty ) ) {
+                               $this->statementsByGeoProperty[$serializedId] = 
new StatementList();
+                       }
+
+                       
$this->statementsByGeoProperty[$serializedId]->addStatement( $statement );
+               }
+       }
+
+       /**
+        * @param ParserOutput $parserOutput
+        */
+       public function updateParserOutput( ParserOutput $parserOutput ) {
+               if ( $this->statementsByGeoProperty === array() ) {
+                       return;
+               }
+
+               $coordinatesOutput = new CoordinatesOutput();
+
+               $secondaryCoordinates = $this->extractMainSnakCoords();
+               $primaryCoordinate = $this->findPrimaryCoordinate( 
$secondaryCoordinates );
+
+               if ( $primaryCoordinate !== null ) {
+                       $primaryCoordinate->primary = true;
+                       $coordinatesOutput->addPrimary( $primaryCoordinate );
+               }
+
+               foreach ( $secondaryCoordinates as $coordinate ) {
+                       $coordinatesOutput->addSecondary( $coordinate );
+               }
+
+               $parserOutput->geoData = $coordinatesOutput;
+       }
+
+       /**
+        * @param Coord[] &$secondaryCoordinates Primary coordinate gets 
removed.
+        *
+        * @return Coord|null
+        */
+       private function findPrimaryCoordinate( array &$secondaryCoordinates ) {
+
+               foreach ( $this->preferredProperties as $propertyId ) {
+                       $primaryCoordinate = null;
+
+                       if ( array_key_exists( $propertyId, 
$this->statementsByGeoProperty ) ) {
+                               $bestStatements = 
$this->statementsByGeoProperty[$propertyId]->getBestStatements();
+
+                               // maybe the only statements have deprecated 
rank
+                               if ( $bestStatements->isEmpty() ) {
+                                       continue;
+                               }
+
+                               foreach ( $bestStatements as $bestStatement ) {
+                                       if ( $primaryCoordinate instanceof 
Coord ) {
+                                               // already set and there are 
multiple best statements, so
+                                               // can't just (somewhat) 
arbitrarily pick one. Instead, don't
+                                               // mark any as primary and 
consider them all as secondary.
+                                               $primaryCoordinate = null;
+                                               break;
+                                       }
+
+                                       try {
+                                               $primaryCoordinate = 
$this->extractMainSnakCoord( $bestStatement );
+                                               $guid = 
$bestStatement->getGuid();
+                                       } catch ( UnexpectedValueException $ex 
) {
+                                               // could be a mismatching snak 
value, and then should just skip it.
+                                               continue;
+                                       }
+                               }
+                       }
+
+                       if ( $primaryCoordinate !== null ) {
+                               // primary coordinate is only primary and not 
secondary
+                               unset( $secondaryCoordinates[$guid] );
+
+                               return $primaryCoordinate;
+                       }
+               }
+
+               return null;
+       }
+
+       /**
+        * @return Coord[]
+        */
+       private function extractMainSnakCoords() {
+               $coordinates = array();
+
+               foreach ( $this->statementsByGeoProperty as $propertyId => 
$statements ) {
+                       foreach ( $statements as $statement ) {
+                               try {
+                                       $coord = $this->extractMainSnakCoord( 
$statement );
+
+                                       if ( $coord instanceof Coord ) {
+                                               $guid = $statement->getGuid();
+                                               $coordinates[$guid] = $coord;
+                                       }
+                               } catch ( UnexpectedValueException $ex ) {
+                                       // can happen if there is a mismatch 
between property and value type.
+                                       continue;
+                               }
+                       }
+               }
+
+               return $coordinates;
+       }
+
+       /**
+        * @param Statement $statement
+        *
+        * @return Coord|null
+        */
+       private function extractMainSnakCoord( Statement $statement ) {
+               $snak = $statement->getMainSnak();
+
+               if ( !$snak instanceof PropertyValueSnak ) {
+                       return null;
+               }
+
+               return $this->extractCoordFromSnak( $snak );
+       }
+
+       /**
+        * @param Snak $snak
+        *
+        * @return Coord
+        * @throws UnexpectedValueException
+        */
+       private function extractCoordFromSnak( Snak $snak ) {
+               $dataValue = $snak->getDataValue();
+
+               if ( !$dataValue instanceof GlobeCoordinateValue ) {
+                       throw new UnexpectedValueException(
+                               '$dataValue expected to be a 
GlobeCoordinateValue'
+                       );
+               }
+
+               return new Coord( $dataValue->getLatitude(), 
$dataValue->getLongitude() );
+       }
+
+}
diff --git a/repo/includes/EntityParserOutputGeneratorFactory.php 
b/repo/includes/EntityParserOutputGeneratorFactory.php
index e14f880..0ddebb7 100644
--- a/repo/includes/EntityParserOutputGeneratorFactory.php
+++ b/repo/includes/EntityParserOutputGeneratorFactory.php
@@ -10,6 +10,7 @@
 use Wikibase\Lib\Store\PropertyDataTypeMatcher;
 use Wikibase\Repo\DataUpdates\EntityParserOutputDataUpdater;
 use Wikibase\Repo\DataUpdates\ExternalLinksDataUpdate;
+use Wikibase\Repo\DataUpdates\GeoDataDataUpdate;
 use Wikibase\Repo\DataUpdates\ImageLinksDataUpdate;
 use Wikibase\Repo\DataUpdates\ReferencedEntitiesDataUpdate;
 use Wikibase\Repo\LinkedData\EntityDataFormatProvider;
@@ -64,6 +65,11 @@
         */
        private $externalEntityIdParser;
 
+       /**
+        * @var string[]
+        */
+       private $preferredGeoDataProperties;
+
        public function __construct(
                EntityViewFactory $entityViewFactory,
                EntityInfoBuilderFactory $entityInfoBuilderFactory,
@@ -72,7 +78,8 @@
                TemplateFactory $templateFactory,
                EntityDataFormatProvider $entityDataFormatProvider,
                PropertyDataTypeLookup $propertyDataTypeLookup,
-               EntityIdParser $externalEntityIdParser
+               EntityIdParser $externalEntityIdParser,
+               array $preferredGeoDataProperties
        ) {
                $this->entityViewFactory = $entityViewFactory;
                $this->entityInfoBuilderFactory = $entityInfoBuilderFactory;
@@ -82,6 +89,7 @@
                $this->entityDataFormatProvider = $entityDataFormatProvider;
                $this->propertyDataTypeLookup = $propertyDataTypeLookup;
                $this->externalEntityIdParser = $externalEntityIdParser;
+               $this->preferredGeoDataProperties = $preferredGeoDataProperties;
        }
 
        /**
@@ -133,7 +141,7 @@
        private function getDataUpdates() {
                $propertyDataTypeMatcher = new PropertyDataTypeMatcher( 
$this->propertyDataTypeLookup );
 
-               return array(
+               $dataUpdates = array(
                        new ReferencedEntitiesDataUpdate(
                                $this->entityTitleLookup,
                                $this->externalEntityIdParser
@@ -141,6 +149,15 @@
                        new ExternalLinksDataUpdate( $propertyDataTypeMatcher ),
                        new ImageLinksDataUpdate( $propertyDataTypeMatcher )
                );
+
+               if ( class_exists( 'GeoData' ) ) {
+                       $dataUpdates[] = new GeoDataDataUpdate(
+                               $propertyDataTypeMatcher,
+                               $this->preferredGeoDataProperties
+                       );
+               }
+
+               return $dataUpdates;
        }
 
 }
diff --git a/repo/includes/WikibaseRepo.php b/repo/includes/WikibaseRepo.php
index f84e15b..6ddb0cc 100644
--- a/repo/includes/WikibaseRepo.php
+++ b/repo/includes/WikibaseRepo.php
@@ -1343,7 +1343,8 @@
                        $templateFactory,
                        $entityDataFormatProvider,
                        $this->getPropertyDataTypeLookup(),
-                       $this->getLocalEntityUriParser()
+                       $this->getLocalEntityUriParser(),
+                       $this->settings->getSetting( 
'preferredGeoDataProperties' )
                );
        }
 
diff --git a/repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php 
b/repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
new file mode 100644
index 0000000..89c1433
--- /dev/null
+++ b/repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
@@ -0,0 +1,254 @@
+<?php
+
+namespace Wikibase\Test;
+
+use Coord;
+use CoordinatesOutput;
+use DataValues\DataValue;
+use DataValues\Geo\Values\LatLongValue;
+use DataValues\Geo\Values\GlobeCoordinateValue;
+use DataValues\StringValue;
+use ParserOutput;
+use Wikibase\DataModel\Entity\ItemId;
+use Wikibase\DataModel\Entity\PropertyId;
+use Wikibase\DataModel\Services\Lookup\InMemoryDataTypeLookup;
+use Wikibase\DataModel\Services\Statement\GuidGenerator;
+use Wikibase\DataModel\Snak\PropertySomeValueSnak;
+use Wikibase\DataModel\Snak\PropertyValueSnak;
+use Wikibase\DataModel\Snak\SnakList;
+use Wikibase\DataModel\Statement\Statement;
+use Wikibase\DataModel\Statement\StatementList;
+use Wikibase\Lib\Store\PropertyDataTypeMatcher;
+use Wikibase\Repo\DataUpdates\GeoDataDataUpdate;
+
+/**
+ * @covers Wikibase\Repo\DataUpdates\GeoDataDataUpdate;
+ *
+ * @group Wikibase
+ * @group WikibaseRepo
+ * @group Database
+ *
+ * @license GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class GeoDataDataUpdateTest extends \MediaWikiTestCase {
+
+       protected function setUp() {
+               if ( !class_exists( 'GeoData' ) ) {
+                       $this->markTestSkipped( 'GeoData extension is 
required.' );
+               }
+
+               parent::setUp();
+       }
+
+       /**
+        * @dataProvider processStatementProvider
+        */
+       public function testProcessStatement( array $expected, array 
$statements, $message ) {
+               $dataUpdate = new GeoDataDataUpdate(
+                       new PropertyDataTypeMatcher( 
$this->getPropertyDataTypeLookup() ),
+                       array( 'P625', 'P9000' )
+               );
+
+               foreach ( $statements as $statement ) {
+                       $dataUpdate->processStatement( $statement );
+               }
+
+               $this->assertAttributeEquals(
+                       $expected,
+                       'statementsByGeoProperty',
+                       $dataUpdate,
+                       $message
+               );
+       }
+
+       public function testUpdateParserOutput() {
+               $statements = $this->getStatements();
+
+               $statementsByGeoProperty = array(
+                       'P625' => new StatementList( array(
+                               $statements['geo-property-P625']
+                       ) ),
+                       'P10' => new StatementList( array(
+                               $statements['geo-property-P10-A'],
+                               $statements['geo-property-P10-B'],
+                               $statements['mismatch-P10']
+                       ) ),
+                       'P9000' => new StatementList( array(
+                               $statements['geo-property-P9000']
+                       ) ),
+                       'P20' => new StatementList( array(
+                               $statements['some-value-P20']
+                       ) ),
+                       'P17' => new StatementList( array(
+                               $statements['deprecated-geo-P17']
+                       ) )
+               );
+
+               $dataUpdate = new GeoDataDataUpdate(
+                       new PropertyDataTypeMatcher( 
$this->getPropertyDataTypeLookup() ),
+                       array( 'P17', 'P404', 'P10', 'P20', 'P9000', 'P625' ),
+                       $statementsByGeoProperty
+               );
+
+               $parserOutput = new ParserOutput();
+
+               $dataUpdate->updateParserOutput( $parserOutput );
+
+               $expected = new CoordinatesOutput();
+
+               // P9000 statement
+               $coord = new Coord( 33.643664, 20.464222 );
+               $coord->primary = true;
+
+               $expected->addPrimary( $coord );
+               $expected->addSecondary( new Coord( 35.690278, 139.700556 ) );
+               $expected->addSecondary( new Coord( 40.748433, -73.985655 ) );
+               $expected->addSecondary( new Coord( 44.264464, 52.643666 ) );
+               $expected->addSecondary( new Coord( 10.0234, 11.52352 ) );
+
+               $this->assertEquals( $expected, $parserOutput->geoData );
+       }
+
+       public function processStatementProvider() {
+               $statements = $this->getStatements();
+
+               return array(
+                       array(
+                               array(),
+                               array( $statements['string-property'] ),
+                               'non-geo property'
+                       ),
+                       array(
+                               array(
+                                       'P625' => new StatementList(
+                                               array( 
$statements['geo-property-P625'] )
+                                       )
+                               ),
+                               array( $statements['geo-property-P625'] ),
+                               'geo property'
+                       ),
+                       array(
+                               array(
+                                       'P17' => new StatementList(
+                                               array( 
$statements['deprecated-geo-P17'] )
+                                       )
+                               ),
+                               array( $statements['deprecated-geo-P17'] ),
+                               'deprecated geo statement'
+                       ),
+                       array(
+                               array(
+                                       'P10' => new StatementList(
+                                               array(
+                                                       
$statements['geo-property-P10-A'],
+                                                       
$statements['geo-property-P10-B']
+                                               )
+                                       )
+                               ),
+                               array( $statements['geo-property-P10-A'], 
$statements['geo-property-P10-B'] ),
+                               'multiple geo statements'
+                       ),
+                       array(
+                               array(
+                                       'P20' => new StatementList(
+                                               array( 
$statements['some-value-P20'] )
+                                       )
+                               ),
+                               array( $statements['some-value-P20'] ),
+                               'some value snak, still added during initial 
processing'
+                       ),
+                       array(
+                               array(),
+                               array( $statements['unknown-property'] ),
+                               'statement with unknown property, not in 
PropertyDataTypeLookup'
+                       )
+               );
+       }
+
+       private function getStatements() {
+               $statements = array();
+
+               $statements['string-property'] = $this->newStatement(
+                       new PropertyId( 'P42' ),
+                       new StringValue( 'kittens!' )
+               );
+
+               $statements['geo-property-P625'] = $this->newStatement(
+                       new PropertyId( 'P625' ),
+                       $this->newGlobeCoordinateValue( 35.690278, 139.700556 )
+               );
+
+               $statements['geo-property-P10-A'] = $this->newStatement(
+                       new PropertyId( 'P10' ),
+                       $this->newGlobeCoordinateValue( 40.748433, -73.985655 )
+               );
+
+               $statements['geo-property-P10-B'] = $this->newStatement(
+                       new PropertyId( 'P10' ),
+                       $this->newGlobeCoordinateValue( 44.264464, 52.643666 )
+               );
+
+               $statements['geo-property-P9000'] = $this->newStatement(
+                       new PropertyId( 'P9000' ),
+                       $this->newGlobeCoordinateValue( 33.643664, 20.464222 )
+               );
+
+               $deprecatedGeoValueStatement = $this->newStatement(
+                       new PropertyId( 'P17' ),
+                       $this->newGlobeCoordinateValue( 10.0234, 11.52352 )
+               );
+
+               $deprecatedGeoValueStatement->setRank( 
Statement::RANK_DEPRECATED );
+
+               $statements['deprecated-geo-P17'] = 
$deprecatedGeoValueStatement;
+
+               $statements['some-value-P20'] = $this->newStatement( new 
PropertyId( 'P20' ) );
+
+               $statements['mismatch-P10'] = $this->newStatement(
+                       new PropertyId( 'P10' ),
+                       new StringValue( 'omg! wrong value type' )
+               );
+
+               $statements['unknown-property'] = $this->newStatement(
+                       new PropertyId( 'P404' ),
+                       $this->newGlobeCoordinateValue( 40.733643, -72.352153 )
+               );
+
+               return $statements;
+       }
+
+       private function newStatement( PropertyId $propertyId, DataValue 
$dataValue = null ) {
+               $guidGenerator = new GuidGenerator();
+
+               if ( $dataValue === null ) {
+                       $snak = new PropertySomeValueSnak( $propertyId );
+               } else {
+                       $snak = new PropertyValueSnak( $propertyId, $dataValue 
);
+               }
+
+               $guid = $guidGenerator->newGuid( new ItemId( 'Q64' ) );
+
+               return new Statement( $snak, null, null, $guid );
+       }
+
+       private function newGlobeCoordinateValue( $lat, $lon ) {
+               $latLongValue = new LatLongValue( $lat, $lon );
+
+               return new GlobeCoordinateValue( $latLongValue, 0.001 );
+       }
+
+       private function getPropertyDataTypeLookup() {
+               $dataTypeLookup = new InMemoryDataTypeLookup();
+
+               $dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P42' 
), 'string' );
+               $dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P10' 
), 'globe-coordinate' );
+               $dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P17' 
), 'globe-coordinate' );
+               $dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P20' 
), 'globe-coordinate' );
+               $dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P625' 
), 'globe-coordinate' );
+               $dataTypeLookup->setDataTypeForProperty( new PropertyId( 
'P9000' ), 'globe-coordinate' );
+
+               return $dataTypeLookup;
+       }
+
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/243625
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If4ec903b530568da4d644892678fdf87c0e92078
Gerrit-PatchSet: 17
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Aude <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: JanZerebecki <[email protected]>
Gerrit-Reviewer: Jonas Kress (WMDE) <[email protected]>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to