Aude has uploaded a new change for review.
https://gerrit.wikimedia.org/r/246280
Change subject: Simplify processing of coordinates in GeoDataDataUpdate
......................................................................
Simplify processing of coordinates in GeoDataDataUpdate
also:
* more tests added
* exclude deprecated statements
* don't override $parserOutput->geoData if already set
* get rid of $statementsByGeoProperty
Change-Id: I4dcdae4b821bbca3ab863aa069a9ac75de592b14
---
M repo/includes/DataUpdates/GeoDataDataUpdate.php
M repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
2 files changed, 245 insertions(+), 178 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase
refs/changes/80/246280/1
diff --git a/repo/includes/DataUpdates/GeoDataDataUpdate.php
b/repo/includes/DataUpdates/GeoDataDataUpdate.php
index c224129..059365d 100644
--- a/repo/includes/DataUpdates/GeoDataDataUpdate.php
+++ b/repo/includes/DataUpdates/GeoDataDataUpdate.php
@@ -42,20 +42,18 @@
private $preferredProperties;
/**
- * @var StatementList[]
+ * @var Coord[]
*/
- private $statementsByGeoProperty;
+ private $coordinates = array();
/**
* @param PropertyDataTypeMatcher $propertyDataTypeMatcher
* @param string[] $preferredProperties
- * @param StatementList[] $statementsByGeoProperty Statements by
globe-coordinate property
* @throws RuntimeException
*/
public function __construct(
PropertyDataTypeMatcher $propertyDataTypeMatcher,
- array $preferredProperties,
- array $statementsByGeoProperty = array()
+ array $preferredProperties
) {
if ( !class_exists( 'GeoData' ) ) {
throw new RuntimeException( 'GeoDataDataUpdate requires
the GeoData extension '
@@ -64,7 +62,6 @@
$this->propertyDataTypeMatcher = $propertyDataTypeMatcher;
$this->preferredProperties = $preferredProperties;
- $this->statementsByGeoProperty = $statementsByGeoProperty;
}
/**
@@ -79,13 +76,16 @@
$propertyId,
'globe-coordinate'
) ) {
- $serializedId = $propertyId->getSerialization();
+ $rank = $statement->getRank();
- if ( !array_key_exists( $serializedId,
$this->statementsByGeoProperty ) ) {
- $this->statementsByGeoProperty[$serializedId] =
new StatementList();
+ if ( $rank > Statement::RANK_DEPRECATED ) {
+ $coordinate = $this->extractMainSnakCoord(
$statement );
+
+ if ( $coordinate instanceof Coord ) {
+ $key = $this->makeCoordinateKey(
$propertyId->getSerialization(), $rank );
+ $this->coordinates[$key][] =
$coordinate;
+ }
}
-
-
$this->statementsByGeoProperty[$serializedId]->addStatement( $statement );
}
}
@@ -93,69 +93,58 @@
* @param ParserOutput $parserOutput
*/
public function updateParserOutput( ParserOutput $parserOutput ) {
- if ( $this->statementsByGeoProperty === array() ) {
- return;
+ $coordinatesOutput = isset( $parserOutput->geoData ) ?: new
CoordinatesOutput();
+
+ if ( $coordinatesOutput->getPrimary() === false ) {
+ $primaryCoordKey = $this->addPrimaryCoordinate(
$coordinatesOutput );
}
- $coordinatesOutput = new CoordinatesOutput();
-
- $secondaryCoordinates = $this->extractMainSnakCoords();
- $primaryCoordinate = $this->findPrimaryCoordinate(
$secondaryCoordinates );
-
- if ( $primaryCoordinate !== null ) {
- $primaryCoordinate->primary = true;
- $coordinatesOutput->addPrimary( $primaryCoordinate );
- }
-
- foreach ( $secondaryCoordinates as $coordinate ) {
- $coordinatesOutput->addSecondary( $coordinate );
+ foreach ( $this->coordinates as $key => $coordinates ) {
+ if ( $key !== $primaryCoordKey ) {
+ foreach ( $coordinates as $coordinate ) {
+ $coordinatesOutput->addSecondary(
$coordinate );
+ }
+ }
}
$parserOutput->geoData = $coordinatesOutput;
}
/**
- * @param Coord[] &$secondaryCoordinates Primary coordinate gets
removed.
+ * @param CoordinatesOutput $coordinatesOutput
*
- * @return Coord|null
+ * @return string|null Array key for Coord selected as primary.
*/
- private function findPrimaryCoordinate( array &$secondaryCoordinates ) {
+ private function addPrimaryCoordinate( CoordinatesOutput
$coordinatesOutput ) {
+ foreach ( $this->preferredProperties as $propertyIdString ) {
+ $key = $this->makeCoordinateKey( $propertyIdString,
Statement::RANK_PREFERRED );
- foreach ( $this->preferredProperties as $propertyId ) {
- $primaryCoordinate = null;
+ $preferred = isset( $this->coordinates[$key] ) ?
$this->coordinates[$key] : array();
+ $preferredCount = count( $preferred );
- if ( array_key_exists( $propertyId,
$this->statementsByGeoProperty ) ) {
- $bestStatements =
$this->statementsByGeoProperty[$propertyId]->getBestStatements();
+ if ( $preferredCount === 1 ) {
+ $primaryCoordinate = $preferred[0];
+ } elseif ( $preferredCount === 0 ) {
+ $key = $this->makeCoordinateKey(
$propertyIdString, Statement::RANK_NORMAL );
+ $normal = isset( $this->coordinates[$key] ) ?
$this->coordinates[$key] : array();
+ $normalCount = count( $normal );
- // maybe the only statements have deprecated
rank
- if ( $bestStatements->isEmpty() ) {
- continue;
+ if ( $normalCount === 1 ) {
+ $primaryCoordinate = $normal[0];
+ } elseif ( $normalCount > 1 ) {
+ // multiple normal coordinates
+ return null;
}
-
- foreach ( $bestStatements as $bestStatement ) {
- if ( $primaryCoordinate instanceof
Coord ) {
- // already set and there are
multiple best statements, so
- // can't just (somewhat)
arbitrarily pick one. Instead, don't
- // mark any as primary and
consider them all as secondary.
- $primaryCoordinate = null;
- break;
- }
-
- try {
- $primaryCoordinate =
$this->extractMainSnakCoord( $bestStatement );
- $guid =
$bestStatement->getGuid();
- } catch ( UnexpectedValueException $ex
) {
- // could be a mismatching snak
value, and then should just skip it.
- continue;
- }
- }
+ } else {
+ // multiple preferred coordinates
+ return null;
}
- if ( $primaryCoordinate !== null ) {
- // primary coordinate is only primary and not
secondary
- unset( $secondaryCoordinates[$guid] );
+ if ( isset( $primaryCoordinate ) ) {
+ $primaryCoordinate->primary = true;
+ $coordinatesOutput->addPrimary(
$primaryCoordinate );
- return $primaryCoordinate;
+ return $key;
}
}
@@ -163,28 +152,13 @@
}
/**
- * @return Coord[]
+ * @param string $propertyIdString
+ * @param int $rank
+ *
+ * @return string
*/
- private function extractMainSnakCoords() {
- $coordinates = array();
-
- foreach ( $this->statementsByGeoProperty as $propertyId =>
$statements ) {
- foreach ( $statements as $statement ) {
- try {
- $coord = $this->extractMainSnakCoord(
$statement );
-
- if ( $coord instanceof Coord ) {
- $guid = $statement->getGuid();
- $coordinates[$guid] = $coord;
- }
- } catch ( UnexpectedValueException $ex ) {
- // can happen if there is a mismatch
between property and value type.
- continue;
- }
- }
- }
-
- return $coordinates;
+ private function makeCoordinateKey( $propertyIdString, $rank ) {
+ return $propertyIdString . '-' . $rank;
}
/**
@@ -199,22 +173,11 @@
return null;
}
- return $this->extractCoordFromSnak( $snak );
- }
-
- /**
- * @param Snak $snak
- *
- * @return Coord
- * @throws UnexpectedValueException
- */
- private function extractCoordFromSnak( Snak $snak ) {
$dataValue = $snak->getDataValue();
if ( !$dataValue instanceof GlobeCoordinateValue ) {
- throw new UnexpectedValueException(
- '$dataValue expected to be a
GlobeCoordinateValue'
- );
+ // Property data type - value mismatch
+ return null;
}
return new Coord( $dataValue->getLatitude(),
$dataValue->getLongitude() );
diff --git a/repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
b/repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
index 94ae090..e20087d 100644
--- a/repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
+++ b/repo/tests/phpunit/includes/DataUpdates/GeoDataDataUpdateTest.php
@@ -55,166 +55,258 @@
$this->assertAttributeEquals(
$expected,
- 'statementsByGeoProperty',
+ 'coordinates',
$dataUpdate,
$message
);
}
- public function testUpdateParserOutput() {
- $statements = $this->getStatements();
-
- $statementsByGeoProperty = array(
- 'P625' => new StatementList( array(
- $statements['geo-property-P625']
- ) ),
- 'P10' => new StatementList( array(
- $statements['geo-property-P10-A'],
- $statements['geo-property-P10-B'],
- $statements['mismatch-P10']
- ) ),
- 'P9000' => new StatementList( array(
- $statements['geo-property-P9000']
- ) ),
- 'P20' => new StatementList( array(
- $statements['some-value-P20']
- ) ),
- 'P17' => new StatementList( array(
- $statements['deprecated-geo-P17']
- ) )
- );
-
- $dataUpdate = new GeoDataDataUpdate(
- new PropertyDataTypeMatcher(
$this->getPropertyDataTypeLookup() ),
- array( 'P17', 'P404', 'P10', 'P20', 'P9000', 'P625' ),
- $statementsByGeoProperty
- );
-
- $parserOutput = new ParserOutput();
-
- $dataUpdate->updateParserOutput( $parserOutput );
-
- $expected = new CoordinatesOutput();
-
- // P9000 statement
- $coord = new Coord( 33.643664, 20.464222 );
- $coord->primary = true;
-
- $expected->addPrimary( $coord );
- $expected->addSecondary( new Coord( 35.690278, 139.700556 ) );
- $expected->addSecondary( new Coord( 40.748433, -73.985655 ) );
- $expected->addSecondary( new Coord( 44.264464, 52.643666 ) );
- $expected->addSecondary( new Coord( 10.0234, 11.52352 ) );
-
- $this->assertEquals( $expected, $parserOutput->geoData );
- }
-
public function processStatementProvider() {
$statements = $this->getStatements();
+ $coords = $this->getCoords();
return array(
array(
array(),
- array( $statements['string-property'] ),
- 'non-geo property'
+ array( $statements['P42-string'] ),
+ 'non-geo statement'
),
array(
array(
- 'P625' => new StatementList(
- array(
$statements['geo-property-P625'] )
- )
+ 'P625-1' => array( $coords['P625-geo'] )
),
- array( $statements['geo-property-P625'] ),
- 'geo property'
+ array( $statements['P625-geo'] ),
+ 'one normal geo statement'
),
array(
- array(
- 'P17' => new StatementList(
- array(
$statements['deprecated-geo-P17'] )
- )
- ),
- array( $statements['deprecated-geo-P17'] ),
+ array(),
+ array( $statements['P17-geo-deprecated'] ),
'deprecated geo statement'
),
array(
array(
- 'P10' => new StatementList(
- array(
-
$statements['geo-property-P10-A'],
-
$statements['geo-property-P10-B']
- )
+ 'P10-1' => array(
+ $coords['P10-geo-A'],
+ $coords['P10-geo-B']
)
),
- array( $statements['geo-property-P10-A'],
$statements['geo-property-P10-B'] ),
- 'multiple geo statements'
+ array(
+ $statements['P10-geo-A'],
+ $statements['P10-geo-B']
+ ),
+ 'multiple normal statements'
),
array(
array(
- 'P20' => new StatementList(
- array(
$statements['some-value-P20'] )
+ 'P10-1' => array(
+ $coords['P10-geo-A']
+ ),
+ 'P10-2' => array(
+ $coords['P10-geo-preferred-A'],
+ $coords['P10-geo-preferred-B']
)
),
- array( $statements['some-value-P20'] ),
- 'some value snak, still added during initial
processing'
+ array(
+ $statements['P10-geo-A'],
+ $statements['P10-geo-preferred-A'],
+ $statements['P10-geo-preferred-B']
+ ),
+ 'multiple preferred, one normal'
+ ),
+ array(
+ array(
+ 'P10-1' => array(
+ $coords['P10-geo-A'],
+ $coords['P10-geo-B']
+ ),
+ 'P10-2' => array(
+ $coords['P10-geo-preferred-A']
+ )
+ ),
+ array(
+ $statements['P10-geo-A'],
+ $statements['P10-geo-B'],
+ $statements['P10-geo-preferred-A']
+ ),
+ 'multiple normal, one preferred'
),
array(
array(),
- array( $statements['unknown-property'] ),
+ array( $statements['P20-some-value'] ),
+ 'geo property with some value snak'
+ ),
+ array(
+ array(),
+ array( $statements['P404-unknown-property'] ),
'statement with unknown property, not in
PropertyDataTypeLookup'
)
);
}
+ public function
testUpdateParserOutput_withPrimaryCoordPreferredStatement() {
+ $dataUpdate = new GeoDataDataUpdate(
+ new PropertyDataTypeMatcher(
$this->getPropertyDataTypeLookup() ),
+ array( 'P9000', 'P625' )
+ );
+
+ foreach ( $this->getStatements() as $statement ) {
+ $dataUpdate->processStatement( $statement );
+ }
+
+ $coords = $this->getCoords();
+
+ $expected = new CoordinatesOutput();
+
+ $primaryCoordinate = $coords['P9000-geo-preferred'];
+ $primaryCoordinate->primary = true;
+
+ $expected->addPrimary( $primaryCoordinate );
+ unset( $coords['P9000-geo-preferred'] );
+
+ foreach ( $coords as $coord ) {
+ $expected->addSecondary( $coord );
+ }
+
+ $parserOutput = new ParserOutput();
+ $dataUpdate->updateParserOutput( $parserOutput );
+
+ $this->assertEquals( $expected, $parserOutput->geoData );
+ }
+
+ public function
testUpdateParserOutput_withPrimaryCoordNormalStatement() {
+ $dataUpdate = new GeoDataDataUpdate(
+ new PropertyDataTypeMatcher(
$this->getPropertyDataTypeLookup() ),
+ array( 'P625', 'P10' )
+ );
+
+ foreach ( $this->getStatements() as $statement ) {
+ $dataUpdate->processStatement( $statement );
+ }
+
+ $expected = new CoordinatesOutput();
+ $coords = $this->getCoords();
+
+ $primaryCoordinate = $coords['P625-geo'];
+ $primaryCoordinate->primary = true;
+
+ $expected->addPrimary( $primaryCoordinate );
+ unset( $coords['P625-geo'] );
+
+ foreach ( $coords as $coord ) {
+ $expected->addSecondary( $coord );
+ }
+
+ $parserOutput = new ParserOutput();
+ $dataUpdate->updateParserOutput( $parserOutput );
+
+ $this->assertEquals( $expected, $parserOutput->geoData );
+ }
+
+ public function testUpdateParserOutput_noPrimaryCoord() {
+ $dataUpdate = new GeoDataDataUpdate(
+ new PropertyDataTypeMatcher(
$this->getPropertyDataTypeLookup() ),
+ array( 'P17', 'P404', 'P10', 'P20', 'P9000', 'P9001',
'P625' )
+ );
+
+ foreach ( $this->getStatements() as $statement ) {
+ $dataUpdate->processStatement( $statement );
+ }
+
+ $expected = new CoordinatesOutput();
+
+ foreach ( $this->getCoords() as $coord ) {
+ $expected->addSecondary( $coord );
+ }
+
+ $parserOutput = new ParserOutput();
+ $dataUpdate->updateParserOutput( $parserOutput );
+
+ $this->assertEquals( $expected, $parserOutput->geoData );
+ }
+
private function getStatements() {
$statements = array();
- $statements['string-property'] = $this->newStatement(
+ $statements['P42-string'] = $this->newStatement(
new PropertyId( 'P42' ),
new StringValue( 'kittens!' )
);
- $statements['geo-property-P625'] = $this->newStatement(
+ $statements['P625-geo'] = $this->newStatement(
new PropertyId( 'P625' ),
$this->newGlobeCoordinateValue( 35.690278, 139.700556 )
);
- $statements['geo-property-P10-A'] = $this->newStatement(
+ $statements['P10-geo-A'] = $this->newStatement(
new PropertyId( 'P10' ),
$this->newGlobeCoordinateValue( 40.748433, -73.985655 )
);
- $statements['geo-property-P10-B'] = $this->newStatement(
+ $statements['P10-geo-B'] = $this->newStatement(
new PropertyId( 'P10' ),
$this->newGlobeCoordinateValue( 44.264464, 52.643666 )
);
- $statements['geo-property-P9000'] = $this->newStatement(
+ $statements['P10-geo-preferred-A'] =
$this->newStatementWithRank(
+ new PropertyId( 'P10' ),
+ $this->newGlobeCoordinateValue( 50.02440, 41.50202 ),
+ Statement::RANK_PREFERRED
+ );
+
+ $statements['P10-geo-preferred-B'] =
$this->newStatementWithRank(
+ new PropertyId( 'P10' ),
+ $this->newGlobeCoordinateValue( 70.0144, 30.0015 ),
+ Statement::RANK_PREFERRED
+ );
+
+ $statements['P9000-geo-A'] = $this->newStatement(
new PropertyId( 'P9000' ),
$this->newGlobeCoordinateValue( 33.643664, 20.464222 )
);
- $deprecatedGeoValueStatement = $this->newStatement(
- new PropertyId( 'P17' ),
- $this->newGlobeCoordinateValue( 10.0234, 11.52352 )
+ $statements['P9000-geo-B'] = $this->newStatement(
+ new PropertyId( 'P9000' ),
+ $this->newGlobeCoordinateValue( 11.1234, 12.5678 )
);
- $deprecatedGeoValueStatement->setRank(
Statement::RANK_DEPRECATED );
+ $statements['P9000-geo-preferred'] =
$this->newStatementWithRank(
+ new PropertyId( 'P9000' ),
+ $this->newGlobeCoordinateValue( 77.7777, 33.3333 ),
+ Statement::RANK_PREFERRED
+ );
- $statements['deprecated-geo-P17'] =
$deprecatedGeoValueStatement;
+ $statements['P17-geo-deprecated'] = $this->newStatementWithRank(
+ new PropertyId( 'P17' ),
+ $this->newGlobeCoordinateValue( 10.0234, 11.52352 ),
+ Statement::RANK_DEPRECATED
+ );
- $statements['some-value-P20'] = $this->newStatement( new
PropertyId( 'P20' ) );
+ $statements['P20-some-value'] = $this->newStatement( new
PropertyId( 'P20' ) );
- $statements['mismatch-P10'] = $this->newStatement(
+ $statements['P10-mismatch'] = $this->newStatement(
new PropertyId( 'P10' ),
new StringValue( 'omg! wrong value type' )
);
- $statements['unknown-property'] = $this->newStatement(
+ $statements['P404-unknown-property'] = $this->newStatement(
new PropertyId( 'P404' ),
$this->newGlobeCoordinateValue( 40.733643, -72.352153 )
);
return $statements;
+ }
+
+ private function getCoords() {
+ return array(
+ 'P625-geo' => new Coord( 35.690278, 139.700556 ),
+ 'P10-geo-A' => new Coord( 40.748433, -73.985655 ),
+ 'P10-geo-B' => new Coord( 44.264464, 52.643666 ),
+ 'P10-geo-preferred-A' => new Coord( 50.02440, 41.50202
),
+ 'P10-geo-preferred-B' => new Coord( 70.0144, 30.0015 ),
+ 'P9000-geo-A' => new Coord( 33.643664, 20.464222 ),
+ 'P9000-geo-B' => new Coord( 11.1234, 12.5678 ),
+ 'P9000-geo-preferred' => new Coord( 77.7777, 33.3333 )
+ );
}
private function newStatement( PropertyId $propertyId, DataValue
$dataValue = null ) {
@@ -229,6 +321,17 @@
$guid = $guidGenerator->newGuid( new ItemId( 'Q64' ) );
return new Statement( $snak, null, null, $guid );
+ }
+
+ private function newStatementWithRank(
+ PropertyId $propertyId,
+ DataValue $dataValue,
+ $rank
+ ) {
+ $rankedStatement = $this->newStatement( $propertyId, $dataValue
);
+ $rankedStatement->setRank( $rank );
+
+ return $rankedStatement;
}
private function newGlobeCoordinateValue( $lat, $lon ) {
@@ -246,6 +349,7 @@
$dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P20'
), 'globe-coordinate' );
$dataTypeLookup->setDataTypeForProperty( new PropertyId( 'P625'
), 'globe-coordinate' );
$dataTypeLookup->setDataTypeForProperty( new PropertyId(
'P9000' ), 'globe-coordinate' );
+ $dataTypeLookup->setDataTypeForProperty( new PropertyId(
'P9001' ), 'globe-coordinate' );
return $dataTypeLookup;
}
--
To view, visit https://gerrit.wikimedia.org/r/246280
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I4dcdae4b821bbca3ab863aa069a9ac75de592b14
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Aude <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits