Tamslo has uploaded a new change for review.
https://gerrit.wikimedia.org/r/189462
Change subject: Added constraint checks, DataValueComparer and adapted locales.
......................................................................
Added constraint checks, DataValueComparer and adapted locales.
Change-Id: I8cda3c5e8b463f0e822337526004fa0dadea4eda
---
M constraint-report/specials/SpecialWikidataConstraintReport.php
M external-validation/specials/SpecialCrossCheck.php
M external-validation/src/CrossCheck/Comparer/DataValueComparer.php
M external-validation/src/CrossCheck/Comparer/EntityIdValueComparer.php
A external-validation/src/CrossCheck/Comparer/MonolingualTextValueComparer.php
A external-validation/src/CrossCheck/Comparer/MultilingualTextValueComparer.php
A external-validation/src/CrossCheck/Comparer/QuantityValueComparer.php
A external-validation/src/CrossCheck/Comparer/StringValueComparer.php
A external-validation/src/CrossCheck/Comparer/TimeValueComparer.php
M external-validation/src/CrossCheck/CrossChecker.php
A external-validation/src/CrossCheck/DumpMetaInformation.php
M external-validation/src/CrossCheck/Result/CompareResult.php
M external-validation/src/CrossCheck/mapping.inc.php
M external-validation/src/UpdateTable/Importer/GndImporter.php
M external-validation/src/UpdateTable/Importer/Importer.php
M i18n/en.json
M i18n/qqq.json
17 files changed, 1,063 insertions(+), 306 deletions(-)
git pull
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikidataQuality
refs/changes/62/189462/1
diff --git a/constraint-report/specials/SpecialWikidataConstraintReport.php
b/constraint-report/specials/SpecialWikidataConstraintReport.php
index 88d3123..93ab220 100644
--- a/constraint-report/specials/SpecialWikidataConstraintReport.php
+++ b/constraint-report/specials/SpecialWikidataConstraintReport.php
@@ -10,182 +10,438 @@
use Wikibase\DataModel\Statement;
use Wikibase\DataModel\Snak;
+//TODO (prio high): define tests for the checks against constraints (test
items with statements)
+//TODO (prio high): add support for remaining constraints (some might use a
common set of methods):
+/* [todo] Commons link
+ * [todo] Conflicts with - similar to Target required claim (target is
self)
+ * [DONE] Diff within range
+ * [todo] Format
+ * [DONE] Inverse - special case of Target required claim
+ * [todo] Item
+ * [DONE] Multi value - similar to Single value
+ * [DONE] One of
+ * [DONE] Qualifier
+ * [todo] Qualifiers
+ * [DONE] Range
+ * [DONE] Single value - similar to Multi value
+ * [DONE] Symmetric - special case of Inverse, which is a special case of
Target required claim
+ * [DONE] Target required claim
+ * [todo] Type - similar to Value type
+ * [todo] Unique value
+ * [todo] Value type - similar to Type
+ */
+//TODO (prio normal): add templates for items, properties, constraints to our
instance and write them like {{Q|1234}} or [[Property:P567]] or
{{tl|Constraint:Range}} or ... in this code
+//TODO (prio normal): check for exceptions and mark a statement as such
+//TODO (prio normal): handle qualifiers, e.g. on a property violating the
single value constraint, although every value was only valid at a certain point
in time
+//TODO (prio normal): handle constraint parameter 'now' when dealing with time
values
+//TODO (prio low): handle output for the edge case, where there are no
constraints defined on an entity's statements (as is the case for many
properties)
+//TODO (prio low): find visualizations other than a table
+//TODO (prio low): add auto-completion/suggestions while typing to the input
form
+//TODO (prio low): go through the warnings and refactor this code accordingly
+
+
class SpecialWikidataConstraintReport extends SpecialPage {
- function __construct() {
- parent::__construct( 'ConstraintReport' );
- }
+ function __construct() {
+ parent::__construct( 'ConstraintReport' );
+ }
- /**
- * @see SpecialPage::getGroupName
- *
- * @return string
- */
- function getGroupName() {
- return "wikidataquality";
- }
+ /**
+ * @see SpecialPage::getGroupName
+ *
+ * @return string
+ */
+ function getGroupName() {
+ return "wikidataquality";
+ }
- /**
- * @see SpecialPage::getDescription
- *
- * @return string
- */
- public function getDescription() {
- return $this->msg( 'special-constraintreport' )->text();
- }
+ /**
+ * @see SpecialPage::getDescription
+ *
+ * @return string
+ */
+ public function getDescription() {
+ return $this->msg( 'wikidataquality-constraintreport' )->text();
+ }
+ private $output = '';
- /**
- * @see SpecialPage::execute
- *
- * @param string|null $par
- */
- function execute( $par ) {
- $this->setHeaders();
- $out = $this->getContext()->getOutput();
+ /**
+ * @see SpecialPage::execute
+ *
+ * @param string|null $par
+ */
+ function execute( $par ) {
+ $this->setHeaders();
+ $out = $this->getContext()->getOutput();
- // Show form
- $out->addHTML( '<p>Enter an Item or a Property ID to check the
corresponding Entity\'s statements against Constraints.<br />'
- . 'Try for example <i>Q46</i> (Europe)<sup>Range</sup>, <i>Q60</i>
(New York City)<sup>Range, One of</sup>, <i>Q80</i> (Tim Berners-Lee)<sup>2x
One of</sup> or some <i>Pxx</i> (XYZ)</p>'
- );
+ // Show form
+ $out->addHTML( "<p>Enter an Item or a Property ID to check the
corresponding Entity's statements against Constraints.</p>" );
$out->addHTML( "<form name='EntityIdForm' action='" .
$_SERVER['PHP_SELF'] . "' method='post'>" );
$out->addHTML( "<input placeholder='Qxx/Pxx' name='entityID'
id='entity-input'>" );
- $out->addHTML( "<input type='submit' value='Check' />" );
- $out->addHTML( "</form><br /><br />" );
+ $out->addHTML( "<input type='submit' value='Check' />" );
+ $out->addHTML( "</form><br /><br />" );
- if (!isset($_POST['entityID'])) {
- return;
- }
+ if( !isset($_POST['entityID']) || strlen($_POST['entityID']) == 0 ) {
+ return;
+ }
- $entity = $this->entityFromPar($_POST['entityID']);
- if ($entity == null) {
- $out->addWikiText("No valid entityID given or entity
does not exist: " . $_POST['entityID'] . "\n");
- return;
- }
+ $entity = $this->entityFromParameter( $_POST['entityID'] );
+ if( $entity == null ) {
+ $out->addWikiText( "No valid entityID given or entity does not
exist: " . $_POST['entityID'] . "\n" );
+ return;
+ }
- $out->addHTML( '<h2>Constraint report for ' .
$entity->getType() . ' ' . $entity->getId() . ' (' . $entity->getLabel('en') .
'):</h2><br />');
+ $out->addHTML( '<h2>Constraint report for ' . $entity->getType() . ' '
. $entity->getId() . ' (' . $entity->getLabel('en') . '):</h2><br />' );
- $entityStatements = $entity->getStatements();
+ $entityStatements = $entity->getStatements();
+ $entityStatementsArray = $entityStatements->toArray();
+ $propertyCount = array();
+ foreach( $entityStatementsArray as $entityStatement ) {
+ if(
array_key_exists($entityStatement->getPropertyId()->getNumericId(),
$propertyCount) ) {
+
$propertyCount[$entityStatement->getPropertyId()->getNumericId()]++;
+ } else {
+
$propertyCount[$entityStatement->getPropertyId()->getNumericId()] = 0;
+ }
+ }
- $dbr = wfGetDB( DB_SLAVE );
+ $dbr = wfGetDB( DB_SLAVE );
- foreach( $entityStatements as $statement ) {
+ $this->output .=
+ "{| class=\"wikitable sortable\"\n"
+ . "! Property !! class=\"unsortable\" | Value !! Constraint !!
class=\"unsortable\" | Parameters !! Status\n";
- $claim = $statement->getClaim();
+ foreach( $entityStatements as $statement ) {
- $propertyId = $claim->getPropertyId();
- $numericPropertyId = $propertyId->getNumericId();
+ $claim = $statement->getClaim();
- $dataValue = $claim->getMainSnak()->getDataValue();
+ $propertyId = $claim->getPropertyId();
+ $numericPropertyId = $propertyId->getNumericId();
- $res = $dbr->select(
- 'wbq_constraints_from_templates',
// $table
- array('pid', 'constraint_name', 'min', 'max',
'values_'), // $vars (columns of the table)
- ("pid = $numericPropertyId"),
// $conds
- __METHOD__,
//
$fname = 'Database::select',
- array('')
//
$options = array()
- );
+ $mainSnak = $claim->getMainSnak();
+ if( $mainSnak->getType() == 'value' ) {
+ $dataValueString = $this->dataValueToString(
$mainSnak->getDataValue() );
+ } else {
+ $dataValueString = '\'\'(' . $mainSnak->getType() . '\'\')';
+ }
- foreach ($res as $row) {
+ $res = $dbr->select(
+ 'wbq_constraints_from_templates',
// $table
+ array('pid', 'constraint_name', 'base_property', 'exceptions',
'item', 'items', 'max', 'min', 'property', 'values_' ), // $vars
(columns of the table)
+ ("pid = $numericPropertyId"),
// $conds
+ __METHOD__,
//
$fname = 'Database::select',
+ array('')
//
$options = array()
+ );
- switch ($row->constraint_name) {
- case 'One of':
-
$this->checkOneOfConstraint($propertyId, $dataValue, $row->values_);
+ foreach( $res as $row ) {
+
+ switch( $row->constraint_name ) {
+ case 'Diff within range':
+ $this->checkDiffWithinRangeConstraint( $propertyId,
$dataValueString, $row->base_property, $row->min, $row->max, $entityStatements
);
+ break;
+ case 'Inverse':
+ $this->checkInverseConstraint( $propertyId,
$dataValueString, $row->property);
+ break;
+ case 'Multi value':
+ $this->checkMultiValueConstraint( $propertyId,
$dataValueString, $propertyCount );
+ break;
+ case 'One of':
+ $this->checkOneOfConstraint( $propertyId,
$dataValueString, $row->values_ );
+ break;
+ case 'Qualifier':
+ $this->checkQualifierConstraint( $propertyId,
$dataValueString );
+ break;
+ case 'Range':
+ $this->checkRangeConstraint( $propertyId,
$dataValueString, $row->min, $row->max );
+ break;
+ case 'Single value':
+
$this->checkSingleValueConstraint( $propertyId, $dataValueString,
$propertyCount );
break;
- case 'Range':
-
$this->checkRangeConstraint($propertyId, $dataValue, $row->min, $row->max);
- break;
- default:
- //not yet implemented cases,
also error case
- $out->addWikiText("Property " .
$propertyId . " has a " . $row->constraint_name . " Constraint, but there is no
check implemented yet. :(\n");
- break;
- }
+ case 'Symmetric':
+ $this->checkSymmetricConstraint( $propertyId,
$dataValueString);
+ break;
+ case 'Target required claim':
+ $this->checkTargetRequiredClaimConstraint(
$propertyId, $dataValueString, $row->property, $row->item, $row->items);
+ break;
+ default:
+ //not yet implemented cases, also error case
+ $this->addOutputRow( $propertyId, $dataValueString,
$row->constraint_name, '', 'todo' );
+ break;
+ }
- }
+ }
- }
-
- }
-
- function entityFromPar($parameter) {
- $lookup =
WikibaseRepo::getDefaultInstance()->getStore()->getEntityLookup();
+ }
- switch(strtoupper($parameter[0])) {
- case 'Q':
- return $lookup->getEntity(new ItemId($parameter));
- case 'P':
- return $lookup->getEntity(new PropertyId($parameter));
- default:
- return null;
- }
- }
+ $this->output .= "|-\n|}";
+ $out->addWikiText($this->output);
+ }
- function checkOneOfConstraint( $propertyId ,$dataValue, $values ) {
- $output = '';
+ function getItem( $itemID ) {
+ $lookup =
WikibaseRepo::getDefaultInstance()->getStore()->getEntityLookup();
+ return $lookup->getEntity(new ItemId($itemID));
+ }
- $dataValueType = $dataValue->getValue()->getType();
- switch( $dataValueType ) {
- case 'wikibase-entityid':
- $value = $dataValue->getValue();
- break;
- case 'quantity':
- $value = $dataValue->getAmount()->getValue();
- break;
- default:
- //error case
- }
+ function entityFromParameter( $parameter ) {
+ $lookup =
WikibaseRepo::getDefaultInstance()->getStore()->getEntityLookup();
- $allowedValues = explode(", ", $values);
- $toReplace = array("{", "}", "|", "[", "]");
+ switch(strtoupper($parameter[0])) {
+ case 'Q':
+ return $lookup->getEntity(new ItemId($parameter));
+ case 'P':
+ return $lookup->getEntity(new PropertyId($parameter));
+ default:
+ return null;
+ }
+ }
- $lookup =
WikibaseRepo::getDefaultInstance()->getStore()->getEntityLookup();
+ function hasProperty( $itemStatementsArray, $propertyId ) {
+ foreach( $itemStatementsArray as $itemStatement ) {
+ if ($itemStatement->getPropertyId() == $propertyId){
+ return true;
+ }
+ }
+ return false;
+ }
- $valueFound = false;
- foreach ($allowedValues as $value) {
- $allowedValues[$value] =
str_replace($toReplace,"",$value);
+ function hasClaim( $itemStatementsArray, $propertyId, $claimItemIdOrArray
) {
+ foreach( $itemStatementsArray as $itemStatement ) {
+ if ($itemStatement->getPropertyId() == $propertyId){
+ if (getType($claimItemIdOrArray) == "string" ) {
+ if ($this->singleHasClaim( $itemStatement,
$claimItemIdOrArray)){
+ return true;
+ }
+ } else {
+ if ($this->arrayHasClaim( $itemStatement,
$claimItemIdOrArray)){
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
- if( in_array($value,$allowedValues) ) {
- $output .= "''The Claim [Property " .
$propertyId . " (" . $lookup->getEntity($propertyId)->getLabel('en') . "): " .
$value . "] complies with the One of Constraint [values " . $values . "].''\n";
- $valueFound = true;
- break;
- }
+ function singleHasClaim( $itemStatement, $claimItemId) {
+ if (
$itemStatement->getClaim()->getMainSnak()->getDataValue()->getEntityId()->getSerialization()
== $claimItemId) {
+ return true;
+ }
+ return false;
+ }
- if ( !$valueFound ) {
- $output .= "'''VIOLATION:''' ''The Claim
[Property " . $propertyId . " (" .
$lookup->getEntity($propertyId)->getLabel('en') . "): " . $value . "] violates
the One of Constraint [values " . $values . "].''\n";
- }
+ function arrayHasClaim( $itemStatement, $claimItemIdArray) {
+ foreach( $claimItemIdArray as $claimItemId) {
+ if (
$itemStatement->getClaim()->getMainSnak()->getDataValue()->getEntityId()->getSerialization()
== $claimItemId) {
+ return true;
+ }
+ }
+ return false;
+ }
- }
+ function convertStringFromTemplatesToArray( $string ) {
+ $toReplace = array("{", "}", "|", "[", "]", " ");
+ return explode(",", str_replace($toReplace, "", $string));
+ }
- $out = $this->getContext()->getOutput();
- $out->addWikiText($output);
- }
+ function checkDiffWithinRangeConstraint( $propertyId, $dataValueString,
$basePropertyId, $min, $max, $entityStatements ) {
+ $parameterString = 'base Property: ' . $basePropertyId . ', min: ' .
$min . ', max: ' . $max;
- function checkRangeConstraint( $propertyId ,$dataValue, $min, $max ) {
- $output = '';
+ foreach( $entityStatements as $statement ) {
+ if( $basePropertyId == $statement->getClaim()->getPropertyId() ) {
+ $mainSnak = $statement->getClaim()->getMainSnak();
- $dataValueType = $dataValue->getValue()->getType();
- switch( $dataValueType ) {
- case 'decimal':
- case 'number':
- $value = $dataValue->getValue();
- break;
- case 'quantity':
- $value = $dataValue->getAmount()->getValue();
- break;
- default:
- //error case
- }
+ if( $mainSnak->getType() == 'value' ) {
+ $basePropertyDataValueString = $this->dataValueToString(
$mainSnak->getDataValue() );
- $lookup =
WikibaseRepo::getDefaultInstance()->getStore()->getEntityLookup();
+ $diff = abs( $dataValueString-$basePropertyDataValueString
);
- if( $value < $min || $value > $max ) {
- $output .= "'''VIOLATION:''' ''The Claim [Property " .
$propertyId . " (" . $lookup->getEntity($propertyId)->getLabel('en') . "): " .
$value . "] violates the Range Constraint [min " . $min . ", max " . $max .
"].''\n";
- } else {
- $output .= "''The Claim [Property " . $propertyId . "
(" . $lookup->getEntity($propertyId)->getLabel('en') . "): " . $value . "]
complies with the Range Constraint [min " . $min . ", max " . $max . "].''\n";
- }
+ if( $diff < $min || $diff > $max ) {
+ $status = 'violation';
+ } else {
+ $status = 'compliance';
+ }
+ } else {
+ $status = 'violation';
+ }
- $out = $this->getContext()->getOutput();
- $out->addWikiText($output);
- }
+ $this->addOutputRow( $propertyId, $dataValueString, 'Diff
within range', $parameterString, $status );
+ }
+ }
+ }
+
+ function checkInverseConstraint( $propertyId, $dataValueString, $property)
{
+ $targetItem = $this->entityFromParameter(
$dataValueString->getSerialization() );
+ $parameterString = 'Property: ' . $property;
+ if ($targetItem == null) {
+ $this->addOutputRow( $propertyId, $dataValueString, 'Inverse',
$parameterString, 'fail' );
+ return;
+ }
+ $targetItemStatements = $targetItem->getStatements();
+ $targetItemStatementsArray = $targetItemStatements->toArray();
+
+ $targetHasProperty = $this->hasProperty( $targetItemStatementsArray,
$property );
+ $status = $targetHasProperty ? 'compliance' : 'violation';
+
+ $this->addOutputRow( $propertyId, $dataValueString, 'Inverse',
$parameterString, $status );
+ }
+
+ function checkMultiValueConstraint( $propertyId, $dataValueString,
$propertyCount ) {
+ if( $propertyCount[$propertyId->getNumericId()] <= 1 ) {
+ $status = 'violation';
+ } else {
+ $status = 'compliance';
+ }
+
+ $this->addOutputRow( $propertyId, $dataValueString, 'Multi value',
'\'\'(none)\'\'', $status );
+ }
+
+ function checkOneOfConstraint( $propertyId, $dataValueString, $values ) {
+ $allowedValues = $this->convertStringFromTemplatesToArray( $values );
+
+ if( !in_array($dataValueString, $allowedValues) ) {
+ $status = 'violation';
+ } else {
+ $status = 'compliance';
+ }
+
+ $showMax = 5;
+ if( sizeof($allowedValues) <= $showMax ) {
+ $parameterString = 'values: ' . implode(", ", $allowedValues);
+ } else {
+ $parameterString = 'values: ' . implode(", ",
array_slice($allowedValues, 0, $showMax)) . ' \'\'(and ' .
(sizeof($allowedValues)-$showMax) . ' more)\'\'';
+ }
+
+ $this->addOutputRow( $propertyId, $dataValueString, 'One of',
$parameterString, $status );
+ }
+
+ function checkQualifierConstraint( $propertyId, $dataValueString ) {
+ $this->addOutputRow( $propertyId, $dataValueString, 'Qualifier',
'\'\'(none)\'\'', 'violation' );
+ }
+
+ function checkRangeConstraint( $propertyId, $dataValueString, $min, $max )
{
+ if( $dataValueString < $min || $dataValueString > $max ) {
+ $status = 'violation';
+ } else {
+ $status = 'compliance';
+ }
+
+ $parameterString = 'min: ' . $min . ', max: ' . $max;
+
+ $this->addOutputRow( $propertyId, $dataValueString, 'Range',
$parameterString, $status );
+ }
+
+ function checkSingleValueConstraint( $propertyId, $dataValueString,
$propertyCount ) {
+ if( $propertyCount[$propertyId->getNumericId()] > 1 ) {
+ $status = 'violation';
+ } else {
+ $status = 'compliance';
+ }
+
+ $this->addOutputRow( $propertyId, $dataValueString, 'Single value',
'\'\'(none)\'\'', $status );
+ }
+
+ function checkSymmetricConstraint( $propertyId, $dataValueString ) {
+ $targetItem = $this->entityFromParameter(
$dataValueString->getSerialization() );
+ if ($targetItem == null) {
+ $this->addOutputRow( $propertyId, $dataValueString, 'Symmetric',
'', 'fail' );
+ return;
+ }
+
+ $targetItemStatements = $targetItem->getStatements();
+ $targetItemStatementsArray = $targetItemStatements->toArray();
+
+ $targetHasProperty = $this->hasProperty( $targetItemStatementsArray,
$propertyId );
+ $status = $targetHasProperty ? 'compliance' : 'violation';
+
+ $this->addOutputRow( $propertyId, $dataValueString, 'Symmetric', '',
$status );
+ }
+
+ function checkTargetRequiredClaimConstraint( $propertyId,
$dataValueString, $property, $item, $items) {
+ $targetItem = $this->entityFromParameter(
$dataValueString->getSerialization() );
+ $parameterString = 'property: ' . $property;
+ if ($targetItem == null) {
+ $this->addOutputRow( $propertyId, $dataValueString, 'Target
required claim', $parameterString, 'fail' );
+ return;
+ }
+
+ $targetItemStatements = $targetItem->getStatements();
+ $targetItemStatementsArray = $targetItemStatements->toArray();
+
+ // 3 possibilities: only property is set, property and item are set or
property and items are set
+ if ($item == null && $items == null) {
+ $targetHasProperty = $this->hasProperty(
$targetItemStatementsArray, $property );
+ $status = $targetHasProperty ? 'compliance' : 'violation';
+ } else if ($items == null) {
+ $parameterString .= ' item: ' . $item;
+ // also check, if value of this statement = $item
+ $status = $this->hasClaim( $targetItemStatementsArray, $property,
$item ) ? 'compliance' : 'violation';
+ } else {
+ $items = $this->convertStringFromTemplatesToArray( $items );
+ $parameterString .= ' items: ' . implode(', ', $items);
+ $status = $this->hasClaim( $targetItemStatementsArray, $property,
$items ) ? 'compliance' : 'violation';
+ }
+
+ $this->addOutputRow( $propertyId, $dataValueString, 'Target required
claim', $parameterString, $status );
+ }
+
+ function dataValueToString( $dataValue ) {
+ $dataValueType = $dataValue->getType();
+ switch( $dataValueType ) {
+ case 'string':
+ case 'decimal':
+ case 'number':
+ case 'boolean':
+ case 'unknown':
+ return $dataValue->getValue();
+ case 'quantity':
+ return $dataValue->getAmount()->getValue();
+ case 'time':
+ return $dataValue->getTime();
+ case 'globecoordinate':
+ case 'geocoordinate':
+ return 'Latitude: ' . $dataValue->getLatitude() . ',
Longitude: ' . $dataValue->getLongitude();
+ case 'monolingualtext':
+ return $dataValue->getText();
+ case 'multilingualtext':
+ if( array_key_exists('en', $dataValue) ) {
+ return $dataValue->getTexts()['en'];
+ } else {
+ return array_shift($dataValue->getTexts());
+ };
+ case 'wikibase-entityid':
+ return $dataValue->getEntityId();
+ case 'bad':
+ default:
+ //error case
+ }
+ }
+
+ function addOutputRow( $propertyId, $dataValueString, $constraintName,
$parameterString, $status ) {
+ $lookup =
WikibaseRepo::getDefaultInstance()->getStore()->getEntityLookup();
+
+ $this->output .=
+ "|-\n"
+ . "| " . $propertyId . " (" .
$lookup->getEntity($propertyId)->getLabel('en') . ") "
+ . "|| " . $dataValueString . " "
+ . "|| " . $constraintName . " "
+ . "|| " . $parameterString . " ";
+ switch( $status ) {
+ case 'compliance':
+ $this->output .= "|| <div style=\"color:#088A08\">compliance
<b>(+)</b></div>\n";
+ break;
+ case 'violation':
+ $this->output .= "|| <div style=\"color:#8A0808\">violation
<b>(-)</b></div>\n";
+ break;
+ case 'exception':
+ $this->output .= "|| <div style=\"color:#D2D20C\">exception
<b>(+)</b></div>\n";
+ break;
+ case 'todo':
+ $this->output .= "|| <div style=\"color:#808080\">not yet
implemented <b>:(</b></div>\n";
+ break;
+ case 'fail':
+ default:
+ $this->output .= "|| <div style=\"color:#808080\">check failed
<b>:(</b></div>\n";
+ //error case
+ }
+ }
}
\ No newline at end of file
diff --git a/external-validation/specials/SpecialCrossCheck.php
b/external-validation/specials/SpecialCrossCheck.php
index 0a7a032..8ce06a8 100644
--- a/external-validation/specials/SpecialCrossCheck.php
+++ b/external-validation/specials/SpecialCrossCheck.php
@@ -31,7 +31,7 @@
*/
public function getDescription()
{
- return $this->msg( 'special-crosscheck' )->text();
+ return $this->msg( 'wikidataquality-crosscheck' )->text();
}
/**
@@ -54,7 +54,7 @@
Html::openElement( 'p' )
. $this->msg( 'special-crosscheck-instructions' )->text()
. Html::element( 'br' )
- . $this->msg( 'special-crosscheck-instructions-example' )->text()
+ . $this->msg( 'wikidataquality-crosscheck-instructions-example'
)->text()
. Html::closeElement( 'p' )
. Html::openElement(
'form',
@@ -69,12 +69,12 @@
'text',
array(
'id' => 'wdq-crosscheck-itemid',
- 'placeholder' => $this->msg(
'special-crosscheck-form-itemid-placeholder' )->text()
+ 'placeholder' => $this->msg(
'wikidataquality-crosscheck-form-itemid-placeholder' )->text()
)
)
. Html::input(
'submit',
- $this->msg( 'special-crosscheck-form-submit-label' )->text(),
+ $this->msg( 'wikidataquality-crosscheck-form-submit-label'
)->text(),
'submit',
array(
'id' => 'wbq-crosscheck-submit'
@@ -92,63 +92,47 @@
// Print results
$out->addHTML(
Html::openElement( 'h3' )
- . $this->msg( 'speical-crosscheck-result-headline' )->text()
+ . $this->msg( 'wikidataquality-crosscheck-result-headline'
)->text() . $_POST[ 'itemId' ]
. Html::closeElement( 'h3' )
);
if ( $results ) {
- $out->addHTML( Html::openElement( 'ul' ) );
+ // Head of table
+ $tableOutput =
+ "{| class=\"wikitable sortable\"\n"
+ . "! ". $this->msg( 'datatypes-type-wikibase-property'
)->text() ." !! class=\"unsortable\" | ". $this->msg( 'wikidataquality-value'
)->text() ." !! class=\"unsortable\" | ". $this->msg(
'wikidataquality-crosscheck-comparative-value' )->text() ." !! ". $this->msg(
'wikidataquality-crosscheck-external-source' )->text() ." !! ". $this->msg(
'wikidataquality-status' )->text() ."\n";
+
foreach ( $results as $result ) {
// Parse value arrays to concatenated strings
$localValues = $this->parseMultipleValues(
$result->getLocalValues(),
- $this->msg( 'special-crosscheck-result-no-wd-entity'
)->text()
+ $this->msg(
'wikidataquality-crosscheck-result-no-wd-entity' )->text()
);
$externalValues = $this->parseMultipleValues(
$result->getExternalValues(),
- $this->msg( 'special-crosscheck-result-no-ext-entity'
)->text()
+ $this->msg(
'wikidataquality-crosscheck-result-no-ext-entity' )->text()
);
- // Print list item
if ( $result->hasDataMismatchOccurred() ) {
- $out->addHTML(
- Html::openElement(
- 'li',
- array(
- 'class' => 'wdq-crosscheck-mismatch'
- )
- )
- . $result->getPropertyId()
- . $this->msg( 'special-crosscheck-result-mismatch'
)->text()
- . Html::element( 'br' )
- . $localValues
- . ' ↔ '
- . $externalValues
- . Html::closeElement( 'li' )
- );
+ $status = "| <span class=\"wdq-crosscheck-error\"> ".
$this->msg( 'wikidataquality-crosscheck-result-mismatch' )->text() ."
<b>(-)</b></span>\n";
} else {
- $out->addHTML(
- Html::openElement(
- 'li',
- array(
- 'class' => 'wdq-crosscheck-success'
- )
- )
- . $result->getPropertyId()
- . $this->msg( 'special-crosscheck-result-success'
)->text()
- . Html::element( 'br' )
- . $localValues
- . ' ↔ '
- . $externalValues
- . Html::closeElement( 'li' )
- );
+ $status = "| <span class=\"wdq-crosscheck-success\">".
$this->msg( 'wikidataquality-crosscheck-result-success' )->text() ."
<b>(+)</b></span>\n";
}
+
+ // Body of table
+ $tableOutput .=
+ "|-\n"
+ . "| " . $result->getPropertyId() . "\n"
+ . "| " . $localValues . "\n"
+ . "| " . $externalValues . "\n"
+ . "| " . $result->getDataSourceName() . "\n"
+ . $status;
}
- $out->addHTML(
- Html::closeElement( 'ul' )
- );
- }
- else {
+
+ // End of table
+ $tableOutput .= "|-\n|}";
+ $out->addWikiText( $tableOutput );
+ } else {
$out->addHTML(
Html::openElement(
'p',
@@ -156,12 +140,13 @@
'class' => 'wdq-crosscheck-error'
)
)
- . $this->msg(
'special-crosscheck-result-item-not-existent' )->text()
- . Html::closeElement( 'p ')
+ . $this->msg(
'wikidataquality-crosscheck-result-item-not-existent' )->text()
+ . Html::closeElement( 'p ' )
);
}
}
}
+
/**
* Parse arary of values to human-readable string
@@ -169,11 +154,11 @@
* @param $errorMessage
* @return string
*/
- private function parseMultipleValues( $values, $errorMessage ) {
- if( $values ) {
+ private function parseMultipleValues( $values, $errorMessage )
+ {
+ if ( $values ) {
return implode( ', ', $values );
- }
- else {
+ } else {
return $errorMessage;
}
}
diff --git a/external-validation/src/CrossCheck/Comparer/DataValueComparer.php
b/external-validation/src/CrossCheck/Comparer/DataValueComparer.php
index 970233f..df4f53c 100644
--- a/external-validation/src/CrossCheck/Comparer/DataValueComparer.php
+++ b/external-validation/src/CrossCheck/Comparer/DataValueComparer.php
@@ -5,7 +5,6 @@
use ReflectionClass;
use DataValues\DataValue;
-use WikidataQuality\ExternalValidation\CrossCheck\Result\CompareResult;
/**
@@ -14,14 +13,26 @@
* @author BP2014N1
* @license GNU GPL v2+
*/
-abstract class DataValueComparer {
+abstract class DataValueComparer
+{
/**
* Array of registered comparers
* @var array
*/
private static $comparers = array(
-
'WikidataQuality\ExternalValidation\CrossCheck\Comparer\EntityIdValueComparer'
+
'WikidataQuality\ExternalValidation\CrossCheck\Comparer\EntityIdValueComparer',
+
'WikidataQuality\ExternalValidation\CrossCheck\Comparer\MonolingualTextValueComparer',
+
'WikidataQuality\ExternalValidation\CrossCheck\Comparer\MultilingualTextValueComparer',
+
'WikidataQuality\ExternalValidation\CrossCheck\Comparer\StringValueComparer',
+
'WikidataQuality\ExternalValidation\CrossCheck\Comparer\TimeValueComparer',
+
'WikidataQuality\ExternalValidation\CrossCheck\Comparer\QuantityValueComparer'
);
+
+ /**
+ * Meta information of the current dump.
+ * @var DumpMetaInformation
+ */
+ protected $dumpMetaInformation;
/**
* Wikibase data value.
@@ -46,7 +57,9 @@
* @param \DataValue $dataValue - wikibase DataValue
* @param array $externalValues - external database values
*/
- public function __construct( DataValue $dataValue, $externalValues,
$localValues = null ) {
+ public function __construct( $dumpMetaInformation, DataValue $dataValue,
$externalValues, $localValues = null )
+ {
+ $this->dumpMetaInformation = $dumpMetaInformation;
$this->dataValue = $dataValue;
$this->externalValues = $externalValues;
$this->localValues = $localValues;
@@ -55,7 +68,7 @@
/**
* Starts the comparison of given DataValue and values of external
database.
- * @return \CompareResult - result of the comparison.
+ * @return bool - result of the comparison.
*/
public abstract function execute();
@@ -66,13 +79,14 @@
* @param array $externalValues - external database values
* @return DataValueComparer
*/
- public static function getComparer( DataValue $dataValue, $externalValues
) {
- foreach( self::$comparers as $comparer ) {
+ public static function getComparer( $dumpMetaInformation, DataValue
$dataValue, $externalValues )
+ {
+ foreach ( self::$comparers as $comparer ) {
$reflector = new ReflectionClass( $comparer );
$acceptedDataValues = $reflector->getStaticPropertyValue(
"acceptedDataValues" );
$dataValueClass = get_class( $dataValue );
- if( in_array( $dataValueClass, $acceptedDataValues ) ) {
- return new $comparer( $dataValue, $externalValues );
+ if ( in_array( $dataValueClass, $acceptedDataValues ) ) {
+ return new $comparer( $dumpMetaInformation, $dataValue,
$externalValues );
}
}
}
diff --git
a/external-validation/src/CrossCheck/Comparer/EntityIdValueComparer.php
b/external-validation/src/CrossCheck/Comparer/EntityIdValueComparer.php
index 2588d80..756fd82 100644
--- a/external-validation/src/CrossCheck/Comparer/EntityIdValueComparer.php
+++ b/external-validation/src/CrossCheck/Comparer/EntityIdValueComparer.php
@@ -4,8 +4,6 @@
use Wikibase\Repo\WikibaseRepo;
-use Wikibase\DataModel\Entity\EntityIdValue;
-use WikidataQuality\ExternalValidation\CrossCheck\Result\CompareResult;
/**
@@ -24,30 +22,19 @@
/**
- * @param EntityIdValue $dataValue
- * @param array $externalValues
- */
- public function __construct( EntityIdValue $dataValue, $externalValues )
- {
- parent::__construct( $dataValue, $externalValues );
- }
-
-
- /**
* Starts the comparison of given EntityIdValue and values of external
database.
- * @return \CompareResult - result of the comparison.
+ * @return bool - result of the comparison.
*/
public function execute()
{
// Get terms of the references entity
$entityId = $this->dataValue->getEntityId();
- $this->localValues = $this->getTerms( $entityId, "de" ); //TODO: get
from database
+ $this->localValues = $this->getTerms( $entityId,
$this->dumpMetaInformation->getLanguage() );
// Compare value
- if ( $this->localValues && count( array_intersect($this->localValues,
$this->externalValues) ) > 0 ) {
+ if ( $this->localValues && count( array_intersect( $this->localValues,
$this->externalValues ) ) > 0 ) {
return true;
- }
- else {
+ } else {
return false;
}
}
diff --git
a/external-validation/src/CrossCheck/Comparer/MonolingualTextValueComparer.php
b/external-validation/src/CrossCheck/Comparer/MonolingualTextValueComparer.php
new file mode 100644
index 0000000..43c46d2
--- /dev/null
+++
b/external-validation/src/CrossCheck/Comparer/MonolingualTextValueComparer.php
@@ -0,0 +1,37 @@
+<?php
+
+namespace WikidataQuality\ExternalValidation\CrossCheck\Comparer;
+
+
+/**
+ * Class MonolingualTextValueComparer
+ * @package WikidataQuality\ExternalValidation\CrossCheck\Comparer
+ * @author BP2014N1
+ * @license GNU GPL v2+
+ */
+class MonolingualTextValueComparer extends DataValueComparer
+{
+ /**
+ * Array of DataValue classes that are supported by the current comparer.
+ * @var array
+ */
+ public static $acceptedDataValues = array(
'DataValues\MonolingualTextValue' );
+
+
+ /**
+ * Starts the comparison of given MonolingualTextValue and values of
external database.
+ * @return bool - result of the comparison.
+ */
+ public function execute()
+ {
+ // Get monolingual text
+ $this->localValues = array( $this->dataValue->getText() );
+
+ // Compare value
+ if ( $this->localValues && count( array_intersect( $this->localValues,
$this->externalValues ) ) > 0 ) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+}
\ No newline at end of file
diff --git
a/external-validation/src/CrossCheck/Comparer/MultilingualTextValueComparer.php
b/external-validation/src/CrossCheck/Comparer/MultilingualTextValueComparer.php
new file mode 100644
index 0000000..674c447
--- /dev/null
+++
b/external-validation/src/CrossCheck/Comparer/MultilingualTextValueComparer.php
@@ -0,0 +1,37 @@
+<?php
+
+namespace WikidataQuality\ExternalValidation\CrossCheck\Comparer;
+
+
+use DataValues\MultilingualTextValue;
+
+
+/**
+ * Class MultilingualTextValueComparer
+ * @package WikidataQuality\ExternalValidation\CrossCheck\Comparer
+ * @author BP2014N1
+ * @license GNU GPL v2+
+ */
+class MultilingualTextValueComparer extends MonolingualTextValueComparer
+{
+ /**
+ * Array of DataValue classes that are supported by the current comparer.
+ * @var array
+ */
+ public static $acceptedDataValues = array(
'DataValues\MultilingualTextValue' );
+
+
+ /**
+ * @param MultilingualTextValue $dataValue
+ * @param array $externalValues
+ * @param array $localValues
+ */
+ public function __construct( $dumpMetaInformation, MultilingualTextValue
$dataValue, $externalValues, $localValues = null )
+ {
+ foreach ( $dataValue->getTexts() as $text ) {
+ if ( $text->getLanguageCode() ==
$this->dumpMetaInformation->getLanguage() ) {
+ parent::__construct( $dumpMetaInformation, $text,
$externalValues );
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git
a/external-validation/src/CrossCheck/Comparer/QuantityValueComparer.php
b/external-validation/src/CrossCheck/Comparer/QuantityValueComparer.php
new file mode 100644
index 0000000..34c1668
--- /dev/null
+++ b/external-validation/src/CrossCheck/Comparer/QuantityValueComparer.php
@@ -0,0 +1,46 @@
+<?php
+
+namespace WikidataQuality\ExternalValidation\CrossCheck\Comparer;
+
+/**
+ * Class QuantityValueComparer
+ * @package WikidataQuality\ExternalValidation\CrossCheck\Comparer
+ * @author BP2014N1
+ * @license GNU GPL v2+
+ */
+class QuantityValueComparer extends DataValueComparer {
+ /**
+ * Array of DataValue classes that are supported by the current comparer.
+ * @var array
+ */
+ public static $acceptedDataValues = array( 'DataValues\QuantityValue' );
+
+
+ /**
+ * Starts the comparison of given QuantityValue and values of external
database.
+ * @return bool - result of the comparison.
+ */
+ public function execute()
+ {
+ // Get local bounds
+ $lowerBound = $this->dataValue->getLowerBound()->getValueFloat();
+ $upperBound = $this->dataValue->getUpperBound()->getValueFloat();
+
+ // Set local values
+ $ammount = $this->dataValue->getAmount()->getValueFloat();
+ $uncertainty = $this->dataValue->getUncertainty() / 2;
+ $this->localValues = array( "$ammount ±$uncertainty" );
+
+ foreach ( $this->externalValues as $externalValue ) {
+ // Convert given string to float
+ $externalValue = floatval( $externalValue );
+
+ // Compare
+ if( $externalValue >= $lowerBound && $externalValue <= $upperBound
) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+}
\ No newline at end of file
diff --git
a/external-validation/src/CrossCheck/Comparer/StringValueComparer.php
b/external-validation/src/CrossCheck/Comparer/StringValueComparer.php
new file mode 100644
index 0000000..3eaa4b9
--- /dev/null
+++ b/external-validation/src/CrossCheck/Comparer/StringValueComparer.php
@@ -0,0 +1,37 @@
+<?php
+
+namespace WikidataQuality\ExternalValidation\CrossCheck\Comparer;
+
+
+/**
+ * Class StringValueComparer
+ * @package WikidataQuality\ExternalValidation\CrossCheck\Comparer
+ * @author BP2014N1
+ * @license GNU GPL v2+
+ */
+class StringValueComparer extends DataValueComparer
+{
+ /**
+ * Array of DataValue classes that are supported by the current comparer.
+ * @var array
+ */
+ public static $acceptedDataValues = array( 'DataValues\StringValue' );
+
+
+ /**
+ * Starts the comparison of given StringValue and values of external
database.
+ * @return bool - result of the comparison.
+ */
+ public function execute()
+ {
+ // Get monolingual text
+ $this->localValues = array( $this->dataValue->getValue() );
+
+ // Compare value
+ if ( $this->localValues && count( array_intersect( $this->localValues,
$this->externalValues ) ) > 0 ) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external-validation/src/CrossCheck/Comparer/TimeValueComparer.php
b/external-validation/src/CrossCheck/Comparer/TimeValueComparer.php
new file mode 100644
index 0000000..f2f828b
--- /dev/null
+++ b/external-validation/src/CrossCheck/Comparer/TimeValueComparer.php
@@ -0,0 +1,176 @@
+<?php
+
+namespace WikidataQuality\ExternalValidation\CrossCheck\Comparer;
+
+
+use DataValues\TimeValue;
+use DateTime;
+
+
+/**
+ * Class TimeValueComparer
+ * @package WikidataQuality\ExternalValidation\CrossCheck\Comparer
+ * @author BP2014N1
+ * @license GNU GPL v2+
+ */
+class TimeValueComparer extends DataValueComparer
+{
+ /**
+ * Array of DataValue classes that are supported by the current comparer.
+ * @var array
+ */
+ public static $acceptedDataValues = array( 'DataValues\TimeValue' );
+
+
+ /**
+ * Starts the comparison of given TimeValue and values of external
database.
+ * @return bool - result of the comparison.
+ */
+ public function execute()
+ {
+ // Parse local datetime
+ $value = substr( $this->dataValue->getTime(), 8 );
+ $localDateTime = DateTime::createFromFormat( 'Y-m-d\TH:i:s\Z', $value
);
+
+ // Parse external datetime
+ $externalDateTime = DateTime::createFromFormat(
$this->dumpMetaInformation->getDateFormat(), $this->externalValues[ 0 ] );
+
+ // Format output values
+ $this->formatValues( $localDateTime, $externalDateTime );
+
+ // Compare value
+ $result = true;
+ $diff = date_diff( $localDateTime, $externalDateTime );
+ switch ( $this->dataValue->getPrecision() ) {
+ case TimeValue::PRECISION_SECOND:
+ $result = $diff->s == 0;
+
+ case TimeValue::PRECISION_MINUTE:
+ $result = $result && $diff->i == 0;
+
+ case TimeValue::PRECISION_HOUR:
+ $result = $result && $diff->h == 0;
+
+ case TimeValue::PRECISION_DAY:
+ $result = $result && $diff->d == 0;
+
+ case TimeValue::PRECISION_MONTH:
+ $result = $result && $diff->m == 0;
+
+ case TimeValue::PRECISION_YEAR:
+ $result = $result && $diff->y == 0;
+
+ case TimeValue::PRECISION_10a:
+ $result = $result && $diff->y < 10;
+
+ case TimeValue::PRECISION_100a:
+ $result = $result && $diff->y < 100;
+
+ case TimeValue::PRECISION_ka:
+ $result = $result && $diff->y < 1000;
+
+ case TimeValue::PRECISION_10ka:
+ $result = $result && $diff->y < 10000;
+
+ case TimeValue::PRECISION_100ka:
+ $result = $result && $diff->y < 100000;
+
+ case TimeValue::PRECISION_Ma:
+ $result = $result && $diff->y < 1000000;
+
+ case TimeValue::PRECISION_10Ma:
+ $result = $result && $diff->y < 10000000;
+
+ case TimeValue::PRECISION_100Ma:
+ $result = $result && $diff->y < 100000000;
+
+ case TimeValue::PRECISION_Ga:
+ $result = $result && $diff->y < 1000000000;
+ break;
+
+ default:
+ $result = false;
+ }
+
+ return $result;
+ }
+
+
+ /**
+ * Sets local and external values to formatted dates depending on
precision.
+ * @param DateTime $local
+ * @param DateTime $external
+ */
+ private function formatValues( $local, $external ) {
+ // Determine date format
+ switch ( $this->dataValue->getPrecision() ) {
+ case TimeValue::PRECISION_SECOND:
+ $format = "Y-m-d H:i:s";
+ break;
+
+ case TimeValue::PRECISION_MINUTE:
+ $format = "Y-m-d H:i";
+ break;
+
+ case TimeValue::PRECISION_HOUR:
+ $format = "Y-m-d H:0";
+ break;
+
+ case TimeValue::PRECISION_DAY:
+ $format = "Y-m-d";
+ break;
+
+ case TimeValue::PRECISION_MONTH:
+ $format = "Y-m";
+ break;
+
+ case TimeValue::PRECISION_YEAR:
+ $format = "Y";
+ break;
+
+ case TimeValue::PRECISION_10a:
+ $format = "Y ±10";
+ break;
+
+ case TimeValue::PRECISION_100a:
+ $format = "Y ±100";
+ break;
+
+ case TimeValue::PRECISION_ka:
+ $format = "Y ±1000";
+ break;
+
+ case TimeValue::PRECISION_10ka:
+ $format = "Y ±1000";
+ break;
+
+ case TimeValue::PRECISION_100ka:
+ $format = "Y ±10000";
+ break;
+
+ case TimeValue::PRECISION_Ma:
+ $format = "Y ±100000";
+ break;
+
+ case TimeValue::PRECISION_10Ma:
+ $format = "Y ±1000000";
+ break;
+
+ case TimeValue::PRECISION_100Ma:
+ $format = "Y ±10000000";
+ break;
+
+ case TimeValue::PRECISION_Ga:
+ $format = "Y ±100000000";
+ break;
+
+ default:
+ $format = "Y-m-d H:i:s";
+ break;
+ }
+
+ // Set properties to formatted dates
+ $this->localValues = array( $local->format( $format ) );
+ $this->externalValues = array( $external->format( $format ) );
+ }
+}
\ No newline at end of file
diff --git a/external-validation/src/CrossCheck/CrossChecker.php
b/external-validation/src/CrossCheck/CrossChecker.php
index 6df1dae..7f9c6b8 100644
--- a/external-validation/src/CrossCheck/CrossChecker.php
+++ b/external-validation/src/CrossCheck/CrossChecker.php
@@ -7,6 +7,7 @@
use Wikibase\DataModel\Entity\ItemId;
use Wikibase\DataModel\Statement\StatementList;
use Wikibase\DataModel\Snak\PropertyValueSnak;
+use WikidataQuality\ExternalValidation\CrossCheck\DumpMetaInformation;
use
WikidataQuality\ExternalValidation\CrossCheck\MappingEvaluator\MappingEvaluator;
use WikidataQuality\ExternalValidation\CrossCheck\Comparer\DataValueComparer;
use WikidataQuality\ExternalValidation\CrossCheck\Result\CompareResult;
@@ -39,6 +40,12 @@
*/
private $mapping;
+ /**
+ * Metadata for dump belonging to external entity.
+ * @var array
+ */
+ private $dumpMetaInformation;
+
public function __construct()
{
@@ -58,6 +65,7 @@
* Starts the whole cross-check process.
* Statements of the item will be checked against each external database,
that is supported and linked by the item.
* @param \ItemId $itemId - Id of the item, that should be cross-cheked
+ * @return \CompareResultList
*/
public function execute( $itemId )
{
@@ -84,6 +92,7 @@
* Checks given statements against one single database identified by given
property id.
* @param \StatementList $statements - list of statements, that should be
cross-checked
* @param \PropertyId $identifierPropertyId - id of the identifier
property, that represents the external database
+ * @return \CompareResultList
*/
private function crossCheckStatements( $statements, $identifierPropertyId )
{
@@ -113,23 +122,24 @@
foreach ( $externalIds as $externalId ) {
// Get external entity
$externalEntity = $this->getExternalEntity( $identifierPropertyId,
$externalId );
+ if ( $externalEntity ) {
+ // Compare each validatable statement
+ foreach ( $validateableStatements as $validateableStatement ) {
+ // Get claim and ids
+ $claim = $validateableStatement->getClaim();
+ $claimGuid = $claim->getGuid();
- // Compare each validatable statement
- foreach ( $validateableStatements as $validateableStatement ) {
- // Get claim and ids
- $claim = $validateableStatement->getClaim();
- $claimGuid = $claim->getGuid();
+ // Get main snak
+ $mainSnak = $claim->getMainSnak();
+ if ( $mainSnak instanceof PropertyValueSnak ) {
+ $dataValue = $mainSnak->getDataValue();
+ $propertyId = $mainSnak->getPropertyId();
+ $propertyMapping = $currentMapping[
$propertyId->getNumericId() ];
- // Get main snak
- $mainSnak = $claim->getMainSnak();
- if ( $mainSnak instanceof PropertyValueSnak ) {
- $dataValue = $mainSnak->getDataValue();
- $propertyId = $mainSnak->getPropertyId();
- $propertyMapping = $currentMapping[
$propertyId->getNumericId() ];
-
- $result = $this->compareDataValue( $propertyId,
$claimGuid, $dataValue, $externalEntity, $propertyMapping );
- if ( $result ) {
- $results->add( $result );
+ $result = $this->compareDataValue( $propertyId,
$claimGuid, $dataValue, $externalEntity, $propertyMapping );
+ if ( $result ) {
+ $results->add( $result );
+ }
}
}
}
@@ -150,9 +160,29 @@
// Run query
$numericPropertyId = $identifierPropertyId->getNumericId();
- $result = $db->selectRow( DUMP_DATA_TABLE, "external_data", array(
"pid=$numericPropertyId", "external_id=$externalId" ) );
+ $result = $db->selectRow( DUMP_DATA_TABLE, array( "dump_id",
"external_data" ), array( "pid=$numericPropertyId",
"external_id=\"$externalId\"" ) );
if ( $result !== false ) {
+ $this->dumpMetaInformation = $this->getMetaInformation( $db,
$result->dump_id );
return $result->external_data;
+ }
+ }
+
+ /**
+ * Retrieves meta information by dump id from database.
+ * @param $db - loadBalancer connection
+ * @param int $dumpId - id of the dump
+ * @return \DumpMetaInformation
+ */
+ private function getMetaInformation( $db, $dumpId )
+ {
+ // Run query
+ $result = $db->selectRow( DUMP_META_TABLE, array( "format",
"language", "date_format", "name" ), array( "row_id=$dumpId" ) );
+ if ( $result !== false ) {
+ $format = $result->format;
+ $language = $result->language;
+ $dateFormat = $result->date_format;
+ $dataSourceName = $result->name;
+ return new DumpMetaInformation( $format, $language, $dateFormat,
$dataSourceName );
}
}
@@ -166,7 +196,7 @@
private function compareDataValue( $propertyId, $claimGuid, $dataValue,
$externalEntity, $propertyMapping )
{
// Get external values by evaluating mapping
- $mapingEvaluator = MappingEvaluator::getEvaluator( "xml",
$externalEntity ); //TODO: get from database
+ $mapingEvaluator = MappingEvaluator::getEvaluator(
$this->dumpMetaInformation->getFormat(), $externalEntity );
if ( $mapingEvaluator ) {
$nodeSelector = $propertyMapping[ "nodeSelector" ];
$valueFormatter = array_key_exists( "valueFormatter",
$propertyMapping ) ? $propertyMapping[ "valueFormatter" ] : null;
@@ -174,12 +204,12 @@
// Start comparer if external value could be evaluated
if ( count( $externalValues ) > 0 ) {
- $comparer = DataValueComparer::getComparer( $dataValue,
$externalValues );
+ $comparer = DataValueComparer::getComparer(
$this->dumpMetaInformation, $dataValue, $externalValues );
if ( $comparer ) {
$result = $comparer->execute();
if ( isset( $result ) ) {
- return new CompareResult( $propertyId, $claimGuid,
$comparer->localValues, $comparer->externalValues, !$result, null );
+ return new CompareResult( $propertyId, $claimGuid,
$comparer->localValues, $comparer->externalValues, !$result, null,
$this->dumpMetaInformation->getDataSourceName() );
}
}
}
diff --git a/external-validation/src/CrossCheck/DumpMetaInformation.php
b/external-validation/src/CrossCheck/DumpMetaInformation.php
new file mode 100644
index 0000000..d8ebab7
--- /dev/null
+++ b/external-validation/src/CrossCheck/DumpMetaInformation.php
@@ -0,0 +1,83 @@
+<?php
+
+namespace WikidataQuality\ExternalValidation\CrossCheck;
+
+
+/**
+ * Class DumpMetaInformation
+ * @package WikidataQuality\ExternalValidation\CrossCheck
+ * @author BP2014N1
+ * @license GNU GPL v2+
+ */
+class DumpMetaInformation {
+ /**
+ * Data format of the dump.
+ * @var string
+ */
+ private $format;
+
+ /**
+ * Language of data in the dump.
+ * @var string
+ */
+ private $language;
+
+ /**
+ * Date format that is used in dump.
+ * @var string
+ */
+ private $dateFormat;
+
+ /**
+ * Name of data source of the dump.
+ * @var string
+ */
+ private $dataSourceName;
+
+
+ /**
+ * @param string $format
+ * @param string $language
+ * @param string $dateFormat
+ * @param string $dataSourceName
+ */
+ public function __construct($format, $language, $dateFormat,
$dataSourceName) {
+ $this->format = $format;
+ $this->language = $language;
+ $this->dateFormat = $dateFormat;
+ $this->dataSourceName = $dataSourceName;
+ }
+
+
+ /**
+ * Returns data format.
+ * @return string
+ */
+ public function getFormat() {
+ return $this->format;
+ }
+
+ /**
+ * Returns language.
+ * @return string
+ */
+ public function getLanguage() {
+ return $this->language;
+ }
+
+ /**
+ * Returns date format.
+ * @return string
+ */
+ public function getDateFormat() {
+ return $this->dateFormat;
+ }
+
+ /**
+ * Returns data source name.
+ * @return string
+ */
+ public function getDataSourceName() {
+ return $this->dataSourceName;
+ }
+}
\ No newline at end of file
diff --git a/external-validation/src/CrossCheck/Result/CompareResult.php
b/external-validation/src/CrossCheck/Result/CompareResult.php
index 7eb41d1..b64a04e 100644
--- a/external-validation/src/CrossCheck/Result/CompareResult.php
+++ b/external-validation/src/CrossCheck/Result/CompareResult.php
@@ -52,20 +52,28 @@
*/
private $referencesMissing;
+ /**
+ * Name of data source.
+ * @var string
+ */
+ private $dataSourceName;
+
/**
* @param array $localValues
* @param array $externalValues
* @param bool $dataMismatch
* @param bool $referencesMissing
+ * @param string $dataSourceName
*/
- public function __construct( $propertyId, $claimGuid, $localValues,
$externalValues, $dataMismatch, $referencesMissing ) {
+ public function __construct( $propertyId, $claimGuid, $localValues,
$externalValues, $dataMismatch, $referencesMissing, $dataSourceName ) {
$this->propertyId = $propertyId;
$this->claimGuid = $claimGuid;
$this->localValues = $localValues;
$this->externalValues = $externalValues;
$this->dataMismatch = $dataMismatch;
$this->referencesMissing = $referencesMissing;
+ $this->dataSourceName = $dataSourceName;
}
public function getPropertyId() {
@@ -91,4 +99,8 @@
public function areReferencesMissing() {
return $this->referencesMissing;
}
+
+ public function getDataSourceName() {
+ return $this->dataSourceName;
+ }
}
\ No newline at end of file
diff --git a/external-validation/src/CrossCheck/mapping.inc.php
b/external-validation/src/CrossCheck/mapping.inc.php
index 73eff7c..7a9053e 100644
--- a/external-validation/src/CrossCheck/mapping.inc.php
+++ b/external-validation/src/CrossCheck/mapping.inc.php
@@ -12,7 +12,7 @@
'valueFormatter' => 'concat(substring-after(./text(), ", "), " ",
substring-before(./text(), ", "))'
),
25 => array(
- 'nodeSelector' => '/record/datafield[@tag="500" and
subfield[@code="i"]="v:Mutter"]/subfield[@code="a"]',
+ 'nodeSelector' => '/record/datafield[@tag="500" and
subfield[@code="9"]="v:Mutter"]/subfield[@code="a"]',
'valueFormatter' => 'concat(substring-after(./text(), ", "), " ",
substring-before(./text(), ", "))'
),
26 => array(
diff --git a/external-validation/src/UpdateTable/Importer/GndImporter.php
b/external-validation/src/UpdateTable/Importer/GndImporter.php
index 2b00b06..26b170f 100644
--- a/external-validation/src/UpdateTable/Importer/GndImporter.php
+++ b/external-validation/src/UpdateTable/Importer/GndImporter.php
@@ -35,6 +35,18 @@
*/
private $numberOfImportedEntites = 0;
+ /**
+ * Curretn database connection
+ * @var \DatabaseBase
+ */
+ private $db;
+
+ /**
+ * Id of the current dump that is imported
+ * @var int
+ */
+ private $dumpId;
+
/**
* @param \ImportContext $importContext
@@ -66,53 +78,75 @@
{
// Download dump
$dumpUrl = $this->buildDumpUrl();
- $this->downloadDump( $dumpUrl );
+ if ( !$this->downloadDump( $dumpUrl ) ) {
+ // If download fails, try previous dump
+ if ( !$this->importContext->isQuiet() ) {
+ print "Download of latest dump failed. Try to download
previous one...\n";
+ }
+ $dumpUrl = $this->buildDumpUrl( true );
+ $this->downloadDump( $dumpUrl );
+ }
- // Connect to database and delete old entried
- $db = $this->establishDbConnection();
+ // Connect to database and delete old entries
+ $this->db = $this->establishDbConnection();
// Insert meta information
$dumpSize = filesize( $this->dumpFile );
- $this->insertMetaInformation( $db, self::DATABASE_NAME,
self::DUMP_DATA_FORMAT, self::DUMP_LANGUAGE, $dumpUrl, $dumpSize,
self::DUMP_LICENSE);
- $dumpId = $this->getDumpId( $db, self::DATABASE_NAME );
+ $this->insertMetaInformation( $this->db, self::DATABASE_NAME,
self::DUMP_DATA_FORMAT, self::DUMP_LANGUAGE, $dumpUrl, $dumpSize,
self::DUMP_LICENSE );
+ $this->dumpId = $this->getDumpId( $this->db, self::DATABASE_NAME );
// Delete old entries
- $this->deleteOldDatabaseEntries( $db, self::WD_PROPERTY_ID );
+ $this->deleteOldDatabaseEntries( $this->db, self::WD_PROPERTY_ID );
// Parse dump and insert entities
xml_set_element_handler(
$this->parser,
"startElement",
- function ( $parser, $name ) use ( $db, $dumpId ) {
- $this->endElement( $db, $dumpId, $name );
- }
+ "endElement"
);
xml_set_character_data_handler( $this->parser, "characterData" );
- $this->parseDump( $db );
+ $this->parseDump( $this->db );
// Release connection
- $this->reuseDbConnection( $db );
+ $this->reuseDbConnection( $this->db );
+ $this->db = null;
}
/**
* Builds url of the latest dump
- * @return string - url of the latest dump
+ * @param $previous - If true, url of the previous dump will be returned
+ * @return string - url of the dump
*/
- private function buildDumpUrl()
+ private function buildDumpUrl( $previous = false )
{
$now = new DateTime();
$year = intval( $now->format( "y" ) );
$month = intval( $now->format( "m" ) );
- if ( $month == 1 ) {
- $month = 10;
- $year--;
- } else if ( $month < 6 ) {
- $month = 2;
- } else if ( $month < 10 ) {
- $month = 6;
+
+ if ( $previous ) {
+ if ( $month == 1 ) {
+ $month = 6;
+ $year--;
+ } else if ( $month < 6 ) {
+ $month = 10;
+ $year--;
+ } else if ( $month < 10 ) {
+ $month = 2;
+ } else {
+ $month = 6;
+ }
} else {
- $month = 10;
+ if ( $month == 1 ) {
+ $month = 10;
+ $year--;
+ } else if ( $month < 6 ) {
+ $month = 2;
+ } else if ( $month < 10 ) {
+ $month = 6;
+ } else {
+ $month = 10;
+ }
}
$url = sprintf( self::DUMP_URL_FORMAT, $year, $month );
@@ -155,14 +189,14 @@
/**
* SAX callback function for end-element event
- * @param \DatabaseBase $db - database connection, that should be used to
insert element
+ * @param Xml parser $parser - current xml parser
* @param string $name - name of the ending element
*/
- private function endElement( $db, $dumpId, $name )
+ private function endElement( $parser, $name )
{
$this->tempRecord .= "</$name>";
if ( $name == "RECORD" ) {
- $this->insertEntity( $db, $dumpId, self::WD_PROPERTY_ID,
$this->getEntityId( $this->tempRecord ), $this->tempRecord );
+ $this->insertEntity( $this->db, $this->dumpId,
self::WD_PROPERTY_ID, $this->getEntityId( $this->tempRecord ),
$this->tempRecord );
$this->numberOfImportedEntites++;
if ( !$this->importContext->isQuiet() ) {
print "\r\033[K";
diff --git a/external-validation/src/UpdateTable/Importer/Importer.php
b/external-validation/src/UpdateTable/Importer/Importer.php
index 9465933..e217b2e 100644
--- a/external-validation/src/UpdateTable/Importer/Importer.php
+++ b/external-validation/src/UpdateTable/Importer/Importer.php
@@ -45,7 +45,7 @@
*/
function __construct( $dumpFileName, $dumpDataFormat, $dumpLanguage,
$importContext )
{
- $this->dumpFile = join(DIRECTORY_SEPARATOR, array( __DIR__, "..",
"..", "..", "dumps", $dumpFileName ) );
+ $this->dumpFile = join( DIRECTORY_SEPARATOR, array( __DIR__, "..",
"..", "..", "dumps", $dumpFileName ) );
$this->dumpDataFormat = $dumpDataFormat;
$this->dumpLanguage = $dumpLanguage;
$this->importContext = $importContext;
@@ -87,9 +87,19 @@
*/
protected function downloadDump( $dumpUrl )
{
+ // Create directory, if needed
+ $dirName = dirname( $this->dumpFile );
+ if ( !is_dir( $dirName ) ) {
+ mkdir( $dirName );
+ }
+
+ // Create file
$targetFile = fopen( $this->dumpFile, "wb" );
+ // Start curl for downloading
$curlSession = curl_init( $dumpUrl );
+ curl_setopt( $curlSession, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0
);
+ curl_setopt( $curlSession, CURLOPT_TIMEOUT, 0 );
curl_setopt( $curlSession, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $curlSession, CURLOPT_FILE, $targetFile );
if ( !$this->importContext->isQuiet() ) {
@@ -97,24 +107,25 @@
curl_setopt( $curlSession, CURLOPT_PROGRESSFUNCTION, array( $this,
"downloadProgressCallback" ) );
}
curl_exec( $curlSession );
- /*$sourceFile = fopen( $dumpUrl, "rb" );
- $buffer = 1024 * 8;
- while( !feof( $sourceFile ) ) {
- fwrite( $targetFile, fread( $sourceFile, $buffer ), $buffer );
- if( !$this->importContext->isQuiet() ) {
- $this->downloadProgressCallback( null, null, ftell(
$sourceFile ), null, null );
- }
- }
- if( !$this->importContext->isQuiet() ) {
- print "\n";
- }
- fclose( $sourceFile );*/
- if( !$this->importContext->isQuiet() ) {
+ //Check for errors
+ $statusCode = curl_getinfo( $curlSession, CURLINFO_HTTP_CODE );
+ if ( $statusCode != 200 || curl_errno( $curlSession ) ) {
+ $error = curl_error( $curlSession );
+ if ( !$this->importContext->isQuiet() ) {
+ print "$error\n";
+ }
+
+ return false;
+ }
+
+ if ( !$this->importContext->isQuiet() ) {
print "\n";
}
fclose( $targetFile );
+
+ return true;
}
/**
@@ -188,7 +199,8 @@
* @param int $size
* @param string $license
*/
- protected function insertMetaInformation( $db, $name, $format, $language,
$source, $size, $license ) {
+ protected function insertMetaInformation( $db, $name, $format, $language,
$source, $size, $license )
+ {
$accumulator = array(
"name" => $name,
"date" => null,
@@ -205,8 +217,7 @@
$rowCount = $db->selectRowCount(
$this->importContext->getMetaTableName(), "*", "name=\"$name\"" );
if ( $rowCount == 0 ) {
$db->insert( $this->importContext->getMetaTableName(),
$accumulator );
- }
- else {
+ } else {
$db->update( $this->importContext->getMetaTableName(),
$accumulator, array( "name=\"$name\"" ) );
}
}
@@ -217,12 +228,12 @@
* @param string $name
* @return bool
*/
- protected function getDumpId( $db, $name ) {
+ protected function getDumpId( $db, $name )
+ {
$result = $db->selectRow( $this->importContext->getMetaTableName(),
"row_id", "name=\"$name\"" );
if ( $result == false ) {
return false;
- }
- else {
+ } else {
return $result->row_id;
}
}
diff --git a/i18n/en.json b/i18n/en.json
index 2ddddec..81cccfe 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -2,21 +2,27 @@
"@metadata": {
"authors": "BP2014N1"
},
- "wikidataquality-desc": "Cross check quality of Wikidata with external
databases",
- "specialpages-group-wikidataquality": "WikidataQuality",
+ "wikidataquality-desc": "Migrate and improve the usage of constraints by
visualizing constraint reports and cross-check data with external databases to
ensure the quality of Wikidata.",
+ "wikidataquality-specialpages-group": "Wikidata Quality",
- "special-constraintreport": "Constraint Report",
+ "wikidataquality-value": "Value",
+ "wikidataquality-status": "Status",
- "special-crosscheck": "Cross-Check with external databases",
- "special-crosscheck-instructions": "Just enter an entity and let it
cross-check against GND.",
- "special-crosscheck-instructions-example": "Try for example Q76 (Barack
Obama) and look at the results.",
- "special-crosscheck-form-itemid-placeholder": "Qxx",
- "special-crosscheck-form-submit-label": "Cross-Check",
- "speical-crosscheck-result-headline": "Result",
- "special-crosscheck-result-item-not-existent": "Item does not exist!",
- "special-crosscheck-result-no-wd-entity": "Could not find referenced
entity",
- "special-crosscheck-result-no-ext-entity": "Could not find external
entity",
- "special-crosscheck-result-mismatch": ": Data mismatch!",
- "special-crosscheck-result-success": ": Verification successful!"
+ "wikidataquality-constraintreport": "Constraint Report",
+
+ "wikidataquality-crosscheck": "Cross-Check with external databases",
+ "wikidataquality-crosscheck-instructions": "Just enter an entity and let
it cross-check against GND.",
+ "wikidataquality-crosscheck-instructions-example": "Try for example Q506
(Nina Hagen) and look at the results.",
+ "wikidataquality-crosscheck-form-itemid-placeholder": "Qxx",
+ "wikidataquality-crosscheck-form-submit-label": "Cross-Check",
+ "wikidataquality-crosscheck-result-headline": "Result for ",
+ "wikidataquality-crosscheck-result-item-not-existent": "Item does not
exist!",
+ "wikidataquality-crosscheck-result-no-wd-entity": "Could not find
referenced entity",
+ "wikidataquality-crosscheck-result-no-ext-entity": "Could not find
external entity",
+ "wikidataquality-crosscheck-result-mismatch": "Mismatch",
+ "wikidataquality-crosscheck-result-success": "Match",
+ "wikidataquality-crosscheck-comparative-value": "Comparative Value",
+ "wikidataquality-crosscheck-external-source": "External Source"
+
}
\ No newline at end of file
diff --git a/i18n/qqq.json b/i18n/qqq.json
index ad76acb..ecd85c6 100644
--- a/i18n/qqq.json
+++ b/i18n/qqq.json
@@ -2,20 +2,26 @@
"@metadata": {
"authors": "BP2014N1"
},
+
"wikidataquality-desc": "{{desc|name=Wikidata
Quality|url=https://www.mediawiki.org/wiki/Extension:WikidataQuality}}",
- "specialpages-group-wikidataquality": "{{doc-special-group|that=are
related to the WikidataQuality extension}}",
+ "wikidataquality-specialpages-group": "{{doc-special-group|that=are
related to the WikidataQuality extension}}",
- "special-constraintreport": "{{doc-special|ConstraintReport}}\nTitle of
the special page.",
+ "wikidataquality-value": "Name for value of the current statement",
+ "wikidataquality-status": "Name for status of the cross-check",
- "special-crosscheck": "{{doc-special|CrossCheck}}\nTitle of the special
page.",
- "special-crosscheck-instructions": "Instructions for starting a
cross-check against GND.",
- "special-crosscheck-instructions-example": "Example item for
cross-checking. Q76 is suitable.",
- "special-crosscheck-form-itemid-placeholder": "Item id placeholder for the
input field.",
- "special-crosscheck-form-submit-label": "Label of the button, that starts
the cross-check.",
- "speical-crosscheck-result-headline": "Headline that appears above the
cross-check results",
- "special-crosscheck-result-item-not-existent": "Error message that appears
when the entered item does not exist.",
- "special-crosscheck-result-no-wd-entity": "Message that appears when the
referenced wikidata entity does not exist.",
- "special-crosscheck-result-no-ext-entity": "Message that appears when the
external entity does not exist.",
- "special-crosscheck-result-mismatch": "Message that appears for
statements, which could not be verified. ",
- "special-crosscheck-result-success": ": Message that appears for
statements, which could be verified. "
+ "wikidataquality-constraintreport":
"{{doc-special|ConstraintReport}}\nTitle of the special page.",
+
+ "wikidataquality-crosscheck": "{{doc-special|CrossCheck}}\nTitle of the
special page.",
+ "wikidataquality-crosscheck-instructions": "Instructions for starting a
cross-check against GND.",
+ "wikidataquality-crosscheck-instructions-example": "Example item for
cross-checking. Q506 is suitable.",
+ "wikidataquality-crosscheck-form-itemid-placeholder": "Item id placeholder
for the input field.",
+ "wikidataquality-crosscheck-form-submit-label": "Label of the button, that
starts the cross-check.",
+ "wikidataquality-crosscheck-result-headline": "Headline that appears above
the cross-check results",
+ "wikidataquality-crosscheck-result-item-not-existent": "Error message that
appears when the entered item does not exist.",
+ "wikidataquality-crosscheck-result-no-wd-entity": "Message that appears
when the referenced wikidata entity does not exist.",
+ "wikidataquality-crosscheck-result-no-ext-entity": "Message that appears
when the external entity does not exist.",
+ "wikidataquality-crosscheck-result-mismatch": "Message that appears for
statements, which could not be verified. ",
+ "wikidataquality-crosscheck-result-success": ": Message that appears for
statements, which could be verified. ",
+ "wikidataquality-crosscheck-comparative-value": "Name for value from the
external source for the cross-check",
+ "wikidataquality-crosscheck-external-source": "Name for external source
the comparative data is from"
}
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/189462
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I8cda3c5e8b463f0e822337526004fa0dadea4eda
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/WikidataQuality
Gerrit-Branch: master
Gerrit-Owner: Tamslo <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits