Gergő Tisza has uploaded a new change for review.
https://gerrit.wikimedia.org/r/94088
Change subject: Handle multiple license templates
......................................................................
Handle multiple license templates
Allows CMD properties to be defined as multivalued, in which case
values will be collected in an array instead of overwriting each
other. On output it will still throw away all but the last value,
but until then internal functions can access all of them.
The license matcher uses this feature to look through multiple
short names so multi-licensed images will be handled correctly if
any one of the licenses can be recongized.
Bug: 56687
Change-Id: If2732dd2eb7150c9ccfc4e82f4dcbf9a479a0fa9
---
M CommonsMetadata_body.php
1 file changed, 44 insertions(+), 19 deletions(-)
git pull
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CommonsMetadata
refs/changes/88/94088/1
diff --git a/CommonsMetadata_body.php b/CommonsMetadata_body.php
index 8caa875..c43d36f 100755
--- a/CommonsMetadata_body.php
+++ b/CommonsMetadata_body.php
@@ -80,7 +80,7 @@
}
if ( isset( $data['LicenseShortName'] ) ) {
- $license = self::getLicenseFromShortname(
$data['LicenseShortName'] );
+ $license = self::filterShortnamesAndGetLicense(
$data['LicenseShortName'] );
if ( $license ) {
$combinedMeta['License'] = array(
'value' => $license,
@@ -97,6 +97,9 @@
);
foreach( $data as $name => $value ) {
+ if ( in_array( $name,
CommonsMetadata_InformationParser::$multivaluedProperties ) ) {
+ $value = end( $value );
+ }
$combinedMeta[ $name ] = array(
'value' => $value,
'source' => 'commons-desc-page'
@@ -170,14 +173,18 @@
/**
* Tries to identify the license based on its short name.
- * @param string $shortName
+ * Might also remove some of the names so the best one is preferred for
display.
+ * @param array $shortNames
* @return string|null one of the values from self::$licenses, or null
if not recognized
* @see
https://commons.wikimedia.org/wiki/Commons:Machine-readable_data#Machine_readable_data_set_by_license_templates
*/
- protected static function getLicenseFromShortname( $shortName ) {
- $shortName = strtolower( trim( $shortName ) );
- if ( isset( self::$licenses[$shortName] ) ) {
- return self::$licenses[$shortName];
+ protected static function filterShortnamesAndGetLicense( &$shortNames )
{
+ foreach ( $shortNames as $name ) {
+ $name = strtolower( trim( $name ) );
+ if ( isset( self::$licenses[$name] ) ) {
+ $shortNames = array( $name );
+ return self::$licenses[$name];
+ }
}
return null;
}
@@ -204,6 +211,9 @@
* being "public" in order that the XMLParser can call them
*/
class CommonsMetadata_InformationParser {
+ public static $multivaluedProperties = array(
+ 'LicenseShortName',
+ );
private $xmlParser;
private $state = self::STATE_INITIAL;
@@ -467,22 +477,19 @@
$this->spanDepth--;
if ( $this->spanDepth <= 0 ) {
$this->state =
self::STATE_LICENSE;
-
$this->finalProps[$this->propName] = $this->text;
+ $this->addPropertyValue(
$this->finalProps, $this->propName, $this->text );
if ( $this->propName ===
'UsageTerms' ) {
- if ( $this->text ===
'Public domain' ) {
-
$this->finalProps['Copyrighted'] = 'False';
- } else {
-
$this->finalProps['Copyrighted'] = 'True';
- }
+ $copyrighted = (
$this->text === 'Public domain' ) ? 'False' : 'True';
+
$this->addPropertyValue( $this->finalProps, 'Copyrighted', $copyrighted );
} elseif ( $this->propName ===
'GPS' ) {
$coord = explode( ';',
$this->text );
if ( count( $coord )
=== 2 &&
is_numeric(
$coord[0] ) &&
is_numeric(
$coord[1] )
) {
-
$this->finalProps['GPSLatitude'] = $coord[0];
-
$this->finalProps['GPSLongitude'] = $coord[1];
-
$this->finalProps['GPSMapDatum'] = 'WGS-84';
+
$this->addPropertyValue( $this->finalProps, 'GPSLatitude', $coord[0] );
+
$this->addPropertyValue( $this->finalProps, 'GPSLongitude', $coord[1] );
+
$this->addPropertyValue( $this->finalProps, 'GPSMapDatum', 'WGS-84' );
unset(
$this->finalProps['GPS'] );
}
$this->state =
self::STATE_INITIAL;
@@ -500,18 +507,19 @@
}
if ( $this->tdDepth <= 0 ) {
$this->state = self::STATE_INITIAL;
+
if ( $this->langText !== '' ) {
if ( $this->targetLang ) {
- $this->finalProps[
$this->propName ] = Html::rawElement(
+ $propValue =
Html::rawElement(
'span',
// FIXME dir
too?
array( 'lang'
=> $this->extractionLang ),
$this->langText
);
} else {
-
$this->finalProps[$this->propName]['_type'] = 'lang';
+ $propValue = array(
'_type' => 'lang' );
foreach (
$this->allLangTexts as $lang => $text ) {
-
$this->finalProps[$this->propName][$lang] = Html::rawElement(
+ $propValue
[$lang] = Html::rawElement(
'span',
//
FIXME dir too?
array(
'lang' => $lang ),
@@ -520,8 +528,10 @@
}
}
} else {
- $this->finalProps[
$this->propName ] = $this->text;
+ $propValue = $this->text;
}
+ $this->addPropertyValue(
$this->finalProps, $this->propName, $propValue );
+
$this->langText = '';
$this->extractionLang = '';
$this->text = '';
@@ -612,4 +622,19 @@
public function getError() {
return xml_get_current_byte_index( $this->xmlParser ) . ': ' .
xml_error_string( xml_get_error_code( $this->xmlParser ));
}
+
+ /**
+ * Checks configuration and decides whether to store a single value and
override old ones or store an array of
+ * values.
+ * @param array $properties array used to collect the property values
+ * @param string $name
+ * @param string $value
+ */
+ private function addPropertyValue( &$properties, $name, $value ) {
+ if ( in_array( $name, self::$multivaluedProperties ) ) {
+ $properties[$name][] = $value;
+ } else {
+ $properties[$name] = $value;
+ }
+ }
}
--
To view, visit https://gerrit.wikimedia.org/r/94088
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: If2732dd2eb7150c9ccfc4e82f4dcbf9a479a0fa9
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CommonsMetadata
Gerrit-Branch: master
Gerrit-Owner: Gergő Tisza <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits