Gergő Tisza has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/94088


Change subject: Handle multiple license templates
......................................................................

Handle multiple license templates

Allows CMD properties to be defined as multivalued, in which case
values will be collected in an array instead of overwriting each
other. On output it will still throw away all but the last value,
but until then internal functions can access all of them.

The license matcher uses this feature to look through multiple
short names so multi-licensed images will be handled correctly if
any one of the licenses can be recongized.

Bug: 56687
Change-Id: If2732dd2eb7150c9ccfc4e82f4dcbf9a479a0fa9
---
M CommonsMetadata_body.php
1 file changed, 44 insertions(+), 19 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CommonsMetadata 
refs/changes/88/94088/1

diff --git a/CommonsMetadata_body.php b/CommonsMetadata_body.php
index 8caa875..c43d36f 100755
--- a/CommonsMetadata_body.php
+++ b/CommonsMetadata_body.php
@@ -80,7 +80,7 @@
                }
 
                if ( isset( $data['LicenseShortName'] ) ) {
-                       $license = self::getLicenseFromShortname( 
$data['LicenseShortName'] );
+                       $license = self::filterShortnamesAndGetLicense( 
$data['LicenseShortName'] );
                        if ( $license ) {
                                $combinedMeta['License'] = array(
                                        'value' => $license,
@@ -97,6 +97,9 @@
                );
 
                foreach( $data as $name => $value ) {
+                       if ( in_array( $name, 
CommonsMetadata_InformationParser::$multivaluedProperties ) ) {
+                               $value = end( $value );
+                       }
                        $combinedMeta[ $name ] = array(
                                'value' => $value,
                                'source' => 'commons-desc-page'
@@ -170,14 +173,18 @@
 
        /**
         * Tries to identify the license based on its short name.
-        * @param string $shortName
+        * Might also remove some of the names so the best one is preferred for 
display.
+        * @param array $shortNames
         * @return string|null one of the values from self::$licenses, or null 
if not recognized
         * @see 
https://commons.wikimedia.org/wiki/Commons:Machine-readable_data#Machine_readable_data_set_by_license_templates
         */
-       protected static function getLicenseFromShortname( $shortName ) {
-               $shortName = strtolower( trim( $shortName ) );
-               if ( isset( self::$licenses[$shortName] ) ) {
-                       return self::$licenses[$shortName];
+       protected static function filterShortnamesAndGetLicense( &$shortNames ) 
{
+               foreach ( $shortNames as $name ) {
+                       $name = strtolower( trim( $name ) );
+                       if ( isset( self::$licenses[$name] ) ) {
+                               $shortNames = array( $name );
+                               return self::$licenses[$name];
+                       }
                }
                return null;
        }
@@ -204,6 +211,9 @@
  * being "public" in order that the XMLParser can call them
  */
 class CommonsMetadata_InformationParser {
+       public static $multivaluedProperties = array(
+               'LicenseShortName',
+       );
 
        private $xmlParser;
        private $state = self::STATE_INITIAL;
@@ -467,22 +477,19 @@
                                        $this->spanDepth--;
                                        if ( $this->spanDepth <= 0 ) {
                                                $this->state = 
self::STATE_LICENSE;
-                                               
$this->finalProps[$this->propName] = $this->text;
+                                               $this->addPropertyValue( 
$this->finalProps, $this->propName, $this->text );
                                                if ( $this->propName === 
'UsageTerms' ) {
-                                                       if ( $this->text === 
'Public domain' ) {
-                                                               
$this->finalProps['Copyrighted'] = 'False';
-                                                       } else {
-                                                               
$this->finalProps['Copyrighted'] = 'True';
-                                                       }
+                                                       $copyrighted = ( 
$this->text === 'Public domain' ) ? 'False' : 'True';
+                                                       
$this->addPropertyValue( $this->finalProps, 'Copyrighted', $copyrighted );
                                                } elseif ( $this->propName === 
'GPS' ) {
                                                        $coord = explode( ';', 
$this->text );
                                                        if ( count( $coord ) 
=== 2 &&
                                                                is_numeric( 
$coord[0] ) &&
                                                                is_numeric( 
$coord[1] )
                                                        ) {
-                                                               
$this->finalProps['GPSLatitude'] = $coord[0];
-                                                               
$this->finalProps['GPSLongitude'] = $coord[1];
-                                                               
$this->finalProps['GPSMapDatum'] = 'WGS-84';
+                                                               
$this->addPropertyValue( $this->finalProps, 'GPSLatitude', $coord[0] );
+                                                               
$this->addPropertyValue( $this->finalProps, 'GPSLongitude', $coord[1] );
+                                                               
$this->addPropertyValue( $this->finalProps, 'GPSMapDatum', 'WGS-84' );
                                                                unset( 
$this->finalProps['GPS'] );
                                                        }
                                                        $this->state = 
self::STATE_INITIAL;
@@ -500,18 +507,19 @@
                                }
                                if ( $this->tdDepth <= 0 ) {
                                        $this->state = self::STATE_INITIAL;
+
                                        if ( $this->langText !== '' ) {
                                                if ( $this->targetLang ) {
-                                                       $this->finalProps[ 
$this->propName ] = Html::rawElement(
+                                                       $propValue = 
Html::rawElement(
                                                                'span',
                                                                // FIXME dir 
too?
                                                                array( 'lang' 
=> $this->extractionLang ),
                                                                $this->langText
                                                        );
                                                } else {
-                                                       
$this->finalProps[$this->propName]['_type'] = 'lang';
+                                                       $propValue = array( 
'_type' => 'lang' );
                                                        foreach ( 
$this->allLangTexts as $lang => $text ) {
-                                                               
$this->finalProps[$this->propName][$lang] = Html::rawElement(
+                                                               $propValue 
[$lang] = Html::rawElement(
                                                                        'span',
                                                                        // 
FIXME dir too?
                                                                        array( 
'lang' => $lang ),
@@ -520,8 +528,10 @@
                                                        }
                                                }
                                        } else {
-                                               $this->finalProps[ 
$this->propName ] = $this->text;
+                                               $propValue = $this->text;
                                        }
+                                       $this->addPropertyValue( 
$this->finalProps, $this->propName, $propValue );
+
                                        $this->langText = '';
                                        $this->extractionLang = '';
                                        $this->text = '';
@@ -612,4 +622,19 @@
        public function getError() {
                return xml_get_current_byte_index( $this->xmlParser ) . ': ' . 
xml_error_string( xml_get_error_code( $this->xmlParser ));
        }
+
+       /**
+        * Checks configuration and decides whether to store a single value and 
override old ones or store an array of
+        * values.
+        * @param array $properties array used to collect the property values
+        * @param string $name
+        * @param string $value
+        */
+       private function addPropertyValue( &$properties, $name, $value ) {
+               if ( in_array( $name, self::$multivaluedProperties ) ) {
+                       $properties[$name][] = $value;
+               } else {
+                       $properties[$name] = $value;
+               }
+       }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/94088
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If2732dd2eb7150c9ccfc4e82f4dcbf9a479a0fa9
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CommonsMetadata
Gerrit-Branch: master
Gerrit-Owner: GergÅ‘ Tisza <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to