Gergő Tisza has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/93889


Change subject: Use description HTML structure instead of categories to 
identify license
......................................................................

Use description HTML structure instead of categories to identify license

Categories do not always work due to bug 56598.

Bug: 56143
Change-Id: Ibaf09360041408e9bf9692056572f5b23b7f7c08
---
M CommonsMetadata_body.php
1 file changed, 28 insertions(+), 9 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CommonsMetadata 
refs/changes/89/93889/1

diff --git a/CommonsMetadata_body.php b/CommonsMetadata_body.php
index 241526c..e50a845 100755
--- a/CommonsMetadata_body.php
+++ b/CommonsMetadata_body.php
@@ -79,17 +79,18 @@
                        $data = self::getMetadata( $descriptionText );
                }
 
-               // For now only get the immediate categories
-               $categories = self::getCategories( $file );
-
-               $licenses = self::getLicensesAndRemoveFromCategories( 
$categories );
-               if ( $licenses ) {
-                       $combinedMeta['License'] = array(
-                               'value' => $licenses[0],
-                               'source' => 'commons-categories',
-                       );
+               if ( isset( $data['LicenseShortName'] ) ) {
+                       $license = self::getLicenseFromShortname( 
$data['LicenseShortName'] );
+                       if ( $license ) {
+                               $combinedMeta['License'] = array(
+                                       'value' => $license,
+                                       'source' => 'commons-templates',
+                               );
+                       }
                }
 
+               // For now only get the immediate categories
+               $categories = self::getCategories( $file );
                $combinedMeta['Categories'] = array(
                        'value' => implode( '|', $categories ),
                        'source' => 'commons-categories',
@@ -154,6 +155,7 @@
         * and returns the corresponding licenses.
         * @param array $categories a list of human-readable category names.
         * @return array
+        * FIXME categories do not work with Commons-hosted images due to bug 
56598
         */
        protected static function getLicensesAndRemoveFromCategories( 
&$categories ) {
                $licenses = array();
@@ -164,6 +166,22 @@
                        }
                }
                return array_merge( $licenses ); // renumber to avoid holes in 
array
+       }
+
+       /**
+        * Tries to identify the license based on its short name.
+        * @param string $shortName
+        * @return string|null one of the values from self::$licenses, or null 
if not recognized
+        * @see 
https://commons.wikimedia.org/wiki/Commons:Machine-readable_data#Machine_readable_data_set_by_license_templates
+        */
+       protected static function getLicenseFromShortname( $shortName ) {
+               $shortName = strtolower( $shortName );
+               foreach ( self::$licenses as $license ) {
+                       if ( strpos( $shortName, $license ) === 0 ) {
+                               return $license;
+                       }
+               }
+               return null;
        }
 
        /**
@@ -410,6 +428,7 @@
                $mapping = array(
                        'licensetpl_link' => 'LicenseUrl',
                        'licensetpl_long' => 'UsageTerms',
+                       'licensetpl_short' => 'LicenseShortName',
                        'geo' => 'GPS', // Not final property name
                );
 

-- 
To view, visit https://gerrit.wikimedia.org/r/93889
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibaf09360041408e9bf9692056572f5b23b7f7c08
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CommonsMetadata
Gerrit-Branch: master
Gerrit-Owner: GergÅ‘ Tisza <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to