Gergő Tisza has uploaded a new change for review.
https://gerrit.wikimedia.org/r/93889
Change subject: Use description HTML structure instead of categories to
identify license
......................................................................
Use description HTML structure instead of categories to identify license
Categories do not always work due to bug 56598.
Bug: 56143
Change-Id: Ibaf09360041408e9bf9692056572f5b23b7f7c08
---
M CommonsMetadata_body.php
1 file changed, 28 insertions(+), 9 deletions(-)
git pull
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CommonsMetadata
refs/changes/89/93889/1
diff --git a/CommonsMetadata_body.php b/CommonsMetadata_body.php
index 241526c..e50a845 100755
--- a/CommonsMetadata_body.php
+++ b/CommonsMetadata_body.php
@@ -79,17 +79,18 @@
$data = self::getMetadata( $descriptionText );
}
- // For now only get the immediate categories
- $categories = self::getCategories( $file );
-
- $licenses = self::getLicensesAndRemoveFromCategories(
$categories );
- if ( $licenses ) {
- $combinedMeta['License'] = array(
- 'value' => $licenses[0],
- 'source' => 'commons-categories',
- );
+ if ( isset( $data['LicenseShortName'] ) ) {
+ $license = self::getLicenseFromShortname(
$data['LicenseShortName'] );
+ if ( $license ) {
+ $combinedMeta['License'] = array(
+ 'value' => $license,
+ 'source' => 'commons-templates',
+ );
+ }
}
+ // For now only get the immediate categories
+ $categories = self::getCategories( $file );
$combinedMeta['Categories'] = array(
'value' => implode( '|', $categories ),
'source' => 'commons-categories',
@@ -154,6 +155,7 @@
* and returns the corresponding licenses.
* @param array $categories a list of human-readable category names.
* @return array
+ * FIXME categories do not work with Commons-hosted images due to bug
56598
*/
protected static function getLicensesAndRemoveFromCategories(
&$categories ) {
$licenses = array();
@@ -164,6 +166,22 @@
}
}
return array_merge( $licenses ); // renumber to avoid holes in
array
+ }
+
+ /**
+ * Tries to identify the license based on its short name.
+ * @param string $shortName
+ * @return string|null one of the values from self::$licenses, or null
if not recognized
+ * @see
https://commons.wikimedia.org/wiki/Commons:Machine-readable_data#Machine_readable_data_set_by_license_templates
+ */
+ protected static function getLicenseFromShortname( $shortName ) {
+ $shortName = strtolower( $shortName );
+ foreach ( self::$licenses as $license ) {
+ if ( strpos( $shortName, $license ) === 0 ) {
+ return $license;
+ }
+ }
+ return null;
}
/**
@@ -410,6 +428,7 @@
$mapping = array(
'licensetpl_link' => 'LicenseUrl',
'licensetpl_long' => 'UsageTerms',
+ 'licensetpl_short' => 'LicenseShortName',
'geo' => 'GPS', // Not final property name
);
--
To view, visit https://gerrit.wikimedia.org/r/93889
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibaf09360041408e9bf9692056572f5b23b7f7c08
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CommonsMetadata
Gerrit-Branch: master
Gerrit-Owner: Gergő Tisza <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits