Brian Wolff has uploaded a new change for review.
https://gerrit.wikimedia.org/r/78926
Change subject: Add "extended" file metadata to API to gather data from
multiple sources
......................................................................
Add "extended" file metadata to API to gather data from multiple sources
Part of the point of this, is to add a hook to allow extensions to
add their own metadata (So somebody could create an extension to
parse commons description pages)
It's hoped that this would provide a simple system to get file
metadata, and would be able to return information on any wiki
(even without any extensions installed to provide additional
information). So it could fallback to exif data, if there's
no better source of information for the file available.
I'll probably send a post to wikitech-l at some point to
ask for feedback on this proposed system.
This also makes a small change in regards to how language
fallbacks are used when formatting multilingual metadata
fields. (As in it now takes them into account instead of
just using the content language).
One possible issue with this is it sometimes outputs
local urls (So consider semi-wip on account of that).
Change-Id: I77303d8e535fc1c42e14cfb853814e5c434a81ec
---
M docs/hooks.txt
M includes/api/ApiQueryImageInfo.php
M includes/media/FormatMetadata.php
M includes/media/MediaHandler.php
4 files changed, 248 insertions(+), 24 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/26/78926/1
diff --git a/docs/hooks.txt b/docs/hooks.txt
index 23ed032..492c240 100644
--- a/docs/hooks.txt
+++ b/docs/hooks.txt
@@ -1144,6 +1144,13 @@
underscore) magic words. Called by MagicWord.
&$doubleUnderscoreIDs: array of strings
+'GetExtendedMetadata': Get extended file metadata for the API
+&$combinedMeta: Array of the form: 'MetadataPropName' => array(
+'value' => prop value, 'source' => 'name of hook' ).
+$file: File object of file in question
+$context: RequestContext (including language to use)
+$single: Only extract the current language
+
'GetFullURL': Modify fully-qualified URLs used in redirects/export/offsite
data.
$title: Title object of page
$url: string value as output (out parameter, can modify)
diff --git a/includes/api/ApiQueryImageInfo.php
b/includes/api/ApiQueryImageInfo.php
index 3a7b331..53a4f73 100644
--- a/includes/api/ApiQueryImageInfo.php
+++ b/includes/api/ApiQueryImageInfo.php
@@ -49,6 +49,12 @@
$scale = $this->getScale( $params );
+ $metadataOpts = array(
+ 'version' => $params['metadataversion'],
+ 'language' => $params['extmetadatalanguage'],
+ 'multilang' => $params['extmetadatamultilang'],
+ );
+
$pageIds = $this->getPageSet()->getAllTitlesByNamespace();
if ( !empty( $pageIds[NS_FILE] ) ) {
$titles = array_keys( $pageIds[NS_FILE] );
@@ -141,7 +147,8 @@
$fit = $this->addPageSubItem( $pageId,
self::getInfo( $img, $prop,
$result,
- $finalThumbParams,
$params['metadataversion'] ) );
+ $finalThumbParams,
$metadataOpts )
+ );
if ( !$fit ) {
if ( count( $pageIds[NS_FILE] )
== 1 ) {
// See the 'the user is
screwed' comment above
@@ -173,7 +180,7 @@
$fit = self::getTransformCount() <
self::TRANSFORM_LIMIT &&
$this->addPageSubItem( $pageId,
self::getInfo( $oldie,
$prop, $result,
-
$finalThumbParams, $params['metadataversion']
+
$finalThumbParams, $metadataOpts
)
);
if ( !$fit ) {
@@ -291,10 +298,18 @@
* @param array $prop of properties to get (in the keys)
* @param $result ApiResult object
* @param array $thumbParams containing 'width' and 'height' items, or
null
- * @param string $version Version of image metadata (for things like
jpeg which have different versions).
+ * @param string $metadataOpts Options for metadata fetching.
* @return Array: result array
*/
- static function getInfo( $file, $prop, $result, $thumbParams = null,
$version = 'latest' ) {
+ static function getInfo( $file, $prop, $result, $thumbParams = null,
$metadataOpts = false ) {
+ if ( !$metadataOpts || is_string( $metadataOpts ) ) {
+ $metadataOpts = array(
+ 'version' => 'latest',
+ 'language' => $wgContLang,
+ 'multilang' => false,
+ );
+ }
+ $version = $metadataOpts['version'];
$vals = array();
// Timestamp is shown even if the file is revdelete'd in
interface
// so do same here.
@@ -355,6 +370,7 @@
$sha1 = isset( $prop['sha1'] );
$meta = isset( $prop['metadata'] );
$commonmeta = isset( $prop['commonmetadata'] );
+ $extmeta = isset( $prop['extmetadata'] );
$mime = isset( $prop['mime'] );
$mediatype = isset( $prop['mediatype'] );
$archive = isset( $prop['archivename'] );
@@ -416,6 +432,19 @@
$vals['commonmetadata'] = $metaArray ?
self::processMetaData( $metaArray, $result ) : array();
}
+ if ( $extmeta ) {
+ $metaArray = FormatMetadata::getExtendedMeta(
+ $file,
+ $metadataOpts['language'],
+ !$metadataOpts['multilang']
+ );
+ foreach( $metaArray as $key => &$value ) {
+ ApiResult::setContent( $value, $value['value']
);
+ unset( $value['value'] );
+ }
+ $vals['extendedmetadata'] = $metaArray;
+ }
+
if ( $mime ) {
$vals['mime'] = $file->getMimeType();
}
@@ -470,6 +499,11 @@
}
public function getCacheMode( $params ) {
+ $prop = array_flip( $params['prop'] );
+ if ( isset( $prop['extmetadata'] ) ) {
+ // extmetadata can vary depending on user
+ return 'private';
+ }
return 'public';
}
@@ -486,6 +520,7 @@
}
public function getAllowedParams() {
+ global $wgContLang;
return array(
'prop' => array(
ApiBase::PARAM_ISMULTI => true,
@@ -516,6 +551,14 @@
'metadataversion' => array(
ApiBase::PARAM_TYPE => 'string',
ApiBase::PARAM_DFLT => '1',
+ ),
+ 'extmetadatalanguage' => array(
+ ApiBase::PARAM_TYPE => 'string',
+ ApiBase::PARAM_DFLT => $wgContLang->getCode(),
+ ),
+ 'extmetadatamultilang' => array(
+ ApiBase::PARAM_TYPE => 'boolean',
+ ApiBase::PARAM_DFLT => false,
),
'urlparam' => array(
ApiBase::PARAM_DFLT => '',
@@ -560,6 +603,7 @@
'mediatype' => ' mediatype - Adds the media
type of the image',
'metadata' => ' metadata - Lists file
metadata (like Exif) for the version of the image',
'commonmetadata' => ' commonmetadata - Lists file
format generic metadata for the version of the image',
+ 'extmetadata' => ' extmetadata - Lists formatted
metadata combined from multiple sources. Results are HTML formatted.',
'archivename' => ' archivename - Adds the file
name of the archive version for non-latest versions',
'bitdepth' => ' bitdepth - Adds the bit
depth of the version',
);
@@ -598,6 +642,10 @@
'end' => 'Timestamp to stop listing at',
'metadataversion' => array( "Version of metadata to
use. if 'latest' is specified, use latest version.",
"Defaults to '1' for backwards
compatibility" ),
+ 'extmetadatalanguage' => array( 'What language to fetch
extmetadata in. This affects both which',
+ 'translation to fetch, if
multiple are available, as well as how things',
+ 'like numbers and various
values are formatted.' ),
+ 'extmetadatamultilang' => 'If translations for
extmetadata property are available, fetch all of them.',
'continue' => 'If the query response includes a
continue value, use it here to get another page of results',
'localonly' => 'Look only for files in the local
repository',
);
diff --git a/includes/media/FormatMetadata.php
b/includes/media/FormatMetadata.php
index 505c2c9..d4382d6 100644
--- a/includes/media/FormatMetadata.php
+++ b/includes/media/FormatMetadata.php
@@ -46,6 +46,18 @@
*/
class FormatMetadata extends ContextSource {
+ /** @var boolean Only output a single language for multi-language
fields */
+ protected $singleLang = false;
+
+ /**
+ * Trigger only outputting single language for multilanguage fields
+ *
+ * @param Boolean $val
+ */
+ public function setSingleLanguage( $val ) {
+ $this->singleLang = $val;
+ }
+
/**
* Numbers given by Exif user agents are often magical, that is they
* should be replaced by a detailed explanation depending on their
@@ -891,6 +903,12 @@
$content = '';
+ if ( !$useContentLang ) {
+ $priorityLanguages =
Language::getFallbacksIncludingSiteLanguage( $this->getLanguage()->getCode() );
+ $priorityLanguages = array_merge(
(array) $this->getLanguage()->getCode(), $priorityLanguages[0],
$priorityLanguages[1] );
+ } else {
+ $priorityLanguages =
$wgContLang->getFallbackLanguages();
+ }
$cLang = $wgContLang->getCode();
$defaultItem = false;
$defaultLang = false;
@@ -907,17 +925,24 @@
unset( $vals['x-default'] );
}
// Do contentLanguage.
- if ( isset( $vals[$cLang] ) ) {
- $isDefault = false;
- if ( $vals[$cLang] === $defaultItem ) {
- $defaultItem = false;
- $isDefault = true;
- }
- $content .= self::langItem(
- $vals[$cLang], $cLang,
- $isDefault, $noHtml,
$useContentLang );
+ foreach( $priorityLanguages as $pLang ) {
+ if ( isset( $vals[$pLang] ) ) {
+ $isDefault = false;
+ if ( $vals[$pLang] ===
$defaultItem ) {
+ $defaultItem = false;
+ $isDefault = true;
+ }
+ $content .= self::langItem(
+ $vals[$pLang], $pLang,
+ $isDefault, $noHtml,
$useContentLang );
- unset( $vals[$cLang] );
+ unset( $vals[$pLang] );
+
+ if ( $this->singleLang ) {
+ return
Html::rawElement( 'span',
+ array( 'lang'
=> $pLang ), $vals[$pLang] );
+ }
+ }
}
// Now do the rest.
@@ -928,11 +953,18 @@
}
$content .= self::langItem( $item,
$lang, false, $noHtml,
$useContentLang );
+ if ( $this->singleLang ) {
+ return Html::rawElement( 'span',
+ array( 'lang' => $lang
), $item );
+ }
}
if ( $defaultItem !== false ) {
$content = self::langItem( $defaultItem,
$defaultLang, true, $noHtml,
$useContentLang ) .
$content;
+ if ( $this->singleLang ) {
+ return $defaultItem;
+ }
}
if ( $noHtml ) {
return $content;
@@ -1367,6 +1399,152 @@
$tel )->text();
}
}
+
+ /**
+ * Get a list of fields that are visible by default.
+ *
+ * @return array
+ */
+ public static function getVisibleFields() {
+ $fields = array();
+ $lines = explode( "\n", wfMessage( 'metadata-fields'
)->inContentLanguage()->text() );
+ foreach ( $lines as $line ) {
+ $matches = array();
+ if ( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches
) ) {
+ $fields[] = $matches[1];
+ }
+ }
+ $fields = array_map( 'strtolower', $fields );
+ return $fields;
+ }
+
+ /**
+ * Get extended metadata. Like normal metadata, but includes
+ * additional information, and only one language is shown.
+ *
+ * @note This method cannot be called while $wgParser is parsing
something.
+ *
+ * @todo This should possibly be cached.
+ *
+ * @param File $file File to use
+ * @param String|Language $lang Language to use (defaults to content
language)
+ * @param IContextSource $context Request context (optional)
+ * @return Array (Empty array if no metadata).
+ */
+ public static function getExtendedMeta( File $file, $lang = false,
$single = true, $context = false ) {
+ global $wgContLang;
+ if ( !$lang ) {
+ $lang = $wgContLang;
+ }
+
+ $format = new FormatMetadata;
+ $format->setSingleLanguage( $single );
+ if ( $context ) {
+ $format->setContext( $context );
+ }
+
+ $derivContext = new DerivativeContext( $format->getContext() );
+ $derivContext->setLanguage( $lang );
+ $format->setContext( $derivContext );
+
+ return $format->makeExtendedMeta( $file );
+ }
+
+ /**
+ * Make extended metadata.
+ *
+ * Usually outside callers use getExtendedMeta.
+ *
+ * @note This method cannot be called while $wgParser is parsing
something.
+ *
+ * @param File $file File to use
+ * @return Array
+ */
+ public function makeExtendedMeta( File $file ) {
+ global $wgParser;
+ wfProfileIn( __METHOD__ );
+
+ // If revision deleted, exit immediately
+ if ( $file->isDeleted( File::DELETED_FILE ) ) {
+ return array();
+ }
+
+ $combinedMeta = array(
+ // This is modification time, which is close to
"upload" time.
+ 'DateTime' => array(
+ 'value' => htmlspecialchars(
$this->getLanguage()->timeanddate( $file->getTimestamp() ) ),
+ 'source' => 'mediawiki-metadata',
+ ),
+ );
+ if ( !$file->isDeleted( File::DELETED_USER ) ) {
+ $combinedMeta['Artist'] = array(
+ 'value' => Linker::userLink( $file->getUser(
'id' ), $file->getUser() ),
+ 'source' => 'mediawiki-metadata',
+ );
+ }
+ if ( $file->getTitle() ) {
+ $pos = strrpos( $file->getTitle()->getText(), '.' );
+ if ( $pos ) {
+ $name = substr( $file->getTitle()->getText(),
0, $pos );
+ } else {
+ $name = $file->getTitle()->getText();
+ }
+ $combinedMeta[ 'ObjectName' ] = array(
+ 'value' => htmlspecialchars( $name ),
+ 'source' => 'mediawiki-metadata',
+ );
+ }
+ $common = $file->getCommonMetaArray();
+ $commonFormatted = $this->makeFormattedData( $common );
+ wfProfileIn( __METHOD__ . '-common' );
+
+ $title = $file->getTitle();
+ if ( !$title ) {
+ $title = Title::newFromText( 'No_title', NS_FILE );
+ }
+ $poptions = ParserOptions::newFromContext( $this->getContext()
);
+ $poptions->setEditSection( false );
+ $poptions->setTargetLanguage( $this->getLanguage() );
+
+ foreach ( $commonFormatted as $name => $value ) {
+ $pout = $wgParser->parse( $value, $title, $poptions );
+ $string = $pout->getText();
+ if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $string,
$m ) ) {
+ $string = $m[1];
+ }
+ $combinedMeta[ $name ] = array(
+ 'value' => $string,
+ 'source' => 'file-metadata'
+ );
+ }
+
+ wfProfileOut( __METHOD__ . '-common' );
+
+ wfRunHooks( 'GetExtendedMetadata', array( &$combinedMeta,
$file, $this->getContext(), $this->singleLang ) );
+
+ $visible = array_flip( self::getVisibleFields() );
+ foreach ( $combinedMeta as $key => $value ) {
+ if ( !isset( $value['source'] ) ) {
+ $combinedMeta[$key]['source'] = 'hook';
+ }
+ if ( !isset( $value['translatedName'] ) ) {
+ $msg = $this->msg( 'exif-' . strtolower( $key )
);
+ if ( $msg->exists() ) {
+ $translated = $msg->parse();
+ } else {
+ $translated = htmlspecialchars(
strtolower( $key ) );
+ }
+
+ $combinedMeta[$key]['translatedName'] =
$translated;
+ }
+ if ( !isset( $visible[ strtolower( $key ) ] ) ) {
+ $combinedMeta[$key]['hidden'] = '';
+ }
+ }
+
+ wfProfileOut( __METHOD__ );
+ return $combinedMeta;
+ }
}
/** For compatability with old FormatExif class
diff --git a/includes/media/MediaHandler.php b/includes/media/MediaHandler.php
index 36f76bb..66c700d 100644
--- a/includes/media/MediaHandler.php
+++ b/includes/media/MediaHandler.php
@@ -456,16 +456,7 @@
* @access protected
*/
function visibleMetadataFields() {
- $fields = array();
- $lines = explode( "\n", wfMessage( 'metadata-fields'
)->inContentLanguage()->text() );
- foreach ( $lines as $line ) {
- $matches = array();
- if ( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches
) ) {
- $fields[] = $matches[1];
- }
- }
- $fields = array_map( 'strtolower', $fields );
- return $fields;
+ return FormatMetadata::getVisibleFields();
}
/**
--
To view, visit https://gerrit.wikimedia.org/r/78926
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I77303d8e535fc1c42e14cfb853814e5c434a81ec
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Brian Wolff <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits