http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95155
Revision: 95155
Author: bawolff
Date: 2011-08-21 17:16:57 +0000 (Sun, 21 Aug 2011)
Log Message:
-----------
follow-up r86169 - 2 minor issues found while writing unit tests
# Some really obscure Exif properties did not have the Exif byte order taken
into account
and were being extracted with the bytes reversed (for example user comment
when encoded as utf-16).
Not a major issue as these properties are very rare in practise, but
certainly not a good thing.
( One would think php's exif support would take care of that, but no it does
not...)
# Change the fallback encoding for Gif comments to be windows-1252 instead of
iso 8859-1. More
to be consitent with jpg and iptc then anything else.
Modified Paths:
--------------
trunk/phase3/includes/media/BitmapMetadataHandler.php
trunk/phase3/includes/media/Exif.php
trunk/phase3/includes/media/GIFMetadataExtractor.php
trunk/phase3/includes/media/JpegMetadataExtractor.php
trunk/phase3/includes/media/Tiff.php
Modified: trunk/phase3/includes/media/BitmapMetadataHandler.php
===================================================================
--- trunk/phase3/includes/media/BitmapMetadataHandler.php 2011-08-21
17:14:02 UTC (rev 95154)
+++ trunk/phase3/includes/media/BitmapMetadataHandler.php 2011-08-21
17:16:57 UTC (rev 95155)
@@ -40,16 +40,16 @@
/**
- *
- * get exif info using exif class.
+ * Get exif info using exif class.
* Basically what used to be in BitmapHandler::getMetadata().
* Just calls stuff in the Exif class.
*
* @param $filename string
*/
- function getExif ( $filename ) {
- if ( file_exists( $filename ) ) {
- $exif = new Exif( $filename );
+ function getExif ( $filename, $byteOrder ) {
+ global $wgShowEXIF;
+ if ( file_exists( $filename ) && $wgShowEXIF ) {
+ $exif = new Exif( $filename, $byteOrder );
$data = $exif->getFilteredData();
if ( $data ) {
$this->addMetadata( $data, 'exif' );
@@ -117,7 +117,6 @@
static function Jpeg ( $filename ) {
$showXMP = function_exists( 'xml_parser_create_ns' );
$meta = new self();
- $meta->getExif( $filename );
$seg = JpegMetadataExtractor::segmentSplitter( $filename );
if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) {
@@ -141,6 +140,9 @@
$meta->addMetadata( $array, $type );
}
}
+ if ( isset( $seg['byteOrder'] ) ) {
+ $meta->getExif( $filename, $seg['byteOrder'] );
+ }
return $meta->getMetadataArray();
}
@@ -208,4 +210,60 @@
return $baseArray;
}
+ /**
+ * This doesn't do much yet, but eventually I plan to add
+ * XMP support for Tiff. (PHP's exif support already extracts
+ * but needs some further processing because PHP's exif support
+ * is stupid...)
+ *
+ * @todo Add XMP support, so this function actually makes
+ * sense to put here.
+ *
+ * The various exceptions this throws are caught later.
+ * @param $filename String
+ * @return Array The metadata.
+ */
+ static public function Tiff ( $filename ) {
+ if ( file_exists( $filename ) ) {
+ $byteOrder = self::getTiffByteOrder( $filename );
+ if ( !$byteOrder ) {
+ throw new MWException( "Error determining byte
order of $filename" );
+ }
+ $exif = new Exif( $filename, $byteOrder );
+ $data = $exif->getFilteredData();
+ if ( $data ) {
+ $data['MEDIAWIKI_EXIF_VERSION'] =
Exif::version();
+ return $data;
+ } else {
+ throw new MWException( "Could not extract data
from tiff file $filename" );
+ }
+ } else {
+ throw new MWException( "File doesn't exist - $filename"
);
+ }
+ }
+ /**
+ * Read the first 2 bytes of a tiff file to figure out
+ * Little Endian or Big Endian. Needed for exif stuff.
+ *
+ * @param $filename String The filename
+ * @return String 'BE' or 'LE' or false
+ */
+ static function getTiffByteOrder( $filename ) {
+ $fh = fopen( $filename, 'rb' );
+ if ( !$fh ) return false;
+ $head = fread( $fh, 2 );
+ fclose( $fh );
+
+ switch( $head ) {
+ case 'II':
+ return 'LE'; // II for intel.
+ case 'MM':
+ return 'BE'; // MM for motorla.
+ default:
+ return false; // Something went wrong.
+
+ }
+ }
+
+
}
Modified: trunk/phase3/includes/media/Exif.php
===================================================================
--- trunk/phase3/includes/media/Exif.php 2011-08-21 17:14:02 UTC (rev
95154)
+++ trunk/phase3/includes/media/Exif.php 2011-08-21 17:16:57 UTC (rev
95155)
@@ -90,6 +90,11 @@
*/
var $log = false;
+ /**
+ * The byte order of the file. Needed because php's
+ * extension doesn't fully process some obscure props.
+ */
+ private $byteOrder;
//@}
/**
@@ -102,7 +107,7 @@
* DigitalZoomRatio = 0/0 is rejected. need to determine if that's
valid.
* possibly should treat 0/0 = 0. need to read exif spec on that.
*/
- function __construct( $file ) {
+ function __construct( $file, $byteOrder = '' ) {
/**
* Page numbers here refer to pages in the EXIF 2.2 standard
*
@@ -275,6 +280,16 @@
$this->file = $file;
$this->basename = wfBaseName( $this->file );
+ if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) {
+ $this->byteOrder = $byteOrder;
+ } else {
+ // Only give a warning for b/c, since originally we
didn't
+ // require this. The number of things affected by this
is
+ // rather small.
+ wfWarn( 'Exif class did not have byte order specified. '
+ . 'Some properties may be decoded incorrectly.' );
+ $this->byteOrder = 'BE'; // BE seems about twice as
popular as LE in jpg's.
+ }
$this->debugFile( $this->basename, __FUNCTION__, true );
if( function_exists( 'exif_read_data' ) ) {
@@ -394,7 +409,16 @@
}
$newVal .= ord( substr($val, $i, 1) );
}
- $this->mFilteredExifData['GPSVersionID'] = $newVal;
+ if ( $this->byteOrder === 'LE' ) {
+ // Need to reverse the string
+ $newVal2 = '';
+ for ( $i = strlen( $newVal ) - 1; $i >= 0; $i--
) {
+ $newVal2 .= substr( $newVal, $i, 1 );
+ }
+ $this->mFilteredExifData['GPSVersionID'] =
$newVal2;
+ } else {
+ $this->mFilteredExifData['GPSVersionID'] =
$newVal;
+ }
unset( $this->mFilteredExifData['GPSVersion'] );
}
@@ -415,7 +439,6 @@
unset($this->mFilteredExifData[$prop]);
return;
}
-
$charCode = substr( $this->mFilteredExifData[$prop], 0,
8);
$val = substr( $this->mFilteredExifData[$prop], 8);
@@ -426,7 +449,7 @@
$charset = "Shift-JIS";
break;
case "UNICODE\x00":
- $charset = "UTF-16";
+ $charset = "UTF-16" . $this->byteOrder;
break;
default: //ascii or undefined.
$charset = "";
Modified: trunk/phase3/includes/media/GIFMetadataExtractor.php
===================================================================
--- trunk/phase3/includes/media/GIFMetadataExtractor.php 2011-08-21
17:14:02 UTC (rev 95154)
+++ trunk/phase3/includes/media/GIFMetadataExtractor.php 2011-08-21
17:16:57 UTC (rev 95155)
@@ -126,14 +126,14 @@
// The standard says this should be
ASCII, however its unclear if
// thats true in practise. Check to see
if its valid utf-8, if so
- // assume its that, otherwise assume
its iso-8859-1
+ // assume its that, otherwise assume
its windows-1252 (iso-8859-1)
$dataCopy = $data;
// quickIsNFCVerify has the side effect
of replacing any invalid characters
UtfNormal::quickIsNFCVerify( $dataCopy
);
if ( $dataCopy !== $data ) {
wfSuppressWarnings();
- $data = iconv( 'ISO-8859-1',
'UTF-8', $data );
+ $data = iconv( 'windows-1252',
'UTF-8', $data );
wfRestoreWarnings();
}
Modified: trunk/phase3/includes/media/JpegMetadataExtractor.php
===================================================================
--- trunk/phase3/includes/media/JpegMetadataExtractor.php 2011-08-21
17:14:02 UTC (rev 95154)
+++ trunk/phase3/includes/media/JpegMetadataExtractor.php 2011-08-21
17:16:57 UTC (rev 95155)
@@ -28,7 +28,10 @@
$segmentCount = 0;
- $segments = array( 'XMP_ext' => array(), 'COM' => array() );
+ $segments = array(
+ 'XMP_ext' => array(),
+ 'COM' => array(),
+ );
if ( !$filename ) {
throw new MWException( "No filename specified for " .
__METHOD__ );
@@ -82,23 +85,34 @@
wfDebug( __METHOD__ . ' Ignoring JPEG
comment as is garbage.' );
}
- } elseif ( $buffer === "\xE1" && $showXMP ) {
+ } elseif ( $buffer === "\xE1" ) {
// APP1 section (Exif, XMP, and XMP extended)
// only extract if XMP is enabled.
$temp = self::jpegExtractMarker( $fh );
-
// check what type of app segment this is.
- if ( substr( $temp, 0, 29 ) ===
"http://ns.adobe.com/xap/1.0/\x00" ) {
+ if ( substr( $temp, 0, 29 ) ===
"http://ns.adobe.com/xap/1.0/\x00" && $showXMP ) {
$segments["XMP"] = substr( $temp, 29 );
- } elseif ( substr( $temp, 0, 35 ) ===
"http://ns.adobe.com/xmp/extension/\x00" ) {
+ } elseif ( substr( $temp, 0, 35 ) ===
"http://ns.adobe.com/xmp/extension/\x00" && $showXMP ) {
$segments["XMP_ext"][] = substr( $temp,
35 );
- } elseif ( substr( $temp, 0, 29 ) ===
"XMP\x00://ns.adobe.com/xap/1.0/\x00" ) {
+ } elseif ( substr( $temp, 0, 29 ) ===
"XMP\x00://ns.adobe.com/xap/1.0/\x00" && $showXMP ) {
// Some images (especially flickr
images) seem to have this.
// I really have no idea what the deal
is with them, but
// whatever...
$segments["XMP"] = substr( $temp, 29 );
wfDebug( __METHOD__ . ' Found XMP
section with wrong app identifier '
. "Using anyways.\n" );
+ } elseif ( substr( $temp, 0, 6 ) === "Exif\0\0"
) {
+ // Just need to find out what the byte
order is.
+ // because php's exif plugin sucks...
+ // This is a II for little Endian, MM
for big. Not a unicode BOM.
+ $byteOrderMarker = substr( $temp, 6, 2
);
+ if ( $byteOrderMarker === 'MM' ) {
+ $segments['byteOrder'] = 'BE';
+ } elseif ( $byteOrderMarker === 'II' ) {
+ $segments['byteOrder'] = 'LE';
+ } else {
+ wfDebug( __METHOD__ . ' Invalid
byte ordering?!' );
+ }
}
} elseif ( $buffer === "\xED" ) {
// APP13 - PSIR. IPTC and some photoshop stuff
Modified: trunk/phase3/includes/media/Tiff.php
===================================================================
--- trunk/phase3/includes/media/Tiff.php 2011-08-21 17:14:02 UTC (rev
95154)
+++ trunk/phase3/includes/media/Tiff.php 2011-08-21 17:16:57 UTC (rev
95155)
@@ -56,13 +56,20 @@
*/
function getMetadata( $image, $filename ) {
global $wgShowEXIF;
- if ( $wgShowEXIF && file_exists( $filename ) ) {
- $exif = new Exif( $filename );
- $data = $exif->getFilteredData();
- if ( $data ) {
- $data['MEDIAWIKI_EXIF_VERSION'] =
Exif::version();
- return serialize( $data );
- } else {
+ if ( $wgShowEXIF ) {
+ try {
+ $meta = BitmapMetadataHandler::Tiff( $filename
);
+ if ( !is_array( $meta ) ) {
+ // This should never happen, but
doesn't hurt to be paranoid.
+ throw new MWException('Metadata array
is not an array');
+ }
+ $meta['MEDIAWIKI_EXIF_VERSION'] =
Exif::version();
+ return serialize( $meta );
+ }
+ catch ( MWException $e ) {
+ // BitmapMetadataHandler throws an exception in
certain exceptional
+ // cases like if file does not exist.
+ wfDebug( __METHOD__ . ': ' . $e->getMessage() .
"\n" );
return ExifBitmapHandler::BROKEN_FILE;
}
} else {
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs