http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95155

Revision: 95155
Author:   bawolff
Date:     2011-08-21 17:16:57 +0000 (Sun, 21 Aug 2011)
Log Message:
-----------
follow-up r86169 - 2 minor issues found while writing unit tests

# Some really obscure Exif properties did not have the Exif byte order taken 
into account
  and were being extracted with the bytes reversed (for example user comment 
when encoded as utf-16).
  Not a major issue as these properties are very rare in practise, but 
certainly not a good thing.
  ( One would think php's exif support would take care of that, but no it does 
not...)
# Change the fallback encoding for Gif comments to be windows-1252 instead of 
iso 8859-1. More
  to be consitent with jpg and iptc then anything else.

Modified Paths:
--------------
    trunk/phase3/includes/media/BitmapMetadataHandler.php
    trunk/phase3/includes/media/Exif.php
    trunk/phase3/includes/media/GIFMetadataExtractor.php
    trunk/phase3/includes/media/JpegMetadataExtractor.php
    trunk/phase3/includes/media/Tiff.php

Modified: trunk/phase3/includes/media/BitmapMetadataHandler.php
===================================================================
--- trunk/phase3/includes/media/BitmapMetadataHandler.php       2011-08-21 
17:14:02 UTC (rev 95154)
+++ trunk/phase3/includes/media/BitmapMetadataHandler.php       2011-08-21 
17:16:57 UTC (rev 95155)
@@ -40,16 +40,16 @@
 
 
        /**
-        *
-        * get exif info using exif class.
+        * Get exif info using exif class.
         * Basically what used to be in BitmapHandler::getMetadata().
         * Just calls stuff in the Exif class.
         *
         * @param $filename string
         */
-       function getExif ( $filename ) {
-               if ( file_exists( $filename ) ) {
-                       $exif = new Exif( $filename );
+       function getExif ( $filename, $byteOrder ) {
+               global $wgShowEXIF;
+               if ( file_exists( $filename ) && $wgShowEXIF ) {
+                       $exif = new Exif( $filename, $byteOrder );
                        $data = $exif->getFilteredData();
                        if ( $data ) {
                                $this->addMetadata( $data, 'exif' );
@@ -117,7 +117,6 @@
        static function Jpeg ( $filename ) {
                $showXMP = function_exists( 'xml_parser_create_ns' );
                $meta = new self();
-               $meta->getExif( $filename );
 
                $seg = JpegMetadataExtractor::segmentSplitter( $filename );
                if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) {
@@ -141,6 +140,9 @@
                                $meta->addMetadata( $array, $type );
                        }
                }
+               if ( isset( $seg['byteOrder'] ) ) {
+                       $meta->getExif( $filename, $seg['byteOrder'] );
+               }
                return $meta->getMetadataArray();
        }
 
@@ -208,4 +210,60 @@
                return $baseArray;
        }
 
+       /**
+        * This doesn't do much yet, but eventually I plan to add
+        * XMP support for Tiff. (PHP's exif support already extracts
+        * but needs some further processing because PHP's exif support
+        * is stupid...)
+        *
+        * @todo Add XMP support, so this function actually makes
+        * sense to put here.
+        *
+        * The various exceptions this throws are caught later.
+        * @param $filename String
+        * @return Array The metadata.
+        */
+       static public function Tiff ( $filename ) {
+               if ( file_exists( $filename ) ) {
+                       $byteOrder = self::getTiffByteOrder( $filename );
+                       if ( !$byteOrder ) {
+                               throw new MWException( "Error determining byte 
order of $filename" );
+                       }
+                       $exif = new Exif( $filename, $byteOrder );
+                       $data = $exif->getFilteredData();
+                       if ( $data ) {
+                               $data['MEDIAWIKI_EXIF_VERSION'] = 
Exif::version();
+                               return $data;
+                       } else {
+                               throw new MWException( "Could not extract data 
from tiff file $filename" );
+                       }
+               } else {
+                       throw new MWException( "File doesn't exist - $filename" 
);
+               }
+       }
+       /**
+        * Read the first 2 bytes of a tiff file to figure out
+        * Little Endian or Big Endian. Needed for exif stuff.
+        *
+        * @param $filename String The filename
+        * @return String 'BE' or 'LE' or false
+        */
+       static function getTiffByteOrder( $filename ) {
+               $fh = fopen( $filename, 'rb' );
+               if ( !$fh ) return false;
+               $head = fread( $fh, 2 );
+               fclose( $fh );
+
+               switch( $head ) {
+                       case 'II':
+                               return 'LE'; // II for intel.
+                       case 'MM':
+                               return 'BE'; // MM for motorla.
+                       default:
+                               return false; // Something went wrong.
+
+               }
+       }
+
+
 }

Modified: trunk/phase3/includes/media/Exif.php
===================================================================
--- trunk/phase3/includes/media/Exif.php        2011-08-21 17:14:02 UTC (rev 
95154)
+++ trunk/phase3/includes/media/Exif.php        2011-08-21 17:16:57 UTC (rev 
95155)
@@ -90,6 +90,11 @@
         */
        var $log = false;
 
+       /**
+        * The byte order of the file. Needed because php's
+        * extension doesn't fully process some obscure props.
+        */
+       private $byteOrder;
        //@}
 
        /**
@@ -102,7 +107,7 @@
         * DigitalZoomRatio = 0/0 is rejected. need to determine if that's 
valid.
         * possibly should treat 0/0 = 0. need to read exif spec on that.
         */
-       function __construct( $file ) {
+       function __construct( $file, $byteOrder = '' ) {
                /**
                 * Page numbers here refer to pages in the EXIF 2.2 standard
                 *
@@ -275,6 +280,16 @@
 
                $this->file = $file;
                $this->basename = wfBaseName( $this->file );
+               if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) {
+                       $this->byteOrder = $byteOrder;
+               } else {
+                       // Only give a warning for b/c, since originally we 
didn't
+                       // require this. The number of things affected by this 
is
+                       // rather small.
+                       wfWarn( 'Exif class did not have byte order specified. '
+                        . 'Some properties may be decoded incorrectly.' );
+                       $this->byteOrder = 'BE'; // BE seems about twice as 
popular as LE in jpg's.
+               }
 
                $this->debugFile( $this->basename, __FUNCTION__, true );
                if( function_exists( 'exif_read_data' ) ) {
@@ -394,7 +409,16 @@
                                }
                                $newVal .= ord( substr($val, $i, 1) );
                        }
-                       $this->mFilteredExifData['GPSVersionID'] = $newVal;
+                       if ( $this->byteOrder === 'LE' ) {
+                               // Need to reverse the string
+                               $newVal2 = '';
+                               for ( $i = strlen( $newVal ) - 1; $i >= 0; $i-- 
) {
+                                       $newVal2 .= substr( $newVal, $i, 1 );
+                               }
+                               $this->mFilteredExifData['GPSVersionID'] = 
$newVal2;
+                       } else {
+                               $this->mFilteredExifData['GPSVersionID'] = 
$newVal;
+                       }
                        unset( $this->mFilteredExifData['GPSVersion'] );
                }
 
@@ -415,7 +439,6 @@
                                unset($this->mFilteredExifData[$prop]);
                                return;
                        }
-
                        $charCode = substr( $this->mFilteredExifData[$prop], 0, 
8);
                        $val = substr( $this->mFilteredExifData[$prop], 8);
                        
@@ -426,7 +449,7 @@
                                        $charset = "Shift-JIS";
                                        break;
                                case "UNICODE\x00":
-                                       $charset = "UTF-16";
+                                       $charset = "UTF-16" . $this->byteOrder;
                                        break;
                                default: //ascii or undefined.
                                        $charset = "";

Modified: trunk/phase3/includes/media/GIFMetadataExtractor.php
===================================================================
--- trunk/phase3/includes/media/GIFMetadataExtractor.php        2011-08-21 
17:14:02 UTC (rev 95154)
+++ trunk/phase3/includes/media/GIFMetadataExtractor.php        2011-08-21 
17:16:57 UTC (rev 95155)
@@ -126,14 +126,14 @@
 
                                        // The standard says this should be 
ASCII, however its unclear if
                                        // thats true in practise. Check to see 
if its valid utf-8, if so
-                                       // assume its that, otherwise assume 
its iso-8859-1
+                                       // assume its that, otherwise assume 
its windows-1252 (iso-8859-1)
                                        $dataCopy = $data;
                                        // quickIsNFCVerify has the side effect 
of replacing any invalid characters
                                        UtfNormal::quickIsNFCVerify( $dataCopy 
);
 
                                        if ( $dataCopy !== $data ) {
                                                wfSuppressWarnings();
-                                               $data = iconv( 'ISO-8859-1', 
'UTF-8', $data );
+                                               $data = iconv( 'windows-1252', 
'UTF-8', $data );
                                                wfRestoreWarnings();
                                        }
 

Modified: trunk/phase3/includes/media/JpegMetadataExtractor.php
===================================================================
--- trunk/phase3/includes/media/JpegMetadataExtractor.php       2011-08-21 
17:14:02 UTC (rev 95154)
+++ trunk/phase3/includes/media/JpegMetadataExtractor.php       2011-08-21 
17:16:57 UTC (rev 95155)
@@ -28,7 +28,10 @@
 
                $segmentCount = 0;
 
-               $segments = array( 'XMP_ext' => array(), 'COM' => array() );
+               $segments = array(
+                       'XMP_ext' => array(),
+                       'COM' => array(),
+               );
 
                if ( !$filename ) {
                        throw new MWException( "No filename specified for " . 
__METHOD__ );
@@ -82,23 +85,34 @@
                                        wfDebug( __METHOD__ . ' Ignoring JPEG 
comment as is garbage.' );
                                }
 
-                       } elseif ( $buffer === "\xE1" && $showXMP ) {
+                       } elseif ( $buffer === "\xE1" ) {
                                // APP1 section (Exif, XMP, and XMP extended)
                                // only extract if XMP is enabled.
                                $temp = self::jpegExtractMarker( $fh );
-
                                // check what type of app segment this is.
-                               if ( substr( $temp, 0, 29 ) === 
"http://ns.adobe.com/xap/1.0/\x00"; ) {
+                               if ( substr( $temp, 0, 29 ) === 
"http://ns.adobe.com/xap/1.0/\x00"; && $showXMP ) {
                                        $segments["XMP"] = substr( $temp, 29 );
-                               } elseif ( substr( $temp, 0, 35 ) === 
"http://ns.adobe.com/xmp/extension/\x00"; ) {
+                               } elseif ( substr( $temp, 0, 35 ) === 
"http://ns.adobe.com/xmp/extension/\x00"; && $showXMP ) {
                                        $segments["XMP_ext"][] = substr( $temp, 
35 );
-                               } elseif ( substr( $temp, 0, 29 ) === 
"XMP\x00://ns.adobe.com/xap/1.0/\x00" ) {
+                               } elseif ( substr( $temp, 0, 29 ) === 
"XMP\x00://ns.adobe.com/xap/1.0/\x00" && $showXMP ) {
                                        // Some images (especially flickr 
images) seem to have this.
                                        // I really have no idea what the deal 
is with them, but
                                        // whatever...
                                        $segments["XMP"] = substr( $temp, 29 );
                                        wfDebug( __METHOD__ . ' Found XMP 
section with wrong app identifier '
                                                . "Using anyways.\n" ); 
+                               } elseif ( substr( $temp, 0, 6 ) === "Exif\0\0" 
) {
+                                       // Just need to find out what the byte 
order is.
+                                       // because php's exif plugin sucks...
+                                       // This is a II for little Endian, MM 
for big. Not a unicode BOM.
+                                       $byteOrderMarker = substr( $temp, 6, 2 
);
+                                       if ( $byteOrderMarker === 'MM' ) {
+                                               $segments['byteOrder'] = 'BE';
+                                       } elseif ( $byteOrderMarker === 'II' ) {
+                                               $segments['byteOrder'] = 'LE';
+                                       } else {
+                                               wfDebug( __METHOD__ . ' Invalid 
byte ordering?!' );
+                                       }
                                }
                        } elseif ( $buffer === "\xED" ) {
                                // APP13 - PSIR. IPTC and some photoshop stuff

Modified: trunk/phase3/includes/media/Tiff.php
===================================================================
--- trunk/phase3/includes/media/Tiff.php        2011-08-21 17:14:02 UTC (rev 
95154)
+++ trunk/phase3/includes/media/Tiff.php        2011-08-21 17:16:57 UTC (rev 
95155)
@@ -56,13 +56,20 @@
         */
        function getMetadata( $image, $filename ) {
                global $wgShowEXIF;
-               if ( $wgShowEXIF && file_exists( $filename ) ) {
-                       $exif = new Exif( $filename );
-                       $data = $exif->getFilteredData();
-                       if ( $data ) {
-                               $data['MEDIAWIKI_EXIF_VERSION'] = 
Exif::version();
-                               return serialize( $data );
-                       } else {
+               if ( $wgShowEXIF ) {
+                       try {
+                               $meta = BitmapMetadataHandler::Tiff( $filename 
);
+                               if ( !is_array( $meta ) ) {
+                                       // This should never happen, but 
doesn't hurt to be paranoid.
+                                       throw new MWException('Metadata array 
is not an array');
+                               }
+                               $meta['MEDIAWIKI_EXIF_VERSION'] = 
Exif::version();
+                               return serialize( $meta );
+                       }
+                       catch ( MWException $e ) {
+                               // BitmapMetadataHandler throws an exception in 
certain exceptional
+                               // cases like if file does not exist.
+                               wfDebug( __METHOD__ . ': ' . $e->getMessage() . 
"\n" );
                                return ExifBitmapHandler::BROKEN_FILE;
                        }
                } else {


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to