Gilles has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/350405 )

Change subject: Store original media dimensions as additional header
......................................................................

Store original media dimensions as additional header

Bug: T150741
For storage repos that support headers (such as Swift), this will store the 
original
media dimensions as an extra custom header, X-Content-Dimensions.
The header is formatted to minimize its length when dealing with multipage
documents, by expressing the information as page ranges keyed by dimensions.

Example for a multipage documents with some pages of different sizes:
X-Content-Dimensions: 1903x899:1-9,11/1903x873:10

Example for a single page document:
X-Content-Dimensions: 800x600:1

Change-Id: Ic4c6a86557b3705cf75d074753e9ce2ee070a6df
---
M includes/filerepo/file/File.php
M includes/filerepo/file/LocalFile.php
M includes/libs/filebackend/FileBackendStore.php
M includes/media/Exif.php
M includes/media/ExifBitmap.php
M includes/media/GIFMetadataExtractor.php
M includes/media/MediaHandler.php
M includes/media/PNGMetadataExtractor.php
M includes/media/XCF.php
M maintenance/importImages.php
M maintenance/refreshFileHeaders.php
M tests/phpunit/includes/media/MediaHandlerTest.php
12 files changed, 139 insertions(+), 24 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/05/350405/1

diff --git a/includes/filerepo/file/File.php b/includes/filerepo/file/File.php
index e367812..1585602 100644
--- a/includes/filerepo/file/File.php
+++ b/includes/filerepo/file/File.php
@@ -2151,10 +2151,10 @@
        /**
         * @return array HTTP header name/value map to use for HEAD/GET request 
responses
         */
-       function getStreamHeaders() {
+       function getContentHeaders() {
                $handler = $this->getHandler();
                if ( $handler ) {
-                       return $handler->getStreamHeaders( $this->getMetadata() 
);
+                       return $handler->getContentHeaders( 
$this->getMetadata() );
                } else {
                        return [];
                }
diff --git a/includes/filerepo/file/LocalFile.php 
b/includes/filerepo/file/LocalFile.php
index 292fc80..b9ae962 100644
--- a/includes/filerepo/file/LocalFile.php
+++ b/includes/filerepo/file/LocalFile.php
@@ -1200,7 +1200,7 @@
                $options = [];
                $handler = MediaHandler::getHandler( $props['mime'] );
                if ( $handler ) {
-                       $options['headers'] = $handler->getStreamHeaders( 
$props['metadata'] );
+                       $options['headers'] = $handler->getContentHeaders( 
$props['metadata'] );
                } else {
                        $options['headers'] = [];
                }
diff --git a/includes/libs/filebackend/FileBackendStore.php 
b/includes/libs/filebackend/FileBackendStore.php
index 039bd42..e2f7886 100644
--- a/includes/libs/filebackend/FileBackendStore.php
+++ b/includes/libs/filebackend/FileBackendStore.php
@@ -1250,7 +1250,7 @@
         * @return array
         */
        protected function sanitizeOpHeaders( array $op ) {
-               static $longs = [ 'content-disposition' ];
+               static $longs = [ 'content-disposition', 'x-content-dimensions' 
];
 
                if ( isset( $op['headers'] ) ) { // op sets HTTP headers
                        $newHeaders = [];
diff --git a/includes/media/Exif.php b/includes/media/Exif.php
index 95fa859..621a4aa 100644
--- a/includes/media/Exif.php
+++ b/includes/media/Exif.php
@@ -117,6 +117,11 @@
                 * @link http://exif.org/Exif2-2.PDF The Exif 2.2 specification
                 */
                $this->mExifTags = [
+                       'COMPUTED' => [
+                               'Width' => Exif::SHORT_OR_LONG, # Image width
+                               'Height' => Exif::SHORT_OR_LONG, # Image height
+                       ],
+
                        # TIFF Rev. 6.0 Attribute Information (p22)
                        'IFD0' => [
                                # Tags relating to image structure
diff --git a/includes/media/ExifBitmap.php b/includes/media/ExifBitmap.php
index 7aeefa0..2f81045 100644
--- a/includes/media/ExifBitmap.php
+++ b/includes/media/ExifBitmap.php
@@ -314,4 +314,25 @@
 
                return true;
        }
+
+       /**
+       * Get useful response headers for GET/HEAD requests for a file with the 
given metadata
+       * @param $metadata mixed Result this handlers getMetadata() for a file
+       * @return Array
+       */
+       public function getContentHeaders( $metadata ) {
+               wfSuppressWarnings();
+               $metadata = unserialize( $metadata );
+               wfRestoreWarnings();
+
+               if ( !isset( $metadata['Width'] ) || !isset( 
$metadata['Height'] ) ) {
+                       return [];
+               }
+
+               $dimensionsMetadata = [];
+               $dimensionsMetadata['width'] = $metadata['Width'];
+               $dimensionsMetadata['height'] = $metadata['Height'];
+
+               return parent::getContentHeaders( serialize( 
$dimensionsMetadata ) );
+       }
 }
diff --git a/includes/media/GIFMetadataExtractor.php 
b/includes/media/GIFMetadataExtractor.php
index de409e7..13dba57 100644
--- a/includes/media/GIFMetadataExtractor.php
+++ b/includes/media/GIFMetadataExtractor.php
@@ -41,7 +41,7 @@
        /** @var string */
        private static $gifTerm;
 
-       const VERSION = 1;
+       const VERSION = 2;
 
        // Each sub-block is less than or equal to 255 bytes.
        // Most of the time its 255 bytes, except for in XMP
@@ -54,9 +54,9 @@
         * @return array
         */
        static function getMetadata( $filename ) {
-               self::$gifFrameSep = pack( "C", ord( "," ) );
-               self::$gifExtensionSep = pack( "C", ord( "!" ) );
-               self::$gifTerm = pack( "C", ord( ";" ) );
+               self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
+               self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
+               self::$gifTerm = pack( "C", ord( ";" ) ); // 3B
 
                $frameCount = 0;
                $duration = 0.0;
@@ -82,8 +82,11 @@
                        throw new Exception( "Not a valid GIF file; header: 
$buf" );
                }
 
-               // Skip over width and height.
-               fread( $fh, 4 );
+               // Read width and height.
+               $buf = fread( $fh, 2 );
+               $width = unpack( 'v', $buf )[1];
+               $buf = fread( $fh, 2 );
+               $height = unpack( 'v', $buf )[1];
 
                // Read BPP
                $buf = fread( $fh, 1 );
@@ -251,6 +254,8 @@
                        'duration' => $duration,
                        'xmp' => $xmp,
                        'comment' => $comment,
+                       'width' => $width,
+                       'height' => $height,
                ];
        }
 
diff --git a/includes/media/MediaHandler.php b/includes/media/MediaHandler.php
index 6a23bd6..d6ec7c1 100644
--- a/includes/media/MediaHandler.php
+++ b/includes/media/MediaHandler.php
@@ -305,16 +305,6 @@
        }
 
        /**
-        * Get useful response headers for GET/HEAD requests for a file with 
the given metadata
-        *
-        * @param mixed $metadata Result of the getMetadata() function of this 
handler for a file
-        * @return array
-        */
-       public function getStreamHeaders( $metadata ) {
-               return [];
-       }
-
-       /**
         * True if the handled types can be transformed
         *
         * @param File $file
@@ -868,4 +858,70 @@
        public function getWarningConfig( $file ) {
                return null;
        }
+
+       /**
+        * Converts a dimensions array about a potentially multipage document 
from an
+        * exhaustive list of ordered page numbers to a list of page ranges
+        * @param Array $pagesByDimensions
+        * @return String
+       */
+       public static function getPageRangesByDimensions( $pagesByDimensions ) {
+               $pageRangesByDimensions = [];
+
+               foreach ( $pagesByDimensions as $dimensions => $pageList ) {
+                       $ranges = [];
+                       $firstPage = $pageList[0];
+                       $lastPage = $firstPage - 1;
+
+                       foreach ( $pageList as $page ) {
+                               if ( $page > $lastPage + 1 ) {
+                                       if ( $firstPage != $lastPage ) {
+                                               $ranges []= 
"$firstPage-$lastPage";
+                                       } else {
+                                               $ranges []= "$firstPage";
+                                       }
+
+                                       $firstPage = $page;
+                               }
+
+                               $lastPage = $page;
+                       }
+
+                       if ( $firstPage != $lastPage ) {
+                               $ranges []= "$firstPage-$lastPage";
+                       } else{
+                               $ranges []= "$firstPage";
+                       }
+
+                       $pageRangesByDimensions[ $dimensions ] = $ranges;
+               }
+
+               $dimensionsString = [];
+               foreach ( $pageRangesByDimensions as $dimensions => $pageRanges 
) {
+                       $dimensionsString []= "$dimensions:" . implode( ',', 
$pageRanges );
+               }
+
+               return implode( '/', $dimensionsString );
+       }
+
+       /**
+       * Get useful response headers for GET/HEAD requests for a file with the 
given metadata
+       * @param $metadata mixed Result this handlers getMetadata() for a file
+       * @return Array
+       */
+       public function getContentHeaders( $metadata ) {
+               wfSuppressWarnings();
+               $metadata = unserialize( $metadata );
+               wfRestoreWarnings();
+
+               if ( !isset( $metadata['width'] ) || !isset( 
$metadata['height'] ) ) {
+                       return [];
+               }
+
+               $dimensionString = $metadata['width'] . 'x' . 
$metadata['height'];
+               $pagesByDimensions = [ $dimensionString => [ 1 ] ];
+               $pageRangesByDimensions = 
MediaHandler::getPageRangesByDimensions( $pagesByDimensions );
+
+               return [ 'X-Content-Dimensions' => $pageRangesByDimensions ];
+       }
 }
diff --git a/includes/media/PNGMetadataExtractor.php 
b/includes/media/PNGMetadataExtractor.php
index d0517d7..75a91f5 100644
--- a/includes/media/PNGMetadataExtractor.php
+++ b/includes/media/PNGMetadataExtractor.php
@@ -40,7 +40,7 @@
        /** @var array */
        private static $textChunks;
 
-       const VERSION = 1;
+       const VERSION = 2;
        const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
 
        static function getMetadata( $filename ) {
@@ -121,6 +121,8 @@
                                if ( !$buf || strlen( $buf ) < $chunk_size ) {
                                        throw new Exception( __METHOD__ . ": 
Read error" );
                                }
+                               $width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
+                               $height = unpack( 'N', substr( $buf, 4, 4 ) 
)[1];
                                $bitDepth = ord( substr( $buf, 8, 1 ) );
                                // Detect the color type in British English as 
per the spec
                                // https://www.w3.org/TR/PNG/#11IHDR
@@ -404,6 +406,8 @@
                        'text' => $text,
                        'bitDepth' => $bitDepth,
                        'colorType' => $colorType,
+                       'width' => $width,
+                       'height' => $height,
                ];
        }
 
diff --git a/includes/media/XCF.php b/includes/media/XCF.php
index c419524..bc1e2fb 100644
--- a/includes/media/XCF.php
+++ b/includes/media/XCF.php
@@ -175,6 +175,9 @@
                                $metadata['colorType'] = 'unknown';
 
                        }
+
+                       $metadata['width'] = $header['width'];
+                       $metadata['height'] = $header['height'];
                } else {
                        // Marker to prevent repeated attempted extraction
                        $metadata['error'] = true;
diff --git a/maintenance/importImages.php b/maintenance/importImages.php
index 23bdb3f..ac07106 100644
--- a/maintenance/importImages.php
+++ b/maintenance/importImages.php
@@ -307,7 +307,7 @@
                                        $publishOptions = [];
                                        $handler = MediaHandler::getHandler( 
$props['mime'] );
                                        if ( $handler ) {
-                                               $publishOptions['headers'] = 
$handler->getStreamHeaders( $props['metadata'] );
+                                               $publishOptions['headers'] = 
$handler->getContentHeaders( $props['metadata'] );
                                        } else {
                                                $publishOptions['headers'] = [];
                                        }
diff --git a/maintenance/refreshFileHeaders.php 
b/maintenance/refreshFileHeaders.php
index e075501..f922055 100644
--- a/maintenance/refreshFileHeaders.php
+++ b/maintenance/refreshFileHeaders.php
@@ -57,13 +57,13 @@
                                __METHOD__, [ 'LIMIT' => $this->mBatchSize, 
'ORDER BY' => 'img_name ASC' ] );
                        foreach ( $res as $row ) {
                                $file = $repo->newFileFromRow( $row );
-                               $headers = $file->getStreamHeaders();
+                               $headers = $file->getContentHeaders();
                                if ( count( $headers ) ) {
                                        $this->updateFileHeaders( $file, 
$headers );
                                }
                                // Do all of the older file versions...
                                foreach ( $file->getHistory() as $oldFile ) {
-                                       $headers = $oldFile->getStreamHeaders();
+                                       $headers = 
$oldFile->getContentHeaders();
                                        if ( count( $headers ) ) {
                                                $this->updateFileHeaders( 
$oldFile, $headers );
                                        }
diff --git a/tests/phpunit/includes/media/MediaHandlerTest.php 
b/tests/phpunit/includes/media/MediaHandlerTest.php
index 7a052f6..4589fa5 100644
--- a/tests/phpunit/includes/media/MediaHandlerTest.php
+++ b/tests/phpunit/includes/media/MediaHandlerTest.php
@@ -65,4 +65,25 @@
                }
                return $result;
        }
+
+       /**
+        * @covers MediaHandler::getPageRangesByDimensions
+        *
+        * @dataProvider provideTestGetPageRangesByDimensions
+        */
+       public function testGetPageRangesByDimensions( $pagesByDimensions, 
$expected ) {
+               $this->assertEquals( $expected, 
MediaHandler::getPageRangesByDimensions( $pagesByDimensions ) );
+       }
+
+       public static function provideTestGetPageRangesByDimensions() {
+               return [
+                       [ [ '123x456' => [ 1 ] ], '123x456:1' ],
+                       [ [ '123x456' => [ 1, 2 ] ], '123x456:1-2' ],
+                       [ [ '123x456' => [ 1, 2, 3 ] ], '123x456:1-3' ],
+                       [ [ '123x456' => [ 1, 2, 3, 5 ] ], '123x456:1-3,5' ],
+                       [ [ '123x456' => [ 1, 3 ] ], '123x456:1,3' ],
+                       [ [ '123x456' => [ 1, 2, 3, 5, 6, 7 ] ], 
'123x456:1-3,5-7' ],
+                       [ [ '123x456' => [ 1, 2, 3, 5, 6, 7 ], '789x789' => [ 
4, 8, 9 ] ], '123x456:1-3,5-7/789x789:4,8-9' ],
+               ];
+       }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/350405
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic4c6a86557b3705cf75d074753e9ce2ee070a6df
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Gilles <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to