Aaron Schulz has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/173241

Change subject: Improved MIME detection in FileBackend
......................................................................

Improved MIME detection in FileBackend

* The content type detector will now inspect the file contents
  to better handle extensionless files.
* Normalize all headers to lower case at the start of the
  FileBackend operation methods. This makes it easy for subclasses
  to check for certain headers.
* Removed some old Content-Disposition b/c code.
* The Swift backend class now respects Content-Type headers
  for create()/store().

Change-Id: Iad59bf6c6a416b706f976a4c425763fd30e2debb
---
M includes/filebackend/FileBackend.php
M includes/filebackend/FileBackendStore.php
M includes/filebackend/SwiftFileBackend.php
3 files changed, 32 insertions(+), 20 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/41/173241/1

diff --git a/includes/filebackend/FileBackend.php 
b/includes/filebackend/FileBackend.php
index 8c0a61a..6b2d880 100644
--- a/includes/filebackend/FileBackend.php
+++ b/includes/filebackend/FileBackend.php
@@ -375,11 +375,6 @@
                if ( empty( $opts['force'] ) ) { // sanity
                        unset( $opts['nonLocking'] );
                }
-               foreach ( $ops as &$op ) {
-                       if ( isset( $op['disposition'] ) ) { // b/c (MW 1.20)
-                               $op['headers']['Content-Disposition'] = 
$op['disposition'];
-                       }
-               }
                $scope = $this->getScopedPHPBehaviorForOps(); // try to ignore 
client aborts
                return $this->doOperationsInternal( $ops, $opts );
        }
@@ -608,9 +603,6 @@
                }
                foreach ( $ops as &$op ) {
                        $op['overwrite'] = true; // avoids RTTs in key/value 
stores
-                       if ( isset( $op['disposition'] ) ) { // b/c (MW 1.20)
-                               $op['headers']['Content-Disposition'] = 
$op['disposition'];
-                       }
                }
                $scope = $this->getScopedPHPBehaviorForOps(); // try to ignore 
client aborts
                return $this->doQuickOperationsInternal( $ops );
diff --git a/includes/filebackend/FileBackendStore.php 
b/includes/filebackend/FileBackendStore.php
index 495ac3c..93ed324 100644
--- a/includes/filebackend/FileBackendStore.php
+++ b/includes/filebackend/FileBackendStore.php
@@ -69,8 +69,19 @@
                $this->mimeCallback = isset( $config['mimeCallback'] )
                        ? $config['mimeCallback']
                        : function ( $storagePath, $content, $fsPath ) {
-                               // @todo handle the case of extension-less 
files using the contents
-                               return StreamFile::contentTypeFromPath( 
$storagePath ) ?: 'unknown/unknown';
+                               $magic = MimeMagic::singleton();
+                               // Trust the extension of the storage path 
(caller must validate)
+                               $ext = FileBackend::extensionFromPath( 
$storagePath );
+                               $type = $magic->guessTypesForExtension( $ext );
+                               // For files without a valid extension (or one 
at all), inspect the contents
+                               if ( !$type && $fsPath ) {
+                                       $type = $magic->guessMimeType( $fsPath, 
false );
+                               } elseif ( !$type && strlen( $content ) ) {
+                                       $tmpFile = TempFSFile::factory( 'mime_' 
);
+                                       file_put_contents( $tmpFile->getPath(), 
$content );
+                                       $type = $magic->guessMimeType( 
$tmpFile->getPath(), false );
+                               }
+                               return $type ?: 'unknown/unknown';
                        };
                $this->memCache = new EmptyBagOStuff(); // disabled by default
                $this->cheapCache = new ProcessCacheLRU( self::CACHE_CHEAP_SIZE 
);
@@ -1075,7 +1086,7 @@
                $status = Status::newGood();
 
                // Fix up custom header name/value pairs...
-               $ops = array_map( array( $this, 'stripInvalidHeadersFromOp' ), 
$ops );
+               $ops = array_map( array( $this, 'sanitizeOpHeaders' ), $ops );
 
                // Build up a list of FileOps...
                $performOps = $this->getOperationsInternal( $ops );
@@ -1141,7 +1152,7 @@
                $status = Status::newGood();
 
                // Fix up custom header name/value pairs...
-               $ops = array_map( array( $this, 'stripInvalidHeadersFromOp' ), 
$ops );
+               $ops = array_map( array( $this, 'sanitizeOpHeaders' ), $ops );
 
                // Clear any file cache entries
                $this->clearCache();
@@ -1236,7 +1247,9 @@
        }
 
        /**
-        * Strip long HTTP headers from a file operation.
+        * Normalize and filter HTTP headers from a file operation
+        *
+        * This normalizes and strips long HTTP headers from a file operation.
         * Most headers are just numbers, but some are allowed to be long.
         * This function is useful for cleaning up headers and avoiding backend
         * specific errors, especially in the middle of batch file operations.
@@ -1244,18 +1257,21 @@
         * @param array $op Same format as doOperation()
         * @return array
         */
-       protected function stripInvalidHeadersFromOp( array $op ) {
-               static $longs = array( 'Content-Disposition' );
+       protected function sanitizeOpHeaders( array $op ) {
+               static $longs = array( 'content-disposition' );
+
                if ( isset( $op['headers'] ) ) { // op sets HTTP headers
+                       $newHeaders = array();
                        foreach ( $op['headers'] as $name => $value ) {
+                               $name = strtolower( $name );
                                $maxHVLen = in_array( $name, $longs ) ? INF : 
255;
                                if ( strlen( $name ) > 255 || strlen( $value ) 
> $maxHVLen ) {
                                        trigger_error( "Header '$name: $value' 
is too long." );
-                                       unset( $op['headers'][$name] );
-                               } elseif ( !strlen( $value ) ) {
-                                       $op['headers'][$name] = ''; // 
null/false => ""
+                               } else {
+                                       $newHeaders[$name] = strlen( $value ) ? 
$value : ''; // null/false => ""
                                }
                        }
+                       $op['headers'] = $newHeaders;
                }
 
                return $op;
diff --git a/includes/filebackend/SwiftFileBackend.php 
b/includes/filebackend/SwiftFileBackend.php
index 625b9b4..06a1b1b 100644
--- a/includes/filebackend/SwiftFileBackend.php
+++ b/includes/filebackend/SwiftFileBackend.php
@@ -221,7 +221,9 @@
                }
 
                $sha1Hash = wfBaseConvert( sha1( $params['content'] ), 16, 36, 
31 );
-               $contentType = $this->getContentType( $params['dst'], 
$params['content'], null );
+               $contentType = isset( $params['headers']['content-type'] )
+                       ? $params['headers']['content-type']
+                       : $this->getContentType( $params['dst'], 
$params['content'], null );
 
                $reqs = array( array(
                        'method' => 'PUT',
@@ -277,7 +279,9 @@
                        return $status;
                }
                $sha1Hash = wfBaseConvert( $sha1Hash, 16, 36, 31 );
-               $contentType = $this->getContentType( $params['dst'], null, 
$params['src'] );
+               $contentType = isset( $params['headers']['content-type'] )
+                       ? $params['headers']['content-type']
+                       : $this->getContentType( $params['dst'], null, 
$params['src'] );
 
                $handle = fopen( $params['src'], 'rb' );
                if ( $handle === false ) { // source doesn't exist?

-- 
To view, visit https://gerrit.wikimedia.org/r/173241
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iad59bf6c6a416b706f976a4c425763fd30e2debb
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <asch...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to