CSteipp has submitted this change and it was merged.

Change subject: SECURITY: Check SVG xml encoding against whitelist
......................................................................


SECURITY: Check SVG xml encoding against whitelist

Some browsers will interpret obscure xml encodings as UTF-8, while
PHP/expat will interpret the given encoding in the xml declaration.


bug: 47304
Change-Id: I3b311a7078d977ae89c51e95e625d79fba183cfc
---
M includes/upload/UploadBase.php
1 file changed, 67 insertions(+), 0 deletions(-)

Approvals:
  CSteipp: Verified; Looks good to me, approved



diff --git a/includes/upload/UploadBase.php b/includes/upload/UploadBase.php
index d40b53d..3a5733c 100644
--- a/includes/upload/UploadBase.php
+++ b/includes/upload/UploadBase.php
@@ -46,6 +46,8 @@
        protected $mBlackListedExtensions;
        protected $mJavaDetected;
 
+       protected static $safeXmlEncodings = array( 'UTF-8', 'ISO-8859-1', 
'ISO-8859-2', 'UTF-16', 'UTF-32' );
+
        const SUCCESS = 0;
        const OK = 0;
        const EMPTY_FILE = 3;
@@ -966,6 +968,15 @@
                        return true;
                }
 
+               // Some browsers will interpret obscure xml encodings as UTF-8, 
while
+               // PHP/expat will interpret the given encoding in the xml 
declaration (bug 47304)
+               if ( $extension == 'svg' || strpos( $mime, 'image/svg' ) === 0 
) {
+                       if ( self::checkXMLEncodingMissmatch( $file ) ) {
+                               wfProfileOut( __METHOD__ );
+                               return true;
+                       }
+               }
+
                /**
                 * Internet Explorer for Windows performs some really stupid 
file type
                 * autodetection which can cause it to interpret valid image 
files as HTML
@@ -1037,6 +1048,62 @@
                return false;
        }
 
+
+       /**
+        * Check a whitelist of xml encodings that are known not to be 
interpreted differently
+        * by the server's xml parser (expat) and some common browsers.
+        *
+        * @param string $file pathname to the temporary upload file
+        * @return Boolean: true if the file contains an encoding that could be 
misinterpreted
+        */
+       public static function checkXMLEncodingMissmatch( $file ) {
+               global $wgSVGMetadataCutoff;
+               $contents = file_get_contents( $file, false, null, -1, 
$wgSVGMetadataCutoff );
+               $encodingRegex = '!encoding[ \t\n\r]*=[ 
\t\n\r]*[\'"](.*?)[\'"]!si';
+
+               if ( preg_match( "!<\?xml\b(.*?)\?>!si", $contents, $matches ) 
) {
+                       if ( preg_match( $encodingRegex, $matches[1], $encMatch 
)
+                               && !in_array( strtoupper( $encMatch[1] ), 
self::$safeXmlEncodings )
+                       ) {
+                               wfDebug( __METHOD__ . ": Found unsafe XML 
encoding '{$encMatch[1]}'\n" );
+                               return true;
+                       }
+               } elseif ( preg_match( "!<\?xml\b!si", $contents ) ) {
+                       // Start of XML declaration without an end in the first 
$wgSVGMetadataCutoff
+                       // bytes. There shouldn't be a legitimate reason for 
this to happen.
+                       wfDebug( __METHOD__ . ": Unmatched XML declaration 
start\n" );
+                       return true;
+               } elseif ( substr( $contents, 0, 4) == "\x4C\x6F\xA7\x94" ) {
+                       // EBCDIC encoded XML
+                       wfDebug( __METHOD__ . ": EBCDIC Encoded XML\n" );
+                       return true;
+               }
+
+               // It's possible the file is encoded with multi-byte encoding, 
so re-encode attempt to
+               // detect the encoding in case is specifies an encoding not 
whitelisted in self::$safeXmlEncodings
+               $attemptEncodings = array( 'UTF-16', 'UTF-16BE', 'UTF-32', 
'UTF-32BE' );
+               foreach ( $attemptEncodings as $encoding ) {
+                       wfSuppressWarnings();
+                       $str = iconv( $encoding, 'UTF-8', $contents );
+                       wfRestoreWarnings();
+                       if ( $str != '' && preg_match( "!<\?xml\b(.*?)\?>!si", 
$str, $matches ) ) {
+                               if ( preg_match( $encodingRegex, $matches[1], 
$encMatch )
+                                       && !in_array( strtoupper( $encMatch[1] 
), self::$safeXmlEncodings )
+                               ) {
+                                       wfDebug( __METHOD__ . ": Found unsafe 
XML encoding '{$encMatch[1]}'\n" );
+                                       return true;
+                               }
+                       } elseif ( $str != '' && preg_match( "!<\?xml\b!si", 
$str ) ) {
+                               // Start of XML declaration without an end in 
the first $wgSVGMetadataCutoff
+                               // bytes. There shouldn't be a legitimate 
reason for this to happen.
+                               wfDebug( __METHOD__ . ": Unmatched XML 
declaration start\n" );
+                               return true;
+                       }
+               }
+
+               return false;
+       }
+
        /**
         * @param $filename string
         * @return bool

-- 
To view, visit https://gerrit.wikimedia.org/r/61643
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I3b311a7078d977ae89c51e95e625d79fba183cfc
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/core
Gerrit-Branch: REL1_20
Gerrit-Owner: CSteipp <cste...@wikimedia.org>
Gerrit-Reviewer: CSteipp <cste...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to