http://www.mediawiki.org/wiki/Special:Code/MediaWiki/89627

Revision: 89627
Author:   tstarling
Date:     2011-06-07 05:57:23 +0000 (Tue, 07 Jun 2011)
Log Message:
-----------
MFT r89397, r89558, etc.: bug 28840 IE URL extension

Modified Paths:
--------------
    branches/REL1_18/phase3/api.php
    branches/REL1_18/phase3/img_auth.php
    branches/REL1_18/phase3/includes/AutoLoader.php
    branches/REL1_18/phase3/includes/RawPage.php
    branches/REL1_18/phase3/includes/WebRequest.php
    branches/REL1_18/phase3/load.php

Added Paths:
-----------
    branches/REL1_18/phase3/includes/libs/IEUrlExtension.php

Property Changed:
----------------
    branches/REL1_18/phase3/api.php
    branches/REL1_18/phase3/img_auth.php
    branches/REL1_18/phase3/includes/AutoLoader.php
    branches/REL1_18/phase3/includes/RawPage.php
    branches/REL1_18/phase3/includes/WebRequest.php
    branches/REL1_18/phase3/load.php

Modified: branches/REL1_18/phase3/api.php
===================================================================
--- branches/REL1_18/phase3/api.php     2011-06-07 05:39:25 UTC (rev 89626)
+++ branches/REL1_18/phase3/api.php     2011-06-07 05:57:23 UTC (rev 89627)
@@ -57,18 +57,7 @@
 $starttime = microtime( true );
 
 // URL safety checks
-//
-// See RawPage.php for details; summary is that MSIE can override the
-// Content-Type if it sees a recognized extension on the URL, such as
-// might be appended via PATH_INFO after 'api.php'.
-//
-// Some data formats can end up containing unfiltered user-provided data
-// which will end up triggering HTML detection and execution, hence
-// XSS injection and all that entails.
-//
-if ( $wgRequest->isPathInfoBad() ) {
-       wfHttpError( 403, 'Forbidden',
-               'Invalid file extension found in PATH_INFO or QUERY_STRING.' );
+if ( !$wgRequest->checkUrlExtension() ) {
        return;
 }
 


Property changes on: branches/REL1_18/phase3/api.php
___________________________________________________________________
Added: svn:mergeinfo
   + /branches/REL1_15/phase3/api.php:51646
/branches/REL1_17/phase3/api.php:81445,81448
/branches/new-installer/phase3/api.php:43664-66004
/branches/sqlite/api.php:58211-58321
/trunk/phase3/api.php:87632,87636,87640,87644,89397

Modified: branches/REL1_18/phase3/img_auth.php
===================================================================
--- branches/REL1_18/phase3/img_auth.php        2011-06-07 05:39:25 UTC (rev 
89626)
+++ branches/REL1_18/phase3/img_auth.php        2011-06-07 05:57:23 UTC (rev 
89627)
@@ -38,15 +38,20 @@
        wfForbidden('img-auth-accessdenied','img-auth-public');
 }
 
+$matches = WebRequest::getPathInfo();
+$path = $matches['title'];
+
 // Check for bug 28235: QUERY_STRING overriding the correct extension
-if ( isset( $_SERVER['QUERY_STRING'] )
-       && preg_match( '/\.[^\\/:*?"<>|%]+(#|\?|$)/i', $_SERVER['QUERY_STRING'] 
) )
+$dotPos = strpos( $path, '.' );
+$whitelist = array();
+if ( $dotPos !== false ) {
+       $whitelist[] = substr( $path, $dotPos + 1 );
+}
+if ( !$wgRequest->checkUrlExtension( $whitelist ) )
 {
-       wfForbidden( 'img-auth-accessdenied', 'img-auth-bad-query-string' );
-}      
+       return;
+}
 
-$matches = WebRequest::getPathInfo();
-$path = $matches['title'];
 $filename = realpath( $wgUploadDirectory . $path );
 $realUpload = realpath( $wgUploadDirectory );
 


Property changes on: branches/REL1_18/phase3/img_auth.php
___________________________________________________________________
Added: svn:mergeinfo
   + /branches/REL1_15/phase3/img_auth.php:51646
/branches/REL1_17/phase3/img_auth.php:81445,81448
/branches/new-installer/phase3/img_auth.php:43664-66004
/branches/sqlite/img_auth.php:58211-58321
/trunk/phase3/img_auth.php:87632,87636,87640,87644,89558

Modified: branches/REL1_18/phase3/includes/AutoLoader.php
===================================================================
--- branches/REL1_18/phase3/includes/AutoLoader.php     2011-06-07 05:39:25 UTC 
(rev 89626)
+++ branches/REL1_18/phase3/includes/AutoLoader.php     2011-06-07 05:57:23 UTC 
(rev 89627)
@@ -502,6 +502,7 @@
        'CSSJanus' => 'includes/libs/CSSJanus.php',
        'CSSMin' => 'includes/libs/CSSMin.php',
        'IEContentAnalyzer' => 'includes/libs/IEContentAnalyzer.php',
+       'IEUrlExtension' => 'includes/libs/IEUrlExtension.php',
        'JavaScriptMinifier' => 'includes/libs/JavaScriptMinifier.php',
        'Spyc' => 'includes/libs/spyc.php',
 


Property changes on: branches/REL1_18/phase3/includes/AutoLoader.php
___________________________________________________________________
Modified: svn:mergeinfo
   - /branches/REL1_15/phase3/includes/AutoLoader.php:51646
/branches/REL1_17/phase3/includes/AutoLoader.php:81448
/branches/new-installer/phase3/includes/AutoLoader.php:43664-66004
/branches/sqlite/includes/AutoLoader.php:58211-58321
/branches/uploadwizard/phase3/includes/AutoLoader.php:73550-75905
/branches/wmf-deployment/includes/AutoLoader.php:53381
   + /branches/REL1_15/phase3/includes/AutoLoader.php:51646
/branches/REL1_17/phase3/includes/AutoLoader.php:81448
/branches/new-installer/phase3/includes/AutoLoader.php:43664-66004
/branches/sqlite/includes/AutoLoader.php:58211-58321
/branches/uploadwizard/phase3/includes/AutoLoader.php:73550-75905
/branches/wmf-deployment/includes/AutoLoader.php:53381
/trunk/phase3/includes/AutoLoader.php:89558

Modified: branches/REL1_18/phase3/includes/RawPage.php
===================================================================
--- branches/REL1_18/phase3/includes/RawPage.php        2011-06-07 05:39:25 UTC 
(rev 89626)
+++ branches/REL1_18/phase3/includes/RawPage.php        2011-06-07 05:57:23 UTC 
(rev 89627)
@@ -118,22 +118,8 @@
        function view() {
                global $wgOut, $wgRequest;
 
-               if( $wgRequest->isPathInfoBad() ) {
-                       # Internet Explorer will ignore the Content-Type header 
if it
-                       # thinks it sees a file extension it recognizes. Make 
sure that
-                       # all raw requests are done through the script node, 
which will
-                       # have eg '.php' and should remain safe.
-                       #
-                       # We used to redirect to a canonical-form URL as a 
general
-                       # backwards-compatibility / good-citizen nice thing. 
However
-                       # a lot of servers are set up in buggy ways, resulting 
in
-                       # redirect loops which hang the browser until the CSS 
load
-                       # times out.
-                       #
-                       # Just return a 403 Forbidden and get it over with.
-                       wfHttpError( 403, 'Forbidden',
-                               'Invalid file extension found in PATH_INFO or 
QUERY_STRING. ' .
-                               'Raw pages must be accessed through the primary 
script entry point.' );
+               if( !$wgRequest->checkUrlExtension() ) {
+                       $wgOut->disable();
                        return;
                }
 


Property changes on: branches/REL1_18/phase3/includes/RawPage.php
___________________________________________________________________
Added: svn:mergeinfo
   + /branches/REL1_15/phase3/includes/RawPage.php:51646
/branches/new-installer/phase3/includes/RawPage.php:43664-66004
/branches/sqlite/includes/RawPage.php:58211-58321
/branches/wmf-deployment/includes/RawPage.php:53381
/trunk/phase3/includes/RawPage.php:89397,89558

Modified: branches/REL1_18/phase3/includes/WebRequest.php
===================================================================
--- branches/REL1_18/phase3/includes/WebRequest.php     2011-06-07 05:39:25 UTC 
(rev 89626)
+++ branches/REL1_18/phase3/includes/WebRequest.php     2011-06-07 05:57:23 UTC 
(rev 89627)
@@ -767,6 +767,62 @@
        }
 
        /**
+        * Check if Internet Explorer will detect an incorrect cache extension 
in 
+        * PATH_INFO or QUERY_STRING. If the request can't be allowed, show an 
error
+        * message or redirect to a safer URL. Returns true if the URL is OK, 
and
+        * false if an error message has been shown and the request should be 
aborted.
+        */
+       public function checkUrlExtension( $extWhitelist = array() ) {
+               global $wgScriptExtension;
+               $extWhitelist[] = ltrim( $wgScriptExtension, '.' );
+               if ( IEUrlExtension::areServerVarsBad( $_SERVER, $extWhitelist 
) ) {
+                       if ( !$this->wasPosted() ) {
+                               $newUrl = IEUrlExtension::fixUrlForIE6(
+                                       $this->getFullRequestURL(), 
$extWhitelist );
+                               if ( $newUrl !== false ) {
+                                       $this->doSecurityRedirect( $newUrl );
+                                       return false;
+                               }
+                       }
+                       wfHttpError( 403, 'Forbidden',
+                               'Invalid file extension found in the path info 
or query string.' );
+                       
+                       return false;
+               }
+               return true;
+       }
+
+       /**
+        * Attempt to redirect to a URL with a QUERY_STRING that's not 
dangerous in 
+        * IE 6. Returns true if it was successful, false otherwise.
+        */
+       protected function doSecurityRedirect( $url ) {
+               header( 'Location: ' . $url );
+               header( 'Content-Type: text/html' );
+               $encUrl = htmlspecialchars( $url );
+               echo <<<HTML
+<html>
+<head>
+<title>Security redirect</title>
+</head>
+<body>
+<h1>Security redirect</h1>
+<p>
+We can't serve non-HTML content from the URL you have requested, because 
+Internet Explorer would interpret it as an incorrect and potentially dangerous
+content type.</p>
+<p>Instead, please use <a href="$encUrl">this URL</a>, which is the same as 
the URL you have requested, except that 
+"&amp;*" is appended. This prevents Internet Explorer from seeing a bogus file 
+extension.
+</p>
+</body>
+</html>
+HTML;
+               echo "\n";
+               return true;
+       }
+
+       /**
         * Returns true if the PATH_INFO ends with an extension other than a 
script
         * extension. This could confuse IE for scripts that send arbitrary 
data which
         * is not HTML but may be detected as such.
@@ -784,30 +840,8 @@
         */
        public function isPathInfoBad() {
                global $wgScriptExtension;
-
-               if ( isset( $_SERVER['QUERY_STRING'] ) 
-                       && preg_match( '/\.[^\\/:*?"<>|%]+(#|\?|$)/i', 
$_SERVER['QUERY_STRING'] ) )
-               {
-                       // Bug 28235
-                       // Block only Internet Explorer, and requests with 
missing UA 
-                       // headers that could be IE users behind a privacy 
proxy.
-                       if ( !isset( $_SERVER['HTTP_USER_AGENT'] ) 
-                               || preg_match( '/; *MSIE/', 
$_SERVER['HTTP_USER_AGENT'] ) )
-                       {
-                               return true;
-                       }
-               }
-
-               if ( !isset( $_SERVER['PATH_INFO'] ) ) {
-                       return false;
-               }
-               $pi = $_SERVER['PATH_INFO'];
-               $dotPos = strrpos( $pi, '.' );
-               if ( $dotPos === false ) {
-                       return false;
-               }
-               $ext = substr( $pi, $dotPos );
-               return !in_array( $ext, array( $wgScriptExtension, '.php', 
'.php5' ) );
+               $extWhitelist[] = ltrim( $wgScriptExtension, '.' );
+               return IEUrlExtension::areServerVarsBad( $_SERVER, 
$extWhitelist );
        }
 
        /**
@@ -1045,8 +1079,12 @@
        public function getSessionArray() {
                return $this->session;
        }
-
-       public function isPathInfoBad() {
+       
+       public function isPathInfoBad( $extWhitelist = array() ) {
                return false;
        }
+
+       public function checkUrlExtension( $extWhitelist = array() ) {
+               return true;
+       }
 }


Property changes on: branches/REL1_18/phase3/includes/WebRequest.php
___________________________________________________________________
Added: svn:mergeinfo
   + /branches/REL1_15/phase3/includes/WebRequest.php:51646
/branches/new-installer/phase3/includes/WebRequest.php:43664-66004
/branches/sqlite/includes/WebRequest.php:58211-58321
/branches/wmf-deployment/includes/WebRequest.php:53381
/trunk/phase3/includes/WebRequest.php:89558

Copied: branches/REL1_18/phase3/includes/libs/IEUrlExtension.php (from rev 
89621, trunk/phase3/includes/libs/IEUrlExtension.php)
===================================================================
--- branches/REL1_18/phase3/includes/libs/IEUrlExtension.php                    
        (rev 0)
+++ branches/REL1_18/phase3/includes/libs/IEUrlExtension.php    2011-06-07 
05:57:23 UTC (rev 89627)
@@ -0,0 +1,247 @@
+<?php
+
+/**
+ * Internet Explorer derives a cache filename from a URL, and then in certain 
+ * circumstances, uses the extension of the resulting file to determine the 
+ * content type of the data, ignoring the Content-Type header. 
+ *
+ * This can be a problem, especially when non-HTML content is sent by 
MediaWiki,
+ * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
+ *
+ * Usually the script filename (e.g. api.php) is present in the URL, and this 
+ * makes Internet Explorer think the extension is a harmless script extension.
+ * But Internet Explorer 6 and earlier allows the script extension to be 
+ * obscured by encoding the dot as "%2E". 
+ *
+ * This class contains functions which help in detecting and dealing with this 
+ * situation.
+ *
+ * Checking the URL for a bad extension is somewhat complicated due to the 
fact 
+ * that CGI doesn't provide a standard method to determine the URL. Instead it
+ * is necessary to pass a subset of $_SERVER variables, which we then attempt 
+ * to use to guess parts of the URL.
+ */
+class IEUrlExtension {
+       /**
+        * Check a subset of $_SERVER (or the whole of $_SERVER if you like)
+        * to see if it indicates that the request was sent with a bad file 
+        * extension. Returns true if the request should be denied or modified, 
+        * false otherwise. The relevant $_SERVER elements are:
+        *
+        *   - SERVER_SOFTWARE
+        *   - REQUEST_URI
+        *   - QUERY_STRING
+        *   - PATH_INFO
+        *
+        * If the a variable is unset in $_SERVER, it should be unset in $vars.
+        *
+        * @param $vars A subset of $_SERVER.
+        * @param $extWhitelist Extensions which are allowed, assumed harmless.
+        */
+       public static function areServerVarsBad( $vars, $extWhitelist = array() 
) {
+               // Check QUERY_STRING or REQUEST_URI
+               if ( isset( $vars['SERVER_SOFTWARE'] )
+                       && isset( $vars['REQUEST_URI'] )
+                       && self::haveUndecodedRequestUri( 
$vars['SERVER_SOFTWARE'] ) )
+               {
+                       $urlPart = $vars['REQUEST_URI'];
+               } elseif ( isset( $vars['QUERY_STRING'] ) ) {
+                       $urlPart = $vars['QUERY_STRING'];
+               } else {
+                       $urlPart = '';
+               }
+
+               if ( self::isUrlExtensionBad( $urlPart, $extWhitelist ) ) {
+                       return true;
+               }
+
+               // Some servers have PATH_INFO but not REQUEST_URI, so we check 
both 
+               // to be on the safe side.
+               if ( isset( $vars['PATH_INFO'] )
+                       && self::isUrlExtensionBad( $vars['PATH_INFO'], 
$extWhitelist ) )
+               {
+                       return true;
+               }
+
+               // All checks passed
+               return false;
+       }
+
+       /**
+        * Given a right-hand portion of a URL, determine whether IE would 
detect
+        * a potentially harmful file extension.
+        *
+        * @param $urlPart The right-hand portion of a URL
+        * @param $extWhitelist An array of file extensions which may occur in 
this
+        *    URL, and which should be allowed.
+        * @return bool
+        */
+       public static function isUrlExtensionBad( $urlPart, $extWhitelist = 
array() ) {
+               if ( strval( $urlPart ) === '' ) {
+                       return false;
+               }
+
+               $extension = self::findIE6Extension( $urlPart );
+               if ( strval( $extension ) === '' ) {
+                       // No extension or empty extension
+                       return false;
+               }
+
+               if ( in_array( $extension, array( 'php', 'php5' ) ) ) {
+                       // Script extension, OK
+                       return false;
+               }
+               if ( in_array( $extension, $extWhitelist ) ) {
+                       // Whitelisted extension
+                       return false;
+               }
+
+               if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
+                       // Non-alphanumeric extension, unlikely to be 
registered. 
+                       //
+                       // The regex above is known to match all registered 
file extensions
+                       // in a default Windows XP installation. It's important 
to allow 
+                       // extensions with ampersands and percent signs, since 
that reduces
+                       // the number of false positives substantially.
+                       return false;
+               }
+
+               // Possibly bad extension
+               return true;
+       }
+
+       /**
+        * Returns a variant of $url which will pass isUrlExtensionBad() but 
has the 
+        * same GET parameters, or false if it can't figure one out.
+        */
+       public static function fixUrlForIE6( $url, $extWhitelist = array() ) {
+               $questionPos = strpos( $url, '?' );
+               if ( $questionPos === false ) {
+                       $beforeQuery = $url . '?';
+                       $query = '';
+               } elseif ( $questionPos === strlen( $url ) - 1 ) {
+                       $beforeQuery = $url;
+                       $query = '';
+               } else {
+                       $beforeQuery = substr( $url, 0, $questionPos + 1 );
+                       $query = substr( $url, $questionPos + 1 );
+               }
+
+               // Multiple question marks cause problems. Encode the second 
and 
+               // subsequent question mark.
+               $query = str_replace( '?', '%3E', $query );
+               // Append an invalid path character so that IE6 won't see the 
end of the
+               // query string as an extension
+               $query .= '&*';
+               // Put the URL back together
+               $url = $beforeQuery . $query;
+               if ( self::isUrlExtensionBad( $url, $extWhitelist ) ) {
+                       // Avoid a redirect loop
+                       return false;
+               }
+               return $url;
+       }
+
+       /**
+        * Determine what extension IE6 will infer from a certain query string.
+        * If the URL has an extension before the question mark, IE6 will use
+        * that and ignore the query string, but per the comment at
+        * isPathInfoBad() we don't have a reliable way to determine the URL,
+        * so isPathInfoBad() just passes in the query string for $url.
+        * All entry points have safe extensions (php, php5) anyway, so
+        * checking the query string is possibly overly paranoid but never
+        * insecure.
+        *
+        * The criteria for finding an extension are as follows:
+        * - a possible extension is a dot followed by one or more characters 
not 
+        *   in <>\"/:|?.#
+        * - if we find a possible extension followed by the end of the string 
or 
+        *   a #, that's our extension
+        * - if we find a possible extension followed by a ?, that's our 
extension
+        *    - UNLESS it's exe, dll or cgi, in which case we ignore it and 
continue 
+        *      searching for another possible extension
+        * - if we find a possible extension followed by a dot or another 
illegal 
+        *   character, we ignore it and continue searching
+        * 
+        * @param $url string URL
+        * @return mixed Detected extension (string), or false if none found
+        */
+       public static function findIE6Extension( $url ) {
+               $pos = 0;
+               $hashPos = strpos( $url, '#' );
+               if ( $hashPos !== false ) {
+                       $urlLength = $hashPos;
+               } else {
+                       $urlLength = strlen( $url );
+               }
+               $remainingLength = $urlLength;
+               while ( $remainingLength > 0 ) {
+                       // Skip ahead to the next dot
+                       $pos += strcspn( $url, '.', $pos, $remainingLength );
+                       if ( $pos >= $urlLength ) {
+                               // End of string, we're done
+                               return false;
+                       }
+                       
+                       // We found a dot. Skip past it
+                       $pos++;
+                       $remainingLength = $urlLength - $pos;
+
+                       // Check for illegal characters in our prospective 
extension,
+                       // or for another dot
+                       $nextPos = $pos + strcspn( $url, "<>\\\"/:|?*.", $pos, 
$remainingLength );
+                       if ( $nextPos >= $urlLength ) {
+                               // No illegal character or next dot
+                               // We have our extension
+                               return substr( $url, $pos, $urlLength - $pos );
+                       }
+                       if ( $url[$nextPos] === '?' ) {
+                               // We've found a legal extension followed by a 
question mark
+                               // If the extension is NOT exe, dll or cgi, 
return it
+                               $extension = substr( $url, $pos, $nextPos - 
$pos );
+                               if ( strcasecmp( $extension, 'exe' ) && 
strcasecmp( $extension, 'dll' ) &&
+                                       strcasecmp( $extension, 'cgi' ) )
+                               {
+                                       return $extension;
+                               }
+                               // Else continue looking
+                       }
+                       // We found an illegal character or another dot
+                       // Skip to that character and continue the loop
+                       $pos = $nextPos + 1;
+                       $remainingLength = $urlLength - $pos;
+               }
+               return false;
+       }
+
+       /**
+        * When passed the value of $_SERVER['SERVER_SOFTWARE'], this function
+        * returns true if that server is known to have a REQUEST_URI variable
+        * with %2E not decoded to ".". On such a server, it is possible to 
detect
+        * whether the script filename has been obscured.
+        *
+        * The function returns false if the server is not known to have this 
+        * behaviour. Microsoft IIS in particular is known to decode escaped 
script
+        * filenames.
+        *
+        * SERVER_SOFTWARE typically contains either a plain string such as 
"Zeus",
+        * or a specification in the style of a User-Agent header, such as 
+        * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
+        *
+        * @param $serverSoftware
+        * @return bool
+        *
+        */
+       public static function haveUndecodedRequestUri( $serverSoftware ) {
+               static $whitelist = array(
+                       'Apache', 
+                       'Zeus', 
+                       'LiteSpeed' );
+               if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
+                       return in_array( $m[1], $whitelist );
+               } else {
+                       return false;
+               }
+       }
+
+}


Property changes on: branches/REL1_18/phase3/includes/libs/IEUrlExtension.php
___________________________________________________________________
Added: svn:mergeinfo
   + /branches/REL1_15/phase3/includes/libs/IEUrlExtension.php:51646
/branches/new-installer/phase3/includes/libs/IEUrlExtension.php:43664-66004
/branches/sqlite/includes/libs/IEUrlExtension.php:58211-58321
/branches/wmf-deployment/includes/libs/IEUrlExtension.php:53381
Added: svn:eol-style
   + native

Modified: branches/REL1_18/phase3/load.php
===================================================================
--- branches/REL1_18/phase3/load.php    2011-06-07 05:39:25 UTC (rev 89626)
+++ branches/REL1_18/phase3/load.php    2011-06-07 05:57:23 UTC (rev 89627)
@@ -40,17 +40,7 @@
 wfProfileIn( 'load.php' );
 
 // URL safety checks
-//
-// See RawPage.php for details; summary is that MSIE can override the
-// Content-Type if it sees a recognized extension on the URL, such as
-// might be appended via PATH_INFO after 'load.php'.
-//
-// Some resources can contain HTML-like strings (e.g. in messages)
-// which will end up triggering HTML detection and execution.
-//
-if ( $wgRequest->isPathInfoBad() ) {
-       wfHttpError( 403, 'Forbidden',
-               'Invalid file extension found in PATH_INFO or QUERY_STRING.' );
+if ( !$wgRequest->checkUrlExtension() ) {
        return;
 }
 


Property changes on: branches/REL1_18/phase3/load.php
___________________________________________________________________
Added: svn:mergeinfo
   + /branches/REL1_15/phase3/load.php:51646
/branches/REL1_17/phase3/load.php:81445,81448
/branches/new-installer/phase3/load.php:43664-66004
/branches/sqlite/load.php:58211-58321
/trunk/phase3/load.php:87632,87636,87640,87644,89397


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to