TheDJ has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/288173

Change subject: Strip the BOM character when we concatenate files
......................................................................

Strip the BOM character when we concatenate files

We are reading files and concatenating their contents, but some files
start with a BOM character and these BOM characters are only allowed at the
beginning of files, not half way.
Stripping them should be safe, since we already assume that everything
is UTF-8.

Change-Id: I14ad698a684e78976e873e9ae2c367475550a063
---
M includes/resourceloader/ResourceLoaderFileModule.php
1 file changed, 19 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/73/288173/1

diff --git a/includes/resourceloader/ResourceLoaderFileModule.php 
b/includes/resourceloader/ResourceLoaderFileModule.php
index 1e7329a..66c0f73 100644
--- a/includes/resourceloader/ResourceLoaderFileModule.php
+++ b/includes/resourceloader/ResourceLoaderFileModule.php
@@ -468,7 +468,7 @@
                if ( !file_exists( $localPath ) ) {
                        throw new MWException( __METHOD__ . ": skip function 
file not found: \"$localPath\"" );
                }
-               $contents = file_get_contents( $localPath );
+               $contents = $this->stripBom( file_get_contents( $localPath ) );
                if ( $this->getConfig()->get( 'ResourceLoaderValidateStaticJS' 
) ) {
                        $contents = $this->validateScriptFile( $localPath, 
$contents );
                }
@@ -810,7 +810,7 @@
                        if ( !file_exists( $localPath ) ) {
                                throw new MWException( __METHOD__ . ": script 
file not found: \"$localPath\"" );
                        }
-                       $contents = file_get_contents( $localPath );
+                       $contents = $this->stripBom( file_get_contents( 
$localPath ) );
                        if ( $this->getConfig()->get( 
'ResourceLoaderValidateStaticJS' ) ) {
                                // Static files don't really need to be checked 
as often; unlike
                                // on-wiki module they shouldn't change 
unexpectedly without
@@ -882,7 +882,7 @@
                        $style = $this->compileLessFile( $localPath, $context );
                        $this->hasGeneratedStyles = true;
                } else {
-                       $style = file_get_contents( $localPath );
+                       $style = $this->stripBom( file_get_contents( $localPath 
) );
                }
 
                if ( $flip ) {
@@ -990,7 +990,7 @@
                        $localPath = $this->getLocalPath( $templatePath );
                        if ( file_exists( $localPath ) ) {
                                $content = file_get_contents( $localPath );
-                               $templates[$alias] = $content;
+                               $templates[$alias] = $this->stripBom( $content 
);
                        } else {
                                $msg = __METHOD__ . ": template file not found: 
\"$localPath\"";
                                wfDebugLog( 'resourceloader', $msg );
@@ -999,4 +999,19 @@
                }
                return $templates;
        }
+
+       /**
+        * Takes an input string and removes the UTF-8 BOM character if present
+        *
+        * We need to remove these after reading a file, because we concatenate 
our files and
+        * the BOM character is not valid in the middle of a string.
+        * We already assume UTF-8 everywhere, so this should be safe.
+        *
+        * @return string input minus the intial BOM char
+        */
+       protected function stripBom( $input ) {
+               $bom = pack( 'H*', 'EFBBBF' );
+               $output = preg_replace( "/^$bom/", '', $input );
+               return $output;
+       }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/288173
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I14ad698a684e78976e873e9ae2c367475550a063
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: TheDJ <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to