Anomie has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/346603 )

Change subject: Parser\Encoder: Trim whitespace from encodings per spec
......................................................................

Parser\Encoder: Trim whitespace from encodings per spec

The spec specifies that certain whitespace is trimmed from the
encoding tags.

Change-Id: Id2db17aa9e52c7b0ed67026bc8f9f39a49f8ecbd
---
M src/Parser/Encoder.php
M tests/Parser/EncoderTest.php
2 files changed, 22 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/css-sanitizer 
refs/changes/03/346603/1

diff --git a/src/Parser/Encoder.php b/src/Parser/Encoder.php
index 48634ad..5691d98 100644
--- a/src/Parser/Encoder.php
+++ b/src/Parser/Encoder.php
@@ -260,14 +260,16 @@
                }
 
                // 1. Transport encoding
-               $encoding = isset( $encodings['transport'] ) ? strtolower( 
$encodings['transport'] ) : null;
+               $encoding = isset( $encodings['transport'] )
+                       ? trim( strtolower( $encodings['transport'] ), 
"\t\n\f\r " )
+                       : null;
                if ( $encoding !== null && isset( self::$encodings[$encoding] ) 
) {
                        return self::doConvert( self::$encodings[$encoding], 
$text );
                }
 
                // 2. @charset rule
                if ( preg_match( '/^@charset 
"([\x00-\x21\x23-\x7f]{0,1012})";/', $text, $m ) ) {
-                       $encoding = strtolower( $m[1] );
+                       $encoding = trim( strtolower( $m[1] ), "\t\n\f\r " );
                        if ( $encoding === 'utf-16be' || $encoding === 
'utf-16le' ) {
                                // It's obviously lying.
                                $encoding = 'utf-8';
@@ -278,7 +280,9 @@
                }
 
                // 3. Environment encoding
-               $encoding = isset( $encodings['environment'] ) ? strtolower( 
$encodings['environment'] ) : null;
+               $encoding = isset( $encodings['environment'] )
+                       ? trim( strtolower( $encodings['environment'] ), 
"\t\n\f\r " )
+                       : null;
                if ( $encoding !== null && isset( self::$encodings[$encoding] ) 
) {
                        return self::doConvert( self::$encodings[$encoding], 
$text );
                }
diff --git a/tests/Parser/EncoderTest.php b/tests/Parser/EncoderTest.php
index db57ccb..b7bf693 100644
--- a/tests/Parser/EncoderTest.php
+++ b/tests/Parser/EncoderTest.php
@@ -67,6 +67,21 @@
                                '@charset "piglatin1"; fóo',
                                [ 'transport' => 'bogus', 'environment' => 
'latin1' ],
                        ],
+                       'Transport encoding with whitespace' => [
+                               '@charset "iso-2022-cn";',
+                               '@charset "iso-2022-cn";',
+                               [ 'transport' => "\rutf-8\n", 'environment' => 
'iso-2022-cn' ],
+                       ],
+                       '@charset with whitespace' => [
+                               "@charset \"\funicode-1-1-utf-8\n\";",
+                               "@charset \"\funicode-1-1-utf-8\n\";",
+                               [ 'transport' => 'bogus', 'environment' => 
'iso-2022-cn' ],
+                       ],
+                       'environment with whitespace' => [
+                               "@charset \"piglatin1\"; f\xf3o",
+                               '@charset "piglatin1"; fóo',
+                               [ 'transport' => 'bogus', 'environment' => 
"\rlatin1\n" ],
+                       ],
                        'fallback to UTF-8' => [
                                "@charset \"piglatin1\"; f\xf3o",
                                '@charset "piglatin1"; f�o',

-- 
To view, visit https://gerrit.wikimedia.org/r/346603
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id2db17aa9e52c7b0ed67026bc8f9f39a49f8ecbd
Gerrit-PatchSet: 1
Gerrit-Project: css-sanitizer
Gerrit-Branch: master
Gerrit-Owner: Anomie <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to