Anomie has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/346603 )
Change subject: Parser\Encoder: Trim whitespace from encodings per spec
......................................................................
Parser\Encoder: Trim whitespace from encodings per spec
The spec specifies that certain whitespace is trimmed from the
encoding tags.
Change-Id: Id2db17aa9e52c7b0ed67026bc8f9f39a49f8ecbd
---
M src/Parser/Encoder.php
M tests/Parser/EncoderTest.php
2 files changed, 22 insertions(+), 3 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/css-sanitizer
refs/changes/03/346603/1
diff --git a/src/Parser/Encoder.php b/src/Parser/Encoder.php
index 48634ad..5691d98 100644
--- a/src/Parser/Encoder.php
+++ b/src/Parser/Encoder.php
@@ -260,14 +260,16 @@
}
// 1. Transport encoding
- $encoding = isset( $encodings['transport'] ) ? strtolower(
$encodings['transport'] ) : null;
+ $encoding = isset( $encodings['transport'] )
+ ? trim( strtolower( $encodings['transport'] ),
"\t\n\f\r " )
+ : null;
if ( $encoding !== null && isset( self::$encodings[$encoding] )
) {
return self::doConvert( self::$encodings[$encoding],
$text );
}
// 2. @charset rule
if ( preg_match( '/^@charset
"([\x00-\x21\x23-\x7f]{0,1012})";/', $text, $m ) ) {
- $encoding = strtolower( $m[1] );
+ $encoding = trim( strtolower( $m[1] ), "\t\n\f\r " );
if ( $encoding === 'utf-16be' || $encoding ===
'utf-16le' ) {
// It's obviously lying.
$encoding = 'utf-8';
@@ -278,7 +280,9 @@
}
// 3. Environment encoding
- $encoding = isset( $encodings['environment'] ) ? strtolower(
$encodings['environment'] ) : null;
+ $encoding = isset( $encodings['environment'] )
+ ? trim( strtolower( $encodings['environment'] ),
"\t\n\f\r " )
+ : null;
if ( $encoding !== null && isset( self::$encodings[$encoding] )
) {
return self::doConvert( self::$encodings[$encoding],
$text );
}
diff --git a/tests/Parser/EncoderTest.php b/tests/Parser/EncoderTest.php
index db57ccb..b7bf693 100644
--- a/tests/Parser/EncoderTest.php
+++ b/tests/Parser/EncoderTest.php
@@ -67,6 +67,21 @@
'@charset "piglatin1"; fóo',
[ 'transport' => 'bogus', 'environment' =>
'latin1' ],
],
+ 'Transport encoding with whitespace' => [
+ '@charset "iso-2022-cn";',
+ '@charset "iso-2022-cn";',
+ [ 'transport' => "\rutf-8\n", 'environment' =>
'iso-2022-cn' ],
+ ],
+ '@charset with whitespace' => [
+ "@charset \"\funicode-1-1-utf-8\n\";",
+ "@charset \"\funicode-1-1-utf-8\n\";",
+ [ 'transport' => 'bogus', 'environment' =>
'iso-2022-cn' ],
+ ],
+ 'environment with whitespace' => [
+ "@charset \"piglatin1\"; f\xf3o",
+ '@charset "piglatin1"; fóo',
+ [ 'transport' => 'bogus', 'environment' =>
"\rlatin1\n" ],
+ ],
'fallback to UTF-8' => [
"@charset \"piglatin1\"; f\xf3o",
'@charset "piglatin1"; f�o',
--
To view, visit https://gerrit.wikimedia.org/r/346603
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Id2db17aa9e52c7b0ed67026bc8f9f39a49f8ecbd
Gerrit-PatchSet: 1
Gerrit-Project: css-sanitizer
Gerrit-Branch: master
Gerrit-Owner: Anomie <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits