JanZerebecki has uploaded a new change for review.
https://gerrit.wikimedia.org/r/65300
Change subject: Adapt test of StringUtils::isUtf8 to work with PHP 5.4
......................................................................
Adapt test of StringUtils::isUtf8 to work with PHP 5.4
The mbstring extension changed in PHP 5.4 so some strings aren't considered
valid UTF-8 anymore, that were before.
Make items in the data provider easier to identify by adding names.
Bug: 43679
Change-Id: I026eff69236187ce3d2ff2fc261a5e1d9cd88b24
---
M tests/phpunit/includes/StringUtilsTest.php
1 file changed, 72 insertions(+), 66 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/00/65300/1
diff --git a/tests/phpunit/includes/StringUtilsTest.php
b/tests/phpunit/includes/StringUtilsTest.php
index 842e2fc..a69ac4c 100644
--- a/tests/phpunit/includes/StringUtilsTest.php
+++ b/tests/phpunit/includes/StringUtilsTest.php
@@ -3,11 +3,11 @@
class StringUtilsTest extends MediaWikiTestCase {
/**
- * This test StringUtils::isUtf8 whenever we have mbstring extension
+ * This tests StringUtils::isUtf8 whenever we have mbstring extension
* loaded.
*
* @covers StringUtils::isUtf8
- * @dataProvider provideStringsForIsUtf8Check
+ * @dataProvider provideStringsForIsUtf8CheckWithMbstring
*/
function testIsUtf8WithMbstring( $expected, $string ) {
if ( !function_exists( 'mb_check_encoding' ) ) {
@@ -20,12 +20,12 @@
}
/**
- * This test StringUtils::isUtf8 making sure we use the pure PHP
+ * This tests StringUtils::isUtf8 making sure we use the pure PHP
* implementation used as a fallback when mb_check_encoding() is
* not available.
*
* @covers StringUtils::isUtf8
- * @dataProvider provideStringsForIsUtf8Check
+ * @dataProvider provideStringsForIsUtf8CheckWithPhpFallback
*/
function testIsUtf8WithPhpFallbackImplementation( $expected, $string ) {
$this->assertEquals( $expected,
@@ -53,92 +53,98 @@
return $escaped;
}
+ public static function provideStringsForIsUtf8CheckWithMbstring() {
+ return self::provideStringsForIsUtf8Check(TRUE);
+ }
+
+ public static function provideStringsForIsUtf8CheckWithPhpFallback() {
+ return self::provideStringsForIsUtf8Check(FALSE);
+ }
+
/**
* See also "UTF-8 decoder capability and stress test" by
* Markus Kuhn:
* http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
*/
- public static function provideStringsForIsUtf8Check() {
+ public static function provideStringsForIsUtf8Check($mbstring) {
// Expected return values for StringUtils::isUtf8()
$PASS = true;
$FAIL = false;
+ /* since PHP 5.4 the mbstring extension rejects some strings
that
+ * it considered UTF-8 before
+ */
+ $passBeforePhp54 = version_compare(PHP_VERSION, '5.4', '<');
+ if (FALSE === $mbstring) {
+ $passBeforePhp54 = TRUE;
+ }
return array(
- array( $PASS, 'Some ASCII' ),
- array( $PASS, "Euro sign €" ),
+ 'some ASCII' => array( $PASS, 'Some ASCII' ),
+ 'euro sign' => array( $PASS, "Euro sign €" ),
- # First possible sequences
- array( $PASS, "\x00" ),
- array( $PASS, "\xc2\x80" ),
- array( $PASS, "\xe0\xa0\x80" ),
- array( $PASS, "\xf0\x90\x80\x80" ),
- array( $PASS, "\xf8\x88\x80\x80\x80" ),
- array( $PASS, "\xfc\x84\x80\x80\x80\x80" ),
+ 'first possible sequence 1 byte' => array( $PASS,
"\x00" ),
+ 'first possible sequence 2 bytes' => array( $PASS,
"\xc2\x80" ),
+ 'first possible sequence 3 bytes' => array( $PASS,
"\xe0\xa0\x80" ),
+ 'first possible sequence 4 bytes' => array( $PASS,
"\xf0\x90\x80\x80" ),
+ 'first possible sequence 5 bytes' => array(
$passBeforePhp54, "\xf8\x88\x80\x80\x80" ),
+ 'first possible sequence 6 bytes' => array(
$passBeforePhp54, "\xfc\x84\x80\x80\x80\x80" ),
- # Last possible sequence
- array( $PASS, "\x7f" ),
- array( $PASS, "\xdf\xbf" ),
- array( $PASS, "\xef\xbf\xbf" ),
- array( $PASS, "\xf7\xbf\xbf\xbf" ),
- array( $PASS, "\xfb\xbf\xbf\xbf\xbf" ),
- array( $FAIL, "\xfd\xbf\xbf\xbf\xbf\xbf" ),
+ 'last possible sequence 1 byte' => array( $PASS, "\x7f"
),
+ 'last possible sequence 2 bytes' => array( $PASS,
"\xdf\xbf" ),
+ 'last possible sequence 3 bytes' => array( $PASS,
"\xef\xbf\xbf" ),
+ 'last possible sequence 4 bytes' => array(
$passBeforePhp54, "\xf7\xbf\xbf\xbf" ),
+ 'last possible sequence 5 bytes' => array(
$passBeforePhp54, "\xfb\xbf\xbf\xbf\xbf" ),
+ 'last possible sequence 6 bytes' => array( $FAIL,
"\xfd\xbf\xbf\xbf\xbf\xbf" ),
- # boundaries:
- array( $PASS, "\xed\x9f\xbf" ),
- array( $PASS, "\xee\x80\x80" ),
- array( $PASS, "\xef\xbf\xbd" ),
- array( $PASS, "\xf4\x8f\xbf\xbf" ),
- array( $PASS, "\xf4\x90\x80\x80" ),
+ 'boundary 1' => array( $PASS, "\xed\x9f\xbf" ),
+ 'boundary 2' => array( $PASS, "\xee\x80\x80" ),
+ 'boundary 3' => array( $PASS, "\xef\xbf\xbd" ),
+ 'boundary 4' => array( $PASS, "\xf4\x8f\xbf\xbf" ),
+ 'boundary 5' => array( $passBeforePhp54,
"\xf4\x90\x80\x80" ),
- # Malformed
- array( $FAIL, "\x80" ),
- array( $FAIL, "\xBF" ),
- array( $FAIL, "\x80\xbf" ),
- array( $FAIL, "\x80\xbf\x80" ),
- array( $FAIL, "\x80\xbf\x80\xbf" ),
- array( $FAIL, "\x80\xbf\x80\xbf\x80" ),
- array( $FAIL, "\x80\xbf\x80\xbf\x80\xbf" ),
- array( $FAIL, "\x80\xbf\x80\xbf\x80\xbf\x80" ),
+ 'malformed 1' => array( $FAIL, "\x80" ),
+ 'malformed 2' => array( $FAIL, "\xBF" ),
+ 'malformed 3' => array( $FAIL, "\x80\xbf" ),
+ 'malformed 4' => array( $FAIL, "\x80\xbf\x80" ),
+ 'malformed 5' => array( $FAIL, "\x80\xbf\x80\xbf" ),
+ 'malformed 6' => array( $FAIL, "\x80\xbf\x80\xbf\x80" ),
+ 'malformed 7' => array( $FAIL,
"\x80\xbf\x80\xbf\x80\xbf" ),
+ 'malformed 8' => array( $FAIL,
"\x80\xbf\x80\xbf\x80\xbf\x80" ),
- # last byte missing
- array( $FAIL, "\xc0" ),
- array( $FAIL, "\xe0\x80" ),
- array( $FAIL, "\xf0\x80\x80" ),
- array( $FAIL, "\xf8\x80\x80\x80" ),
- array( $FAIL, "\xfc\x80\x80\x80\x80" ),
- array( $FAIL, "\xdf" ),
- array( $FAIL, "\xef\xbf" ),
- array( $FAIL, "\xf7\xbf\xbf" ),
- array( $FAIL, "\xfb\xbf\xbf\xbf" ),
- array( $FAIL, "\xfd\xbf\xbf\xbf\xbf" ),
+ 'last byte missing 1' => array( $FAIL, "\xc0" ),
+ 'last byte missing 2' => array( $FAIL, "\xe0\x80" ),
+ 'last byte missing 3' => array( $FAIL, "\xf0\x80\x80" ),
+ 'last byte missing 4' => array( $FAIL,
"\xf8\x80\x80\x80" ),
+ 'last byte missing 5' => array( $FAIL,
"\xfc\x80\x80\x80\x80" ),
+ 'last byte missing 6' => array( $FAIL, "\xdf" ),
+ 'last byte missing 7' => array( $FAIL, "\xef\xbf" ),
+ 'last byte missing 8' => array( $FAIL, "\xf7\xbf\xbf" ),
+ 'last byte missing 9' => array( $FAIL,
"\xfb\xbf\xbf\xbf" ),
+ 'last byte missing 10' => array( $FAIL,
"\xfd\xbf\xbf\xbf\xbf" ),
- # impossible bytes
- array( $FAIL, "\xfe" ),
- array( $FAIL, "\xff" ),
- array( $FAIL, "\xfe\xfe\xff\xff" ),
+ 'impossible bytes 1' => array( $FAIL, "\xfe" ),
+ 'impossible bytes 2' => array( $FAIL, "\xff" ),
+ 'impossible bytes 3' => array( $FAIL,
"\xfe\xfe\xff\xff" ),
/*
# The PHP implementation does not handle characters
# being represented in a form which is too long :(
- # overlong sequences
- array( $FAIL, "\xc0\xaf" ),
- array( $FAIL, "\xe0\x80\xaf" ),
- array( $FAIL, "\xf0\x80\x80\xaf" ),
- array( $FAIL, "\xf8\x80\x80\x80\xaf" ),
- array( $FAIL, "\xfc\x80\x80\x80\x80\xaf" ),
+ 'overlong sequence 1' => array( $FAIL, "\xc0\xaf" ),
+ 'overlong sequence 2' => array( $FAIL, "\xe0\x80\xaf" ),
+ 'overlong sequence 3' => array( $FAIL,
"\xf0\x80\x80\xaf" ),
+ 'overlong sequence 4' => array( $FAIL,
"\xf8\x80\x80\x80\xaf" ),
+ 'overlong sequence 5' => array( $FAIL,
"\xfc\x80\x80\x80\x80\xaf" ),
- # Maximum overlong sequences
- array( $FAIL, "\xc1\xbf" ),
- array( $FAIL, "\xe0\x9f\xbf" ),
- array( $FAIL, "\xf0\x8F\xbf\xbf" ),
- array( $FAIL, "\xf8\x87\xbf\xbf" ),
- array( $FAIL, "\xfc\x83\xbf\xbf\xbf\xbf" ),
+ 'maximum overlong sequence 1' => array( $FAIL,
"\xc1\xbf" ),
+ 'maximum overlong sequence 2' => array( $FAIL,
"\xe0\x9f\xbf" ),
+ 'maximum overlong sequence 3' => array( $FAIL,
"\xf0\x8F\xbf\xbf" ),
+ 'maximum overlong sequence 4' => array( $FAIL,
"\xf8\x87\xbf\xbf" ),
+ 'maximum overlong sequence 5' => array( $FAIL,
"\xfc\x83\xbf\xbf\xbf\xbf" ),
*/
- # non characters
- array( $PASS, "\xef\xbf\xbe" ),
- array( $PASS, "\xef\xbf\xbf" ),
+ 'non characters 1' => array( $PASS, "\xef\xbf\xbe" ),
+ 'non characters 2' => array( $PASS, "\xef\xbf\xbf" ),
);
}
}
--
To view, visit https://gerrit.wikimedia.org/r/65300
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I026eff69236187ce3d2ff2fc261a5e1d9cd88b24
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: JanZerebecki <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits