JanZerebecki has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/65300


Change subject: Adapt test of StringUtils::isUtf8 to work with PHP 5.4
......................................................................

Adapt test of StringUtils::isUtf8 to work with PHP 5.4

The mbstring extension changed in PHP 5.4 so some strings aren't considered
valid UTF-8 anymore, that were before.

Make items in the data provider easier to identify by adding names.

Bug: 43679
Change-Id: I026eff69236187ce3d2ff2fc261a5e1d9cd88b24
---
M tests/phpunit/includes/StringUtilsTest.php
1 file changed, 72 insertions(+), 66 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/00/65300/1

diff --git a/tests/phpunit/includes/StringUtilsTest.php 
b/tests/phpunit/includes/StringUtilsTest.php
index 842e2fc..a69ac4c 100644
--- a/tests/phpunit/includes/StringUtilsTest.php
+++ b/tests/phpunit/includes/StringUtilsTest.php
@@ -3,11 +3,11 @@
 class StringUtilsTest extends MediaWikiTestCase {
 
        /**
-        * This test StringUtils::isUtf8 whenever we have mbstring extension
+        * This tests StringUtils::isUtf8 whenever we have mbstring extension
         * loaded.
         *
         * @covers StringUtils::isUtf8
-        * @dataProvider provideStringsForIsUtf8Check
+        * @dataProvider provideStringsForIsUtf8CheckWithMbstring
         */
        function testIsUtf8WithMbstring( $expected, $string ) {
                if ( !function_exists( 'mb_check_encoding' ) ) {
@@ -20,12 +20,12 @@
        }
 
        /**
-        * This test StringUtils::isUtf8 making sure we use the pure PHP
+        * This tests StringUtils::isUtf8 making sure we use the pure PHP
         * implementation used as a fallback when mb_check_encoding() is
         * not available.
         *
         * @covers StringUtils::isUtf8
-        * @dataProvider provideStringsForIsUtf8Check
+        * @dataProvider provideStringsForIsUtf8CheckWithPhpFallback
         */
        function testIsUtf8WithPhpFallbackImplementation( $expected, $string ) {
                $this->assertEquals( $expected,
@@ -53,92 +53,98 @@
                return $escaped;
        }
 
+       public static function provideStringsForIsUtf8CheckWithMbstring() {
+               return self::provideStringsForIsUtf8Check(TRUE);
+       }
+
+       public static function provideStringsForIsUtf8CheckWithPhpFallback() {
+               return self::provideStringsForIsUtf8Check(FALSE);
+       }
+
        /**
         * See also "UTF-8 decoder capability and stress test" by
         * Markus Kuhn:
         * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
         */
-       public static function provideStringsForIsUtf8Check() {
+       public static function provideStringsForIsUtf8Check($mbstring) {
                // Expected return values for StringUtils::isUtf8()
                $PASS = true;
                $FAIL = false;
+               /* since PHP 5.4 the mbstring extension rejects some strings 
that
+                * it considered UTF-8 before
+                */
+               $passBeforePhp54 = version_compare(PHP_VERSION, '5.4', '<');
+               if (FALSE === $mbstring) {
+                       $passBeforePhp54 = TRUE;
+               }
 
                return array(
-                       array( $PASS, 'Some ASCII' ),
-                       array( $PASS, "Euro sign €" ),
+                       'some ASCII' => array( $PASS, 'Some ASCII' ),
+                       'euro sign' => array( $PASS, "Euro sign €" ),
 
-                       # First possible sequences
-                       array( $PASS, "\x00" ),
-                       array( $PASS, "\xc2\x80" ),
-                       array( $PASS, "\xe0\xa0\x80" ),
-                       array( $PASS, "\xf0\x90\x80\x80" ),
-                       array( $PASS, "\xf8\x88\x80\x80\x80" ),
-                       array( $PASS, "\xfc\x84\x80\x80\x80\x80" ),
+                       'first possible sequence 1 byte' => array( $PASS, 
"\x00" ),
+                       'first possible sequence 2 bytes' => array( $PASS, 
"\xc2\x80" ),
+                       'first possible sequence 3 bytes' => array( $PASS, 
"\xe0\xa0\x80" ),
+                       'first possible sequence 4 bytes' => array( $PASS, 
"\xf0\x90\x80\x80" ),
+                       'first possible sequence 5 bytes' => array( 
$passBeforePhp54, "\xf8\x88\x80\x80\x80" ),
+                       'first possible sequence 6 bytes' => array( 
$passBeforePhp54, "\xfc\x84\x80\x80\x80\x80" ),
 
-                       # Last possible sequence
-                       array( $PASS, "\x7f" ),
-                       array( $PASS, "\xdf\xbf" ),
-                       array( $PASS, "\xef\xbf\xbf" ),
-                       array( $PASS, "\xf7\xbf\xbf\xbf" ),
-                       array( $PASS, "\xfb\xbf\xbf\xbf\xbf" ),
-                       array( $FAIL, "\xfd\xbf\xbf\xbf\xbf\xbf" ),
+                       'last possible sequence 1 byte' => array( $PASS, "\x7f" 
),
+                       'last possible sequence 2 bytes' => array( $PASS, 
"\xdf\xbf" ),
+                       'last possible sequence 3 bytes' => array( $PASS, 
"\xef\xbf\xbf" ),
+                       'last possible sequence 4 bytes' => array( 
$passBeforePhp54, "\xf7\xbf\xbf\xbf" ),
+                       'last possible sequence 5 bytes' => array( 
$passBeforePhp54, "\xfb\xbf\xbf\xbf\xbf" ),
+                       'last possible sequence 6 bytes' => array( $FAIL, 
"\xfd\xbf\xbf\xbf\xbf\xbf" ),
 
-                       # boundaries:
-                       array( $PASS, "\xed\x9f\xbf" ),
-                       array( $PASS, "\xee\x80\x80" ),
-                       array( $PASS, "\xef\xbf\xbd" ),
-                       array( $PASS, "\xf4\x8f\xbf\xbf" ),
-                       array( $PASS, "\xf4\x90\x80\x80" ),
+                       'boundary 1' => array( $PASS, "\xed\x9f\xbf" ),
+                       'boundary 2' => array( $PASS, "\xee\x80\x80" ),
+                       'boundary 3' => array( $PASS, "\xef\xbf\xbd" ),
+                       'boundary 4' => array( $PASS, "\xf4\x8f\xbf\xbf" ),
+                       'boundary 5' => array( $passBeforePhp54, 
"\xf4\x90\x80\x80" ),
 
-                       # Malformed
-                       array( $FAIL, "\x80" ),
-                       array( $FAIL, "\xBF" ),
-                       array( $FAIL, "\x80\xbf" ),
-                       array( $FAIL, "\x80\xbf\x80" ),
-                       array( $FAIL, "\x80\xbf\x80\xbf" ),
-                       array( $FAIL, "\x80\xbf\x80\xbf\x80" ),
-                       array( $FAIL, "\x80\xbf\x80\xbf\x80\xbf" ),
-                       array( $FAIL, "\x80\xbf\x80\xbf\x80\xbf\x80" ),
+                       'malformed 1' => array( $FAIL, "\x80" ),
+                       'malformed 2' => array( $FAIL, "\xBF" ),
+                       'malformed 3' => array( $FAIL, "\x80\xbf" ),
+                       'malformed 4' => array( $FAIL, "\x80\xbf\x80" ),
+                       'malformed 5' => array( $FAIL, "\x80\xbf\x80\xbf" ),
+                       'malformed 6' => array( $FAIL, "\x80\xbf\x80\xbf\x80" ),
+                       'malformed 7' => array( $FAIL, 
"\x80\xbf\x80\xbf\x80\xbf" ),
+                       'malformed 8' => array( $FAIL, 
"\x80\xbf\x80\xbf\x80\xbf\x80" ),
 
-                       # last byte missing
-                       array( $FAIL, "\xc0" ),
-                       array( $FAIL, "\xe0\x80" ),
-                       array( $FAIL, "\xf0\x80\x80" ),
-                       array( $FAIL, "\xf8\x80\x80\x80" ),
-                       array( $FAIL, "\xfc\x80\x80\x80\x80" ),
-                       array( $FAIL, "\xdf" ),
-                       array( $FAIL, "\xef\xbf" ),
-                       array( $FAIL, "\xf7\xbf\xbf" ),
-                       array( $FAIL, "\xfb\xbf\xbf\xbf" ),
-                       array( $FAIL, "\xfd\xbf\xbf\xbf\xbf" ),
+                       'last byte missing 1' => array( $FAIL, "\xc0" ),
+                       'last byte missing 2' => array( $FAIL, "\xe0\x80" ),
+                       'last byte missing 3' => array( $FAIL, "\xf0\x80\x80" ),
+                       'last byte missing 4' => array( $FAIL, 
"\xf8\x80\x80\x80" ),
+                       'last byte missing 5' => array( $FAIL, 
"\xfc\x80\x80\x80\x80" ),
+                       'last byte missing 6' => array( $FAIL, "\xdf" ),
+                       'last byte missing 7' => array( $FAIL, "\xef\xbf" ),
+                       'last byte missing 8' => array( $FAIL, "\xf7\xbf\xbf" ),
+                       'last byte missing 9' => array( $FAIL, 
"\xfb\xbf\xbf\xbf" ),
+                       'last byte missing 10' => array( $FAIL, 
"\xfd\xbf\xbf\xbf\xbf" ),
 
-                       # impossible bytes
-                       array( $FAIL, "\xfe" ),
-                       array( $FAIL, "\xff" ),
-                       array( $FAIL, "\xfe\xfe\xff\xff" ),
+                       'impossible bytes 1' => array( $FAIL, "\xfe" ),
+                       'impossible bytes 2' => array( $FAIL, "\xff" ),
+                       'impossible bytes 3' => array( $FAIL, 
"\xfe\xfe\xff\xff" ),
 
                        /*
                        # The PHP implementation does not handle characters
                        # being represented in a form which is too long :(
 
-                       # overlong sequences
-                       array( $FAIL, "\xc0\xaf" ),
-                       array( $FAIL, "\xe0\x80\xaf" ),
-                       array( $FAIL, "\xf0\x80\x80\xaf" ),
-                       array( $FAIL, "\xf8\x80\x80\x80\xaf" ),
-                       array( $FAIL, "\xfc\x80\x80\x80\x80\xaf" ),
+                       'overlong sequence 1' => array( $FAIL, "\xc0\xaf" ),
+                       'overlong sequence 2' => array( $FAIL, "\xe0\x80\xaf" ),
+                       'overlong sequence 3' => array( $FAIL, 
"\xf0\x80\x80\xaf" ),
+                       'overlong sequence 4' => array( $FAIL, 
"\xf8\x80\x80\x80\xaf" ),
+                       'overlong sequence 5' => array( $FAIL, 
"\xfc\x80\x80\x80\x80\xaf" ),
 
-                       # Maximum overlong sequences
-                       array( $FAIL, "\xc1\xbf" ),
-                       array( $FAIL, "\xe0\x9f\xbf" ),
-                       array( $FAIL, "\xf0\x8F\xbf\xbf" ),
-                       array( $FAIL, "\xf8\x87\xbf\xbf" ),
-                       array( $FAIL, "\xfc\x83\xbf\xbf\xbf\xbf" ),
+                       'maximum overlong sequence 1' => array( $FAIL, 
"\xc1\xbf" ),
+                       'maximum overlong sequence 2' => array( $FAIL, 
"\xe0\x9f\xbf" ),
+                       'maximum overlong sequence 3' => array( $FAIL, 
"\xf0\x8F\xbf\xbf" ),
+                       'maximum overlong sequence 4' => array( $FAIL, 
"\xf8\x87\xbf\xbf" ),
+                       'maximum overlong sequence 5' => array( $FAIL, 
"\xfc\x83\xbf\xbf\xbf\xbf" ),
                        */
 
-                       # non characters
-                       array( $PASS, "\xef\xbf\xbe" ),
-                       array( $PASS, "\xef\xbf\xbf" ),
+                       'non characters 1' => array( $PASS, "\xef\xbf\xbe" ),
+                       'non characters 2' => array( $PASS, "\xef\xbf\xbf" ),
                );
        }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/65300
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I026eff69236187ce3d2ff2fc261a5e1d9cd88b24
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: JanZerebecki <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to