In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/44563783c331578157881d3a392d50dd3ea07885?hp=f41ee62e9d9e2e6909b863830aaeb4f1e06407e0>
- Log ----------------------------------------------------------------- commit 44563783c331578157881d3a392d50dd3ea07885 Author: Karl Williamson <[email protected]> Date: Fri Oct 28 08:46:53 2016 -0600 XS-APItest/t/utf8.t: Test with longest possible overlong As part of testing, certain malformations are perturbed to also be overlong to see that the combination of them is properly handled. To do this, the code will take a test case and calculate an overlong that is longer than it. However if the test case is as long as the overlong would be, this can't be done, and is skipped. This commit now uses a longer overlong than previously (now the maximum possible) so that fewer tests have to be skipped. M ext/XS-APItest/t/utf8.t commit f2c1c1486fc9dcf3cc17aeda650215584a00df4f Author: Karl Williamson <[email protected]> Date: Fri Oct 28 08:44:43 2016 -0600 XS-APItest/t/utf8.t: White-space only M ext/XS-APItest/t/utf8.t commit 1069c57cb1f4e6b94f8695843243749e9511303e Author: Karl Williamson <[email protected]> Date: Fri Oct 28 08:42:38 2016 -0600 XS-APItest/t/utf8.t: Fix EBCDIC bug This number needs to be adjusted for EBCDIC platforms M ext/XS-APItest/t/utf8.t commit 78a3c0f885993b7560c809640e932af91ba25136 Author: Karl Williamson <[email protected]> Date: Fri Oct 28 08:36:56 2016 -0600 XS-APItest/t/utf8.t: Move a common expression to $var The maximum byte length of a single code-points UTF-8 representation is used in a bunch of places. Calculate it once. M ext/XS-APItest/t/utf8.t commit f9913875f5edd15a92af1ff0a4775ebb3fdf927a Author: Karl Williamson <[email protected]> Date: Fri Oct 28 08:31:09 2016 -0600 XS-APItest/t/utf8.t: Fix wrong test on EBCDIC The I8 string doesn't work the same as UTF-8, as it only takes 5 bits from each continuation byte instead of 6. M ext/XS-APItest/t/utf8.t ----------------------------------------------------------------------- Summary of changes: ext/XS-APItest/t/utf8.t | 76 ++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t index 6c6ed67..fc04dfc 100644 --- a/ext/XS-APItest/t/utf8.t +++ b/ext/XS-APItest/t/utf8.t @@ -380,6 +380,9 @@ my $first_continuation = (isASCII) ? 0x80 : 0xA0; my $final_continuation = 0xBF; my $start = (isASCII) ? 0xC2 : 0xC5; +my $max_bytes = (isASCII) ? 13 : 14; # Max number of bytes in a UTF-8 sequence + # representing a single code point + my $continuation = $first_continuation - 1; while ($cp < 255) { @@ -429,7 +432,7 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } $u < 0x200000 ? 4 : $u < 0x4000000 ? 5 : $u < 0x80000000 ? 6 : (($is64bit) - ? ($u < 0x1000000000 ? 7 : 13) + ? ($u < 0x1000000000 ? 7 : $max_bytes) : 7) ) : ($u < 0xA0 ? 1 : @@ -438,7 +441,7 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } $u < 0x40000 ? 4 : $u < 0x400000 ? 5 : $u < 0x4000000 ? 6 : - $u < 0x40000000 ? 7 : 14 ); + $u < 0x40000000 ? 7 : $max_bytes ); } # If this test fails, subsequent ones are meaningless. @@ -1051,7 +1054,7 @@ my @malformations = ( (isASCII) ? "\xc1\xbf" : I8_to_native("\xc4\xbf"), 2, $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - (isASCII) ? 0x7F : utf8::unicode_to_native(0xBF), + (isASCII) ? 0x7F : utf8::unicode_to_native(0x9F), 2, qr/overlong/ ], @@ -1163,11 +1166,11 @@ if (isASCII && ! $is64bit) { # 32-bit ASCII platform ], [ "overflow malformation, can tell on first byte", "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80", - 13, + $max_bytes, 0, # There is no way to allow this malformation $UTF8_GOT_OVERFLOW, $REPLACEMENT, - 13, + $max_bytes, qr/overflows/ ]; } @@ -1182,20 +1185,20 @@ else { (isASCII) ? "\xff\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - (isASCII) ? 13 : 14, + $max_bytes, $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, 0, # NUL - (isASCII) ? 13 : 14, + $max_bytes, qr/overlong/, ], [ "overlong malformation, highest max-byte", (isASCII) # 2**36-1 on ASCII; 2**30-1 on EBCDIC ? "\xff\x80\x80\x80\x80\x80\x80\xbf\xbf\xbf\xbf\xbf\xbf" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xbf\xbf\xbf\xbf\xbf\xbf"), - (isASCII) ? 13 : 14, + $max_bytes, $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, (isASCII) ? 0xFFFFFFFFF : 0x3FFFFFFF, - (isASCII) ? 13 : 14, + $max_bytes, qr/overlong/, ]; @@ -1203,11 +1206,11 @@ else { push @malformations, [ "overflow malformation", I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"), - 14, + $max_bytes, 0, # There is no way to allow this malformation $UTF8_GOT_OVERFLOW, $REPLACEMENT, - 14, + $max_bytes, qr/overflows/ ]; } @@ -1217,11 +1220,11 @@ else { (isASCII) ? "\xff\x80\x90\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0" : I8_to_native("\xff\xb0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - (isASCII) ? 13 : 14, + $max_bytes, 0, # There is no way to allow this malformation $UTF8_GOT_OVERFLOW, $REPLACEMENT, - (isASCII) ? 13 : 14, + $max_bytes, qr/overflows/ ]; } @@ -1725,7 +1728,7 @@ my @tests = ( # 32-bit machines $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x80000000, (isASCII) ? 7 :14, + 'utf8', 0x80000000, (isASCII) ? 7 : $max_bytes, nonportable_regex(0x80000000) ], [ "requires at least 32 bits, and use SUPER-type flags, instead of ABOVE_31_BIT", @@ -1733,7 +1736,7 @@ my @tests = ( ? "\xfe\x82\x80\x80\x80\x80\x80" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, $UTF8_GOT_SUPER, - 'utf8', 0x80000000, (isASCII) ? 7 :14, + 'utf8', 0x80000000, (isASCII) ? 7 : $max_bytes, nonportable_regex(0x80000000) ], [ "overflow with warnings/disallow for more than 31 bits", @@ -1756,7 +1759,7 @@ my @tests = ( $UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, 'utf8', 0, - (! isASCII) ? 14 : ($is64bit) ? 13 : 7, + (! isASCII) ? $max_bytes : ($is64bit) ? $max_bytes : 7, # XXX qr/overflows/ ], ); @@ -1770,7 +1773,7 @@ if ($is64bit) { : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x1000000000, (isASCII) ? 13 : 14, + 'utf8', 0x1000000000, $max_bytes, qr/and( is)? not portable/ ]; if (! isASCII) { @@ -1779,35 +1782,35 @@ if ($is64bit) { I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x800000000, 14, + 'utf8', 0x800000000, $max_bytes, nonportable_regex(0x80000000) ], [ "requires at least 32 bits", I8_to_native("\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x10000000000, 14, + 'utf8', 0x10000000000, $max_bytes, nonportable_regex(0x10000000000) ], [ "requires at least 32 bits", I8_to_native("\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x200000000000, 14, + 'utf8', 0x200000000000, $max_bytes, nonportable_regex(0x20000000000) ], [ "requires at least 32 bits", I8_to_native("\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x4000000000000, 14, + 'utf8', 0x4000000000000, $max_bytes, nonportable_regex(0x4000000000000) ], [ "requires at least 32 bits", I8_to_native("\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x80000000000000, 14, + 'utf8', 0x80000000000000, $max_bytes, nonportable_regex(0x80000000000000) ], [ "requires at least 32 bits", @@ -1815,7 +1818,7 @@ if ($is64bit) { #IBM-1047 \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41 $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x1000000000000000, 14, + 'utf8', 0x1000000000000000, $max_bytes, nonportable_regex(0x1000000000000000) ]; } @@ -1982,10 +1985,10 @@ foreach my $test (@tests) { foreach my $overlong ("", "overlong") { - # Our hard-coded overlong starts with \xFE, so + # If we're already at the longest possible, we + # can't create an overlong (which would be longer) # can't handle anything larger. - next if $overlong - && ord native_to_I8(substr($bytes, 0, 1)) >= 0xFE; + next if $overlong && $expected_len >= $max_bytes; my @malformations; my @expected_errors; @@ -2026,12 +2029,13 @@ foreach my $test (@tests) { # overlong sequence. This should evaluate # to the exact same code point as the # original. - $this_bytes = "\xfe" - . (I8_to_native(chr $first_continuation) - x ( 6 - length($this_bytes))) - . $this_bytes; + $this_bytes + = I8_to_native("\xff") + . (I8_to_native(chr $first_continuation) + x ( $max_bytes - 1 - length($this_bytes))) + . $this_bytes; $this_length = length($this_bytes); - $this_expected_len = 7; + $this_expected_len = $max_bytes; push @expected_errors, $UTF8_GOT_LONG; } if ($malformations_name =~ /short/) { @@ -2069,10 +2073,10 @@ foreach my $test (@tests) { || $malformations_name; my $this_name = "utf8n_to_uvchr_error() $testname: " . (($disallow_flag) - ? 'disallowed' - : $disallowed - ? $disallowed - : 'allowed'); + ? 'disallowed' + : $disallowed + ? $disallowed + : 'allowed'); $this_name .= ", $eval_warn"; $this_name .= ", " . (($warn_flag) ? 'with warning flag' @@ -2163,7 +2167,7 @@ foreach my $test (@tests) { } } fail("Expected '$malformation' warning" - . "but didn't get it"); + . " but didn't get it"); } } -- Perl5 Master Repository
