In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/44563783c331578157881d3a392d50dd3ea07885?hp=f41ee62e9d9e2e6909b863830aaeb4f1e06407e0>

- Log -----------------------------------------------------------------
commit 44563783c331578157881d3a392d50dd3ea07885
Author: Karl Williamson <[email protected]>
Date:   Fri Oct 28 08:46:53 2016 -0600

    XS-APItest/t/utf8.t: Test with longest possible overlong
    
    As part of testing, certain malformations are perturbed to also be
    overlong to see that the combination of them is properly handled.  To do
    this, the code will take a test case and calculate an overlong that is
    longer than it.  However if the test case is as long as the overlong
    would be, this can't be done, and is skipped.  This commit now
    uses a longer overlong than previously (now the maximum possible) so
    that fewer tests have to be skipped.

M       ext/XS-APItest/t/utf8.t

commit f2c1c1486fc9dcf3cc17aeda650215584a00df4f
Author: Karl Williamson <[email protected]>
Date:   Fri Oct 28 08:44:43 2016 -0600

    XS-APItest/t/utf8.t: White-space only

M       ext/XS-APItest/t/utf8.t

commit 1069c57cb1f4e6b94f8695843243749e9511303e
Author: Karl Williamson <[email protected]>
Date:   Fri Oct 28 08:42:38 2016 -0600

    XS-APItest/t/utf8.t: Fix EBCDIC bug
    
    This number needs to be adjusted for EBCDIC platforms

M       ext/XS-APItest/t/utf8.t

commit 78a3c0f885993b7560c809640e932af91ba25136
Author: Karl Williamson <[email protected]>
Date:   Fri Oct 28 08:36:56 2016 -0600

    XS-APItest/t/utf8.t: Move a common expression to $var
    
    The maximum byte length of a single code-points UTF-8 representation is
    used in a bunch of places.  Calculate it once.

M       ext/XS-APItest/t/utf8.t

commit f9913875f5edd15a92af1ff0a4775ebb3fdf927a
Author: Karl Williamson <[email protected]>
Date:   Fri Oct 28 08:31:09 2016 -0600

    XS-APItest/t/utf8.t: Fix wrong test on EBCDIC
    
    The I8 string doesn't work the same as UTF-8, as it only takes 5 bits
    from each continuation byte instead of 6.

M       ext/XS-APItest/t/utf8.t
-----------------------------------------------------------------------

Summary of changes:
 ext/XS-APItest/t/utf8.t | 76 ++++++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t
index 6c6ed67..fc04dfc 100644
--- a/ext/XS-APItest/t/utf8.t
+++ b/ext/XS-APItest/t/utf8.t
@@ -380,6 +380,9 @@ my $first_continuation = (isASCII) ? 0x80 : 0xA0;
 my $final_continuation = 0xBF;
 my $start = (isASCII) ? 0xC2 : 0xC5;
 
+my $max_bytes = (isASCII) ? 13 : 14; # Max number of bytes in a UTF-8 sequence
+                                     # representing a single code point
+
 my $continuation = $first_continuation - 1;
 
 while ($cp < 255) {
@@ -429,7 +432,7 @@ for my $u (sort { utf8::unicode_to_native($a) <=> 
utf8::unicode_to_native($b) }
                 $u < 0x200000       ? 4 :
                 $u < 0x4000000      ? 5 :
                 $u < 0x80000000     ? 6 : (($is64bit)
-                                        ? ($u < 0x1000000000 ? 7 : 13)
+                                        ? ($u < 0x1000000000 ? 7 : $max_bytes)
                                         : 7)
               )
             : ($u < 0xA0        ? 1 :
@@ -438,7 +441,7 @@ for my $u (sort { utf8::unicode_to_native($a) <=> 
utf8::unicode_to_native($b) }
                $u < 0x40000     ? 4 :
                $u < 0x400000    ? 5 :
                $u < 0x4000000   ? 6 :
-               $u < 0x40000000  ? 7 : 14 );
+               $u < 0x40000000  ? 7 : $max_bytes );
     }
 
     # If this test fails, subsequent ones are meaningless.
@@ -1051,7 +1054,7 @@ my @malformations = (
         (isASCII) ? "\xc1\xbf" : I8_to_native("\xc4\xbf"),
         2,
         $UTF8_ALLOW_LONG, $UTF8_GOT_LONG,
-        (isASCII) ? 0x7F : utf8::unicode_to_native(0xBF),
+        (isASCII) ? 0x7F : utf8::unicode_to_native(0x9F),
         2,
         qr/overlong/
     ],
@@ -1163,11 +1166,11 @@ if (isASCII && ! $is64bit) {    # 32-bit ASCII platform
         ],
         [ "overflow malformation, can tell on first byte",
             "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80",
-            13,
+            $max_bytes,
             0,  # There is no way to allow this malformation
             $UTF8_GOT_OVERFLOW,
             $REPLACEMENT,
-            13,
+            $max_bytes,
             qr/overflows/
         ];
 }
@@ -1182,20 +1185,20 @@ else {
             (isASCII)
              ?              
"\xff\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"
              : 
I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
-            (isASCII) ? 13 : 14,
+            $max_bytes,
             $UTF8_ALLOW_LONG, $UTF8_GOT_LONG,
             0,   # NUL
-            (isASCII) ? 13 : 14,
+            $max_bytes,
             qr/overlong/,
         ],
         [ "overlong malformation, highest max-byte",
             (isASCII)    # 2**36-1 on ASCII; 2**30-1 on EBCDIC
              ?              
"\xff\x80\x80\x80\x80\x80\x80\xbf\xbf\xbf\xbf\xbf\xbf"
              : 
I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xbf\xbf\xbf\xbf\xbf\xbf"),
-            (isASCII) ? 13 : 14,
+            $max_bytes,
             $UTF8_ALLOW_LONG, $UTF8_GOT_LONG,
             (isASCII) ? 0xFFFFFFFFF : 0x3FFFFFFF,
-            (isASCII) ? 13 : 14,
+            $max_bytes,
             qr/overlong/,
         ];
 
@@ -1203,11 +1206,11 @@ else {
         push @malformations,
         [ "overflow malformation",
             
I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"),
-            14,
+            $max_bytes,
             0,  # There is no way to allow this malformation
             $UTF8_GOT_OVERFLOW,
             $REPLACEMENT,
-            14,
+            $max_bytes,
             qr/overflows/
         ];
     }
@@ -1217,11 +1220,11 @@ else {
                (isASCII)
                 ?              
"\xff\x80\x90\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"
                 : 
I8_to_native("\xff\xb0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
-                (isASCII) ? 13 : 14,
+                $max_bytes,
                 0,  # There is no way to allow this malformation
                 $UTF8_GOT_OVERFLOW,
                 $REPLACEMENT,
-                (isASCII) ? 13 : 14,
+                $max_bytes,
                 qr/overflows/
             ];
     }
@@ -1725,7 +1728,7 @@ my @tests = (
         # 32-bit machines
         $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT,
         $UTF8_GOT_ABOVE_31_BIT,
-        'utf8', 0x80000000, (isASCII) ? 7 :14,
+        'utf8', 0x80000000, (isASCII) ? 7 : $max_bytes,
         nonportable_regex(0x80000000)
     ],
     [ "requires at least 32 bits, and use SUPER-type flags, instead of 
ABOVE_31_BIT",
@@ -1733,7 +1736,7 @@ my @tests = (
          ? "\xfe\x82\x80\x80\x80\x80\x80"
          : 
I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
         $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, $UTF8_GOT_SUPER,
-        'utf8', 0x80000000, (isASCII) ? 7 :14,
+        'utf8', 0x80000000, (isASCII) ? 7 : $max_bytes,
         nonportable_regex(0x80000000)
     ],
     [ "overflow with warnings/disallow for more than 31 bits",
@@ -1756,7 +1759,7 @@ my @tests = (
         $UTF8_DISALLOW_ABOVE_31_BIT,
         $UTF8_GOT_ABOVE_31_BIT,
         'utf8', 0,
-        (! isASCII) ? 14 : ($is64bit) ? 13 : 7,
+        (! isASCII) ? $max_bytes : ($is64bit) ? $max_bytes : 7,   # XXX
         qr/overflows/
     ],
 );
@@ -1770,7 +1773,7 @@ if ($is64bit) {
             : 
I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
             $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT,
             $UTF8_GOT_ABOVE_31_BIT,
-            'utf8', 0x1000000000, (isASCII) ? 13 : 14,
+            'utf8', 0x1000000000, $max_bytes,
             qr/and( is)? not portable/
         ];
     if (! isASCII) {
@@ -1779,35 +1782,35 @@ if ($is64bit) {
                 
I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
                 $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT,
                 $UTF8_GOT_ABOVE_31_BIT,
-                'utf8', 0x800000000, 14,
+                'utf8', 0x800000000, $max_bytes,
                 nonportable_regex(0x80000000)
             ],
             [ "requires at least 32 bits",
                 
I8_to_native("\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
                 $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT,
                 $UTF8_GOT_ABOVE_31_BIT,
-                'utf8', 0x10000000000, 14,
+                'utf8', 0x10000000000, $max_bytes,
                 nonportable_regex(0x10000000000)
             ],
             [ "requires at least 32 bits",
                 
I8_to_native("\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
                 $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT,
                 $UTF8_GOT_ABOVE_31_BIT,
-                'utf8', 0x200000000000, 14,
+                'utf8', 0x200000000000, $max_bytes,
                 nonportable_regex(0x20000000000)
             ],
             [ "requires at least 32 bits",
                 
I8_to_native("\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
                 $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT,
                 $UTF8_GOT_ABOVE_31_BIT,
-                'utf8', 0x4000000000000, 14,
+                'utf8', 0x4000000000000, $max_bytes,
                 nonportable_regex(0x4000000000000)
             ],
             [ "requires at least 32 bits",
                 
I8_to_native("\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
                 $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT,
                 $UTF8_GOT_ABOVE_31_BIT,
-                'utf8', 0x80000000000000, 14,
+                'utf8', 0x80000000000000, $max_bytes,
                 nonportable_regex(0x80000000000000)
             ],
             [ "requires at least 32 bits",
@@ -1815,7 +1818,7 @@ if ($is64bit) {
                    #IBM-1047  
\xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41
                 $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT,
                 $UTF8_GOT_ABOVE_31_BIT,
-                'utf8', 0x1000000000000000, 14,
+                'utf8', 0x1000000000000000, $max_bytes,
                 nonportable_regex(0x1000000000000000)
             ];
     }
@@ -1982,10 +1985,10 @@ foreach my $test (@tests) {
 
                         foreach my $overlong ("", "overlong") {
 
-                            # Our hard-coded overlong starts with \xFE, so
+                            # If we're already at the longest possible, we
+                            # can't create an overlong (which would be longer)
                             # can't handle anything larger.
-                            next if $overlong
-                            && ord native_to_I8(substr($bytes, 0, 1)) >= 0xFE;
+                            next if $overlong && $expected_len >= $max_bytes;
 
                             my @malformations;
                             my @expected_errors;
@@ -2026,12 +2029,13 @@ foreach my $test (@tests) {
                                     # overlong sequence.  This should evaluate
                                     # to the exact same code point as the
                                     # original.
-                                    $this_bytes = "\xfe"
-                                             . (I8_to_native(chr 
$first_continuation)
-                                                  x ( 6 - length($this_bytes)))
-                                             . $this_bytes;
+                                    $this_bytes
+                                    = I8_to_native("\xff")
+                                    . (I8_to_native(chr $first_continuation)
+                                       x ( $max_bytes - 1 - 
length($this_bytes)))
+                                    . $this_bytes;
                                     $this_length = length($this_bytes);
-                                    $this_expected_len = 7;
+                                    $this_expected_len = $max_bytes;
                                     push @expected_errors, $UTF8_GOT_LONG;
                                 }
                                 if ($malformations_name =~ /short/) {
@@ -2069,10 +2073,10 @@ foreach my $test (@tests) {
                                           || $malformations_name;
                             my $this_name = "utf8n_to_uvchr_error() $testname: 
"
                                                         . (($disallow_flag)
-                                                            ? 'disallowed'
-                                                            : $disallowed
-                                                            ? $disallowed
-                                                            : 'allowed');
+                                                           ? 'disallowed'
+                                                           : $disallowed
+                                                             ? $disallowed
+                                                             : 'allowed');
                             $this_name .= ", $eval_warn";
                             $this_name .= ", " . (($warn_flag)
                                                 ? 'with warning flag'
@@ -2163,7 +2167,7 @@ foreach my $test (@tests) {
                                         }
                                     }
                                     fail("Expected '$malformation' warning"
-                                       . "but didn't get it");
+                                       . " but didn't get it");
 
                                 }
                             }

--
Perl5 Master Repository

Reply via email to