In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/aadb82e0ec97e7fc243dcacb81423f82c41eb512?hp=bd5630ab7f091ed4c39efcdbb5b9433bd6cf4fe3>

- Log -----------------------------------------------------------------
commit aadb82e0ec97e7fc243dcacb81423f82c41eb512
Author: Karl Williamson <k...@cpan.org>
Date:   Sat Oct 15 11:02:04 2016 -0600

    PATCH: [perl #129891] t/op/utf8decode.t failing
    
    This bug is a result of 32-bit vs 64-bit words, and is a problem in the
    test file and not the underlying code.
    
    The blamed commit changed things so that is a UTF-8 sequence has
    multiple malformations, a diagnostic is generated for each.  Some of the
    tests in utf8decode.t overflow on 32-bit words, but not 64.  The
    solution is to change the .t to also look for the extra overflow
    warnings on 32 bit machines.

M       t/op/utf8decode.t

commit 5ec712b17f589b0efc75ccd871d07947dd474a85
Author: Karl Williamson <k...@cpan.org>
Date:   Sat Oct 15 11:00:57 2016 -0600

    utf8.c: Silence a compiler warning
    
    Some compilers wrongly warn that this is used uninitialized.

M       utf8.c
-----------------------------------------------------------------------

Summary of changes:
 t/op/utf8decode.t | 44 +++++++++++++++++++++++++++++++++++---------
 utf8.c            |  3 ++-
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t
index 8de9154..90c233a 100644
--- a/t/op/utf8decode.t
+++ b/t/op/utf8decode.t
@@ -14,6 +14,8 @@ $|=1;
     my $ordwide = ord($wide);
     printf "# under use bytes ord(v256) = 0x%02x\n", $ordwide;
     skip_all('UTF-8-centric tests (not valid for UTF-EBCDIC)') if $ordwide == 
140;
+    # This could be ported to EBCDIC, but a lot of trouble.
+    # ext/XS-APItest/t/utf8.t contains comprehensive tests for both platforms
 
     if ($ordwide != 196) {
        printf "# v256 starts with 0x%02x\n", $ordwide;
@@ -22,12 +24,22 @@ $|=1;
 
 no utf8;
 
+my $is64bit = length sprintf("%x", ~0) > 8;
+
 foreach (<DATA>) {
     if (/^(?:\d+(?:\.\d+)?)\s/ || /^#/) {
        # print "# $_\n";
     } elsif (my ($id, $okay, $Unicode, $byteslen, $hex, $charslen, $experr)
             = /^(\d+\.\d+\.\d+[bu]?)   # ID
-               \s+(y|n|N-?\d+)         # expect to pass or fail
+               \s+(y|n|N-?\d+(?:,\d+)?)  # expect to pass or fail
+                                          # 'n' means expect one diagnostic
+                                          # 'N\d+'     means expect this
+                                          #            number of diagnostics
+                                          # 'N\d+,\d+' means expect the first
+                                          #            number of diagnostics
+                                          #            on a 32-bit system; the
+                                          #            second number on a
+                                          #            64-bit one
                 \s+([0-9a-f]{1,8}(?:,[0-9a-f]{1,8})*|-) # Unicode characters
                 \s+(\d+)                # number of octets
                 \s+([0-9a-f]{2}(?::[0-9a-f]{2})*)       # octets in hex
@@ -49,10 +61,12 @@ foreach (<DATA>) {
            isnt($experr, '', "Expected warning for $id provided");
            warnings_like(sub {unpack 'C0U*', $octets}, [qr/$experr/],
                         "Only expected warning for $id");
-       } elsif ($okay !~ /^N(-?\d+)/) {
+       } elsif ($okay !~ /^N-?(\d+)(?:,(\d+))?/) {
            is($okay, 'n', "Confused test description for $id");
        } else {
-           my $expect = $1;
+           my $expect32 = $1;
+            my $expect64 = $2 // $expect32;
+            my $expect = ($is64bit) ? $expect64 : $expect32;
            my @warnings;
 
            {
@@ -63,16 +77,26 @@ foreach (<DATA>) {
                unpack 'C0U*', $octets;
            }
 
+           unless (is(scalar @warnings, $expect, "Expected number of warnings 
for $id seen")) {
+                note(join "", "Got:\n", @warnings);
+            }
            isnt($experr, '', "Expected first warning for $id provided");
-           like($warnings[0], qr/$experr/, "Expected first warning for $id 
seen");
+
+            my $message;
+            if ($expect64 != $expect32 && ! $is64bit) {
+                like($warnings[0], qr/overflow/, "overflow warning for $id 
seen");
+                shift @warnings;
+                $message = "Expected first warning after overflow for $id 
seen";
+            }
+            else {
+                $message = "Expected first warning for $id seen";
+            }
+           like($warnings[0], qr/$experr/, $message);
            local $::TODO;
            if ($expect < 0) {
                $expect = -$expect;
                $::TODO = "Markus Kuhn states that $expect invalid sequences 
should be signalled";
            }
-           unless (is(scalar @warnings, $expect, "Expected number of warnings 
for $id seen")) {
-                note(join "", "Got:\n", @warnings);
-            }
 
        }
     } else {
@@ -85,6 +109,8 @@ done_testing();
 # This table is based on Markus Kuhn's UTF-8 Decode Stress Tester,
 # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
 # version dated 2015-08-28.
+#
+# See the code that parses these lines for comments as to the column meanings
 
 __DATA__
 1      Correct UTF-8
@@ -143,8 +169,8 @@ __DATA__
 3.4.1 N15 -    30      
c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf
       -       unexpected non-continuation byte 0xe0, immediately after start 
byte 0xc0
 3.5    Impossible bytes (but not with Perl's extended UTF-8)
 3.5.1 n -      1       fe      -       1 byte, need 7
-3.5.2 n -      1       ff      -       1 byte, need 13
-3.5.3 N5 -     4       fe:fe:ff:ff     -       byte 0xfe
+3.5.2 N2,1 -   1       ff      -       1 byte, need 13
+3.5.3 N8,5 -   4       fe:fe:ff:ff     -       byte 0xfe
 4      Overlong sequences
 4.1    Examples of an overlong ASCII character
 4.1.1 n -      2       c0:af   -       overlong
diff --git a/utf8.c b/utf8.c
index 729650d..fb3acad 100644
--- a/utf8.c
+++ b/utf8.c
@@ -997,7 +997,8 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
      * too short one.  Otherwise the first two are set to 's0' and 'send', and
      * the third not used at all */
     U8 * adjusted_s0 = (U8 *) s0;
-    U8 * adjusted_send;
+    U8 * adjusted_send = NULL;  /* (Initialized to silence compilers' wrong
+                                   warning) */
     UV uv_so_far = 0;   /* (Initialized to silence compilers' wrong warning) */
 
     PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_ERROR;

--
Perl5 Master Repository

Reply via email to