In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/4787c1a50fd80944d298d2934b6ff28355efdb3d?hp=6de2dd46140d0d3ab6813e26940d7b74418b0260>

- Log -----------------------------------------------------------------
commit 4787c1a50fd80944d298d2934b6ff28355efdb3d
Author: Karl Williamson <[email protected]>
Date:   Tue Dec 6 22:28:59 2016 -0700

    lib/locale.t: Up the permissible failure % for os390
    
    Recent changes to the os390 locales have caused the number of failing
    ones to exceed the cutoff of acceptable bad locales before the .t shows
    failure.
    
    os390 has more problematic locales than typical, because it has locales
    for various IBM code pages, and some of these locales are incompatible
    with perl.  For example there is a CP 037 locale available which is
    supposed to allow os390 (running CP 1047) to emulate machines where CP
    037 is native.  But these two code pages have different positions for
    various critical characters that perl assumes have a particular ordinal
    value, e.g. '['.  That being placed differently in a locale means that
    patterns with bracketed character classes won't work in perl on that
    locale, and the locales fail locale.t
    
    This commit changes the acceptable failing rate to 10% (up from 5% most
    everywhere else) on os390, reflecting the higher percentage of base
    incompatibilities.

M       lib/locale.t

commit 88d057adfcdfd2053ec0c2f917724a34f52b436f
Author: Karl Williamson <[email protected]>
Date:   Tue Dec 6 21:34:32 2016 -0700

    t/test.pl: Clarify syntax for tests in t/lib/*

M       t/test.pl

commit 2db24202937a2afb49936f6d94ba13f35a277fba
Author: Karl Williamson <[email protected]>
Date:   Tue Dec 6 12:12:22 2016 -0700

    utf8.c: Remove unused variable
    
    The called function can cope with a NULL parameter here, so no need to
    use a dummy one.

M       utf8.c

commit b9a3b632ca566d1242692073ca3a9c5c0d6ced43
Author: Karl Williamson <[email protected]>
Date:   Tue Dec 6 12:10:34 2016 -0700

    APItest/t/utf8.t: Fix EBCDIC test
    
    One can tell if this code point is above Unicode from just the first
    byte, unlike what it was specifying.

M       ext/XS-APItest/t/utf8.t

commit 5dcbe30a8e534d19d84680e2b83e1d9a8b057ef5
Author: Karl Williamson <[email protected]>
Date:   Sat Dec 3 12:01:15 2016 -0700

    toke.c: Swap 'if' and 'else' clauses
    
    It is easier to read if the trivial case comes before the much longer
    case, so complement the sense of this 'if', and swap the current 'else'
    and 'if' clauses

M       toke.c

commit 96970a3c8cbe5ad489ce831380f0e208315ea651
Author: Karl Williamson <[email protected]>
Date:   Mon Nov 28 18:01:42 2016 -0700

    APItest/t/handy.t: Slightly simplify
    
    This combines two adjacent 'if' blocks into a single one, as the 'if'
    clause is identical in both.

M       ext/XS-APItest/t/handy.t
-----------------------------------------------------------------------

Summary of changes:
 ext/XS-APItest/t/handy.t |  4 ----
 ext/XS-APItest/t/utf8.t  |  2 +-
 lib/locale.t             |  6 +++++-
 t/test.pl                |  5 +++--
 toke.c                   | 17 +++++++++++------
 utf8.c                   |  7 +++----
 6 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/ext/XS-APItest/t/handy.t b/ext/XS-APItest/t/handy.t
index a85f701e99..acf3af5b4a 100644
--- a/ext/XS-APItest/t/handy.t
+++ b/ext/XS-APItest/t/handy.t
@@ -242,10 +242,6 @@ foreach my $name (sort keys %properties) {
                 my $truth = truth($matches && (utf8::native_to_unicode($i) < 
128 || $i > 255));
                 is ($ret, $truth, "is${function}_LC_utf8( $display_name ) == 
$truth (C locale)");
             }
-        }
-
-        if ($name ne 'vertws' && defined $utf8_locale) {
-            use locale;
 
             POSIX::setlocale( &POSIX::LC_ALL, $utf8_locale);
             $ret = truth eval "test_is${function}_LC_utf8('$char')";
diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t
index 50e1359ff6..51997118a6 100644
--- a/ext/XS-APItest/t/utf8.t
+++ b/ext/XS-APItest/t/utf8.t
@@ -1769,7 +1769,7 @@ my @tests = (
         $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, $UTF8_GOT_SUPER,
         'utf8', 0x80000000,
         (isASCII) ? 7 : $max_bytes,
-        (isASCII) ? 1 : 8,
+        1,
         nonportable_regex(0x80000000)
     ],
     [ "overflow with warnings/disallow for more than 31 bits",
diff --git a/lib/locale.t b/lib/locale.t
index b49197b240..da8d10ecb0 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -38,9 +38,13 @@ our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
 # fail them unless at least this percentage of the tested locales fail.
 # On AIX machines, many locales call a no-break space a graphic.
 # (There aren't 1000 locales currently in existence, so 99.9 works)
+# EBCDIC os390 has more locales fail than normal, because it has locales that
+# move various critical characters like '['.
 my $acceptable_failure_percentage = ($^O =~ / ^ ( AIX ) $ /ix)
                                      ? 99.9
-                                     : 5;
+                                     : ($^O =~ / ^ ( os390 ) $ /ix)
+                                       ? 10
+                                       : 5;
 
 # The list of test numbers of the problematic tests.
 my %problematical_tests;
diff --git a/t/test.pl b/t/test.pl
index de2ada0328..98e7632e92 100644
--- a/t/test.pl
+++ b/t/test.pl
@@ -1078,8 +1078,9 @@ sub fresh_perl_like {
 # Each program is source code to run followed by an "EXPECT" line, followed
 # by the expected output.
 #
-# The code to run may begin with a command line switch such as -w or -0777
-# (alphanumerics only), and may contain (note the '# ' on each):
+# The first line of the code to run may be a command line switch such as -wE
+# or -0777 (alphanumerics only; only one cluster, beginning with a minus is
+# allowed).  Later lines may contain (note the '# ' on each):
 #   # TODO reason for todo
 #   # SKIP reason for skip
 #   # SKIP ?code to test if this should be skipped
diff --git a/toke.c b/toke.c
index 936eab5110..841b5f90ee 100644
--- a/toke.c
+++ b/toke.c
@@ -3775,9 +3775,17 @@ S_scan_const(pTHX_ char *start)
        } /* end if (backslash) */
 
     default_action:
-       /* If we started with encoded form, or already know we want it,
-          then encode the next character */
-       if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
+        /* Just copy the input to the output, though we may have to convert
+         * to/from UTF-8.
+         *
+         * If the input has the same representation in UTF-8 as not, it will be
+         * a single byte, and we don't care about UTF8ness; or if neither
+         * source nor output is UTF-8, just copy the byte */
+        if (NATIVE_BYTE_IS_INVARIANT((U8)(*s)) || (! this_utf8 && ! has_utf8))
+        {
+           *d++ = *s++;
+        }
+        else {
            STRLEN len  = 1;
 
            /* One might think that it is wasted effort in the case of the
@@ -3812,9 +3820,6 @@ S_scan_const(pTHX_ char *start)
 
            d = (char*)uvchr_to_utf8((U8*)d, nextuv);
        }
-       else {
-           *d++ = *s++;
-       }
     } /* while loop to process each character */
 
     /* terminate the string and set up the sv */
diff --git a/utf8.c b/utf8.c
index 074f738806..8ab38eaf06 100644
--- a/utf8.c
+++ b/utf8.c
@@ -4651,7 +4651,6 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
            return FALSE;
        }
        if (UNLIKELY(isUTF8_POSSIBLY_PROBLEMATIC(*s))) {
-           STRLEN char_len;
            if (UNLIKELY(UTF8_IS_SUPER(s, e))) {
                 if (   ckWARN_d(WARN_NON_UNICODE)
                     || (   ckWARN_d(WARN_DEPRECATED)
@@ -4671,7 +4670,7 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
 #endif
                 )) {
                     /* A side effect of this function will be to warn */
-                    (void) utf8n_to_uvchr(s, e - s, &char_len, 
UTF8_WARN_SUPER);
+                    (void) utf8n_to_uvchr(s, e - s, NULL, UTF8_WARN_SUPER);
                     ok = FALSE;
                 }
            }
@@ -4680,7 +4679,7 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
                     /* This has a different warning than the one the called
                      * function would output, so can't just call it, unlike we
                      * do for the non-chars and above-unicodes */
-                   UV uv = utf8_to_uvchr_buf(s, e, &char_len);
+                   UV uv = utf8_to_uvchr_buf(s, e, NULL);
                    Perl_warner(aTHX_ packWARN(WARN_SURROGATE),
                        "Unicode surrogate U+%04" UVXf " is illegal in UTF-8", 
uv);
                    ok = FALSE;
@@ -4688,7 +4687,7 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
            }
            else if (UNLIKELY(UTF8_IS_NONCHAR(s, e)) && 
(ckWARN_d(WARN_NONCHAR))) {
                 /* A side effect of this function will be to warn */
-                (void) utf8n_to_uvchr(s, e - s, &char_len, UTF8_WARN_NONCHAR);
+                (void) utf8n_to_uvchr(s, e - s, NULL, UTF8_WARN_NONCHAR);
                ok = FALSE;
            }
        }

--
Perl5 Master Repository

Reply via email to