In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/4787c1a50fd80944d298d2934b6ff28355efdb3d?hp=6de2dd46140d0d3ab6813e26940d7b74418b0260>
- Log ----------------------------------------------------------------- commit 4787c1a50fd80944d298d2934b6ff28355efdb3d Author: Karl Williamson <[email protected]> Date: Tue Dec 6 22:28:59 2016 -0700 lib/locale.t: Up the permissible failure % for os390 Recent changes to the os390 locales have caused the number of failing ones to exceed the cutoff of acceptable bad locales before the .t shows failure. os390 has more problematic locales than typical, because it has locales for various IBM code pages, and some of these locales are incompatible with perl. For example there is a CP 037 locale available which is supposed to allow os390 (running CP 1047) to emulate machines where CP 037 is native. But these two code pages have different positions for various critical characters that perl assumes have a particular ordinal value, e.g. '['. That being placed differently in a locale means that patterns with bracketed character classes won't work in perl on that locale, and the locales fail locale.t This commit changes the acceptable failing rate to 10% (up from 5% most everywhere else) on os390, reflecting the higher percentage of base incompatibilities. M lib/locale.t commit 88d057adfcdfd2053ec0c2f917724a34f52b436f Author: Karl Williamson <[email protected]> Date: Tue Dec 6 21:34:32 2016 -0700 t/test.pl: Clarify syntax for tests in t/lib/* M t/test.pl commit 2db24202937a2afb49936f6d94ba13f35a277fba Author: Karl Williamson <[email protected]> Date: Tue Dec 6 12:12:22 2016 -0700 utf8.c: Remove unused variable The called function can cope with a NULL parameter here, so no need to use a dummy one. M utf8.c commit b9a3b632ca566d1242692073ca3a9c5c0d6ced43 Author: Karl Williamson <[email protected]> Date: Tue Dec 6 12:10:34 2016 -0700 APItest/t/utf8.t: Fix EBCDIC test One can tell if this code point is above Unicode from just the first byte, unlike what it was specifying. M ext/XS-APItest/t/utf8.t commit 5dcbe30a8e534d19d84680e2b83e1d9a8b057ef5 Author: Karl Williamson <[email protected]> Date: Sat Dec 3 12:01:15 2016 -0700 toke.c: Swap 'if' and 'else' clauses It is easier to read if the trivial case comes before the much longer case, so complement the sense of this 'if', and swap the current 'else' and 'if' clauses M toke.c commit 96970a3c8cbe5ad489ce831380f0e208315ea651 Author: Karl Williamson <[email protected]> Date: Mon Nov 28 18:01:42 2016 -0700 APItest/t/handy.t: Slightly simplify This combines two adjacent 'if' blocks into a single one, as the 'if' clause is identical in both. M ext/XS-APItest/t/handy.t ----------------------------------------------------------------------- Summary of changes: ext/XS-APItest/t/handy.t | 4 ---- ext/XS-APItest/t/utf8.t | 2 +- lib/locale.t | 6 +++++- t/test.pl | 5 +++-- toke.c | 17 +++++++++++------ utf8.c | 7 +++---- 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/ext/XS-APItest/t/handy.t b/ext/XS-APItest/t/handy.t index a85f701e99..acf3af5b4a 100644 --- a/ext/XS-APItest/t/handy.t +++ b/ext/XS-APItest/t/handy.t @@ -242,10 +242,6 @@ foreach my $name (sort keys %properties) { my $truth = truth($matches && (utf8::native_to_unicode($i) < 128 || $i > 255)); is ($ret, $truth, "is${function}_LC_utf8( $display_name ) == $truth (C locale)"); } - } - - if ($name ne 'vertws' && defined $utf8_locale) { - use locale; POSIX::setlocale( &POSIX::LC_ALL, $utf8_locale); $ret = truth eval "test_is${function}_LC_utf8('$char')"; diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t index 50e1359ff6..51997118a6 100644 --- a/ext/XS-APItest/t/utf8.t +++ b/ext/XS-APItest/t/utf8.t @@ -1769,7 +1769,7 @@ my @tests = ( $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, $UTF8_GOT_SUPER, 'utf8', 0x80000000, (isASCII) ? 7 : $max_bytes, - (isASCII) ? 1 : 8, + 1, nonportable_regex(0x80000000) ], [ "overflow with warnings/disallow for more than 31 bits", diff --git a/lib/locale.t b/lib/locale.t index b49197b240..da8d10ecb0 100644 --- a/lib/locale.t +++ b/lib/locale.t @@ -38,9 +38,13 @@ our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; # fail them unless at least this percentage of the tested locales fail. # On AIX machines, many locales call a no-break space a graphic. # (There aren't 1000 locales currently in existence, so 99.9 works) +# EBCDIC os390 has more locales fail than normal, because it has locales that +# move various critical characters like '['. my $acceptable_failure_percentage = ($^O =~ / ^ ( AIX ) $ /ix) ? 99.9 - : 5; + : ($^O =~ / ^ ( os390 ) $ /ix) + ? 10 + : 5; # The list of test numbers of the problematic tests. my %problematical_tests; diff --git a/t/test.pl b/t/test.pl index de2ada0328..98e7632e92 100644 --- a/t/test.pl +++ b/t/test.pl @@ -1078,8 +1078,9 @@ sub fresh_perl_like { # Each program is source code to run followed by an "EXPECT" line, followed # by the expected output. # -# The code to run may begin with a command line switch such as -w or -0777 -# (alphanumerics only), and may contain (note the '# ' on each): +# The first line of the code to run may be a command line switch such as -wE +# or -0777 (alphanumerics only; only one cluster, beginning with a minus is +# allowed). Later lines may contain (note the '# ' on each): # # TODO reason for todo # # SKIP reason for skip # # SKIP ?code to test if this should be skipped diff --git a/toke.c b/toke.c index 936eab5110..841b5f90ee 100644 --- a/toke.c +++ b/toke.c @@ -3775,9 +3775,17 @@ S_scan_const(pTHX_ char *start) } /* end if (backslash) */ default_action: - /* If we started with encoded form, or already know we want it, - then encode the next character */ - if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) { + /* Just copy the input to the output, though we may have to convert + * to/from UTF-8. + * + * If the input has the same representation in UTF-8 as not, it will be + * a single byte, and we don't care about UTF8ness; or if neither + * source nor output is UTF-8, just copy the byte */ + if (NATIVE_BYTE_IS_INVARIANT((U8)(*s)) || (! this_utf8 && ! has_utf8)) + { + *d++ = *s++; + } + else { STRLEN len = 1; /* One might think that it is wasted effort in the case of the @@ -3812,9 +3820,6 @@ S_scan_const(pTHX_ char *start) d = (char*)uvchr_to_utf8((U8*)d, nextuv); } - else { - *d++ = *s++; - } } /* while loop to process each character */ /* terminate the string and set up the sv */ diff --git a/utf8.c b/utf8.c index 074f738806..8ab38eaf06 100644 --- a/utf8.c +++ b/utf8.c @@ -4651,7 +4651,6 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len) return FALSE; } if (UNLIKELY(isUTF8_POSSIBLY_PROBLEMATIC(*s))) { - STRLEN char_len; if (UNLIKELY(UTF8_IS_SUPER(s, e))) { if ( ckWARN_d(WARN_NON_UNICODE) || ( ckWARN_d(WARN_DEPRECATED) @@ -4671,7 +4670,7 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len) #endif )) { /* A side effect of this function will be to warn */ - (void) utf8n_to_uvchr(s, e - s, &char_len, UTF8_WARN_SUPER); + (void) utf8n_to_uvchr(s, e - s, NULL, UTF8_WARN_SUPER); ok = FALSE; } } @@ -4680,7 +4679,7 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len) /* This has a different warning than the one the called * function would output, so can't just call it, unlike we * do for the non-chars and above-unicodes */ - UV uv = utf8_to_uvchr_buf(s, e, &char_len); + UV uv = utf8_to_uvchr_buf(s, e, NULL); Perl_warner(aTHX_ packWARN(WARN_SURROGATE), "Unicode surrogate U+%04" UVXf " is illegal in UTF-8", uv); ok = FALSE; @@ -4688,7 +4687,7 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len) } else if (UNLIKELY(UTF8_IS_NONCHAR(s, e)) && (ckWARN_d(WARN_NONCHAR))) { /* A side effect of this function will be to warn */ - (void) utf8n_to_uvchr(s, e - s, &char_len, UTF8_WARN_NONCHAR); + (void) utf8n_to_uvchr(s, e - s, NULL, UTF8_WARN_NONCHAR); ok = FALSE; } } -- Perl5 Master Repository
