In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/679f623f366bc81022b7c77de4bc4302828303c6?hp=26c2b24e747153620ae90a402ebd0dab176e803d>
- Log ----------------------------------------------------------------- commit 679f623f366bc81022b7c77de4bc4302828303c6 Author: Karl Williamson <[email protected]> Date: Mon May 18 11:16:18 2015 -0600 lib/utf8.t: EBCDIC fixes Some of the test chose code points that did not match its assumptions as to their classifications. And some of the tests were extended to work on 1047 EBCDIC M lib/utf8.t commit 74f6d97dc11c4dcc2f63c8708c2f7b918f69bd37 Author: Karl Williamson <[email protected]> Date: Mon May 18 10:18:51 2015 -0600 t/op/split.t: Generalize for EBCDIC Whatever the bug was that caused some of these to need to be skipped, it's gone now. Also some of the tests are easily adapted to work on EBCDIC platforms. M t/op/split.t commit dc83bf8e644104953efa0f771ec775aba638af5a Author: Karl Williamson <[email protected]> Date: Wed May 6 21:05:19 2015 -0600 perlguts: Wrap macro name with C<> M pod/perlguts.pod ----------------------------------------------------------------------- Summary of changes: lib/utf8.t | 55 ++++++++++++++++++++++++++++++++++++++----------------- pod/perlguts.pod | 2 +- t/op/split.t | 19 +++++-------------- 3 files changed, 44 insertions(+), 32 deletions(-) diff --git a/lib/utf8.t b/lib/utf8.t index c09f96e..d90361d 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -124,10 +124,10 @@ no utf8; # Ironic, no? my $progfile = 'utf' . $$; END {unlink_all $progfile} - # If I'm right 60 is '>' in ASCII, ' ' in EBCDIC - # 173 is not punctuation in either ASCII or EBCDIC + # 64 is '@' in ASCII, ' ' in EBCDIC + # 193 is not punctuation in either ASCII nor EBCDIC my (@char); - foreach (60, 173, 257, 65532) { + foreach (64, 193, 257, 65532) { my $char = chr $_; utf8::encode($char); # I don't want to use map {ord} and I've no need to hardcode the UTF @@ -143,11 +143,11 @@ no utf8; # Ironic, no? # Now we've done all the UTF8 munching hopefully we're safe my @tests = ( ['check our detection program works', - 'my @a = ("'.chr(60).'\x2A", ""); $b = show @a', qr/^>60,42<><$/], + 'my @a = ("'.chr(64).'\x2A", ""); $b = show @a', qr/^>64,42<><$/], ['check literal 8 bit input', - '$a = "' . chr (173) . '"; $b = show $a', qr/^>173<$/], + '$a = "' . chr (193) . '"; $b = show $a', qr/^>193<$/], ['check no utf8; makes no change', - 'no utf8; $a = "' . chr (173) . '"; $b = show $a', qr/^>173<$/], + 'no utf8; $a = "' . chr (193) . '"; $b = show $a', qr/^>193<$/], # Now we do the real byte sequences that are valid UTF8 (map { ["the utf8 sequence for chr $_->[0]", @@ -270,15 +270,28 @@ BANG # "my" variable $strict::VERSION can't be in a package # SKIP: { - skip("Embedded UTF-8 does not work in EBCDIC", 1) if $::IS_EBCDIC; - ok('' eq runperl(prog => <<'CODE'), "change #17928"); - my $code = qq{ my \$\xe3\x83\x95\xe3\x83\xbc = 5; }; - { - use utf8; - eval $code; - print $@ if $@; + skip("Haven't bothered to port this to EBCDIC non-1047", 1) if $::IS_EBCDIC + && ord '^' != 95; + if ($::IS_ASCII) { + ok('' eq runperl(prog => <<'CODE'), "change #17928"); + my $code = qq{ my \$\xe3\x83\x95\xe3\x83\xbc = 5; }; + { + use utf8; + eval $code; + print $@ if $@; + } +CODE } + else { + ok('' eq runperl(prog => <<'CODE'), "change #17928"); + my $code = qq{ my \$\xCE\x47\x64\xCE\x48\x70 = 5; }; + { + use utf8; + eval $code; + print $@ if $@; + } CODE + } } { @@ -324,11 +337,19 @@ END } SKIP: { - skip("Embedded UTF-8 does not work in EBCDIC", 1) if $::IS_EBCDIC; + skip("Haven't bothered to port this to EBCDIC non-1047", 1) if $::IS_EBCDIC + && ord '^' != 95; use utf8; - is eval qq{q \xc3\xbc test \xc3\xbc . qq\xc2\xb7 test \xc2\xb7}, - ' test test ', - "utf8 quote delimiters [perl #16823]"; + if ($::IS_ASCII) { + is eval qq{q \xc3\xbc test \xc3\xbc . qq\xc2\xb7 test \xc2\xb7}, + ' test test ', + "utf8 quote delimiters [perl #16823]"; + } + else { + is eval qq{q \x8B\x70 test \x8B\x70 . qq\x80\x66 test \x80\x66}, + ' test test ', + "utf8 quote delimiters [perl #16823]"; + } } # Test the "internals". diff --git a/pod/perlguts.pod b/pod/perlguts.pod index a58d7ad..a667a10 100644 --- a/pod/perlguts.pod +++ b/pod/perlguts.pod @@ -2946,7 +2946,7 @@ Since just passing an SV to an XS function and copying the data of the SV is not enough to copy the UTF8 flags, even less right is just passing a S<C<char *>> to an XS function. -For full generality, use the L<perlapi/DO_UTF8> macro to see if the +For full generality, use the L<C<DO_UTF8>|perlapi/DO_UTF8> macro to see if the string in an SV is to be I<treated> as UTF-8. This takes into account if the call to the XS function is being made from within the scope of L<S<C<use bytes>>|bytes>. If so, the underlying bytes that comprise the diff --git a/t/op/split.t b/t/op/split.t index bcefcfa..fb73271 100644 --- a/t/op/split.t +++ b/t/op/split.t @@ -3,6 +3,7 @@ BEGIN { chdir 't' if -d 't'; require './test.pl'; + require './charset_tools.pl'; set_up_inc('../lib'); } @@ -264,15 +265,11 @@ is($cnt, scalar(@ary)); { my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; - SKIP: { - if ($::IS_EBCDIC) { - skip("EBCDIC", 1); - } else { + { # bug id 20000426.003 my ($a, $b, $c) = split(/\x40/, $s); ok($a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a); - } } my ($a, $b) = split(/\x{100}/, $s); @@ -281,13 +278,9 @@ is($cnt, scalar(@ary)); my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); ok($a eq "\x20\x40" && $b eq "\x40\x20"); - SKIP: { - if ($::IS_EBCDIC) { - skip("EBCDIC", 1); - } else { + { my ($a, $b) = split(/\x40\x{80}/, $s); ok($a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"); - } } my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); @@ -492,16 +485,14 @@ is($cnt, scalar(@ary)); my @results; my $expr; $expr = ' a b c '; - @results = split "\x20", $expr if $::IS_ASCII; - @results = split "\x40", $expr if $::IS_EBCDIC; + @results = split uni_to_native("\x20"), $expr; is @results, 3, "RT #116086: split on string of single hex-20: captured 3 elements"; is $results[0], 'a', "RT #116086: split on string of single hex-20: first element is non-empty"; $expr = " a \tb c "; - @results = split "\x20", $expr if $::IS_ASCII; - @results = split "\x40", $expr if $::IS_EBCDIC; + @results = split uni_to_native("\x20"), $expr; is @results, 3, "RT #116086: split on string of single hex-20: captured 3 elements"; is $results[0], 'a', -- Perl5 Master Repository
