In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/7adaf5b2acaa653a831bb68ed91d8d205fbf9fb2?hp=8226f5083483c32f65ab0be1501060f802b10c04>
- Log ----------------------------------------------------------------- commit 7adaf5b2acaa653a831bb68ed91d8d205fbf9fb2 Author: Karl Williamson <[email protected]> Date: Mon May 18 10:47:50 2015 -0600 lib/utf8.t: Add some tests to stress EBCDIC The tests for the Latin1 \xFF aren't a fair test of UTF-8 on EBCDIC platforms, because it is generally a UTF-8 invariant character, so is the same regardless of being in UTF-8 or not. This adds some tests where the UTF-EBCDIC version is 2 bytes (as well as the UTF-8 version). M lib/utf8.t commit 900924803afbe9797cd65fc2e423c8d52b014139 Author: Karl Williamson <[email protected]> Date: Mon May 18 10:02:08 2015 -0600 t/base/lex.t: Use more standard test for EBCDIC This makes it easier to grep for these things. The typical test is for the ord("A"), not some other character. Since this is in t/base, it doesn't use helper scripts. M t/base/lex.t commit 02f1786bedc74672bb8402e240d9c430e0dbb799 Author: Karl Williamson <[email protected]> Date: Fri May 15 13:34:07 2015 -0600 t/op/tr.t: Clarify skip tests message M t/op/tr.t commit 2890b6e4f939b25ff5a7d8f0778bde87ac9e86db Author: Karl Williamson <[email protected]> Date: Tue Aug 18 19:49:02 2015 -0600 op/chr.t: Better skip message M t/op/chr.t commit 83bcbc6111ed334f239986aa6a0f00a47b523a0c Author: Karl Williamson <[email protected]> Date: Fri Apr 3 12:06:39 2015 -0600 Various .t files: Use globals to see if on EBCDIC These globals are already available; by using them instead of rolling our own, it makes it easer to grep for these kinds of instances. M lib/bytes.t M lib/utf8.t M t/op/chr.t M t/op/lex.t M t/op/oct.t M t/op/split.t M t/op/sub_lval.t M t/op/tr.t M t/op/vec.t M t/op/ver.t ----------------------------------------------------------------------- Summary of changes: lib/bytes.t | 6 +++--- lib/utf8.t | 58 +++++++++++++++++++++++++++++++++++++++------------------ t/base/lex.t | 2 +- t/op/chr.t | 4 +++- t/op/lex.t | 2 +- t/op/oct.t | 4 ++-- t/op/split.t | 4 ++-- t/op/sub_lval.t | 2 +- t/op/tr.t | 8 +++----- t/op/vec.t | 3 +-- t/op/ver.t | 26 +++++++++++++------------- 11 files changed, 70 insertions(+), 49 deletions(-) diff --git a/lib/bytes.t b/lib/bytes.t index 8fc535f..4e50ff3 100644 --- a/lib/bytes.t +++ b/lib/bytes.t @@ -33,20 +33,20 @@ utf8::encode(my $c2_utf8 = $c2); { use bytes; - if (ord('A') == 193) { # EBCDIC? + if ($::IS_EBCDIC) { # EBCDIC? is(ord($c), 0x8c, "ord under use bytes looks at the 1st byte"); } else { is(ord($c), 0xc4, "ord under use bytes looks at the 1st byte"); } is(length($c), 2, "length under use bytes looks at bytes"); is(bytes::length($c), 2, "bytes::length under use bytes looks at bytes"); - if (ord('A') == 193) { # EBCDIC? + if ($::IS_EBCDIC) { # EBCDIC? is(bytes::ord($c), 0x8c, "bytes::ord under use bytes looks at the 1st byte"); } else { is(bytes::ord($c), 0xc4, "bytes::ord under use bytes looks at the 1st byte"); } # In z/OS \x41,\x8c are the codepoints corresponding to \x80,\xc4 respectively under ASCII platform - if (ord('A') == 193) { # EBCDIC? + if ($::IS_EBCDIC) { # EBCDIC? is(bytes::substr($c, 0, 1), "\x8c", "bytes::substr under use bytes looks at bytes"); is(bytes::index($c, "\x41"), 1, "bytes::index under use bytes looks at bytes"); is(bytes::rindex($c, "\x8c"), 0, "bytes::rindex under use bytes looks at bytes"); diff --git a/lib/utf8.t b/lib/utf8.t index bf722f3..c09f96e 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -270,7 +270,7 @@ BANG # "my" variable $strict::VERSION can't be in a package # SKIP: { - skip("Embedded UTF-8 does not work in EBCDIC", 1) if ord("A") == 193; + skip("Embedded UTF-8 does not work in EBCDIC", 1) if $::IS_EBCDIC; ok('' eq runperl(prog => <<'CODE'), "change #17928"); my $code = qq{ my \$\xe3\x83\x95\xe3\x83\xbc = 5; }; { @@ -324,7 +324,7 @@ END } SKIP: { - skip("Embedded UTF-8 does not work in EBCDIC", 1) if ord("A") == 193; + skip("Embedded UTF-8 does not work in EBCDIC", 1) if $::IS_EBCDIC; use utf8; is eval qq{q \xc3\xbc test \xc3\xbc . qq\xc2\xb7 test \xc2\xb7}, ' test test ', @@ -336,88 +336,110 @@ SKIP: { { my $a = "A"; my $b = chr(0x0FF); - my $c = chr(0x100); + my $c = chr(0x0DF); # FF is invariant in many EBCDIC pages, so is not a + # fair test of 'beyond'; but DF is variant (in all + # supported EBCDIC pages so far), so make 2 'beyond' + # tests + my $d = chr(0x100); ok( utf8::valid($a), "utf8::valid basic"); ok( utf8::valid($b), "utf8::valid beyond"); - ok( utf8::valid($c), "utf8::valid unicode"); + ok( utf8::valid($c), "utf8::valid beyond"); + ok( utf8::valid($d), "utf8::valid unicode"); ok(!utf8::is_utf8($a), "!utf8::is_utf8 basic"); ok(!utf8::is_utf8($b), "!utf8::is_utf8 beyond"); - ok( utf8::is_utf8($c), "utf8::is_utf8 unicode"); + ok(!utf8::is_utf8($c), "!utf8::is_utf8 beyond"); + ok( utf8::is_utf8($d), "utf8::is_utf8 unicode"); is(utf8::upgrade($a), 1, "utf8::upgrade basic"); - if (ord('A') == 193) { # EBCDIC. + if ($::IS_EBCDIC) { # EBCDIC. is(utf8::upgrade($b), 1, "utf8::upgrade beyond"); } else { is(utf8::upgrade($b), 2, "utf8::upgrade beyond"); } - is(utf8::upgrade($c), 2, "utf8::upgrade unicode"); + is(utf8::upgrade($c), 2, "utf8::upgrade beyond"); + is(utf8::upgrade($d), 2, "utf8::upgrade unicode"); is($a, "A", "basic"); is($b, "\xFF", "beyond"); - is($c, "\x{100}", "unicode"); + is($c, "\xDF", "beyond"); + is($d, "\x{100}", "unicode"); ok( utf8::valid($a), "utf8::valid basic"); ok( utf8::valid($b), "utf8::valid beyond"); - ok( utf8::valid($c), "utf8::valid unicode"); + ok( utf8::valid($c), "utf8::valid beyond"); + ok( utf8::valid($d), "utf8::valid unicode"); ok( utf8::is_utf8($a), "utf8::is_utf8 basic"); ok( utf8::is_utf8($b), "utf8::is_utf8 beyond"); - ok( utf8::is_utf8($c), "utf8::is_utf8 unicode"); + ok( utf8::is_utf8($c), "utf8::is_utf8 beyond"); + ok( utf8::is_utf8($d), "utf8::is_utf8 unicode"); is(utf8::downgrade($a), 1, "utf8::downgrade basic"); is(utf8::downgrade($b), 1, "utf8::downgrade beyond"); + is(utf8::downgrade($c), 1, "utf8::downgrade beyond"); is($a, "A", "basic"); is($b, "\xFF", "beyond"); + is($c, "\xDF", "beyond"); ok( utf8::valid($a), "utf8::valid basic"); ok( utf8::valid($b), "utf8::valid beyond"); + ok( utf8::valid($c), "utf8::valid beyond"); ok(!utf8::is_utf8($a), "!utf8::is_utf8 basic"); ok(!utf8::is_utf8($b), "!utf8::is_utf8 beyond"); + ok(!utf8::is_utf8($c), "!utf8::is_utf8 beyond"); utf8::encode($a); utf8::encode($b); utf8::encode($c); + utf8::encode($d); is($a, "A", "basic"); - if (ord('A') == 193) { # EBCDIC. + if ($::IS_EBCDIC) { # EBCDIC. is(length($b), 1, "beyond length"); } else { is(length($b), 2, "beyond length"); } - is(length($c), 2, "unicode length"); + is(length($c), 2, "beyond length"); + is(length($d), 2, "unicode length"); ok(utf8::valid($a), "utf8::valid basic"); ok(utf8::valid($b), "utf8::valid beyond"); - ok(utf8::valid($c), "utf8::valid unicode"); + ok(utf8::valid($c), "utf8::valid beyond"); + ok(utf8::valid($d), "utf8::valid unicode"); # encode() clears the UTF-8 flag (unlike upgrade()). ok(!utf8::is_utf8($a), "!utf8::is_utf8 basic"); ok(!utf8::is_utf8($b), "!utf8::is_utf8 beyond"); - ok(!utf8::is_utf8($c), "!utf8::is_utf8 unicode"); + ok(!utf8::is_utf8($c), "!utf8::is_utf8 beyond"); + ok(!utf8::is_utf8($d), "!utf8::is_utf8 unicode"); utf8::decode($a); utf8::decode($b); utf8::decode($c); + utf8::decode($d); is($a, "A", "basic"); is($b, "\xFF", "beyond"); - is($c, "\x{100}", "unicode"); + is($c, "\xDF", "beyond"); + is($d, "\x{100}", "unicode"); ok(utf8::valid($a), "!utf8::valid basic"); ok(utf8::valid($b), "!utf8::valid beyond"); - ok(utf8::valid($c), " utf8::valid unicode"); + ok(utf8::valid($c), "!utf8::valid beyond"); + ok(utf8::valid($d), " utf8::valid unicode"); ok(!utf8::is_utf8($a), "!utf8::is_utf8 basic"); - if (ord('A') == 193) { # EBCDIC. + if ($::IS_EBCDIC) { # EBCDIC. ok( utf8::is_utf8(pack('U',0x0ff)), " utf8::is_utf8 beyond"); } else { ok( utf8::is_utf8($b), " utf8::is_utf8 beyond"); # $b stays in UTF-8. } - ok( utf8::is_utf8($c), " utf8::is_utf8 unicode"); + ok( utf8::is_utf8($c), " utf8::is_utf8 beyond"); # $c stays in UTF-8. + ok( utf8::is_utf8($d), " utf8::is_utf8 unicode"); } { diff --git a/t/base/lex.t b/t/base/lex.t index 8862337..47c6be8 100644 --- a/t/base/lex.t +++ b/t/base/lex.t @@ -386,7 +386,7 @@ print "ok $test - call a function in package v10::foo\n"; $test++; print "not " unless (1?v65:"bar") eq chr(65); print "ok $test - colon detection after vstring does not break ? vstring :\n"; $test++; -if (ord("\t") == 9) { +if (ord("A") == 65) { print v35; print "not "; print v10; diff --git a/t/op/chr.t b/t/op/chr.t index 3546f02..5f57bad 100644 --- a/t/op/chr.t +++ b/t/op/chr.t @@ -49,7 +49,9 @@ is chr "-2.2", chr -2.2, 'chr "-2.2" eq chr -2.2'; # Check UTF-8 (not UTF-EBCDIC). SKIP: { - skip "no UTF-8 on EBCDIC", 21 if chr(193) eq 'A'; + skip "ASCII centric tests", 21 if $::IS_EBCDIC; + # Too hard to convert these tests generically to EBCDIC code pages without + # using chr(), which is what we're testing. sub hexes { no warnings 'utf8'; # avoid surrogate and beyond Unicode warnings diff --git a/t/op/lex.t b/t/op/lex.t index a4ce65c..c515449 100644 --- a/t/op/lex.t +++ b/t/op/lex.t @@ -129,7 +129,7 @@ fresh_perl_is( '* <null> ident' ); SKIP: { - skip "Different output on EBCDIC (presumably)", 2 if ord("A") != 65; + skip "Different output on EBCDIC (presumably)", 2 if $::IS_EBCDIC; fresh_perl_is( qq'"ab}"ax;&\0z\x8Ao}\x82x;', <<gibberish, Bareword found where operator expected at - line 1, near ""ab}"ax" diff --git a/t/op/oct.t b/t/op/oct.t index 2324655..84814b1 100644 --- a/t/op/oct.t +++ b/t/op/oct.t @@ -111,7 +111,7 @@ is(length, 5, is($_, "\0"."_"."7"."_"."7", "string concatenation with nul character"); chop, chop, chop, chop; is($_, "\0", "repeated chop() eliminated all but nul character"); -if (ord("\t") != 9) { +if ($::IS_EBCDIC) { is("\157_", "?_", "question mark is 111 in 1047, 037, && POSIX-BC"); } @@ -126,7 +126,7 @@ is(length, 5, is($_, "\0"."_"."7"."_"."7", "string concatenation with nul character"); chop, chop, chop, chop; is($_, "\0", "repeated chop() eliminated all but nul character"); -if (ord("\t") != 9) { +if ($::IS_EBCDIC) { is("\x61_", "/_", "/ is 97 in 1047, 037, && POSIX-BC"); } diff --git a/t/op/split.t b/t/op/split.t index 50579bf..bcefcfa 100644 --- a/t/op/split.t +++ b/t/op/split.t @@ -265,7 +265,7 @@ is($cnt, scalar(@ary)); my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; SKIP: { - if (ord('A') == 193) { + if ($::IS_EBCDIC) { skip("EBCDIC", 1); } else { # bug id 20000426.003 @@ -282,7 +282,7 @@ is($cnt, scalar(@ary)); ok($a eq "\x20\x40" && $b eq "\x40\x20"); SKIP: { - if (ord('A') == 193) { + if ($::IS_EBCDIC) { skip("EBCDIC", 1); } else { my ($a, $b) = split(/\x40\x{80}/, $s); diff --git a/t/op/sub_lval.t b/t/op/sub_lval.t index 9b0ad06..ab9faac 100644 --- a/t/op/sub_lval.t +++ b/t/op/sub_lval.t @@ -530,7 +530,7 @@ is($str, "Hi, world!"); $str = "Made w/ JavaScript"; sub veclv : lvalue { vec($str, 2, 32) } -if (ord('A') != 193) { +if ($::IS_ASCII) { veclv() = 0x5065726C; } else { # EBCDIC? diff --git a/t/op/tr.t b/t/op/tr.t index 508ab14..ffb3877 100644 --- a/t/op/tr.t +++ b/t/op/tr.t @@ -14,8 +14,6 @@ plan tests => 138; # This caused an asan failure due to a bad write past the end of the stack. eval { my $x; die 1..127, $x =~ y/// }; -my $Is_EBCDIC = (ord('i') == 0x89 & ord('J') == 0xd1); - $_ = "abcdefghijklmnopqrstuvwxyz"; tr/a-z/A-Z/; @@ -96,7 +94,7 @@ is(length $x, 3); $x =~ tr/A/B/; is(length $x, 3); -if (ord("\t") == 9) { # ASCII +if ($::IS_ASCII) { # ASCII is($x, 256.66.258); } else { @@ -110,7 +108,7 @@ is($x, 256.193.258); $x =~ tr/A/B/; is(length $x, 3); -if (ord("\t") == 9) { # ASCII +if ($::IS_ASCII) { # ASCII is($x, 256.193.258); } else { @@ -326,7 +324,7 @@ is($c, 8); is($a, "XXXXXXXX"); SKIP: { - skip "not EBCDIC", 4 unless $Is_EBCDIC; + skip "valid only on EBCDIC platforms", 4 unless $::IS_EBCDIC; $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/; is($c, 2); diff --git a/t/op/vec.t b/t/op/vec.t index 141a6da..33aedab 100644 --- a/t/op/vec.t +++ b/t/op/vec.t @@ -8,7 +8,6 @@ BEGIN { plan( tests => 35 ); -my $Is_EBCDIC = (ord('A') == 193) ? 1 : 0; is(vec($foo,0,1), 0); is(length($foo), undef); @@ -68,7 +67,7 @@ ok(! $@); $@ = undef; eval { vec($foo, 1, 8) = 13 }; ok(! $@); -if ($Is_EBCDIC) { +if ($::IS_EBCDIC) { is($foo, "\x8c\x0d\xff\x8a\x69"); } else { diff --git a/t/op/ver.t b/t/op/ver.t index e896e9e..144a352 100644 --- a/t/op/ver.t +++ b/t/op/ver.t @@ -19,7 +19,7 @@ is( $@, '', "use v5.5.640; $@"); require_ok('v5.5.640'); # printing characters should work -if (ord("\t") == 9) { # ASCII +if ($::IS_ASCII) { # ASCII is('ok ',v111.107.32,'ASCII printing characters'); # hash keys too @@ -40,7 +40,7 @@ $x = v77; is('ok',$x,'poetry optimization'); # but not when dots are involved -if (ord("\t") == 9) { # ASCII +if ($::IS_ASCII) { # ASCII $x = v77.78.79; } else { @@ -58,7 +58,7 @@ is( $@, '', "use 5.5.640; $@"); require_ok('5.5.640'); # hash keys too -if (ord("\t") == 9) { # ASCII +if ($::IS_ASCII) { # ASCII $h{111.107.32} = "ok"; } else { @@ -66,7 +66,7 @@ else { } is('ok',$h{ok },'hash keys w/o v'); -if (ord("\t") == 9) { # ASCII +if ($::IS_ASCII) { # ASCII $x = 77.78.79; } else { @@ -77,7 +77,7 @@ is($x, 'MNO','poetry optimization with dots w/o v'); is(1.20.300.4000, "\x{1}\x{14}\x{12c}\x{fa0}",'compare embedded \x{} string w/o v'); # test sprintf("%vd"...) etc -if (ord("\t") == 9) { # ASCII +if ($::IS_ASCII) { # ASCII is(sprintf("%vd", "Perl"), '80.101.114.108', 'ASCII sprintf("%vd", "Perl")'); } else { @@ -86,7 +86,7 @@ else { is(sprintf("%vd", v1.22.333.4444), '1.22.333.4444', 'sprintf("%vd", v1.22.333.4444)'); -if (ord("\t") == 9) { # ASCII +if ($::IS_ASCII) { # ASCII is(sprintf("%vx", "Perl"), '50.65.72.6c', 'ASCII sprintf("%vx", "Perl")'); } else { @@ -95,7 +95,7 @@ else { is(sprintf("%vX", 1.22.333.4444), '1.16.14D.115C','ASCII sprintf("%vX", 1.22.333.4444)'); -if (ord("\t") == 9) { # ASCII +if ($::IS_ASCII) { # ASCII is(sprintf("%#*vo", ":", "Perl"), '0120:0145:0162:0154', 'ASCII sprintf("%vo", "Perl")'); } else { @@ -112,42 +112,42 @@ is(sprintf("%vd", join("", map { chr } { use bytes; - if (ord("\t") == 9) { # ASCII + if ($::IS_ASCII) { # ASCII is(sprintf("%vd", "Perl"), '80.101.114.108', 'ASCII sprintf("%vd", "Perl") w/use bytes'); } else { is(sprintf("%vd", "Perl"), '215.133.153.147', 'EBCDIC sprintf("%vd", "Perl") w/use bytes'); } - if (ord("\t") == 9) { # ASCII + if ($::IS_ASCII) { # ASCII is(sprintf("%vd", 1.22.333.4444), '1.22.197.141.225.133.156', 'ASCII sprintf("%vd", v1.22.333.4444 w/use bytes'); } else { is(sprintf("%vd", 1.22.333.4444), '1.22.142.84.187.81.112', 'EBCDIC sprintf("%vd", v1.22.333.4444 w/use bytes'); } - if (ord("\t") == 9) { # ASCII + if ($::IS_ASCII) { # ASCII is(sprintf("%vx", "Perl"), '50.65.72.6c', 'ASCII sprintf("%vx", "Perl")'); } else { is(sprintf("%vx", "Perl"), 'd7.85.99.93', 'EBCDIC sprintf("%vx", "Perl")'); } - if (ord("\t") == 9) { # ASCII + if ($::IS_ASCII) { # ASCII is(sprintf("%vX", v1.22.333.4444), '1.16.C5.8D.E1.85.9C', 'ASCII sprintf("%vX", v1.22.333.4444)'); } else { is(sprintf("%vX", v1.22.333.4444), '1.16.8E.54.BB.51.70', 'EBCDIC sprintf("%vX", v1.22.333.4444)'); } - if (ord("\t") == 9) { # ASCII + if ($::IS_ASCII) { # ASCII is(sprintf("%#*vo", ":", "Perl"), '0120:0145:0162:0154', 'ASCII sprintf("%#*vo", ":", "Perl")'); } else { is(sprintf("%#*vo", ":", "Perl"), '0327:0205:0231:0223', 'EBCDIC sprintf("%#*vo", ":", "Perl")'); } - if (ord("\t") == 9) { # ASCII + if ($::IS_ASCII) { # ASCII is(sprintf("%*vb", "##", v1.22.333.4444), '1##10110##11000101##10001101##11100001##10000101##10011100', 'ASCII sprintf("%*vb", "##", v1.22.333.4444)'); -- Perl5 Master Repository
