In perl.git, the branch khw/ebcdic has been updated <http://perl5.git.perl.org/perl.git/commitdiff/6644288b2f54908195f15177940ecc0c5a2e4727?hp=11e66312239bc787b3f0096923f554ef5edc64c6>
- Log ----------------------------------------------------------------- commit 6644288b2f54908195f15177940ecc0c5a2e4727 Author: Karl Williamson <[email protected]> Date: Fri Apr 5 23:34:50 2013 -0600 t/uni/overload.t: EBCDIC fixes M t/uni/overload.t commit d277f1676236304752710b695879b2cb1131c954 Author: Karl Williamson <[email protected]> Date: Fri Apr 5 23:34:13 2013 -0600 t/uni/method.t: EBCDIC fixes M t/uni/method.t commit f3c289c2333a5179b1247e3b02d1f8aa630a18d9 Author: Karl Williamson <[email protected]> Date: Fri Apr 5 23:33:28 2013 -0600 t/op/utf8magic.t: EBCDIC fixes M t/op/utf8magic.t commit ae89c780f8dc88b1d0405f5a59fda83180d2a544 Author: Karl Williamson <[email protected]> Date: Fri Apr 5 23:32:57 2013 -0600 t/op/evalbytes.t: EBCDIC fixes M t/op/evalbytes.t commit cac0af4da0ce584d28bff733483dbe982f398f96 Author: Karl Williamson <[email protected]> Date: Fri Apr 5 23:32:25 2013 -0600 XXX more regcharclass, clean up M regen/regcharclass_multi_char_folds.pl ----------------------------------------------------------------------- Summary of changes: regen/regcharclass_multi_char_folds.pl | 8 ++- t/op/evalbytes.t | 5 +- t/op/utf8magic.t | 2 +- t/uni/method.t | 9 ++-- t/uni/overload.t | 86 ++++++++++++++++--------------- 5 files changed, 58 insertions(+), 52 deletions(-) diff --git a/regen/regcharclass_multi_char_folds.pl b/regen/regcharclass_multi_char_folds.pl index c93658d..fdb0bad 100644 --- a/regen/regcharclass_multi_char_folds.pl +++ b/regen/regcharclass_multi_char_folds.pl @@ -5,7 +5,8 @@ use warnings; use Unicode::UCD "prop_invmap"; # This returns an array of strings of the form -# "\x{foo}\x{bar}\x{baz}" +# "N\x{foo}\x{bar}\x{baz}" +# XXX # of the sequences of code points that are multi-character folds in the # current Unicode version. If the parameter is 1, all such folds are # returned. If the parameters is 0, only the ones containing exclusively @@ -41,7 +42,8 @@ sub gen_combinations ($;) { my $new_string = sprintf "$string\\x{%X}", $fold_ref->[$i][$j]; if ($i >= @$fold_ref - 1) { # Final level: just return it - push @ret, "\"$new_string\""; + # XXX N not necessary? + push @ret, "\"N$new_string\""; } else { # Generate the combinations for the next level with this one's push @ret, &gen_combinations($fold_ref, $new_string, $i + 1); @@ -76,7 +78,7 @@ sub multi_char_folds ($) { # Create a line that looks like "\x{foo}\x{bar}\x{baz}" of the code # points that make up the fold. my $fold = join "", map { sprintf "\\x{%X}", $_ } @{$folds_ref->[$i]}; - $fold = "\"$fold\""; + $fold = "\"N$fold\""; # Skip if something else already has this fold next if grep { $_ eq $fold } @folds; diff --git a/t/op/evalbytes.t b/t/op/evalbytes.t index 5a03e63..623422c 100644 --- a/t/op/evalbytes.t +++ b/t/op/evalbytes.t @@ -27,8 +27,9 @@ is evalbytes($upcode), "\xff\xfe", 'evalbytes on upgraded extra-ASCII'; use utf8; is evalbytes($code), "\xff\xfe", 'evalbytes ignores outer utf8 pragma'; } -is evalbytes "use utf8; '\xc4\x80'", chr 256, 'use utf8 within evalbytes'; -chop($upcode = "use utf8; '\xc4\x80'" . chr 256); +my $U_100 = byte_utf8a_to_utf8n("\xc4\x80"); +is evalbytes "use utf8; $U_100", chr 256, 'use utf8 within evalbytes'; +chop($upcode = "use utf8; $U_100" . chr 256); is evalbytes $upcode, chr 256, 'use utf8 within evalbytes on utf8 string'; eval { evalbytes chr 256 }; like $@, qr/Wide character/, 'evalbytes croaks on non-bytes'; diff --git a/t/op/utf8magic.t b/t/op/utf8magic.t index 55e921d..0851629 100644 --- a/t/op/utf8magic.t +++ b/t/op/utf8magic.t @@ -33,6 +33,6 @@ $str2 = "b"; utf8::encode $str2; is $::stored, "a", 'utf8::encode respects get-magic on POK scalars'; -tie $str2, "", "\xc4\x80"; +tie $str2, "", byte_utf8a_to_utf8n("\xc4\x80"); utf8::decode $str2; is $::stored, "\x{100}", 'utf8::decode respects set-magic'; diff --git a/t/uni/method.t b/t/uni/method.t index 4f9d72d..0979c55 100644 --- a/t/uni/method.t +++ b/t/uni/method.t @@ -111,8 +111,9 @@ is( ref Føø::Bær->new, 'Føø::Bær'); my $new_ascii = "new"; my $new_latin = "nèw"; -my $new_utf8 = "n\303\250w"; -my $newoct = "n\303\250w"; +my $e_with_grave = byte_utf8a_to_utf8n("\303\250"); +my $new_utf8 = "n${e_with_grave}w"; +my $newoct = "n${e_with_grave}w"; utf8::decode($new_utf8); like( Føø::Bær->$new_ascii, qr/Føø::Bær=HASH/u, "Can access \$new_ascii, [$new_ascii], stored in a scalar, as a method, through a UTF-8 package." ); @@ -121,7 +122,7 @@ like( Føø::Bær->$new_utf8, qr/Føø::Bær=HASH/u, "Can access \$new_utf8, [$n { local $@; eval { Føø::Bær->$newoct }; - like($@, qr/Can't locate object method "n\303\250w" via package "Føø::Bær"/u, "Can't access [$newoct], stored in a scalar, as a method through a UTF-8 package." ); + like($@, qr/Can't locate object method "n${e_with_grave}w" via package "Føø::Bær"/u, "Can't access [$newoct], stored in a scalar, as a method through a UTF-8 package." ); } @@ -138,7 +139,7 @@ like( $pkg_latin_1->$new_utf8, qr/Føø::Bær=HASH/u, "Can access \$new_utf8, [$ { local $@; eval { $pkg_latin_1->$newoct }; - like($@, qr/Can't locate object method "n\303\250w" via package "Føø::Bær"/u, "Can't access [$newoct], stored in a scalar, as a method, when the UTF-8 package name is also in a scalar."); + like($@, qr/Can't locate object method "n${e_with_grave}w" via package "Føø::Bær"/u, "Can't access [$newoct], stored in a scalar, as a method, when the UTF-8 package name is also in a scalar." ... [2 chars truncated] } ok !!Føø::Bær->can($new_ascii), "->can works for [$new_ascii]"; diff --git a/t/uni/overload.t b/t/uni/overload.t index bd87b66..d87a8d2 100644 --- a/t/uni/overload.t +++ b/t/uni/overload.t @@ -39,7 +39,7 @@ package main; # no feature "unicode_strings"; # Bug 34297 -foreach my $t ("ASCII", "B\366se") { +foreach my $t ("ASCII", "B" . latin1_to_native("\366") . "se") { my $length = length $t; my $u = UTF8Toggle->new($t); @@ -49,49 +49,51 @@ foreach my $t ("ASCII", "B\366se") { is (length $u, $length, "length of '$t'"); } -my $u = UTF8Toggle->new("\311"); +my $E_acute = latin1_to_native("\311"); +my $e_acute = latin1_to_native("\351"); +my $u = UTF8Toggle->new($E_acute); my $lc = lc $u; is (length $lc, 1); -is ($lc, "\311", "E acute -> e acute"); +is ($lc, $E_acute, "E acute -> e acute"); $lc = lc $u; is (length $lc, 1); -is ($lc, "\351", "E acute -> e acute"); +is ($lc, $e_acute, "E acute -> e acute"); $lc = lc $u; is (length $lc, 1); -is ($lc, "\311", "E acute -> e acute"); +is ($lc, $E_acute, "E acute -> e acute"); -$u = UTF8Toggle->new("\351"); +$u = UTF8Toggle->new($e_acute); my $uc = uc $u; is (length $uc, 1); -is ($uc, "\351", "e acute -> E acute"); +is ($uc, $e_acute, "e acute -> E acute"); $uc = uc $u; is (length $uc, 1); -is ($uc, "\311", "e acute -> E acute"); +is ($uc, $E_acute, "e acute -> E acute"); $uc = uc $u; is (length $uc, 1); -is ($uc, "\351", "e acute -> E acute"); +is ($uc, $e_acute, "e acute -> E acute"); -$u = UTF8Toggle->new("\311"); +$u = UTF8Toggle->new($E_acute); $lc = lcfirst $u; is (length $lc, 1); -is ($lc, "\311", "E acute -> e acute"); +is ($lc, $E_acute, "E acute -> e acute"); $lc = lcfirst $u; is (length $lc, 1); -is ($lc, "\351", "E acute -> e acute"); +is ($lc, $e_acute, "E acute -> e acute"); $lc = lcfirst $u; is (length $lc, 1); -is ($lc, "\311", "E acute -> e acute"); +is ($lc, $E_acute, "E acute -> e acute"); -$u = UTF8Toggle->new("\351"); +$u = UTF8Toggle->new($e_acute); $uc = ucfirst $u; is (length $uc, 1); -is ($uc, "\351", "e acute -> E acute"); +is ($uc, $e_acute, "e acute -> E acute"); $uc = ucfirst $u; is (length $uc, 1); -is ($uc, "\311", "e acute -> E acute"); +is ($uc, $E_acute, "e acute -> E acute"); $uc = ucfirst $u; is (length $uc, 1); -is ($uc, "\351", "e acute -> E acute"); +is ($uc, $e_acute, "e acute -> E acute"); my $have_setlocale = 0; eval { @@ -115,49 +117,49 @@ SKIP: { require locale; import locale; } } - my $u = UTF8Toggle->new("\311"); + my $u = UTF8Toggle->new($E_acute); my $lc = lc $u; is (length $lc, 1); - is ($lc, "\351", "E acute -> e acute"); + is ($lc, $e_acute, "E acute -> e acute"); $lc = lc $u; is (length $lc, 1); - is ($lc, "\351", "E acute -> e acute"); + is ($lc, $e_acute, "E acute -> e acute"); $lc = lc $u; is (length $lc, 1); - is ($lc, "\351", "E acute -> e acute"); + is ($lc, $e_acute, "E acute -> e acute"); - $u = UTF8Toggle->new("\351"); + $u = UTF8Toggle->new($e_acute); my $uc = uc $u; is (length $uc, 1); - is ($uc, "\311", "e acute -> E acute"); + is ($uc, $E_acute, "e acute -> E acute"); $uc = uc $u; is (length $uc, 1); - is ($uc, "\311", "e acute -> E acute"); + is ($uc, $E_acute, "e acute -> E acute"); $uc = uc $u; is (length $uc, 1); - is ($uc, "\311", "e acute -> E acute"); + is ($uc, $E_acute, "e acute -> E acute"); - $u = UTF8Toggle->new("\311"); + $u = UTF8Toggle->new($E_acute); $lc = lcfirst $u; is (length $lc, 1); - is ($lc, "\351", "E acute -> e acute"); + is ($lc, $e_acute, "E acute -> e acute"); $lc = lcfirst $u; is (length $lc, 1); - is ($lc, "\351", "E acute -> e acute"); + is ($lc, $e_acute, "E acute -> e acute"); $lc = lcfirst $u; is (length $lc, 1); - is ($lc, "\351", "E acute -> e acute"); + is ($lc, $e_acute, "E acute -> e acute"); - $u = UTF8Toggle->new("\351"); + $u = UTF8Toggle->new($e_acute); $uc = ucfirst $u; is (length $uc, 1); - is ($uc, "\311", "e acute -> E acute"); + is ($uc, $E_acute, "e acute -> E acute"); $uc = ucfirst $u; is (length $uc, 1); - is ($uc, "\311", "e acute -> E acute"); + is ($uc, $E_acute, "e acute -> E acute"); $uc = ucfirst $u; is (length $uc, 1); - is ($uc, "\311", "e acute -> E acute"); + is ($uc, $E_acute, "e acute -> E acute"); } } @@ -169,8 +171,8 @@ foreach my $operator ('print', 'syswrite', 'syswrite len', 'syswrite off', open my $fh, "+>$layer", $tmpfile or die $!; my $pad = $operator =~ /\boff\b/ ? "\243" : ""; my $trail = $operator =~ /\blen\b/ ? "!" : ""; - my $u = UTF8Toggle->new("$pad\311\n$trail"); - my $l = UTF8Toggle->new("$pad\351\n$trail", 1); + my $u = UTF8Toggle->new("$pad$E_acute\n$trail"); + my $l = UTF8Toggle->new("$pad$e_acute\n$trail", 1); if ($operator eq 'print') { no warnings 'utf8'; print $fh $u; @@ -208,17 +210,17 @@ foreach my $operator ('print', 'syswrite', 'syswrite len', 'syswrite off', seek $fh, 0, 0 or die $!; my $line; chomp ($line = <$fh>); - is ($line, "\311", "$operator $layer"); + is ($line, $E_acute, "$operator $layer"); chomp ($line = <$fh>); - is ($line, "\311", "$operator $layer"); + is ($line, $E_acute, "$operator $layer"); chomp ($line = <$fh>); - is ($line, "\311", "$operator $layer"); + is ($line, $E_acute, "$operator $layer"); chomp ($line = <$fh>); - is ($line, "\351", "$operator $layer"); + is ($line, $e_acute, "$operator $layer"); chomp ($line = <$fh>); - is ($line, "\351", "$operator $layer"); + is ($line, $e_acute, "$operator $layer"); chomp ($line = <$fh>); - is ($line, "\351", "$operator $layer"); + is ($line, $e_acute, "$operator $layer"); close $fh or die $!; } @@ -255,7 +257,7 @@ foreach my $b ($big, UTF8Toggle->new($big)) { } } -my $bits = "\311"; +my $bits = $E_acute; foreach my $pieces ($bits, UTF8Toggle->new($bits)) { like ($bits ^ $pieces, qr/\A\0+\z/, "something xor itself is zeros"); like ($bits ^ $pieces, qr/\A\0+\z/, "something xor itself is zeros"); -- Perl5 Master Repository
