In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/28ffebafd3403d496952bd64c99bb9bd7cbe871f?hp=744ef08f7a0cf217b22effff01fb1d9f7f244aea>
- Log ----------------------------------------------------------------- commit 28ffebafd3403d496952bd64c99bb9bd7cbe871f Author: Karl Williamson <[email protected]> Date: Wed Jan 14 11:16:50 2015 -0700 perlhacktips: Fix typo M pod/perlhacktips.pod commit d1cef54a570d5ed7b6b443b65984965167a0a3ef Author: Karl Williamson <[email protected]> Date: Wed Jan 14 11:01:21 2015 -0700 t/charset_tools.pl: Improve function names 'latin1_to_native' and 'native_to_latin1' actually operate on all code points, not just the ranges implied by their names. This commit changes the names to reflect that, to 'uni_to_native' and 'native_to_uni'. The old names have only been available since 5.21, so there is no backcompat issue. M lib/charnames.t M t/charset_tools.pl M t/op/chop.t M t/op/index.t M t/op/lc.t M t/re/pat_advanced.t commit 7bc44f185b5a15a31bcd895d928f90a19774fadd Author: Karl Williamson <[email protected]> Date: Wed Jan 14 10:54:31 2015 -0700 Use more universal function in tests where makes sense By using a different function that is just as applicable, or more so, in these cases, we can avoid having to require a file. The functions that translate code points between character sets are always available and just as appropriate for the uses in the three tests t/re/fold_grind.t, t/re/reg_fold.t, and lib/unicore/TestProp.pl. Change to use it instead of the functions that take string inputs. This is slightly cleaner, as the file that contains it no longer has to be 'required'. And in some of the cases the code point function is a better choice, as there had to be translation between chr and ord before. M lib/unicore/mktables M t/re/fold_grind.t M t/re/reg_fold.t commit 91d6f8a5d8405333aefe332e7433af8e4ec3623c Author: Karl Williamson <[email protected]> Date: Wed Jan 14 10:51:09 2015 -0700 t/test.pl: Rmv obsolete comment The referred-to functions have been moved to a separate file, and are no longer contained in this one. M t/test.pl ----------------------------------------------------------------------- Summary of changes: lib/charnames.t | 2 +- lib/unicore/mktables | 7 +------ pod/perlhacktips.pod | 2 +- t/charset_tools.pl | 4 ++-- t/op/chop.t | 2 +- t/op/index.t | 4 ++-- t/op/lc.t | 6 +++--- t/re/fold_grind.t | 6 +++--- t/re/pat_advanced.t | 6 +++--- t/re/reg_fold.t | 4 ++-- t/test.pl | 4 +--- 11 files changed, 20 insertions(+), 27 deletions(-) diff --git a/lib/charnames.t b/lib/charnames.t index e115811..cd87350 100644 --- a/lib/charnames.t +++ b/lib/charnames.t @@ -733,7 +733,7 @@ is($_, 'foobar', 'Verify charnames.pm doesnt clobbers $_'); my $names = do "unicore/Name.pl"; ok(defined $names, "Verify can read 'unicore/Name.pl'"); -my $non_ascii = native_to_latin1($names) =~ tr/\0-\177//c; +my $non_ascii = native_to_uni($names) =~ tr/\0-\177//c; ok(! $non_ascii, "Verify all official names are ASCII-only"); # Verify that charnames propagate to eval("") diff --git a/lib/unicore/mktables b/lib/unicore/mktables index a21aa1a..742e2af 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -18606,11 +18606,6 @@ __DATA__ use strict; use warnings; -# If run outside the normal test suite on an ASCII platform, you can -# just create a latin1_to_native() function that just returns its -# inputs, because that's the only function used from charset_tools.pl -require "charset_tools.pl"; - # Test qr/\X/ and the \p{} regular expression constructs. This file is # constructed by mktables from the tables it generates, so if mktables is # buggy, this won't necessarily catch those bugs. Tests are generated for all @@ -18760,7 +18755,7 @@ sub Test_X($) { my $this_string = ""; my $this_display = ""; foreach my $code_point (@code_points) { - $this_string .= latin1_to_native(chr(hex $code_point)); + $this_string .= chr utf8::unicode_to_native(hex $code_point); $this_display .= "\\x{$code_point}"; } diff --git a/pod/perlhacktips.pod b/pod/perlhacktips.pod index 3d477da..40bb3a1 100644 --- a/pod/perlhacktips.pod +++ b/pod/perlhacktips.pod @@ -173,7 +173,7 @@ NUM2PTR().) =item * -Casting between data function pointers and data pointers +Casting between function pointers and data pointers Technically speaking casting between function pointers and data pointers is unportable and undefined, but practically speaking it seems diff --git a/t/charset_tools.pl b/t/charset_tools.pl index 6d70a37..0621a7a 100644 --- a/t/charset_tools.pl +++ b/t/charset_tools.pl @@ -8,7 +8,7 @@ $::IS_EBCDIC = ord 'A' == 193; # the set of 256 characters which is usually called Latin1. However, they # will work properly with any character input, not just Latin1. -sub native_to_latin1($) { +sub native_to_uni($) { my $string = shift; return $string if $::IS_ASCII; @@ -23,7 +23,7 @@ sub native_to_latin1($) { return $output; } -sub latin1_to_native($) { +sub uni_to_native($) { my $string = shift; return $string if $::IS_ASCII; diff --git a/t/op/chop.t b/t/op/chop.t index 827eb82..91c4fbe 100644 --- a/t/op/chop.t +++ b/t/op/chop.t @@ -183,7 +183,7 @@ ok($@ =~ /Can\'t modify.*chop.*in.*assignment/); eval 'chomp($x, $y) = (1, 2);'; ok($@ =~ /Can\'t modify.*chom?p.*in.*assignment/); -my @chars = ("N", latin1_to_native("\xd3"), substr ("\xd4\x{100}", 0, 1), chr 1296); +my @chars = ("N", uni_to_native("\xd3"), substr ("\xd4\x{100}", 0, 1), chr 1296); foreach my $start (@chars) { foreach my $end (@chars) { local $/ = $end; diff --git a/t/op/index.t b/t/op/index.t index 29a4771..243cc1b 100644 --- a/t/op/index.t +++ b/t/op/index.t @@ -93,8 +93,8 @@ is(rindex($a, "foo", ), 0); { my $search; my $text; - $search = "foo " . latin1_to_native("\xc9") . " bar"; - $text = "a" . latin1_to_native("\xa3\xa3") . "a $search $search quux"; + $search = "foo " . uni_to_native("\xc9") . " bar"; + $text = "a" . uni_to_native("\xa3\xa3") . "a $search $search quux"; my $text_utf8 = $text; utf8::upgrade($text_utf8); diff --git a/t/op/lc.t b/t/op/lc.t index 72c7a2a..716cb2a 100644 --- a/t/op/lc.t +++ b/t/op/lc.t @@ -108,13 +108,13 @@ is(fc($b) , "\x{101}\x{101}aa", 'fc'); # \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is # \x{2BC}\x{E4} or MODIFIER LETTER APOSTROPHE and N. -is(latin1_to_native("\U\x{DF}aB\x{149}cD"), latin1_to_native("SSAB\x{2BC}NCD"), +is(uni_to_native("\U\x{DF}aB\x{149}cD"), uni_to_native("SSAB\x{2BC}NCD"), "multicharacter uppercase"); # The \x{DF} is its own lowercase, ditto for \x{149}. # There are no single character -> multiple characters lowercase mappings. -is(latin1_to_native("\L\x{DF}aB\x{149}cD"), latin1_to_native("\x{DF}ab\x{149}cd"), +is(uni_to_native("\L\x{DF}aB\x{149}cD"), uni_to_native("\x{DF}ab\x{149}cd"), "multicharacter lowercase"); # \x{DF} is LATIN SMALL LETTER SHARP S, its foldcase is ss or \x{73}\x{73}; @@ -122,7 +122,7 @@ is(latin1_to_native("\L\x{DF}aB\x{149}cD"), latin1_to_native("\x{DF}ab\x{149}cd" # \x{2BC}\x{6E} or MODIFIER LETTER APOSTROPHE and n. # Note that is this further tested in t/uni/fold.t -is(latin1_to_native("\F\x{DF}aB\x{149}cD"), latin1_to_native("ssab\x{2BC}ncd"), +is(uni_to_native("\F\x{DF}aB\x{149}cD"), uni_to_native("ssab\x{2BC}ncd"), "multicharacter foldcase"); diff --git a/t/re/fold_grind.t b/t/re/fold_grind.t index 3fb11e5..a39affb 100644 --- a/t/re/fold_grind.t +++ b/t/re/fold_grind.t @@ -5,7 +5,7 @@ binmode STDOUT, ":utf8"; BEGIN { chdir 't' if -d 't'; @INC = '../lib'; - require './test.pl'; require './charset_tools.pl'; + require './test.pl'; require Config; import Config; skip_all_if_miniperl("no dynamic loading on miniperl, no Encode nor POSIX"); if ($^O eq 'dec_osf') { @@ -481,8 +481,8 @@ foreach my $test (sort { numerically } keys %tests) { next if @target > 1 && @pattern > 1; # Have to convert non-utf8 chars to native char set - @target = map { $_ > 255 ? $_ : ord latin1_to_native(chr($_)) } @target; - @pattern = map { $_ > 255 ? $_ : ord latin1_to_native(chr($_)) } @pattern; + @target = map { utf8::unicode_to_native($_) } @target; + @pattern = map { utf8::unicode_to_native($_) } @pattern; # Get in hex form. my @x_target = map { sprintf "\\x{%04X}", $_ } @target; diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index 19d6fbc..e03b2f4 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -1047,7 +1047,7 @@ sub run_tests { undef $w; my $Cedilla_Latin1 = "GAR" - . latin1_to_native("\xC7") + . uni_to_native("\xC7") . "ON"; my $Cedilla_utf8 = $Cedilla_Latin1; utf8::upgrade($Cedilla_utf8); @@ -1060,9 +1060,9 @@ sub run_tests { undef $w; my $NBSP_Latin1 = "NBSP" - . latin1_to_native("\xA0") + . uni_to_native("\xA0") . "SEPARATED" - . latin1_to_native("\xA0") + . uni_to_native("\xA0") . "SPACE"; my $NBSP_utf8 = $NBSP_Latin1; utf8::upgrade($NBSP_utf8); diff --git a/t/re/reg_fold.t b/t/re/reg_fold.t index 3e98866..a23a799 100644 --- a/t/re/reg_fold.t +++ b/t/re/reg_fold.t @@ -3,7 +3,7 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; - require './test.pl'; require './charset_tools.pl'; + require './test.pl'; skip_all_if_miniperl("no dynamic loading on miniperl, no File::Spec"); } @@ -158,7 +158,7 @@ push @tests, qq[like 'a', qr/\\p{Upper}/i, "'a' =~ /\\\\p{Upper}/i"]; push @tests, q[my $c = "\x{212A}"; my $p = qr/(?:^[K_]+$)/i; utf8::upgrade($p); like $c, qr/$p/, 'Bug #78994: my $c = "\x{212A}"; my $p = qr/(?:^[K_]+$)/i; utf8::upgrade($p); $c =~ $p']; use charnames ":full"; -my $e_grave = latin1_to_native("\xE8"); +my $e_grave = chr utf8::unicode_to_native(0xE8); push @tests, q[my $re1 = "\N{WHITE SMILING FACE}";like $e_grave, qr/[\w$re1]/, 'my $re = "\N{WHITE SMILING FACE}"; $e_grave =~ qr/[\w$re]/']; push @tests, q[my $re2 = "\N{WHITE SMILING FACE}";like $e_grave, qr/\w|$re2/, 'my $re = "\N{WHITE SMILING FACE}"; $e_grave =~ qr/\w|$re/']; diff --git a/t/test.pl b/t/test.pl index 1d08f55..d30214b 100644 --- a/t/test.pl +++ b/t/test.pl @@ -1,7 +1,5 @@ # -# t/test.pl - most of Test::More functionality without the fuss, plus -# has mappings native_to_latin1 and latin1_to_native so that fewer tests -# on non ASCII-ish platforms need to be skipped +# t/test.pl - most of Test::More functionality without the fuss # NOTE: -- Perl5 Master Repository
