In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/202c41cbde95a3d4b1f80be3633fdee05b08fded?hp=6c52f6c9a260e34e5fe12222a09c91d217628a9f>
- Log ----------------------------------------------------------------- commit 202c41cbde95a3d4b1f80be3633fdee05b08fded Author: Steve Hay <[email protected]> Date: Sun Sep 1 15:01:54 2013 +0100 perldelta - CPAN::Meta::Requirements has been upgraded M pod/perldelta.pod commit 60f577e0304ce0cd93ca30edfeb534713ea7ffd9 Author: Steve Hay <[email protected]> Date: Sun Sep 1 14:59:01 2013 +0100 Upgrade Unicode::Collate from version 0.98 to 0.99 M Porting/Maintainers.pl M cpan/Unicode-Collate/Changes M cpan/Unicode-Collate/Collate.pm M cpan/Unicode-Collate/Collate.xs M cpan/Unicode-Collate/README M cpan/Unicode-Collate/mkheader M cpan/Unicode-Collate/t/illegal.t M cpan/Unicode-Collate/t/override.t M pod/perldelta.pod commit ad434879973009b368013b6390fb5691800a87bb Author: Steve Hay <[email protected]> Date: Sun Sep 1 14:57:33 2013 +0100 Upgrade Scalar-List-Utils from version 1.31 to 1.32 M Porting/Maintainers.pl M cpan/List-Util/Changes M cpan/List-Util/ListUtil.xs M cpan/List-Util/lib/List/Util.pm M cpan/List-Util/lib/List/Util/XS.pm M cpan/List-Util/lib/Scalar/Util.pm M cpan/List-Util/t/readonly.t M pod/perldelta.pod ----------------------------------------------------------------------- Summary of changes: Porting/Maintainers.pl | 4 +- cpan/List-Util/Changes | 7 + cpan/List-Util/ListUtil.xs | 5 +- cpan/List-Util/lib/List/Util.pm | 2 +- cpan/List-Util/lib/List/Util/XS.pm | 2 +- cpan/List-Util/lib/Scalar/Util.pm | 36 ++--- cpan/List-Util/t/readonly.t | 4 +- cpan/Unicode-Collate/Changes | 6 + cpan/Unicode-Collate/Collate.pm | 108 +++++++++---- cpan/Unicode-Collate/Collate.xs | 18 +-- cpan/Unicode-Collate/README | 2 +- cpan/Unicode-Collate/mkheader | 8 +- cpan/Unicode-Collate/t/illegal.t | 50 +++++- cpan/Unicode-Collate/t/override.t | 309 ++++++++++++++++++++++--------------- pod/perldelta.pod | 18 +++ 15 files changed, 379 insertions(+), 200 deletions(-) diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index 1bc5dea..0d799f6 100755 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -1546,7 +1546,7 @@ use File::Glob qw(:case); 'Scalar-List-Utils' => { 'MAINTAINER' => 'gbarr', - 'DISTRIBUTION' => 'PEVANS/Scalar-List-Utils-1.31.tar.gz', + 'DISTRIBUTION' => 'PEVANS/Scalar-List-Utils-1.32.tar.gz', 'FILES' => q[cpan/List-Util], 'EXCLUDED' => [ qr{^inc/Module/}, @@ -1916,7 +1916,7 @@ use File::Glob qw(:case); 'Unicode::Collate' => { 'MAINTAINER' => 'sadahiro', - 'DISTRIBUTION' => 'SADAHIRO/Unicode-Collate-0.98.tar.gz', + 'DISTRIBUTION' => 'SADAHIRO/Unicode-Collate-0.99.tar.gz', 'FILES' => q[cpan/Unicode-Collate], 'EXCLUDED' => [ qr{N$}, diff --git a/cpan/List-Util/Changes b/cpan/List-Util/Changes index d7088a1..2c26c70 100644 --- a/cpan/List-Util/Changes +++ b/cpan/List-Util/Changes @@ -1,3 +1,10 @@ +1.32 -- Sun Aug 31 23:48 UTC 2013 + + * Skip pairmap()'s MULTICALL implementation 5.8.9 / 5.10.0 as it doesn't + work (RT87857) + * Comment on the fact that package "0" is defined but false (RT88201) + * TODO test in t/readonly.t now passes since 5.19.3 (RT88223) + 1.31 -- Wed Aug 14 20:38 UTC 2013 * Bugfix pairmap to return list length in scalar context diff --git a/cpan/List-Util/ListUtil.xs b/cpan/List-Util/ListUtil.xs index 7448a8f..c89bd57 100644 --- a/cpan/List-Util/ListUtil.xs +++ b/cpan/List-Util/ListUtil.xs @@ -529,7 +529,10 @@ PPCODE: bgv = gv_fetchpv("b", GV_ADD, SVt_PV); SAVESPTR(GvSV(agv)); SAVESPTR(GvSV(bgv)); -#ifdef dMULTICALL +/* This MULTICALL-based code appears to fail on perl 5.10.0 and 5.8.9 + * Skip it on those versions (RT#87857) + */ +#if defined(dMULTICALL) && (PERL_BCDVERSION > 0x5010000 || PERL_BCDVERSION < 0x5008009) if(!CvISXSUB(cv)) { // Since MULTICALL is about to move it SV **stack = PL_stack_base + ax; diff --git a/cpan/List-Util/lib/List/Util.pm b/cpan/List-Util/lib/List/Util.pm index f01bff2..042ef14 100644 --- a/cpan/List-Util/lib/List/Util.pm +++ b/cpan/List-Util/lib/List/Util.pm @@ -13,7 +13,7 @@ require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw(first min max minstr maxstr reduce sum sum0 shuffle pairmap pairgrep pairfirst pairs pairkeys pairvalues); -our $VERSION = "1.31"; +our $VERSION = "1.32"; our $XS_VERSION = $VERSION; $VERSION = eval $VERSION; diff --git a/cpan/List-Util/lib/List/Util/XS.pm b/cpan/List-Util/lib/List/Util/XS.pm index bb38ab2..d9916bd 100644 --- a/cpan/List-Util/lib/List/Util/XS.pm +++ b/cpan/List-Util/lib/List/Util/XS.pm @@ -2,7 +2,7 @@ package List::Util::XS; use strict; use List::Util; -our $VERSION = "1.31"; # FIXUP +our $VERSION = "1.32"; # FIXUP $VERSION = eval $VERSION; # FIXUP 1; diff --git a/cpan/List-Util/lib/Scalar/Util.pm b/cpan/List-Util/lib/Scalar/Util.pm index d10bbaa..7101c98 100644 --- a/cpan/List-Util/lib/Scalar/Util.pm +++ b/cpan/List-Util/lib/Scalar/Util.pm @@ -26,7 +26,7 @@ our @EXPORT_OK = qw( tainted weaken ); -our $VERSION = "1.31"; +our $VERSION = "1.32"; $VERSION = eval $VERSION; our @EXPORT_FAIL; @@ -80,9 +80,7 @@ so small such that being individual extensions would be wasteful. By default C<Scalar::Util> does not export any subroutines. The subroutines defined are -=over 4 - -=item blessed EXPR +=head2 blessed EXPR If EXPR evaluates to a blessed reference the name of the package that it is blessed into is returned. Otherwise C<undef> is returned. @@ -96,7 +94,11 @@ that it is blessed into is returned. Otherwise C<undef> is returned. $obj = bless [], "Foo"; $class = blessed $obj; # "Foo" -=item dualvar NUM, STRING +Take care when using this function simply as a truth test (such as in +C<if(blessed $ref)...>) because the package name C<"0"> is defined yet +false. + +=head2 dualvar NUM, STRING Returns a scalar that has the value NUM in a numeric context and the value STRING in a string context. @@ -105,7 +107,7 @@ value STRING in a string context. $num = $foo + 2; # 12 $str = $foo . " world"; # Hello world -=item isdual EXPR +=head2 isdual EXPR If EXPR is a scalar that is a dualvar, the result is true. @@ -132,7 +134,7 @@ You can capture its numeric and string content using: $err = dualvar $!, $!; $dual = isdual($err); # true -=item isvstring EXPR +=head2 isvstring EXPR If EXPR is a scalar which was coded as a vstring the result is true. @@ -140,12 +142,12 @@ If EXPR is a scalar which was coded as a vstring the result is true. $fmt = isvstring($vs) ? "%vd" : "%s"; #true printf($fmt,$vs); -=item looks_like_number EXPR +=head2 looks_like_number EXPR Returns true if perl thinks EXPR is a number. See L<perlapi/looks_like_number>. -=item openhandle FH +=head2 openhandle FH Returns FH if FH may be used as a filehandle and is open, or FH is a tied handle. Otherwise C<undef> is returned. @@ -155,7 +157,7 @@ handle. Otherwise C<undef> is returned. $fh = openhandle(*NOTOPEN); # undef $fh = openhandle("scalar"); # undef -=item readonly SCALAR +=head2 readonly SCALAR Returns true if SCALAR is readonly. @@ -164,7 +166,7 @@ Returns true if SCALAR is readonly. $readonly = foo($bar); # false $readonly = foo(0); # true -=item refaddr EXPR +=head2 refaddr EXPR If EXPR evaluates to a reference the internal memory address of the referenced value is returned. Otherwise C<undef> is returned. @@ -176,7 +178,7 @@ the referenced value is returned. Otherwise C<undef> is returned. $obj = bless {}, "Foo"; $addr = refaddr $obj; # eg 88123488 -=item reftype EXPR +=head2 reftype EXPR If EXPR evaluates to a reference the type of the variable referenced is returned. Otherwise C<undef> is returned. @@ -188,21 +190,21 @@ is returned. Otherwise C<undef> is returned. $obj = bless {}, "Foo"; $type = reftype $obj; # HASH -=item set_prototype CODEREF, PROTOTYPE +=head2 set_prototype CODEREF, PROTOTYPE Sets the prototype of the given function, or deletes it if PROTOTYPE is undef. Returns the CODEREF. set_prototype \&foo, '$$'; -=item tainted EXPR +=head2 tainted EXPR Return true if the result of EXPR is tainted $taint = tainted("constant"); # false $taint = tainted($ENV{PWD}); # true if running under -T -=item weaken REF +=head2 weaken REF REF will be turned into a weak reference. This means that it will not hold a reference count on the object it references. Also when the reference @@ -237,7 +239,7 @@ references to objects will be strong, causing the remaining objects to never be destroyed because there is now always a strong reference to them in the @object array. -=item isweak EXPR +=head2 isweak EXPR If EXPR is a scalar which is a weak reference the result is true. @@ -251,8 +253,6 @@ B<NOTE>: Copying a weak reference creates a normal, strong, reference. $copy = $ref; $weak = isweak($copy); # false -=back - =head1 DIAGNOSTICS Module use may give one of the following errors during import. diff --git a/cpan/List-Util/t/readonly.t b/cpan/List-Util/t/readonly.t index 42ed3d8..91385fd 100644 --- a/cpan/List-Util/t/readonly.t +++ b/cpan/List-Util/t/readonly.t @@ -45,7 +45,9 @@ sub try $var = 123; { - local $TODO = $Config::Config{useithreads} ? "doesn't work with threads" : undef; + # This used not to work with ithreads, but seems to be working since 5.19.3 + local $TODO = ( $Config::Config{useithreads} && $] < 5.019003 ) ? + "doesn't work with threads" : undef; ok( try ("abc"), 'reference a constant in a sub'); } ok( !try ($var), 'reference a non-constant in a sub'); diff --git a/cpan/Unicode-Collate/Changes b/cpan/Unicode-Collate/Changes index 0fd0908..b8fa1a9e 100644 --- a/cpan/Unicode-Collate/Changes +++ b/cpan/Unicode-Collate/Changes @@ -1,5 +1,11 @@ Revision history for Perl module Unicode::Collate. +0.99 Sun Sep 1 12:46:14 2013 + - by default out-of-range values are treated as if it were U+FFFD + when UCA_Version >= 22. + - supported overriding out-of-range values (see 'overrideOut' in POD). + - modified tests: override.t, illegal.t in t. + 0.98 Sat Jun 15 19:44:06 2013 - typo (see [rt.cpan.org #85655] typo fixes) diff --git a/cpan/Unicode-Collate/Collate.pm b/cpan/Unicode-Collate/Collate.pm index 388da67..48840ec 100644 --- a/cpan/Unicode-Collate/Collate.pm +++ b/cpan/Unicode-Collate/Collate.pm @@ -14,7 +14,7 @@ use File::Spec; no warnings 'utf8'; -our $VERSION = '0.98'; +our $VERSION = '0.99'; our $PACKAGE = __PACKAGE__; ### begin XS only ### @@ -106,7 +106,7 @@ my (%VariableOK); our @ChangeOK = qw/ alternate backwards level normalization rearrange katakana_before_hiragana upper_before_lower ignore_level2 - overrideHangul overrideCJK preprocess UCA_Version + overrideCJK overrideHangul overrideOut preprocess UCA_Version hangul_terminator variable identical highestFFFF minimalFFFE /; @@ -497,7 +497,7 @@ sub splitEnt # remove a code point marked as a completely ignorable. for (my $i = 0; $i < @src; $i++) { - if (_isIllegal($src[$i]) || $vers <= 20 && _isNonchar($src[$i])) { + if ($vers <= 20 && _isIllegal($src[$i])) { $src[$i] = undef; } elsif ($ver9) { $src[$i] = undef if $map->{ $src[$i] } @@ -621,25 +621,27 @@ sub getWt my $u = shift; my $map = $self->{mapping}; my $der = $self->{derivCode}; + my $out = $self->{overrideOut}; my $uXS = $self->{__useXS}; ### XS only return if !defined $u; return $self->varCE($HighestVCE) if $u eq 0xFFFF && $self->{highestFFFF}; return $self->varCE($minimalVCE) if $u eq 0xFFFE && $self->{minimalFFFE}; - return map($self->varCE($_), @{ $map->{$u} }) if $map->{$u}; + $u = 0xFFFD if $u !~ /;/ && 0x10FFFF < $u && !$out; + + my @ce; + if ($map->{$u}) { + @ce = @{ $map->{$u} }; # $u may be a contraction ### begin XS only ### - return map($self->varCE($_), _fetch_simple($u)) - if $uXS && _exists_simple($u); + } elsif ($uXS && _exists_simple($u)) { + @ce = _fetch_simple($u); ### end XS only ### - - # JCPS must not be a contraction, then it's a code point. - if (Hangul_SIni <= $u && $u <= Hangul_SFin) { + } elsif (Hangul_SIni <= $u && $u <= Hangul_SFin) { my $hang = $self->{overrideHangul}; - my @hangulCE; if ($hang) { - @hangulCE = map _pack_override($_, $u, $der), $hang->($u); + @ce = map _pack_override($_, $u, $der), $hang->($u); } elsif (!defined $hang) { - @hangulCE = $der->($u); + @ce = $der->($u); } else { my $max = $self->{maxlength}; my @decH = _decompHangul($u); @@ -665,25 +667,26 @@ sub getWt } } - @hangulCE = map({ + @ce = map({ $map->{$_} ? @{ $map->{$_} } : $uXS && _exists_simple($_) ? _fetch_simple($_) : ### XS only $der->($_); } @decH); } - return map $self->varCE($_), @hangulCE; + } elsif ($out && 0x10FFFF < $u) { + @ce = map _pack_override($_, $u, $der), $out->($u); } else { my $cjk = $self->{overrideCJK}; my $vers = $self->{UCA_Version}; if ($cjk && _isUIdeo($u, $vers)) { - my @cjkCE = map _pack_override($_, $u, $der), $cjk->($u); - return map $self->varCE($_), @cjkCE; - } - if ($vers == 8 && defined $cjk && _isUIdeo($u, 0)) { - return map $self->varCE($_), _uideoCE_8($u); + @ce = map _pack_override($_, $u, $der), $cjk->($u); + } elsif ($vers == 8 && defined $cjk && _isUIdeo($u, 0)) { + @ce = _uideoCE_8($u); + } else { + @ce = $der->($u); } - return map $self->varCE($_), $der->($u); } + return map $self->varCE($_), @ce; } @@ -1095,6 +1098,9 @@ The following revisions are supported. The default is 26. * Noncharacters (e.g. U+FFFF) are not ignored, and can be overridden since C<UCA_Version> 22. +* Out-of-range codepoints (greater than U+10FFFF) are not ignored, +and can be overridden since C<UCA_Version> 22. + * Fully ignorable characters were ignored, and would not interrupt contractions with C<UCA_Version> 9 and 11. @@ -1216,7 +1222,8 @@ almost, but the latter has a problem that you should know which letter is next to C<c>. For a certain language where C<ch> as the next letter, C<"abch"> is greater than C<"abc\x{FFFF}">, but less than C<"abd">. -Note: This is equivalent to C<entry =E<gt> 'FFFF ; [.FFFE.0020.0005.FFFF]'>. +Note: +This is equivalent to C<(entry =E<gt> 'FFFF ; [.FFFE.0020.0005.FFFF]')>. Any other character than C<U+FFFF> can be tailored by C<entry>. =item identical @@ -1325,7 +1332,8 @@ then C<$a2> and C<$b2> at level 1, as followed. "b\x{FFFE}aaa" "bbb\x{FFFE}a" -Note: This is equivalent to C<entry =E<gt> 'FFFE ; [.0001.0020.0005.FFFE]'>. +Note: +This is equivalent to C<(entry =E<gt> 'FFFE ; [.0001.0020.0005.FFFE]')>. Any other character than C<U+FFFE> can be tailored by C<entry>. =item normalization @@ -1425,10 +1433,16 @@ ex. ignores all CJK unified ideographs. # where ->eq("Pe\x{4E00}rl", "Perl") is true # as U+4E00 is a CJK unified ideograph and to be ignorable. -If C<undef> is passed explicitly as the value for this key, -weights for CJK unified ideographs are treated as undefined. +If a false value (including C<undef>) is passed, C<overrideCJK> +has no effect. +C<$Collator-E<gt>change(overrideCJK =E<gt> 0)> resets the old one. + But assignment of weight for CJK unified ideographs in C<table> or C<entry> is still valid. +If C<undef> is passed explicitly as the value for this key, +weights for CJK unified ideographs are treated as undefined. +However when C<UCA_Version> E<gt> 8, C<(overrideCJK =E<gt> undef)> +has no special meaning. B<Note:> In addition to them, 12 CJK compatibility ideographs (C<U+FA0E>, C<U+FA0F>, C<U+FA11>, C<U+FA13>, C<U+FA14>, C<U+FA1F>, C<U+FA21>, C<U+FA23>, @@ -1452,12 +1466,54 @@ NFD and NFKD are not appropriate, since NFD and NFKD will decompose Hangul syllables before overriding. FCD may decompose Hangul syllables as the case may be. +If a false value (but not C<undef>) is passed, C<overrideHangul> +has no effect. +C<$Collator-E<gt>change(overrideHangul =E<gt> 0)> resets the old one. + If C<undef> is passed explicitly as the value for this key, weight for Hangul syllables is treated as undefined without decomposition into Hangul Jamo. But definition of weight for Hangul syllables in C<table> or C<entry> is still valid. +=item overrideOut + +-- see 7.1.1 Handling Ill-Formed Code Unit Sequences, UTS #10. + +Perl seems to allow out-of-range values (greater than 0x10FFFF). +By default, out-of-range values are replaced with C<U+FFFD> +(REPLACEMENT CHARACTER) when C<UCA_Version> E<gt>= 22, +or ignored when C<UCA_Version> E<lt>= 20. + +When C<UCA_Version> E<gt>= 22, the weights of out-of-range values +can be overridden. Though C<table> or C<entry> are available for them, +out-of-range values are too many. + +C<overrideOut> can perform it algorithmically. +This parameter works like C<overrideCJK>, so see there for examples. + +ex. ignores all out-of-range values. + + overrideOut => sub {()}, # CODEREF returning empty list + +If a false value (including C<undef>) is passed, C<overrideOut> +has no effect. +C<$Collator-E<gt>change(overrideOut =E<gt> 0)> resets the old one. + +UCA recommends that out-of-range values should not be ignored for security +reasons. Say, C<"pe\x{110000}rl"> should not be equal to C<"perl">. +However, C<U+FFFD> is wrongly mapped to a variable collation element +in DUCET for Unicode 6.0.0 to 6.2.0, that means out-of-range values will be +ignored when C<variable> isn't C<Non-ignorable>. + +Unicode 6.3.0 will correct the mapping of C<U+FFFD>. +see L<http://www.unicode.org/reports/tr10/tr10-27.html#Trailing_Weights>. +Such a correction is reproduced by this. + + overrideOut => sub { 0xFFFD }, # CODEREF returning a very large integer + +Since Unicode 6.3.0, C<(overrideOut =E<gt> sub { 0xFFFD })> may be unnecesssary. + =item preprocess -- see 5.4 Preprocessing, UTS #10. @@ -1559,7 +1615,7 @@ may be better to avoid namespace conflict. B<NOTE>: When XSUB is used, the DUCET is compiled on building this module, and it may save time at the run time. -Explicit saying C<table =E<gt> 'allkeys.txt'> (or using another table), +Explicit saying C<(table =E<gt> 'allkeys.txt')>, or using another table, or using C<ignoreChar>, C<ignoreName>, C<undefChar>, C<undefName> or C<rewrite> will prevent this module from using the compiled DUCET. @@ -1934,7 +1990,7 @@ module (see L<Unicode::Normalize>). If you need not it (say, in the case when you need not handle any combining characters), -assign C<normalization =E<gt> undef> explicitly. +assign C<(normalization =E<gt> undef)> explicitly. -- see 6.5 Avoiding Normalization, UTS #10. diff --git a/cpan/Unicode-Collate/Collate.xs b/cpan/Unicode-Collate/Collate.xs index 27920ed..c339cc7 100644 --- a/cpan/Unicode-Collate/Collate.xs +++ b/cpan/Unicode-Collate/Collate.xs @@ -210,22 +210,8 @@ _isIllegal (sv) XSRETURN_YES; uv = SvUVX(sv); RETVAL = boolSV( - 0x10FFFF < uv /* out of range */ - ); -OUTPUT: - RETVAL - - -SV* -_isNonchar (sv) - SV* sv - PREINIT: - UV uv; - CODE: - /* should be called only if ! _isIllegal(sv). */ - uv = SvUVX(sv); - RETVAL = boolSV( - ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] (cf. utf8.c) */ + 0x10FFFF < uv /* out of range */ + || ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] (cf. utf8.c) */ || (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */ || (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */ ); diff --git a/cpan/Unicode-Collate/README b/cpan/Unicode-Collate/README index 141de8a..1de2709 100644 --- a/cpan/Unicode-Collate/README +++ b/cpan/Unicode-Collate/README @@ -1,4 +1,4 @@ -Unicode/Collate version 0.98 +Unicode/Collate version 0.99 =============================== NAME diff --git a/cpan/Unicode-Collate/mkheader b/cpan/Unicode-Collate/mkheader index c92d7c9..c3d0ebf 100644 --- a/cpan/Unicode-Collate/mkheader +++ b/cpan/Unicode-Collate/mkheader @@ -135,11 +135,11 @@ foreach my $tbl (@tripletable) { my $null = $tbl->{null}; my $init = $tbl->{init}; - open FH, ">$file" or croak "$PACKAGE: $file can't be made"; - binmode FH; select FH; + open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made"; + binmode $fh_h; select $fh_h; my %val; - print FH << 'EOF'; + print << 'EOF'; /* * This file is auto-generated by mkheader. * Any changes here will be lost! @@ -189,7 +189,7 @@ EOF print "\n"; } print "};\n\n"; - close FH; + close $fh_h; } 1; diff --git a/cpan/Unicode-Collate/t/illegal.t b/cpan/Unicode-Collate/t/illegal.t index 5d7999d..7fa81e4 100644 --- a/cpan/Unicode-Collate/t/illegal.t +++ b/cpan/Unicode-Collate/t/illegal.t @@ -25,7 +25,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..65\n"; } +BEGIN { $| = 1; print "1..127\n"; } # 77 + 5 x @Versions my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -125,7 +125,7 @@ ok($nonch->lt("", "\x{FDD0}")); ok($nonch->lt("", "\x{FDEF}")); ok($nonch->lt("", "\x02")); ok($nonch->lt("", "\x{10FFFF}")); -ok($nonch->eq("", "\x{110000}")); +ok($nonch->lt("", "\x{110000}")); # 38..47 ok($nonch->lt("\x00", "\x01")); @@ -137,7 +137,7 @@ ok($nonch->lt("\x{DFFF}", "\x{FDD0}")); ok($nonch->lt("\x{FDD0}", "\x{FDEF}")); ok($nonch->lt("\x{FDEF}", "\x02")); ok($nonch->lt("\x02", "\x{10FFFF}")); -ok($nonch->gt("\x{10FFFF}", "\x{110000}")); +ok($nonch->lt("\x{10FFFF}", "\x{110000}")); # 48..51 ok($nonch->lt("A", "A\x{FFFF}")); @@ -178,3 +178,47 @@ for my $ret (@ret) { ok($match eq $ret); } +################## + +my $out = Unicode::Collate->new( + level => 1, + table => undef, + normalization => undef, + overrideOut => sub { 0xFFFD }, +); + +my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24, 26); + +for my $v (@Versions) { + $out->change(UCA_Version => $v); + ok($out->cmp('', "\x{10FFFF}") == ($v >= 22 ? -1 : 0)); + ok($out->cmp('', "\x{110000}") == ($v >= 22 ? -1 : 0)); + ok($out->cmp('ABC', "\x{110000}") == ($v >= 22 ? -1 : 1)); + ok($out->cmp("\x{10FFFD}", "\x{110000}") == ($v >= 22 ? -1 : 1)); + ok($out->cmp("\x{11FFFD}", "\x{110000}") == ($v >= 22 ? 0 : 0)); +} + +# x+66..x+77 +ok($out->lt('ABC', "\x{123456}")); +ok($out->lt("\x{FFFD}", "\x{123456}")); + +$out->change(overrideOut => sub {()}); + +ok($out->eq('', "\x{123456}")); +ok($out->gt('ABC', "\x{123456}")); +ok($out->gt("\x{FFFD}", "\x{123456}")); + +$out->change(overrideOut => undef); +ok($out->lt('', "\x{123456}")); +ok($out->eq("\x{FFFD}", "\x{123456}")); + +$out->change(overrideOut => sub { 0xFFFD }); + +ok($out->lt('', "\x{123456}")); +ok($out->lt('ABC', "\x{123456}")); +ok($out->lt("\x{FFFD}", "\x{123456}")); + +$out->change(overrideOut => 0); +ok($out->lt('', "\x{123456}")); +ok($out->eq("\x{FFFD}", "\x{123456}")); + diff --git a/cpan/Unicode-Collate/t/override.t b/cpan/Unicode-Collate/t/override.t index bc6a70c..025a369 100644 --- a/cpan/Unicode-Collate/t/override.t +++ b/cpan/Unicode-Collate/t/override.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..35\n"; } +BEGIN { $| = 1; print "1..65\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -30,129 +30,186 @@ ok(1); ######################### -##### 2..6 - -my $all_undef_8 = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideCJK => undef, - overrideHangul => undef, - UCA_Version => 8, -); - -# All in the Unicode code point order. -# No hangul decomposition. - -ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); -ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); -ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); -ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); -ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); - - -##### 7..11 - -my $all_undef_9 = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideCJK => undef, - overrideHangul => undef, - UCA_Version => 9, -); - -# CJK Ideo. < CJK ext A/B < Others. -# No hangul decomposition. - -ok($all_undef_9->lt("\x{4E00}", "\x{3402}")); -ok($all_undef_9->lt("\x{3402}", "\x{20000}")); -ok($all_undef_9->lt("\x{20000}", "\x{AC00}")); -ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}")); -ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned - -##### 12..16 - -my $ignoreHangul = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideHangul => sub {()}, - entry => <<'ENTRIES', -AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL -ENTRIES -); - -# All Hangul Syllables except U+AE00 are ignored. - -ok($ignoreHangul->eq("\x{AC00}", "")); -ok($ignoreHangul->lt("\x{AC00}", "\0")); -ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}")); -ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored. -ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. - -##### 17..21 - -my $undefHangul = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideHangul => sub { - my $u = shift; - return $u == 0xAE00 ? 0x100 : undef; - } -); - -# All Hangul Syllables except U+AE00 are undefined. - -ok($undefHangul->lt("\x{AE00}", "r")); -ok($undefHangul->gt("\x{AC00}", "r")); -ok($undefHangul->gt("\x{AC00}", "\x{1100}\x{1161}")); -ok($undefHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. -ok($undefHangul->lt("\x{AC00}", "\x{B000}")); - -##### 22..25 - -my $undefCJK = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideCJK => sub { - my $u = shift; - return $u == 0x4E00 ? 0x100 : undef; - } -); - -# All CJK Ideographs except U+4E00 are undefined. - -ok($undefCJK->lt("\x{4E00}", "r")); -ok($undefCJK->lt("\x{5000}", "r")); # still CJK < unassigned -ok($undefCJK->lt("Pe\x{4E00}rl", "Perl")); # 'r' is unassigned. -ok($undefCJK->lt("\x{5000}", "\x{6000}")); - -##### 26..30 - -my $cpHangul = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideHangul => sub { shift } -); - -ok($cpHangul->lt("\x{AC00}", "\x{AC01}")); -ok($cpHangul->lt("\x{AC01}", "\x{D7A3}")); -ok($cpHangul->lt("\x{D7A3}", "r")); # 'r' is unassigned. -ok($cpHangul->lt("r", "\x{D7A4}")); -ok($cpHangul->lt("\x{D7A3}", "\x{4E00}")); - -##### 31..35 - -my $arrayHangul = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideHangul => sub { - my $u = shift; - return [$u, 0x20, 0x2, $u]; - } -); - -ok($arrayHangul->lt("\x{AC00}", "\x{AC01}")); -ok($arrayHangul->lt("\x{AC01}", "\x{D7A3}")); -ok($arrayHangul->lt("\x{D7A3}", "r")); # 'r' is unassigned. -ok($arrayHangul->lt("r", "\x{D7A4}")); -ok($arrayHangul->lt("\x{D7A3}", "\x{4E00}")); +##### 2..31 + +{ + my $all_undef_8 = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideCJK => undef, + overrideHangul => undef, + UCA_Version => 8, + ); + # All in the Unicode code point order. + # No hangul decomposition. + + ok($all_undef_8->lt("\x{1100}", "\x{3402}")); + ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); + ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); + ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); + ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); + ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); + # U+ABFF: not assigned + + # a hangul syllable is decomposed into jamo. + $all_undef_8->change(overrideHangul => 0); + ok($all_undef_8->lt("\x{1100}", "\x{3402}")); + ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); + ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); + ok($all_undef_8->gt("\x{4E00}", "\x{AC00}")); + ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}")); + ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}")); + + # CJK defined < Jamo undefined + $all_undef_8->change(overrideCJK => 0); + ok($all_undef_8->gt("\x{1100}", "\x{3402}")); + ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); + ok($all_undef_8->gt("\x{4DFF}", "\x{4E00}")); + ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); + ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}")); + ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}")); + + # CJK undefined > Jamo undefined + $all_undef_8->change(overrideCJK => undef); + ok($all_undef_8->lt("\x{1100}", "\x{3402}")); + ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); + ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); + ok($all_undef_8->gt("\x{4E00}", "\x{AC00}")); + ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}")); + ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}")); + + $all_undef_8->change(overrideHangul => undef); + ok($all_undef_8->lt("\x{1100}", "\x{3402}")); + ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); + ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); + ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); + ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); + ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); +} + +##### 32..38 + +{ + my $all_undef_9 = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideCJK => undef, + overrideHangul => undef, + UCA_Version => 9, + ); + # CJK Ideo. < CJK ext A/B < Others. + # No hangul decomposition. + + ok($all_undef_9->lt("\x{4E00}", "\x{3402}")); + ok($all_undef_9->lt("\x{3402}", "\x{20000}")); + ok($all_undef_9->lt("\x{20000}", "\x{AC00}")); + ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}")); + ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); + # U+ABFF: not assigned + + # a hangul syllable is decomposed into jamo. + $all_undef_9->change(overrideHangul => 0); + ok($all_undef_9->eq("\x{AC00}", "\x{1100}\x{1161}")); + ok($all_undef_9->lt("\x{AC00}", "\x{ABFF}")); +} + +##### 39..46 + +{ + my $ignoreHangul = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideHangul => sub {()}, + entry => 'AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL', + ); + # All Hangul Syllables except U+AE00 are ignored. + + ok($ignoreHangul->eq("\x{AC00}", "")); + ok($ignoreHangul->lt("\x{AC00}", "\0")); + ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}")); + ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored. + ok($ignoreHangul->eq("Pe\x{AC00}rl", "Perl")); + ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); + # 'r' is unassigned. + + $ignoreHangul->change(overrideHangul => 0); + ok($ignoreHangul->eq("\x{AC00}", "\x{1100}\x{1161}")); + + $ignoreHangul->change(overrideHangul => undef); + ok($ignoreHangul->gt("\x{AC00}", "\x{1100}\x{1161}")); +} + +##### 47..51 + +{ + my $undefHangul = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideHangul => sub { + my $u = shift; + return $u == 0xAE00 ? 0x100 : undef; + } + ); + # All Hangul Syllables except U+AE00 are undefined. + + ok($undefHangul->lt("\x{AE00}", "r")); + ok($undefHangul->gt("\x{AC00}", "r")); + ok($undefHangul->gt("\x{AC00}", "\x{1100}\x{1161}")); + ok($undefHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. + ok($undefHangul->lt("\x{AC00}", "\x{B000}")); +} + +##### 52..55 + +{ + my $undefCJK = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideCJK => sub { + my $u = shift; + return $u == 0x4E00 ? 0x100 : undef; + } + ); + # All CJK Ideographs except U+4E00 are undefined. + + ok($undefCJK->lt("\x{4E00}", "r")); + ok($undefCJK->lt("\x{5000}", "r")); # still CJK < unassigned + ok($undefCJK->lt("Pe\x{4E00}rl", "Perl")); + ok($undefCJK->lt("\x{5000}", "\x{6000}")); +} + +##### 56..60 + +{ + my $cpHangul = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideHangul => sub { shift } + ); + + ok($cpHangul->lt("\x{AC00}", "\x{AC01}")); + ok($cpHangul->lt("\x{AC01}", "\x{D7A3}")); + ok($cpHangul->lt("\x{D7A3}", "r")); + ok($cpHangul->lt("r", "\x{D7A4}")); + ok($cpHangul->lt("\x{D7A3}", "\x{4E00}")); +} + +##### 61..65 + +{ + my $arrayHangul = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideHangul => sub { + my $u = shift; + return [$u, 0x20, 0x2, $u]; + } + ); + + ok($arrayHangul->lt("\x{AC00}", "\x{AC01}")); + ok($arrayHangul->lt("\x{AC01}", "\x{D7A3}")); + ok($arrayHangul->lt("\x{D7A3}", "r")); + ok($arrayHangul->lt("r", "\x{D7A4}")); + ok($arrayHangul->lt("\x{D7A3}", "\x{4E00}")); +} diff --git a/pod/perldelta.pod b/pod/perldelta.pod index f3459d8..84080f1 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -170,6 +170,12 @@ C<Carp> that expects C<Carp::Heavy> to provide subroutines. =item * +L<CPAN::Meta::Requirements> has been upgraded from version 2.122 to 2.123. + +TODO + +=item * + L<Data::Dumper> has been upgraded from version 2.148 to 2.149. This upgrade is part of a larger change to make the array interface 64-bit safe @@ -230,6 +236,12 @@ nonexistent array elements. =item * +L<List::Util> and L<Scalar::Util> have been upgraded from version 1.31 to 1.32. + +TODO + +=item * + L<Math::BigInt> has been upgraded from version 1.9992 to 1.9993. Cleaned up the L<Math::BigInt> and L<Math::BigFloat> documentation to @@ -292,6 +304,12 @@ nonexistent array elements. =item * +L<Unicode::Collate> has been upgraded from version 0.98 to 0.99. + +TODO + +=item * + L<warnings> has been upgraded from version 1.18 to 1.19. The C<syscalls> warnings category has been added to check for embedded NUL -- Perl5 Master Repository
