[perl.git] branch blead, updated. v5.19.3-289-g202c41c

Steve Hay Sun, 01 Sep 2013 07:42:10 -0700

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/202c41cbde95a3d4b1f80be3633fdee05b08fded?hp=6c52f6c9a260e34e5fe12222a09c91d217628a9f>


- Log -----------------------------------------------------------------
commit 202c41cbde95a3d4b1f80be3633fdee05b08fded
Author: Steve Hay <[email protected]>
Date:   Sun Sep 1 15:01:54 2013 +0100

    perldelta - CPAN::Meta::Requirements has been upgraded

M       pod/perldelta.pod

commit 60f577e0304ce0cd93ca30edfeb534713ea7ffd9
Author: Steve Hay <[email protected]>
Date:   Sun Sep 1 14:59:01 2013 +0100

    Upgrade Unicode::Collate from version 0.98 to 0.99

M       Porting/Maintainers.pl
M       cpan/Unicode-Collate/Changes
M       cpan/Unicode-Collate/Collate.pm
M       cpan/Unicode-Collate/Collate.xs
M       cpan/Unicode-Collate/README
M       cpan/Unicode-Collate/mkheader
M       cpan/Unicode-Collate/t/illegal.t
M       cpan/Unicode-Collate/t/override.t
M       pod/perldelta.pod

commit ad434879973009b368013b6390fb5691800a87bb
Author: Steve Hay <[email protected]>
Date:   Sun Sep 1 14:57:33 2013 +0100

    Upgrade Scalar-List-Utils from version 1.31 to 1.32

M       Porting/Maintainers.pl
M       cpan/List-Util/Changes
M       cpan/List-Util/ListUtil.xs
M       cpan/List-Util/lib/List/Util.pm
M       cpan/List-Util/lib/List/Util/XS.pm
M       cpan/List-Util/lib/Scalar/Util.pm
M       cpan/List-Util/t/readonly.t
M       pod/perldelta.pod
-----------------------------------------------------------------------

Summary of changes:
 Porting/Maintainers.pl             |   4 +-
 cpan/List-Util/Changes             |   7 +
 cpan/List-Util/ListUtil.xs         |   5 +-
 cpan/List-Util/lib/List/Util.pm    |   2 +-
 cpan/List-Util/lib/List/Util/XS.pm |   2 +-
 cpan/List-Util/lib/Scalar/Util.pm  |  36 ++---
 cpan/List-Util/t/readonly.t        |   4 +-
 cpan/Unicode-Collate/Changes       |   6 +
 cpan/Unicode-Collate/Collate.pm    | 108 +++++++++----
 cpan/Unicode-Collate/Collate.xs    |  18 +--
 cpan/Unicode-Collate/README        |   2 +-
 cpan/Unicode-Collate/mkheader      |   8 +-
 cpan/Unicode-Collate/t/illegal.t   |  50 +++++-
 cpan/Unicode-Collate/t/override.t  | 309 ++++++++++++++++++++++---------------
 pod/perldelta.pod                  |  18 +++
 15 files changed, 379 insertions(+), 200 deletions(-)

diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl
index 1bc5dea..0d799f6 100755
--- a/Porting/Maintainers.pl
+++ b/Porting/Maintainers.pl
@@ -1546,7 +1546,7 @@ use File::Glob qw(:case);
 
     'Scalar-List-Utils' => {
         'MAINTAINER'   => 'gbarr',
-        'DISTRIBUTION' => 'PEVANS/Scalar-List-Utils-1.31.tar.gz',
+        'DISTRIBUTION' => 'PEVANS/Scalar-List-Utils-1.32.tar.gz',
         'FILES'    => q[cpan/List-Util],
         'EXCLUDED' => [
             qr{^inc/Module/},
@@ -1916,7 +1916,7 @@ use File::Glob qw(:case);
 
     'Unicode::Collate' => {
         'MAINTAINER'   => 'sadahiro',
-        'DISTRIBUTION' => 'SADAHIRO/Unicode-Collate-0.98.tar.gz',
+        'DISTRIBUTION' => 'SADAHIRO/Unicode-Collate-0.99.tar.gz',
         'FILES'        => q[cpan/Unicode-Collate],
         'EXCLUDED'     => [
             qr{N$},
diff --git a/cpan/List-Util/Changes b/cpan/List-Util/Changes
index d7088a1..2c26c70 100644
--- a/cpan/List-Util/Changes
+++ b/cpan/List-Util/Changes
@@ -1,3 +1,10 @@
+1.32 -- Sun Aug 31 23:48 UTC 2013
+
+  * Skip pairmap()'s MULTICALL implementation 5.8.9 / 5.10.0 as it doesn't
+    work (RT87857)
+  * Comment on the fact that package "0" is defined but false (RT88201)
+  * TODO test in t/readonly.t now passes since 5.19.3 (RT88223)
+
 1.31 -- Wed Aug 14 20:38 UTC 2013
 
   * Bugfix pairmap to return list length in scalar context
diff --git a/cpan/List-Util/ListUtil.xs b/cpan/List-Util/ListUtil.xs
index 7448a8f..c89bd57 100644
--- a/cpan/List-Util/ListUtil.xs
+++ b/cpan/List-Util/ListUtil.xs
@@ -529,7 +529,10 @@ PPCODE:
     bgv = gv_fetchpv("b", GV_ADD, SVt_PV);
     SAVESPTR(GvSV(agv));
     SAVESPTR(GvSV(bgv));
-#ifdef dMULTICALL
+/* This MULTICALL-based code appears to fail on perl 5.10.0 and 5.8.9
+ * Skip it on those versions (RT#87857)
+ */
+#if defined(dMULTICALL) && (PERL_BCDVERSION > 0x5010000 || PERL_BCDVERSION < 
0x5008009)
     if(!CvISXSUB(cv)) {
        // Since MULTICALL is about to move it
        SV **stack = PL_stack_base + ax;
diff --git a/cpan/List-Util/lib/List/Util.pm b/cpan/List-Util/lib/List/Util.pm
index f01bff2..042ef14 100644
--- a/cpan/List-Util/lib/List/Util.pm
+++ b/cpan/List-Util/lib/List/Util.pm
@@ -13,7 +13,7 @@ require Exporter;
 
 our @ISA        = qw(Exporter);
 our @EXPORT_OK  = qw(first min max minstr maxstr reduce sum sum0 shuffle 
pairmap pairgrep pairfirst pairs pairkeys pairvalues);
-our $VERSION    = "1.31";
+our $VERSION    = "1.32";
 our $XS_VERSION = $VERSION;
 $VERSION    = eval $VERSION;
 
diff --git a/cpan/List-Util/lib/List/Util/XS.pm 
b/cpan/List-Util/lib/List/Util/XS.pm
index bb38ab2..d9916bd 100644
--- a/cpan/List-Util/lib/List/Util/XS.pm
+++ b/cpan/List-Util/lib/List/Util/XS.pm
@@ -2,7 +2,7 @@ package List::Util::XS;
 use strict;
 use List::Util;
 
-our $VERSION = "1.31";       # FIXUP
+our $VERSION = "1.32";       # FIXUP
 $VERSION = eval $VERSION;    # FIXUP
 
 1;
diff --git a/cpan/List-Util/lib/Scalar/Util.pm 
b/cpan/List-Util/lib/Scalar/Util.pm
index d10bbaa..7101c98 100644
--- a/cpan/List-Util/lib/Scalar/Util.pm
+++ b/cpan/List-Util/lib/Scalar/Util.pm
@@ -26,7 +26,7 @@ our @EXPORT_OK = qw(
   tainted
   weaken
 );
-our $VERSION    = "1.31";
+our $VERSION    = "1.32";
 $VERSION   = eval $VERSION;
 
 our @EXPORT_FAIL;
@@ -80,9 +80,7 @@ so small such that being individual extensions would be 
wasteful.
 By default C<Scalar::Util> does not export any subroutines. The
 subroutines defined are
 
-=over 4
-
-=item blessed EXPR
+=head2 blessed EXPR
 
 If EXPR evaluates to a blessed reference the name of the package
 that it is blessed into is returned. Otherwise C<undef> is returned.
@@ -96,7 +94,11 @@ that it is blessed into is returned. Otherwise C<undef> is 
returned.
    $obj    = bless [], "Foo";
    $class  = blessed $obj;              # "Foo"
 
-=item dualvar NUM, STRING
+Take care when using this function simply as a truth test (such as in
+C<if(blessed $ref)...>) because the package name C<"0"> is defined yet
+false.
+
+=head2 dualvar NUM, STRING
 
 Returns a scalar that has the value NUM in a numeric context and the
 value STRING in a string context.
@@ -105,7 +107,7 @@ value STRING in a string context.
     $num = $foo + 2;                    # 12
     $str = $foo . " world";             # Hello world
 
-=item isdual EXPR
+=head2 isdual EXPR
 
 If EXPR is a scalar that is a dualvar, the result is true.
 
@@ -132,7 +134,7 @@ You can capture its numeric and string content using:
     $err = dualvar $!, $!;
     $dual = isdual($err);               # true
 
-=item isvstring EXPR
+=head2 isvstring EXPR
 
 If EXPR is a scalar which was coded as a vstring the result is true.
 
@@ -140,12 +142,12 @@ If EXPR is a scalar which was coded as a vstring the 
result is true.
     $fmt  = isvstring($vs) ? "%vd" : "%s"; #true
     printf($fmt,$vs);
 
-=item looks_like_number EXPR
+=head2 looks_like_number EXPR
 
 Returns true if perl thinks EXPR is a number. See
 L<perlapi/looks_like_number>.
 
-=item openhandle FH
+=head2 openhandle FH
 
 Returns FH if FH may be used as a filehandle and is open, or FH is a tied
 handle. Otherwise C<undef> is returned.
@@ -155,7 +157,7 @@ handle. Otherwise C<undef> is returned.
     $fh = openhandle(*NOTOPEN);         # undef
     $fh = openhandle("scalar");         # undef
 
-=item readonly SCALAR
+=head2 readonly SCALAR
 
 Returns true if SCALAR is readonly.
 
@@ -164,7 +166,7 @@ Returns true if SCALAR is readonly.
     $readonly = foo($bar);              # false
     $readonly = foo(0);                 # true
 
-=item refaddr EXPR
+=head2 refaddr EXPR
 
 If EXPR evaluates to a reference the internal memory address of
 the referenced value is returned. Otherwise C<undef> is returned.
@@ -176,7 +178,7 @@ the referenced value is returned. Otherwise C<undef> is 
returned.
     $obj  = bless {}, "Foo";
     $addr = refaddr $obj;               # eg 88123488
 
-=item reftype EXPR
+=head2 reftype EXPR
 
 If EXPR evaluates to a reference the type of the variable referenced
 is returned. Otherwise C<undef> is returned.
@@ -188,21 +190,21 @@ is returned. Otherwise C<undef> is returned.
     $obj  = bless {}, "Foo";
     $type = reftype $obj;               # HASH
 
-=item set_prototype CODEREF, PROTOTYPE
+=head2 set_prototype CODEREF, PROTOTYPE
 
 Sets the prototype of the given function, or deletes it if PROTOTYPE is
 undef. Returns the CODEREF.
 
     set_prototype \&foo, '$$';
 
-=item tainted EXPR
+=head2 tainted EXPR
 
 Return true if the result of EXPR is tainted
 
     $taint = tainted("constant");       # false
     $taint = tainted($ENV{PWD});        # true if running under -T
 
-=item weaken REF
+=head2 weaken REF
 
 REF will be turned into a weak reference. This means that it will not
 hold a reference count on the object it references. Also when the reference
@@ -237,7 +239,7 @@ references to objects will be strong, causing the remaining 
objects to never
 be destroyed because there is now always a strong reference to them in the
 @object array.
 
-=item isweak EXPR
+=head2 isweak EXPR
 
 If EXPR is a scalar which is a weak reference the result is true.
 
@@ -251,8 +253,6 @@ B<NOTE>: Copying a weak reference creates a normal, strong, 
reference.
     $copy = $ref;
     $weak = isweak($copy);              # false
 
-=back
-
 =head1 DIAGNOSTICS
 
 Module use may give one of the following errors during import.
diff --git a/cpan/List-Util/t/readonly.t b/cpan/List-Util/t/readonly.t
index 42ed3d8..91385fd 100644
--- a/cpan/List-Util/t/readonly.t
+++ b/cpan/List-Util/t/readonly.t
@@ -45,7 +45,9 @@ sub try
 
 $var = 123;
 {
-    local $TODO = $Config::Config{useithreads} ? "doesn't work with threads" : 
undef;
+    # This used not to work with ithreads, but seems to be working since 5.19.3
+    local $TODO = ( $Config::Config{useithreads} && $] < 5.019003 ) ?
+      "doesn't work with threads" : undef;
     ok( try ("abc"), 'reference a constant in a sub');
 }
 ok( !try ($var), 'reference a non-constant in a sub');
diff --git a/cpan/Unicode-Collate/Changes b/cpan/Unicode-Collate/Changes
index 0fd0908..b8fa1a9e 100644
--- a/cpan/Unicode-Collate/Changes
+++ b/cpan/Unicode-Collate/Changes
@@ -1,5 +1,11 @@
 Revision history for Perl module Unicode::Collate.
 
+0.99  Sun Sep  1 12:46:14 2013
+    - by default out-of-range values are treated as if it were U+FFFD
+      when UCA_Version >= 22.
+    - supported overriding out-of-range values (see 'overrideOut' in POD).
+    - modified tests: override.t, illegal.t in t.
+
 0.98  Sat Jun 15 19:44:06 2013
     - typo (see [rt.cpan.org #85655] typo fixes)
 
diff --git a/cpan/Unicode-Collate/Collate.pm b/cpan/Unicode-Collate/Collate.pm
index 388da67..48840ec 100644
--- a/cpan/Unicode-Collate/Collate.pm
+++ b/cpan/Unicode-Collate/Collate.pm
@@ -14,7 +14,7 @@ use File::Spec;
 
 no warnings 'utf8';
 
-our $VERSION = '0.98';
+our $VERSION = '0.99';
 our $PACKAGE = __PACKAGE__;
 
 ### begin XS only ###
@@ -106,7 +106,7 @@ my (%VariableOK);
 our @ChangeOK = qw/
     alternate backwards level normalization rearrange
     katakana_before_hiragana upper_before_lower ignore_level2
-    overrideHangul overrideCJK preprocess UCA_Version
+    overrideCJK overrideHangul overrideOut preprocess UCA_Version
     hangul_terminator variable identical highestFFFF minimalFFFE
   /;
 
@@ -497,7 +497,7 @@ sub splitEnt
 
     # remove a code point marked as a completely ignorable.
     for (my $i = 0; $i < @src; $i++) {
-       if (_isIllegal($src[$i]) || $vers <= 20 && _isNonchar($src[$i])) {
+       if ($vers <= 20 && _isIllegal($src[$i])) {
            $src[$i] = undef;
        } elsif ($ver9) {
            $src[$i] = undef if $map->{ $src[$i] }
@@ -621,25 +621,27 @@ sub getWt
     my $u    = shift;
     my $map  = $self->{mapping};
     my $der  = $self->{derivCode};
+    my $out  = $self->{overrideOut};
     my $uXS  = $self->{__useXS}; ### XS only
 
     return if !defined $u;
     return $self->varCE($HighestVCE) if $u eq 0xFFFF && $self->{highestFFFF};
     return $self->varCE($minimalVCE) if $u eq 0xFFFE && $self->{minimalFFFE};
-    return map($self->varCE($_), @{ $map->{$u} }) if $map->{$u};
+    $u = 0xFFFD if $u !~ /;/ && 0x10FFFF < $u && !$out;
+
+    my @ce;
+    if ($map->{$u}) {
+       @ce = @{ $map->{$u} }; # $u may be a contraction
 ### begin XS only ###
-    return map($self->varCE($_), _fetch_simple($u))
-       if $uXS && _exists_simple($u);
+    } elsif ($uXS && _exists_simple($u)) {
+       @ce = _fetch_simple($u);
 ### end XS only ###
-
-    # JCPS must not be a contraction, then it's a code point.
-    if (Hangul_SIni <= $u && $u <= Hangul_SFin) {
+    } elsif (Hangul_SIni <= $u && $u <= Hangul_SFin) {
        my $hang = $self->{overrideHangul};
-       my @hangulCE;
        if ($hang) {
-           @hangulCE = map _pack_override($_, $u, $der), $hang->($u);
+           @ce = map _pack_override($_, $u, $der), $hang->($u);
        } elsif (!defined $hang) {
-           @hangulCE = $der->($u);
+           @ce = $der->($u);
        } else {
            my $max  = $self->{maxlength};
            my @decH = _decompHangul($u);
@@ -665,25 +667,26 @@ sub getWt
                }
            }
 
-           @hangulCE = map({
+           @ce = map({
                    $map->{$_} ? @{ $map->{$_} } :
                $uXS && _exists_simple($_) ? _fetch_simple($_) : ### XS only
                    $der->($_);
                } @decH);
        }
-       return map $self->varCE($_), @hangulCE;
+    } elsif ($out && 0x10FFFF < $u) {
+       @ce = map _pack_override($_, $u, $der), $out->($u);
     } else {
        my $cjk  = $self->{overrideCJK};
        my $vers = $self->{UCA_Version};
        if ($cjk && _isUIdeo($u, $vers)) {
-           my @cjkCE = map _pack_override($_, $u, $der), $cjk->($u);
-           return map $self->varCE($_), @cjkCE;
-       }
-       if ($vers == 8 && defined $cjk && _isUIdeo($u, 0)) {
-           return map $self->varCE($_), _uideoCE_8($u);
+           @ce = map _pack_override($_, $u, $der), $cjk->($u);
+       } elsif ($vers == 8 && defined $cjk && _isUIdeo($u, 0)) {
+           @ce = _uideoCE_8($u);
+       } else {
+           @ce = $der->($u);
        }
-       return map $self->varCE($_), $der->($u);
     }
+    return map $self->varCE($_), @ce;
 }
 
 
@@ -1095,6 +1098,9 @@ The following revisions are supported.  The default is 26.
 * Noncharacters (e.g. U+FFFF) are not ignored, and can be overridden
 since C<UCA_Version> 22.
 
+* Out-of-range codepoints (greater than U+10FFFF) are not ignored,
+and can be overridden since C<UCA_Version> 22.
+
 * Fully ignorable characters were ignored, and would not interrupt
 contractions with C<UCA_Version> 9 and 11.
 
@@ -1216,7 +1222,8 @@ almost, but the latter has a problem that you should know 
which letter is
 next to C<c>. For a certain language where C<ch> as the next letter,
 C<"abch"> is greater than C<"abc\x{FFFF}">, but less than C<"abd">.
 
-Note: This is equivalent to C<entry =E<gt> 'FFFF ; [.FFFE.0020.0005.FFFF]'>.
+Note:
+This is equivalent to C<(entry =E<gt> 'FFFF ; [.FFFE.0020.0005.FFFF]')>.
 Any other character than C<U+FFFF> can be tailored by C<entry>.
 
 =item identical
@@ -1325,7 +1332,8 @@ then C<$a2> and C<$b2> at level 1, as followed.
         "b\x{FFFE}aaa"
         "bbb\x{FFFE}a"
 
-Note: This is equivalent to C<entry =E<gt> 'FFFE ; [.0001.0020.0005.FFFE]'>.
+Note:
+This is equivalent to C<(entry =E<gt> 'FFFE ; [.0001.0020.0005.FFFE]')>.
 Any other character than C<U+FFFE> can be tailored by C<entry>.
 
 =item normalization
@@ -1425,10 +1433,16 @@ ex. ignores all CJK unified ideographs.
    # where ->eq("Pe\x{4E00}rl", "Perl") is true
    # as U+4E00 is a CJK unified ideograph and to be ignorable.
 
-If C<undef> is passed explicitly as the value for this key,
-weights for CJK unified ideographs are treated as undefined.
+If a false value (including C<undef>) is passed, C<overrideCJK>
+has no effect.
+C<$Collator-E<gt>change(overrideCJK =E<gt> 0)> resets the old one.
+
 But assignment of weight for CJK unified ideographs
 in C<table> or C<entry> is still valid.
+If C<undef> is passed explicitly as the value for this key,
+weights for CJK unified ideographs are treated as undefined.
+However when C<UCA_Version> E<gt> 8, C<(overrideCJK =E<gt> undef)>
+has no special meaning.
 
 B<Note:> In addition to them, 12 CJK compatibility ideographs (C<U+FA0E>,
 C<U+FA0F>, C<U+FA11>, C<U+FA13>, C<U+FA14>, C<U+FA1F>, C<U+FA21>, C<U+FA23>,
@@ -1452,12 +1466,54 @@ NFD and NFKD are not appropriate, since NFD and NFKD 
will decompose
 Hangul syllables before overriding. FCD may decompose Hangul syllables
 as the case may be.
 
+If a false value (but not C<undef>) is passed, C<overrideHangul>
+has no effect.
+C<$Collator-E<gt>change(overrideHangul =E<gt> 0)> resets the old one.
+
 If C<undef> is passed explicitly as the value for this key,
 weight for Hangul syllables is treated as undefined
 without decomposition into Hangul Jamo.
 But definition of weight for Hangul syllables
 in C<table> or C<entry> is still valid.
 
+=item overrideOut
+
+-- see 7.1.1 Handling Ill-Formed Code Unit Sequences, UTS #10.
+
+Perl seems to allow out-of-range values (greater than 0x10FFFF).
+By default, out-of-range values are replaced with C<U+FFFD>
+(REPLACEMENT CHARACTER) when C<UCA_Version> E<gt>= 22,
+or ignored when C<UCA_Version> E<lt>= 20.
+
+When C<UCA_Version> E<gt>= 22, the weights of out-of-range values
+can be overridden. Though C<table> or C<entry> are available for them,
+out-of-range values are too many.
+
+C<overrideOut> can perform it algorithmically.
+This parameter works like C<overrideCJK>, so see there for examples.
+
+ex. ignores all out-of-range values.
+
+  overrideOut => sub {()}, # CODEREF returning empty list
+
+If a false value (including C<undef>) is passed, C<overrideOut>
+has no effect.
+C<$Collator-E<gt>change(overrideOut =E<gt> 0)> resets the old one.
+
+UCA recommends that out-of-range values should not be ignored for security
+reasons. Say, C<"pe\x{110000}rl"> should not be equal to C<"perl">.
+However, C<U+FFFD> is wrongly mapped to a variable collation element
+in DUCET for Unicode 6.0.0 to 6.2.0, that means out-of-range values will be
+ignored when C<variable> isn't C<Non-ignorable>.
+
+Unicode 6.3.0 will correct the mapping of C<U+FFFD>.
+see L<http://www.unicode.org/reports/tr10/tr10-27.html#Trailing_Weights>.
+Such a correction is reproduced by this.
+
+  overrideOut => sub { 0xFFFD }, # CODEREF returning a very large integer
+
+Since Unicode 6.3.0, C<(overrideOut =E<gt> sub { 0xFFFD })> may be 
unnecesssary.
+
 =item preprocess
 
 -- see 5.4 Preprocessing, UTS #10.
@@ -1559,7 +1615,7 @@ may be better to avoid namespace conflict.
 
 B<NOTE>: When XSUB is used, the DUCET is compiled on building this
 module, and it may save time at the run time.
-Explicit saying C<table =E<gt> 'allkeys.txt'> (or using another table),
+Explicit saying C<(table =E<gt> 'allkeys.txt')>, or using another table,
 or using C<ignoreChar>, C<ignoreName>, C<undefChar>, C<undefName> or
 C<rewrite> will prevent this module from using the compiled DUCET.
 
@@ -1934,7 +1990,7 @@ module (see L<Unicode::Normalize>).
 
 If you need not it (say, in the case when you need not
 handle any combining characters),
-assign C<normalization =E<gt> undef> explicitly.
+assign C<(normalization =E<gt> undef)> explicitly.
 
 -- see 6.5 Avoiding Normalization, UTS #10.
 
diff --git a/cpan/Unicode-Collate/Collate.xs b/cpan/Unicode-Collate/Collate.xs
index 27920ed..c339cc7 100644
--- a/cpan/Unicode-Collate/Collate.xs
+++ b/cpan/Unicode-Collate/Collate.xs
@@ -210,22 +210,8 @@ _isIllegal (sv)
        XSRETURN_YES;
     uv = SvUVX(sv);
     RETVAL = boolSV(
-          0x10FFFF < uv                   /* out of range */
-    );
-OUTPUT:
-    RETVAL
-
-
-SV*
-_isNonchar (sv)
-    SV* sv
-  PREINIT:
-    UV uv;
-  CODE:
-    /* should be called only if ! _isIllegal(sv). */
-    uv = SvUVX(sv);
-    RETVAL = boolSV(
-          ((uv & 0xFFFE) == 0xFFFE)       /* ??FFF[EF] (cf. utf8.c) */
+          0x10FFFF < uv                   /* out of range */
+       || ((uv & 0xFFFE) == 0xFFFE)       /* ??FFF[EF] (cf. utf8.c) */
        || (0xD800 <= uv && uv <= 0xDFFF)  /* unpaired surrogates */
        || (0xFDD0 <= uv && uv <= 0xFDEF)  /* other non-characters */
     );
diff --git a/cpan/Unicode-Collate/README b/cpan/Unicode-Collate/README
index 141de8a..1de2709 100644
--- a/cpan/Unicode-Collate/README
+++ b/cpan/Unicode-Collate/README
@@ -1,4 +1,4 @@
-Unicode/Collate version 0.98
+Unicode/Collate version 0.99
 ===============================
 
 NAME
diff --git a/cpan/Unicode-Collate/mkheader b/cpan/Unicode-Collate/mkheader
index c92d7c9..c3d0ebf 100644
--- a/cpan/Unicode-Collate/mkheader
+++ b/cpan/Unicode-Collate/mkheader
@@ -135,11 +135,11 @@ foreach my $tbl (@tripletable) {
     my $null = $tbl->{null};
     my $init = $tbl->{init};
 
-    open FH, ">$file" or croak "$PACKAGE: $file can't be made";
-    binmode FH; select FH;
+    open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made";
+    binmode $fh_h; select $fh_h;
     my %val;
 
-    print FH << 'EOF';
+    print << 'EOF';
 /*
  * This file is auto-generated by mkheader.
  * Any changes here will be lost!
@@ -189,7 +189,7 @@ EOF
        print "\n";
     }
     print "};\n\n";
-    close FH;
+    close $fh_h;
 }
 
 1;
diff --git a/cpan/Unicode-Collate/t/illegal.t b/cpan/Unicode-Collate/t/illegal.t
index 5d7999d..7fa81e4 100644
--- a/cpan/Unicode-Collate/t/illegal.t
+++ b/cpan/Unicode-Collate/t/illegal.t
@@ -25,7 +25,7 @@ BEGIN {
 
 use strict;
 use warnings;
-BEGIN { $| = 1; print "1..65\n"; }
+BEGIN { $| = 1; print "1..127\n"; } # 77 + 5 x @Versions
 my $count = 0;
 sub ok ($;$) {
     my $p = my $r = shift;
@@ -125,7 +125,7 @@ ok($nonch->lt("", "\x{FDD0}"));
 ok($nonch->lt("", "\x{FDEF}"));
 ok($nonch->lt("", "\x02"));
 ok($nonch->lt("", "\x{10FFFF}"));
-ok($nonch->eq("", "\x{110000}"));
+ok($nonch->lt("", "\x{110000}"));
 
 # 38..47
 ok($nonch->lt("\x00",     "\x01"));
@@ -137,7 +137,7 @@ ok($nonch->lt("\x{DFFF}", "\x{FDD0}"));
 ok($nonch->lt("\x{FDD0}", "\x{FDEF}"));
 ok($nonch->lt("\x{FDEF}", "\x02"));
 ok($nonch->lt("\x02",     "\x{10FFFF}"));
-ok($nonch->gt("\x{10FFFF}", "\x{110000}"));
+ok($nonch->lt("\x{10FFFF}", "\x{110000}"));
 
 # 48..51
 ok($nonch->lt("A",   "A\x{FFFF}"));
@@ -178,3 +178,47 @@ for my $ret (@ret) {
     ok($match eq $ret);
 }
 
+##################
+
+my $out = Unicode::Collate->new(
+    level => 1,
+    table => undef,
+    normalization => undef,
+    overrideOut => sub { 0xFFFD },
+);
+
+my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24, 26);
+
+for my $v (@Versions) {
+    $out->change(UCA_Version => $v);
+    ok($out->cmp('',           "\x{10FFFF}") == ($v >= 22 ? -1 : 0));
+    ok($out->cmp('',           "\x{110000}") == ($v >= 22 ? -1 : 0));
+    ok($out->cmp('ABC',        "\x{110000}") == ($v >= 22 ? -1 : 1));
+    ok($out->cmp("\x{10FFFD}", "\x{110000}") == ($v >= 22 ? -1 : 1));
+    ok($out->cmp("\x{11FFFD}", "\x{110000}") == ($v >= 22 ?  0 : 0));
+}
+
+# x+66..x+77
+ok($out->lt('ABC',      "\x{123456}"));
+ok($out->lt("\x{FFFD}", "\x{123456}"));
+
+$out->change(overrideOut => sub {()});
+
+ok($out->eq('',         "\x{123456}"));
+ok($out->gt('ABC',      "\x{123456}"));
+ok($out->gt("\x{FFFD}", "\x{123456}"));
+
+$out->change(overrideOut => undef);
+ok($out->lt('',         "\x{123456}"));
+ok($out->eq("\x{FFFD}", "\x{123456}"));
+
+$out->change(overrideOut => sub { 0xFFFD });
+
+ok($out->lt('',         "\x{123456}"));
+ok($out->lt('ABC',      "\x{123456}"));
+ok($out->lt("\x{FFFD}", "\x{123456}"));
+
+$out->change(overrideOut => 0);
+ok($out->lt('',         "\x{123456}"));
+ok($out->eq("\x{FFFD}", "\x{123456}"));
+
diff --git a/cpan/Unicode-Collate/t/override.t 
b/cpan/Unicode-Collate/t/override.t
index bc6a70c..025a369 100644
--- a/cpan/Unicode-Collate/t/override.t
+++ b/cpan/Unicode-Collate/t/override.t
@@ -13,7 +13,7 @@ BEGIN {
 
 use strict;
 use warnings;
-BEGIN { $| = 1; print "1..35\n"; }
+BEGIN { $| = 1; print "1..65\n"; }
 my $count = 0;
 sub ok ($;$) {
     my $p = my $r = shift;
@@ -30,129 +30,186 @@ ok(1);
 
 #########################
 
-##### 2..6
-
-my $all_undef_8 = Unicode::Collate->new(
-  table => undef,
-  normalization => undef,
-  overrideCJK => undef,
-  overrideHangul => undef,
-  UCA_Version => 8,
-);
-
-# All in the Unicode code point order.
-# No hangul decomposition.
-
-ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
-ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
-ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
-ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
-ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
-
-
-##### 7..11
-
-my $all_undef_9 = Unicode::Collate->new(
-  table => undef,
-  normalization => undef,
-  overrideCJK => undef,
-  overrideHangul => undef,
-  UCA_Version => 9,
-);
-
-# CJK Ideo. < CJK ext A/B < Others.
-# No hangul decomposition.
-
-ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
-ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
-ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
-ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
-ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
-
-##### 12..16
-
-my $ignoreHangul = Unicode::Collate->new(
-  table => undef,
-  normalization => undef,
-  overrideHangul => sub {()},
-  entry => <<'ENTRIES',
-AE00 ; [.0100.0020.0002.AE00]  # Hangul GEUL
-ENTRIES
-);
-
-# All Hangul Syllables except U+AE00 are ignored.
-
-ok($ignoreHangul->eq("\x{AC00}", ""));
-ok($ignoreHangul->lt("\x{AC00}", "\0"));
-ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
-ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
-ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
-
-##### 17..21
-
-my $undefHangul = Unicode::Collate->new(
-  table => undef,
-  normalization => undef,
-  overrideHangul => sub {
-    my $u = shift;
-    return $u == 0xAE00 ? 0x100 : undef;
-  }
-);
-
-# All Hangul Syllables except U+AE00 are undefined.
-
-ok($undefHangul->lt("\x{AE00}", "r"));
-ok($undefHangul->gt("\x{AC00}", "r"));
-ok($undefHangul->gt("\x{AC00}", "\x{1100}\x{1161}"));
-ok($undefHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
-ok($undefHangul->lt("\x{AC00}", "\x{B000}"));
-
-##### 22..25
-
-my $undefCJK = Unicode::Collate->new(
-  table => undef,
-  normalization => undef,
-  overrideCJK => sub {
-    my $u = shift;
-    return $u == 0x4E00 ? 0x100 : undef;
-  }
-);
-
-# All CJK Ideographs except U+4E00 are undefined.
-
-ok($undefCJK->lt("\x{4E00}", "r"));
-ok($undefCJK->lt("\x{5000}", "r")); # still CJK < unassigned
-ok($undefCJK->lt("Pe\x{4E00}rl", "Perl")); # 'r' is unassigned.
-ok($undefCJK->lt("\x{5000}", "\x{6000}"));
-
-##### 26..30
-
-my $cpHangul = Unicode::Collate->new(
-  table => undef,
-  normalization => undef,
-  overrideHangul => sub { shift }
-);
-
-ok($cpHangul->lt("\x{AC00}", "\x{AC01}"));
-ok($cpHangul->lt("\x{AC01}", "\x{D7A3}"));
-ok($cpHangul->lt("\x{D7A3}", "r")); # 'r' is unassigned.
-ok($cpHangul->lt("r", "\x{D7A4}"));
-ok($cpHangul->lt("\x{D7A3}", "\x{4E00}"));
-
-##### 31..35
-
-my $arrayHangul = Unicode::Collate->new(
-  table => undef,
-  normalization => undef,
-  overrideHangul => sub {
-    my $u = shift;
-    return [$u, 0x20, 0x2, $u];
-  }
-);
-
-ok($arrayHangul->lt("\x{AC00}", "\x{AC01}"));
-ok($arrayHangul->lt("\x{AC01}", "\x{D7A3}"));
-ok($arrayHangul->lt("\x{D7A3}", "r")); # 'r' is unassigned.
-ok($arrayHangul->lt("r", "\x{D7A4}"));
-ok($arrayHangul->lt("\x{D7A3}", "\x{4E00}"));
+##### 2..31
+
+{
+    my $all_undef_8 = Unicode::Collate->new(
+       table => undef,
+       normalization => undef,
+       overrideCJK => undef,
+       overrideHangul => undef,
+       UCA_Version => 8,
+    );
+    # All in the Unicode code point order.
+    # No hangul decomposition.
+
+    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
+    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
+    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
+    ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
+    ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
+    ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
+    # U+ABFF: not assigned
+
+    # a hangul syllable is decomposed into jamo.
+    $all_undef_8->change(overrideHangul => 0);
+    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
+    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
+    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
+    ok($all_undef_8->gt("\x{4E00}", "\x{AC00}"));
+    ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}"));
+    ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}"));
+
+    # CJK defined < Jamo undefined
+    $all_undef_8->change(overrideCJK => 0);
+    ok($all_undef_8->gt("\x{1100}", "\x{3402}"));
+    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
+    ok($all_undef_8->gt("\x{4DFF}", "\x{4E00}"));
+    ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
+    ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}"));
+    ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}"));
+
+    # CJK undefined > Jamo undefined
+    $all_undef_8->change(overrideCJK => undef);
+    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
+    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
+    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
+    ok($all_undef_8->gt("\x{4E00}", "\x{AC00}"));
+    ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}"));
+    ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}"));
+
+    $all_undef_8->change(overrideHangul => undef);
+    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
+    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
+    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
+    ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
+    ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
+    ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
+}
+
+##### 32..38
+
+{
+    my $all_undef_9 = Unicode::Collate->new(
+       table => undef,
+       normalization => undef,
+       overrideCJK => undef,
+       overrideHangul => undef,
+       UCA_Version => 9,
+    );
+    # CJK Ideo. < CJK ext A/B < Others.
+    # No hangul decomposition.
+
+    ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
+    ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
+    ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
+    ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
+    ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}"));
+    # U+ABFF: not assigned
+
+    # a hangul syllable is decomposed into jamo.
+    $all_undef_9->change(overrideHangul => 0);
+    ok($all_undef_9->eq("\x{AC00}", "\x{1100}\x{1161}"));
+    ok($all_undef_9->lt("\x{AC00}", "\x{ABFF}"));
+}
+
+##### 39..46
+
+{
+    my $ignoreHangul = Unicode::Collate->new(
+       table => undef,
+       normalization => undef,
+       overrideHangul => sub {()},
+       entry => 'AE00 ; [.0100.0020.0002.AE00]  # Hangul GEUL',
+    );
+    # All Hangul Syllables except U+AE00 are ignored.
+
+    ok($ignoreHangul->eq("\x{AC00}", ""));
+    ok($ignoreHangul->lt("\x{AC00}", "\0"));
+    ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
+    ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not 
ignored.
+    ok($ignoreHangul->eq("Pe\x{AC00}rl", "Perl"));
+    ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl"));
+    # 'r' is unassigned.
+
+    $ignoreHangul->change(overrideHangul => 0);
+    ok($ignoreHangul->eq("\x{AC00}", "\x{1100}\x{1161}"));
+
+    $ignoreHangul->change(overrideHangul => undef);
+    ok($ignoreHangul->gt("\x{AC00}", "\x{1100}\x{1161}"));
+}
+
+##### 47..51
+
+{
+    my $undefHangul = Unicode::Collate->new(
+       table => undef,
+       normalization => undef,
+       overrideHangul => sub {
+           my $u = shift;
+           return $u == 0xAE00 ? 0x100 : undef;
+       }
+    );
+    # All Hangul Syllables except U+AE00 are undefined.
+
+    ok($undefHangul->lt("\x{AE00}", "r"));
+    ok($undefHangul->gt("\x{AC00}", "r"));
+    ok($undefHangul->gt("\x{AC00}", "\x{1100}\x{1161}"));
+    ok($undefHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
+    ok($undefHangul->lt("\x{AC00}", "\x{B000}"));
+}
+
+##### 52..55
+
+{
+    my $undefCJK = Unicode::Collate->new(
+       table => undef,
+       normalization => undef,
+       overrideCJK => sub {
+           my $u = shift;
+           return $u == 0x4E00 ? 0x100 : undef;
+       }
+    );
+    # All CJK Ideographs except U+4E00 are undefined.
+
+    ok($undefCJK->lt("\x{4E00}", "r"));
+    ok($undefCJK->lt("\x{5000}", "r")); # still CJK < unassigned
+    ok($undefCJK->lt("Pe\x{4E00}rl", "Perl"));
+    ok($undefCJK->lt("\x{5000}", "\x{6000}"));
+}
+
+##### 56..60
+
+{
+    my $cpHangul = Unicode::Collate->new(
+       table => undef,
+       normalization => undef,
+       overrideHangul => sub { shift }
+    );
+
+    ok($cpHangul->lt("\x{AC00}", "\x{AC01}"));
+    ok($cpHangul->lt("\x{AC01}", "\x{D7A3}"));
+    ok($cpHangul->lt("\x{D7A3}", "r"));
+    ok($cpHangul->lt("r", "\x{D7A4}"));
+    ok($cpHangul->lt("\x{D7A3}", "\x{4E00}"));
+}
+
+##### 61..65
+
+{
+    my $arrayHangul = Unicode::Collate->new(
+       table => undef,
+       normalization => undef,
+       overrideHangul => sub {
+           my $u = shift;
+           return [$u, 0x20, 0x2, $u];
+       }
+    );
+
+    ok($arrayHangul->lt("\x{AC00}", "\x{AC01}"));
+    ok($arrayHangul->lt("\x{AC01}", "\x{D7A3}"));
+    ok($arrayHangul->lt("\x{D7A3}", "r"));
+    ok($arrayHangul->lt("r", "\x{D7A4}"));
+    ok($arrayHangul->lt("\x{D7A3}", "\x{4E00}"));
+}
 
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index f3459d8..84080f1 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -170,6 +170,12 @@ C<Carp> that expects C<Carp::Heavy> to provide subroutines.
 
 =item *
 
+L<CPAN::Meta::Requirements> has been upgraded from version 2.122 to 2.123.
+
+TODO
+
+=item *
+
 L<Data::Dumper> has been upgraded from version 2.148 to 2.149.
 
 This upgrade is part of a larger change to make the array interface 64-bit safe
@@ -230,6 +236,12 @@ nonexistent array elements.
 
 =item *
 
+L<List::Util> and L<Scalar::Util> have been upgraded from version 1.31 to 1.32.
+
+TODO
+
+=item *
+
 L<Math::BigInt> has been upgraded from version 1.9992 to 1.9993.
 
 Cleaned up the L<Math::BigInt> and L<Math::BigFloat> documentation to
@@ -292,6 +304,12 @@ nonexistent array elements.
 
 =item *
 
+L<Unicode::Collate> has been upgraded from version 0.98 to 0.99.
+
+TODO
+
+=item *
+
 L<warnings> has been upgraded from version 1.18 to 1.19.
 
 The C<syscalls> warnings category has been added to check for embedded NUL

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.19.3-289-g202c41c

Reply via email to