[perl.git] branch blead, updated. v5.21.0-229-gf11f9c4

Karl Williamson Sat, 31 May 2014 18:10:27 -0700

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/f11f9c4c96c7b47e180befa9fb272343566a7167?hp=0ed2b00b2bd6a650391433ad8733983692df43d5>


- Log -----------------------------------------------------------------
commit f11f9c4c96c7b47e180befa9fb272343566a7167
Author: Karl Williamson <[email protected]>
Date:   Sat May 31 17:11:10 2014 -0600

    perlebcdic: Clean-up
    
    There are much simpler ways to do some things than were given.  This
    also makes some clarifications, and removes obsolete text, shortens some
    too long verbatim lines.

M       pod/perlebcdic.pod
M       t/porting/known_pod_issues.dat

commit e30b2da5d01e80840b115621b2c798c01ff426a2
Author: Karl Williamson <[email protected]>
Date:   Sat May 31 17:09:53 2014 -0600

    utfebcdic.h: Add comments

M       utfebcdic.h

commit b985ae615210d15bfa67bddb2118de1c02c21935
Author: Karl Williamson <[email protected]>
Date:   Fri Nov 15 13:40:03 2013 -0700

    PATCH: [perl #120386]: av_len documentation
    
    I think I have incorporated everybody's concerns in this patch.

M       av.c

commit ca3d51ba62f0e2b46d3714c26711c8973a3724bb
Author: Karl Williamson <[email protected]>
Date:   Mon Oct 14 22:58:30 2013 -0600

    lib/utf8.pm: Document unicode_to_native() and inverse

M       lib/utf8.pm

commit 308a4ae118d0c5ca43889a96b89dd0b5be487b5c
Author: Karl Williamson <[email protected]>
Date:   Mon Oct 14 22:38:24 2013 -0600

    lib/utf8.pm: wrap code examples in pod with C<>

M       lib/utf8.pm

commit b6fb4de72917282df5b2b45712d7d3f9f1609943
Author: Karl Williamson <[email protected]>
Date:   Sat May 31 14:27:54 2014 -0600

    lib/locale.t: Add tests, fix test names
    
    It is unfortunate that tests are in two places in this file.  The reason
    is to avoid doing eval's (to avoid any perturbations that might cause)
    but be under the scope of two different locale forms.  This adds text to
    remind maintainers that they should create copies when adding tests, and
    includes the new tests that have previously been added to one place but
    not the other, and includes the new improved test names.

M       lib/locale.t

commit 40205cabcf46648032cafa4d149c2cbf7787179d
Author: Karl Williamson <[email protected]>
Date:   Sat May 31 14:11:35 2014 -0600

    lib/locale.t: Add some tests

M       lib/locale.t

commit 2575405ef7be50228c2f0ecad4d802ee784f6be3
Author: Karl Williamson <[email protected]>
Date:   Sat May 31 14:10:00 2014 -0600

    lib/locale.t: Improve test names; add some names

M       lib/locale.t

commit 7c641603528c1bd9ae833c58a4f65b49ae8d4726
Author: Karl Williamson <[email protected]>
Date:   Sat May 31 12:45:41 2014 -0600

    sv.c: Add comment

M       sv.c
-----------------------------------------------------------------------

Summary of changes:
 av.c                           |  11 +-
 lib/locale.t                   | 525 +++++++++++++++++++++++++++--------------
 lib/utf8.pm                    |  59 ++++-
 pod/perlebcdic.pod             | 122 +++-------
 sv.c                           |   3 +
 t/porting/known_pod_issues.dat |   1 -
 utfebcdic.h                    |   2 +
 7 files changed, 445 insertions(+), 278 deletions(-)

diff --git a/av.c b/av.c
index c08d2c2..0602525 100644
--- a/av.c
+++ b/av.c
@@ -760,11 +760,16 @@ The Perl equivalent for this is C<$#myarray>.
 
 (A slightly shorter form is C<av_tindex>.)
 
+=for apidoc av_tindex
+
+Same as L</av_top_index>.
+
 =for apidoc av_len
 
-Same as L</av_top_index>.  Returns the highest index in the array.  Note that 
the
-return value is +1 what its name implies it returns; and hence differs in
-meaning from what the similarly named L</sv_len> returns.
+Same as L</av_top_index>.  Note that, unlike what the name implies, it returns
+the highest index in the array, so to get the size of the array you need to use
+S<C<av_len(av) + 1>>.  This is unlike L</sv_len>, which returns what you would
+expect.
 
 =cut
 */
diff --git a/lib/locale.t b/lib/locale.t
index eea92e0..5e839bc 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -67,7 +67,7 @@ sub debugf {
     printf @_ if $debug;
 }
 
-$a = 'abc %';
+$a = 'abc %9';
 
 my $test_num = 0;
 
@@ -94,13 +94,15 @@ sub is_tainted { # hello, camel two.
 
 sub check_taint ($;$) {
     my $message_tail = $_[1] // "";
-    $message_tail = ": $message_tail" if $message_tail;
+
+    # Extra blanks are so aligns with taint_not output
+    $message_tail = ":     $message_tail" if $message_tail;
     ok is_tainted($_[0]), "verify that is tainted$message_tail";
 }
 
 sub check_taint_not ($;$) {
     my $message_tail = $_[1] // "";
-    $message_tail = ": $message_tail" if $message_tail;
+    $message_tail = ":  $message_tail" if $message_tail;
     ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
 }
 
@@ -110,33 +112,36 @@ check_taint_not   $&, "not tainted outside 'use locale'";
 
 use locale;    # engage locale and therefore locale taint.
 
-check_taint_not   $a, "\t\$a";
-
-check_taint       uc($a);
-check_taint       "\U$a";
-check_taint       ucfirst($a);
-check_taint       "\u$a";
-check_taint       lc($a);
-check_taint       fc($a);
-check_taint       "\L$a";
-check_taint       "\F$a";
-check_taint       lcfirst($a);
-check_taint       "\l$a";
-
-check_taint_not  sprintf('%e', 123.456);
-check_taint_not  sprintf('%f', 123.456);
-check_taint_not  sprintf('%g', 123.456);
-check_taint_not  sprintf('%d', 123.456);
-check_taint_not  sprintf('%x', 123.456);
+# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
+# ":notcharacters"
+
+check_taint_not   $a, '$a';
+
+check_taint       uc($a), 'uc($a)';
+check_taint       "\U$a", '"\U$a"';
+check_taint       ucfirst($a), 'ucfirst($a)';
+check_taint       "\u$a", '"\u$a"';
+check_taint       lc($a), 'lc($a)';
+check_taint       fc($a), 'fc($a)';
+check_taint       "\L$a", '"\L$a"';
+check_taint       "\F$a", '"\F$a"';
+check_taint       lcfirst($a), 'lcfirst($a)';
+check_taint       "\l$a", '"\l$a"';
+
+check_taint_not  sprintf('%e', 123.456), "sprintf('%e', 123.456)";
+check_taint_not  sprintf('%f', 123.456), "sprintf('%f', 123.456)";
+check_taint_not  sprintf('%g', 123.456), "sprintf('%g', 123.456)";
+check_taint_not  sprintf('%d', 123.456), "sprintf('%d', 123.456)";
+check_taint_not  sprintf('%x', 123.456), "sprintf('%x', 123.456)";
 
 $_ = $a;       # untaint $_
 
 $_ = uc($a);   # taint $_
 
-check_taint      $_, "\t\$_";
+check_taint      $_, '$_ = uc($a)';
 
 /(\w)/;        # taint $&, $`, $', $+, $1.
-check_taint      $&, "\t/(\\w)/ \$&";
+check_taint      $&, "\$& from /(\\w)/";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
 check_taint      $+, "\t\$+";
@@ -144,7 +149,7 @@ check_taint      $1, "\t\$1";
 check_taint_not  $2, "\t\$2";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&, "\t/(.)/ \$&";
+check_taint_not  $&, "\$& from /(.)/";
 check_taint_not  $`, "\t\$`";
 check_taint_not  $', "\t\$'";
 check_taint_not  $+, "\t\$+";
@@ -152,7 +157,7 @@ check_taint_not  $1, "\t\$1";
 check_taint_not  $2, "\t\$2";
 
 /(\W)/;        # taint $&, $`, $', $+, $1.
-check_taint      $&, "\t/(\\W)/ \$&";
+check_taint      $&, "\$& from /(\\W)/";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
 check_taint      $+, "\t\$+";
@@ -160,7 +165,7 @@ check_taint      $1, "\t\$1";
 check_taint_not  $2, "\t\$2";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&, "\t/(.)/ \$&";
+check_taint_not  $&, "\$& from /(.)/";
 check_taint_not  $`, "\t\$`";
 check_taint_not  $', "\t\$'";
 check_taint_not  $+, "\t\$+";
@@ -168,7 +173,7 @@ check_taint_not  $1, "\t\$1";
 check_taint_not  $2, "\t\$2";
 
 /(\s)/;        # taint $&, $`, $', $+, $1.
-check_taint      $&, "\t/(\\s)/ \$&";
+check_taint      $&, "\$& from /(\\s)/";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
 check_taint      $+, "\t\$+";
@@ -176,10 +181,43 @@ check_taint      $1, "\t\$1";
 check_taint_not  $2, "\t\$2";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&, "\t/(.)/ \$&";
+check_taint_not  $&, "\$& from /(.)/";
 
 /(\S)/;        # taint $&, $`, $', $+, $1.
-check_taint      $&, "\t/(\\S)/ \$&";
+check_taint      $&, "\$& from /(\\S)/";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\$& from /(.)/";
+
+"0" =~ /(\d)/; # taint $&, $`, $', $+, $1.
+check_taint      $&, "\$& from /(\\d)/";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\$& from /(.)/";
+
+/(\D)/;        # taint $&, $`, $', $+, $1.
+check_taint      $&, "\$& from /(\\D)/";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\$& from /(.)/";
+
+/([[:alnum:]])/;       # taint $&, $`, $', $+, $1.
+check_taint      $&, "\$& from /([[:alnum:]])/";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
 check_taint      $+, "\t\$+";
@@ -187,39 +225,55 @@ check_taint      $1, "\t\$1";
 check_taint_not  $2, "\t\$2";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&, "\t/(.)/ \$&";
+check_taint_not  $&, "\$& from /(.)/";
+
+/([[:^alnum:]])/;      # taint $&, $`, $', $+, $1.
+check_taint      $&, "\$& from /([[:^alnum:]])/";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 "a" =~ /(a)|(\w)/;     # taint $&, $`, $', $+, $1.
-check_taint      $&, "\t/(a)|(\\w)/ \$&";
+check_taint      $&, "\$& from /(a)|(\\w)/";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
 check_taint      $+, "\t\$+";
 check_taint      $1, "\t\$1";
-ok($1 eq 'a', ("\t" x 4) . "\$1 is 'a'");
-ok(! defined $2, ("\t" x 4) . "\$2 is undefined");
+ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
+ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
 check_taint_not  $2, "\t\$2";
 check_taint_not  $3, "\t\$3";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&, "\t/(.)/ \$&";
+check_taint_not  $&, "\$& from /(.)/";
 
 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;   # no 
tainting because no locale dependence
-check_taint_not      $&, "\t/(\\N{CYRILLIC CAPITAL LETTER A})/i \$&";
+check_taint_not      $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
 check_taint_not      $`, "\t\$`";
 check_taint_not      $', "\t\$'";
 check_taint_not      $+, "\t\$+";
 check_taint_not      $1, "\t\$1";
-ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\$1 is 'small cyrillic 
a'");
+ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic 
a'");
 check_taint_not      $2, "\t\$2";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&, "\t/./ \$&";
+check_taint_not  $&, "\$& from /./";
+
+"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;   # taints because depends on 
locale
+check_taint      $&, "\$& from /(\\N{KELVIN SIGN})/i";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not      $2, "\t\$2";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&, "\t/(.)/ \$&";
+check_taint_not  $&, "\$& from /(.)/";
 
 "a:" =~ /(.)\b(.)/;    # taint $&, $`, $', $+, $1.
-check_taint      $&, "\t/(.)\\b(.)/ \$&";
+check_taint      $&, "\$& from /(.)\\b(.)/";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
 check_taint      $+, "\t\$+";
@@ -228,10 +282,10 @@ check_taint      $2, "\t\$2";
 check_taint_not  $3, "\t\$3";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&, "\t/./ \$&";
+check_taint_not  $&, "\$& from /./";
 
 "aa" =~ /(.)\B(.)/;    # taint $&, $`, $', $+, $1.
-check_taint      $&, "\t/(.)\\B(.)/ \$&";
+check_taint      $&, "\$& from /(.)\\B(.)/";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
 check_taint      $+, "\t\$+";
@@ -240,26 +294,26 @@ check_taint      $2, "\t\$2";
 check_taint_not  $3, "\t\$3";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&, "\t/./ \$&";
+check_taint_not  $&, "\$& from /./";
 
 "aaa" =~ /(.).(\1)/i;  # notaint because not locale dependent
-check_taint_not      $&, "\t/(.).(\\1)/ \$&";
+check_taint_not      $&, "\$ & from /(.).(\\1)/";
 check_taint_not      $`, "\t\$`";
 check_taint_not      $', "\t\$'";
 check_taint_not      $+, "\t\$+";
 check_taint_not      $1, "\t\$1";
 check_taint_not      $2, "\t\$2";
-check_taint_not  $3, "\t\$3";
+check_taint_not      $3, "\t\$3";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&, "\t/./ \$&";
+check_taint_not  $&, "\$ & from /./";
 
 $_ = $a;       # untaint $_
 
-check_taint_not  $_, "\t\$_";
+check_taint_not  $_, 'untainting $_ works';
 
 /(b)/;         # this must not taint
-check_taint_not  $&, "\t/(b)/ \$&";
+check_taint_not  $&, "\$ & from /(b)/";
 check_taint_not  $`, "\t\$`";
 check_taint_not  $', "\t\$'";
 check_taint_not  $+, "\t\$+";
@@ -268,12 +322,12 @@ check_taint_not  $2, "\t\$2";
 
 $_ = $a;       # untaint $_
 
-check_taint_not  $_, "\t\$_";
+check_taint_not  $_, 'untainting $_ works';
 
 $b = uc($a);   # taint $b
 s/(.+)/$b/;    # this must taint only the $_
 
-check_taint      $_, "\t\$_";
+check_taint      $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is 
tainted';
 check_taint_not  $&, "\t\$&";
 check_taint_not  $`, "\t\$`";
 check_taint_not  $', "\t\$'";
@@ -284,7 +338,7 @@ check_taint_not  $2, "\t\$2";
 $_ = $a;       # untaint $_
 
 s/(.+)/b/;     # this must not taint
-check_taint_not  $_, "\t\$_";
+check_taint_not  $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
 check_taint_not  $&, "\t\$&";
 check_taint_not  $`, "\t\$`";
 check_taint_not  $', "\t\$'";
@@ -295,13 +349,13 @@ check_taint_not  $2, "\t\$2";
 $b = $a;       # untaint $b
 
 ($b = $a) =~ s/\w/$&/;
-check_taint      $b, "\t\$b";  # $b should be tainted.
-check_taint_not  $a, "\t\$a";  # $a should be not.
+check_taint      $b, '$b from ($b = $a) =~ s/\w/$&/';  # $b should be tainted.
+check_taint_not  $a, '$a from ($b = $a) =~ s/\w/$&/';  # $a should be not.
 
 $_ = $a;       # untaint $_
 
 s/(\w)/\l$1/;  # this must taint
-check_taint      $_, "\t\$_";
+check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,';        # this 
must taint
 check_taint      $&, "\t\$&";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
@@ -312,7 +366,7 @@ check_taint_not  $2, "\t\$2";
 $_ = $a;       # untaint $_
 
 s/(\w)/\L$1/;  # this must taint
-check_taint      $_, "\t\$_";
+check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
 check_taint      $&, "\t\$&";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
@@ -323,7 +377,7 @@ check_taint_not  $2, "\t\$2";
 $_ = $a;       # untaint $_
 
 s/(\w)/\u$1/;  # this must taint
-check_taint      $_, "\t\$_";
+check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
 check_taint      $&, "\t\$&";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
@@ -334,7 +388,7 @@ check_taint_not  $2, "\t\$2";
 $_ = $a;       # untaint $_
 
 s/(\w)/\U$1/;  # this must taint
-check_taint      $_, "\t\$_";
+check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
 check_taint      $&, "\t\$&";
 check_taint      $`, "\t\$`";
 check_taint      $', "\t\$'";
@@ -344,7 +398,7 @@ check_taint_not  $2, "\t\$2";
 
 # After all this tainting $a should be cool.
 
-check_taint_not  $a, "\t\$a";
+check_taint_not  $a, '$a still not tainted';
 
 "a" =~ /([a-z])/;
 check_taint_not $1, '"a" =~ /([a-z])/';
@@ -358,168 +412,283 @@ check_taint_not $1, '"foo.bar_baz" =~ 
/^(.*)[._](.*?)$/';
 
     use locale ':not_characters'; # engage restricted locale with different
                                   # tainting rules
-
-    check_taint_not   $a;
-
-    check_taint_not    uc($a);
-    check_taint_not    "\U$a";
-    check_taint_not    ucfirst($a);
-    check_taint_not    "\u$a";
-    check_taint_not    lc($a);
-    check_taint_not    fc($a);
-    check_taint_not    "\L$a";
-    check_taint_not    "\F$a";
-    check_taint_not    lcfirst($a);
-    check_taint_not    "\l$a";
-
-    check_taint_not  sprintf('%e', 123.456);
-    check_taint_not  sprintf('%f', 123.456);
-    check_taint_not  sprintf('%g', 123.456);
-    check_taint_not  sprintf('%d', 123.456);
-    check_taint_not  sprintf('%x', 123.456);
+    check_taint_not   $a, '$a';
+
+    check_taint_not   uc($a), 'uc($a)';
+    check_taint_not   "\U$a", '"\U$a"';
+    check_taint_not   ucfirst($a), 'ucfirst($a)';
+    check_taint_not   "\u$a", '"\u$a"';
+    check_taint_not   lc($a), 'lc($a)';
+    check_taint_not   fc($a), 'fc($a)';
+    check_taint_not   "\L$a", '"\L$a"';
+    check_taint_not   "\F$a", '"\F$a"';
+    check_taint_not   lcfirst($a), 'lcfirst($a)';
+    check_taint_not   "\l$a", '"\l$a"';
+
+    check_taint_not  sprintf('%e', 123.456), "sprintf('%e', 123.456)";
+    check_taint_not  sprintf('%f', 123.456), "sprintf('%f', 123.456)";
+    check_taint_not  sprintf('%g', 123.456), "sprintf('%g', 123.456)";
+    check_taint_not  sprintf('%d', 123.456), "sprintf('%d', 123.456)";
+    check_taint_not  sprintf('%x', 123.456), "sprintf('%x', 123.456)";
 
     $_ = $a;   # untaint $_
 
-    $_ = uc($a);       # taint $_
+    $_ = uc($a);
 
-    check_taint_not    $_;
+    check_taint_not  $_, '$_ = uc($a)';
 
-    /(\w)/;    # taint $&, $`, $', $+, $1.
-    check_taint_not    $&;
-    check_taint_not    $`;
-    check_taint_not    $';
-    check_taint_not    $+;
-    check_taint_not    $1;
-    check_taint_not  $2;
+    /(\w)/;
+    check_taint_not  $&, "\$& from /(\\w)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
 
     /(.)/;     # untaint $&, $`, $', $+, $1.
-    check_taint_not  $&;
-    check_taint_not  $`;
-    check_taint_not  $';
-    check_taint_not  $+;
-    check_taint_not  $1;
-    check_taint_not  $2;
-
-    /(\W)/;    # taint $&, $`, $', $+, $1.
-    check_taint_not    $&;
-    check_taint_not    $`;
-    check_taint_not    $';
-    check_taint_not    $+;
-    check_taint_not    $1;
-    check_taint_not  $2;
-
-    /(\s)/;    # taint $&, $`, $', $+, $1.
-    check_taint_not    $&;
-    check_taint_not    $`;
-    check_taint_not    $';
-    check_taint_not    $+;
-    check_taint_not    $1;
-    check_taint_not  $2;
-
-    /(\S)/;    # taint $&, $`, $', $+, $1.
-    check_taint_not    $&;
-    check_taint_not    $`;
-    check_taint_not    $';
-    check_taint_not    $+;
-    check_taint_not    $1;
-    check_taint_not  $2;
+    check_taint_not  $&, "\$& from /(.)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
+
+    /(\W)/;
+    check_taint_not  $&, "\$& from /(\\W)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
 
-    $_ = $a;   # untaint $_
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /(.)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
+
+    /(\s)/;
+    check_taint_not  $&, "\$& from /(\\s)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
 
-    check_taint_not  $_;
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /(.)/";
 
-    /(b)/;             # this must not taint
-    check_taint_not  $&;
-    check_taint_not  $`;
-    check_taint_not  $';
-    check_taint_not  $+;
-    check_taint_not  $1;
-    check_taint_not  $2;
+    /(\S)/;
+    check_taint_not  $&, "\$& from /(\\S)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
 
-    $_ = $a;   # untaint $_
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /(.)/";
+
+    "0" =~ /(\d)/;
+    check_taint_not  $&, "\$& from /(\\d)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
+
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /(.)/";
+
+    /(\D)/;
+    check_taint_not  $&, "\$& from /(\\D)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
+
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /(.)/";
+
+    /([[:alnum:]])/;
+    check_taint_not  $&, "\$& from /([[:alnum:]])/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
+
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /(.)/";
+
+    /([[:^alnum:]])/;
+    check_taint_not  $&, "\$& from /([[:^alnum:]])/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
+
+    "a" =~ /(a)|(\w)/;
+    check_taint_not  $&, "\$& from /(a)|(\\w)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
+    ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
+    check_taint_not  $2, "\t\$2";
+    check_taint_not  $3, "\t\$3";
+
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /(.)/";
+
+    "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
+    check_taint_not      $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
+    check_taint_not      $`, "\t\$`";
+    check_taint_not      $', "\t\$'";
+    check_taint_not      $+, "\t\$+";
+    check_taint_not      $1, "\t\$1";
+    ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small 
cyrillic a'");
+    check_taint_not      $2, "\t\$2";
+
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /./";
+
+    "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
+    check_taint_not  $&, "\$& from /(\\N{KELVIN SIGN})/i";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not      $2, "\t\$2";
 
-    check_taint_not  $_;
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /(.)/";
+
+    "a:" =~ /(.)\b(.)/;
+    check_taint_not  $&, "\$& from /(.)\\b(.)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
+    check_taint_not  $3, "\t\$3";
 
-    $b = uc($a);       # taint $b
-    s/(.+)/$b/;        # this must taint only the $_
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /./";
 
-    check_taint_not    $_;
-    check_taint_not  $&;
-    check_taint_not  $`;
-    check_taint_not  $';
-    check_taint_not  $+;
-    check_taint_not  $1;
-    check_taint_not  $2;
+    "aa" =~ /(.)\B(.)/;
+    check_taint_not  $&, "\$& from /(.)\\B(.)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
+    check_taint_not  $3, "\t\$3";
+
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$& from /./";
+
+    "aaa" =~ /(.).(\1)/i;      # notaint because not locale dependent
+    check_taint_not      $&, "\$ & from /(.).(\\1)/";
+    check_taint_not      $`, "\t\$`";
+    check_taint_not      $', "\t\$'";
+    check_taint_not      $+, "\t\$+";
+    check_taint_not      $1, "\t\$1";
+    check_taint_not      $2, "\t\$2";
+    check_taint_not      $3, "\t\$3";
+
+    /(.)/;     # untaint $&, $`, $', $+, $1.
+    check_taint_not  $&, "\$ & from /./";
 
     $_ = $a;   # untaint $_
 
-    s/(.+)/b/; # this must not taint
-    check_taint_not  $_;
-    check_taint_not  $&;
-    check_taint_not  $`;
-    check_taint_not  $';
-    check_taint_not  $+;
-    check_taint_not  $1;
-    check_taint_not  $2;
+    check_taint_not  $_, 'untainting $_ works';
+
+    /(b)/;
+    check_taint_not  $&, "\$ & from /(b)/";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
+
+    $_ = $a;   # untaint $_
+
+    check_taint_not  $_, 'untainting $_ works';
+
+    s/(.+)/b/;
+    check_taint_not  $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
+    check_taint_not  $&, "\t\$&";
+    check_taint_not  $`, "\t\$`";
+    check_taint_not  $', "\t\$'";
+    check_taint_not  $+, "\t\$+";
+    check_taint_not  $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
 
     $b = $a;   # untaint $b
 
     ($b = $a) =~ s/\w/$&/;
-    check_taint_not    $b;     # $b should be tainted.
-    check_taint_not  $a;       # $a should be not.
+    check_taint_not     $b, '$b from ($b = $a) =~ s/\w/$&/';
+    check_taint_not  $a, '$a from ($b = $a) =~ s/\w/$&/';
 
     $_ = $a;   # untaint $_
 
-    s/(\w)/\l$1/;      # this must taint
-    check_taint_not    $_;
-    check_taint_not    $&;
-    check_taint_not    $`;
-    check_taint_not    $';
-    check_taint_not    $+;
-    check_taint_not    $1;
-    check_taint_not  $2;
+    s/(\w)/\l$1/;
+    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this 
must taint
+    check_taint_not     $&, "\t\$&";
+    check_taint_not     $`, "\t\$`";
+    check_taint_not     $', "\t\$'";
+    check_taint_not     $+, "\t\$+";
+    check_taint_not     $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
 
     $_ = $a;   # untaint $_
 
-    s/(\w)/\L$1/;      # this must taint
-    check_taint_not    $_;
-    check_taint_not    $&;
-    check_taint_not    $`;
-    check_taint_not    $';
-    check_taint_not    $+;
-    check_taint_not    $1;
-    check_taint_not  $2;
+    s/(\w)/\L$1/;
+    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
+    check_taint_not     $&, "\t\$&";
+    check_taint_not     $`, "\t\$`";
+    check_taint_not     $', "\t\$'";
+    check_taint_not     $+, "\t\$+";
+    check_taint_not     $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
 
     $_ = $a;   # untaint $_
 
-    s/(\w)/\u$1/;      # this must taint
-    check_taint_not    $_;
-    check_taint_not    $&;
-    check_taint_not    $`;
-    check_taint_not    $';
-    check_taint_not    $+;
-    check_taint_not    $1;
-    check_taint_not  $2;
+    s/(\w)/\u$1/;
+    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
+    check_taint_not     $&, "\t\$&";
+    check_taint_not     $`, "\t\$`";
+    check_taint_not     $', "\t\$'";
+    check_taint_not     $+, "\t\$+";
+    check_taint_not     $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
 
     $_ = $a;   # untaint $_
 
-    s/(\w)/\U$1/;      # this must taint
-    check_taint_not    $_;
-    check_taint_not    $&;
-    check_taint_not    $`;
-    check_taint_not    $';
-    check_taint_not    $+;
-    check_taint_not    $1;
-    check_taint_not  $2;
+    s/(\w)/\U$1/;
+    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
+    check_taint_not     $&, "\t\$&";
+    check_taint_not     $`, "\t\$`";
+    check_taint_not     $', "\t\$'";
+    check_taint_not     $+, "\t\$+";
+    check_taint_not     $1, "\t\$1";
+    check_taint_not  $2, "\t\$2";
 
     # After all this tainting $a should be cool.
 
-    check_taint_not  $a;
+    check_taint_not  $a, '$a still not tainted';
 
     "a" =~ /([a-z])/;
     check_taint_not $1, '"a" =~ /([a-z])/';
     "foo.bar_baz" =~ /^(.*)[._](.*?)$/;  # Bug 120675
     check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
+
 }
 
 # Here are in scope of 'use locale'
diff --git a/lib/utf8.pm b/lib/utf8.pm
index 43c7277..4980c7c 100644
--- a/lib/utf8.pm
+++ b/lib/utf8.pm
@@ -2,7 +2,7 @@ package utf8;
 
 $utf8::hint_bits = 0x00800000;
 
-our $VERSION = '1.13';
+our $VERSION = '1.14';
 
 sub import {
     $^H |= $utf8::hint_bits;
@@ -42,6 +42,14 @@ utf8 - Perl pragma to enable/disable UTF-8 (or UTF-EBCDIC) 
in source code
  utf8::encode($string);  # "\x{100}"  becomes "\xc4\x80"
  utf8::decode($string);  # "\xc4\x80" becomes "\x{100}"
 
+ # Convert a code point from the platform native character set to
+ # Unicode, and vice-versa.
+ $unicode = utf8::native_to_unicode(ord('A')); # returns 65 on both
+                                               # ASCII and EBCDIC
+                                               # platforms
+ $native = utf8::unicode_to_native(65);       # returns 65 on ASCII
+                                              # platforms; 193 on EBCDIC
+
  $flag = utf8::is_utf8($string); # since Perl 5.8.1
  $flag = utf8::valid($string);
 
@@ -99,7 +107,7 @@ you should not say that unless you really want to have UTF-8 
source code.
 
 =over 4
 
-=item * $num_octets = utf8::upgrade($string)
+=item * C<$num_octets = utf8::upgrade($string)>
 
 Converts in-place the internal representation of the string from an octet
 sequence in the native encoding (Latin-1 or EBCDIC) to I<UTF-X>. The
@@ -114,7 +122,7 @@ B<Note that this function does not handle arbitrary 
encodings.>
 Therefore Encode is recommended for the general purposes; see also
 L<Encode>.
 
-=item * $success = utf8::downgrade($string[, $fail_ok])
+=item * C<$success = utf8::downgrade($string[, $fail_ok])>
 
 Converts in-place the internal representation of the string from
 I<UTF-X> to the equivalent octet sequence in the native encoding (Latin-1
@@ -135,7 +143,7 @@ B<Note that this function does not handle arbitrary 
encodings.>
 Therefore Encode is recommended for the general purposes; see also
 L<Encode>.
 
-=item * utf8::encode($string)
+=item * C<utf8::encode($string)>
 
 Converts in-place the character sequence to the corresponding octet
 sequence in I<UTF-X>. That is, every (possibly wide) character gets
@@ -144,14 +152,14 @@ individual I<UTF-X> bytes of the character.  The UTF8 
flag is turned off.
 Returns nothing.
 
  my $a = "\x{100}"; # $a contains one character, with ord 0x100
- utf8::encode($a);  # $a contains two characters, with ords 0xc4 and
-                    # 0x80
+ utf8::encode($a);  # $a contains two characters, with ords (on
+                    # ASCII platforms) 0xc4 and 0x80
 
 B<Note that this function does not handle arbitrary encodings.>
 Therefore Encode is recommended for the general purposes; see also
 L<Encode>.
 
-=item * $success = utf8::decode($string)
+=item * C<$success = utf8::decode($string)>
 
 Attempts to convert in-place the octet sequence encoded as I<UTF-X> to the
 corresponding character sequence. That is, it replaces each sequence of
@@ -161,20 +169,47 @@ turned on only if the source string contains 
multiple-byte I<UTF-X>
 characters.  If I<$string> is invalid as I<UTF-X>, returns false;
 otherwise returns true.
 
-    my $a = "\xc4\x80"; # $a contains two characters, with ords
-                        # 0xc4 and 0x80
-    utf8::decode($a);   # $a contains one character, with ord 0x100
+ my $a = "\xc4\x80"; # $a contains two characters, with ords
+                     # 0xc4 and 0x80
+ utf8::decode($a);   # On ASCII platforms, $a contains one char,
+                     # with ord 0x100.   On EBCDIC platforms, $a
+                     # is unchanged and the function returns FALSE.
+
+(C<"\xc4\x80"> is not a valid sequence of bytes in any UTF-8-encoded
+character(s) in the EBCDIC code pages that Perl supports, which is why the
+above example returns failure on them.  What does decode into C<\x{100}>
+depends on the platform.  It is C<"\x8C\x41"> in IBM-1047.)
 
 B<Note that this function does not handle arbitrary encodings.>
 Therefore Encode is recommended for the general purposes; see also
 L<Encode>.
 
-=item * $flag = utf8::is_utf8($string)
+=item * C<$unicode = utf8::native_to_unicode($code_point)>
+
+This takes an unsigned integer (which represents the ordinal number of a
+character (or a code point) on the platform the program is being run on) and
+returns its Unicode equivalent value.  Since ASCII platforms natively use the
+Unicode code points, this function returns its input on them.  On EBCDIC
+platforms it converts from EBCIDC to Unicode.
+
+A meaningless value will currently be returned if the input is not an unsigned
+integer.
+
+=item * C<$native = utf8::unicode_to_native($code_point)>
+
+This is the inverse of C<utf8::native_to_unicode()>, converting the other
+direction.  Again, on ASCII platforms, this returns its input, but on EBCDIC
+platforms it will find the native platform code point, given any Unicode one.
+
+A meaningless value will currently be returned if the input is not an unsigned
+integer.
+
+=item * C<$flag = utf8::is_utf8($string)>
 
 (Since Perl 5.8.1)  Test whether I<$string> is marked internally as encoded in
 UTF-8.  Functionally the same as Encode::is_utf8().
 
-=item * $flag = utf8::valid($string)
+=item * C<$flag = utf8::valid($string)>
 
 [INTERNAL] Test whether I<$string> is in a consistent state regarding
 UTF-8.  Will return true if it is well-formed UTF-8 and has the UTF-8 flag
diff --git a/pod/perlebcdic.pod b/pod/perlebcdic.pod
index 28f5981..c7efd98 100644
--- a/pod/perlebcdic.pod
+++ b/pod/perlebcdic.pod
@@ -125,7 +125,7 @@ This causes a problem with the semantics of the pack/unpack 
"U", which
 are supposed to pack Unicode code points to characters and back to numbers.
 The problem is: which code points to use for code points less than 256?
 (for 256 and over there's no problem: Unicode code points are used)
-In EBCDIC, for the low 256 the EBCDIC code points are used.  This
+In EBCDIC, the EBCDIC code points are used for the low 256.  This
 means that the equivalences
 
     pack("U", ord($character)) eq $character
@@ -142,12 +142,8 @@ equal I<A with acute> or chr(101), and unpack("U", "A") 
would equal
 
 =item *
 
-Many of the remaining problems seem to be related to case-insensitive matching
-
-=item *
-
 The extensions Unicode::Collate and Unicode::Normalized are not
-supported under EBCDIC, likewise for the encoding pragma.
+supported under EBCDIC, likewise for the (now deprecated) encoding pragma.
 
 =back
 
@@ -269,7 +265,8 @@ might want to write:
 
  open(FH,"<perlebcdic.pod") or die "Could not open perlebcdic.pod: $!";
  while (<FH>) {
-     if 
(/(.{29})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\.?(\d*)\s+(\d+)\.?(\d*)/)
+     if (/(.{29})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\.?(\d*)
+                                                     \s+(\d+)\.?(\d*)/x)
      {
          if ($7 ne '' && $9 ne '') {
              printf(
@@ -310,7 +307,8 @@ Or, in order to retain the UTF-x code points in hexadecimal:
 
  open(FH,"<perlebcdic.pod") or die "Could not open perlebcdic.pod: $!";
  while (<FH>) {
-     if 
(/(.{29})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\.?(\d*)\s+(\d+)\.?(\d*)/)
+     if (/(.{29})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\.?(\d*)
+                                                     \s+(\d+)\.?(\d*)/x)
      {
          if ($7 ne '' && $9 ne '') {
              printf(
@@ -330,8 +328,8 @@ Or, in order to retain the UTF-x code points in hexadecimal:
 
 
                           ISO
-                         8859-1             POS-
-                         CCSID  CCSID CCSID IX-
+                         8859-1             POS-         CCSID
+                         CCSID  CCSID CCSID IX-          1047
   chr                     0819   0037 1047  BC  UTF-8  UTF-EBCDIC
  ---------------------------------------------------------------------
  <NUL>                       0    0    0    0    0        0
@@ -973,17 +971,23 @@ work on any platform as follows:
 
     sub Is_c0 {
         my $char = substr(shift,0,1);
-        return $char =~ /[[:cntrl:]]/
-               && $char =~ /[[:ascii:]]/
-               && ! Is_delete($char);
+        return $char =~ /[[:cntrl:]]/a && ! Is_delete($char);
+
+        # Alternatively:
+        # return $char =~ /[[:cntrl:]]/
+        #        && $char =~ /[[:ascii:]]/
+        #        && ! Is_delete($char);
     }
 
     sub Is_print_ascii {
         my $char = substr(shift,0,1);
 
-        return $char =~ /[[:print:]]/ && $char =~ /[[:ascii:]]/;
+        return $char =~ /[[:print:]]/a;
 
         # Alternatively:
+        # return $char =~ /[[:print:]]/ && $char =~ /[[:ascii:]]/;
+
+        # Or
         # return $char
         #      =~ /[ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~]/;
     }
@@ -1012,7 +1016,8 @@ to use the characters in the range explicitly:
 
     sub Is_latin_1 {
         my $char = substr(shift,0,1);
-        $char =~ /[Â 
Â¡Â¢Â£Â¤Â¥Â¦Â§Â¨Â©ÂªÂ«Â¬ÂÂ®Â¯Â°Â±Â²Â³Â´ÂµÂ¶Â·Â¸Â¹ÂºÂ»Â¼Â½Â¾Â¿ÃÃÃÃÃÃ
ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃ 
Ã¡Ã¢Ã£Ã¤Ã¥Ã¦Ã§Ã¨Ã©ÃªÃ«Ã¬ÃÃ®Ã¯Ã°Ã±Ã²Ã³Ã´ÃµÃ¶Ã·Ã¸Ã¹ ... [15 chars truncated]
+        $char =~ /[Â 
Â¡Â¢Â£Â¤Â¥Â¦Â§Â¨Â©ÂªÂ«Â¬ÂÂ®Â¯Â°Â±Â²Â³Â´ÂµÂ¶Â·Â¸Â¹ÂºÂ»Â¼Â½Â¾Â¿ÃÃÃÃÃÃ
ÃÃÃÃÃÃÃÃÃÃ]
+                  [ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃ 
Ã¡Ã¢Ã£Ã¤Ã¥Ã¦Ã§Ã¨Ã©ÃªÃ«Ã¬ÃÃ®Ã¯Ã°Ã±Ã²Ã³Ã´ÃµÃ¶Ã·Ã¸Ã¹ÃºÃ»Ã¼Ã½Ã¾Ã¿]/x;
     }
 
 Although that form may run into trouble in network transit (due to the
@@ -1112,65 +1117,24 @@ may also be expressed as either of:
     http://www.pvhp.com/%7epvhp/
 
 where 7E is the hexadecimal ASCII code point for '~'.  Here is an example
-of decoding such a URL under CCSID 1047:
+of decoding such a URL in any EBCDIC code page:
 
     $url = 'http://www.pvhp.com/%7Epvhp/';
-    # this array assumes code page 1047
-    my @a2e_1047 = (
-          0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 21, 11, 12, 13, 14, 15,
-         16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31,
-         64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97,
-        240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111,
-        124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214,
-        215,216,217,226,227,228,229,230,231,232,233,173,224,189, 95,109,
-        121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150,
-        151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161,  7,
-         32, 33, 34, 35, 36, 37,  6, 23, 40, 41, 42, 43, 44,  9, 10, 27,
-         48, 49, 26, 51, 52, 53, 54,  8, 56, 57, 58, 59,  4, 20, 62,255,
-         65,170, 74,177,159,178,106,181,187,180,154,138,176,202,175,188,
-        144,143,234,250,190,160,182,179,157,218,155,139,183,184,185,171,
-        100,101, 98,102, 99,103,158,104,116,113,114,115,120,117,118,119,
-        172,105,237,238,235,239,236,191,128,253,254,251,252,186,174, 89,
-         68, 69, 66, 70, 67, 71,156, 72, 84, 81, 82, 83, 88, 85, 86, 87,
-        140, 73,205,206,203,207,204,225,112,221,222,219,220,141,142,223
-    );
-    $url =~ s/%([0-9a-fA-F]{2})/pack("c",$a2e_1047[hex($1)])/ge;
+    $url =~ s/%([0-9a-fA-F]{2})/
+              pack("c",utf8::unicode_to_native(hex($1)))/xge;
 
 Conversely, here is a partial solution for the task of encoding such
-a URL under the 1047 code page:
+a URL in any EBCDIC code page:
 
     $url = 'http://www.pvhp.com/~pvhp/';
-    # this array assumes code page 1047
-    my @e2a_1047 = (
-          0,  1,  2,  3,156,  9,134,127,151,141,142, 11, 12, 13, 14, 15,
-         16, 17, 18, 19,157, 10,  8,135, 24, 25,146,143, 28, 29, 30, 31,
-        128,129,130,131,132,133, 23, 27,136,137,138,139,140,  5,  6,  7,
-        144,145, 22,147,148,149,150,  4,152,153,154,155, 20, 21,158, 26,
-         32,160,226,228,224,225,227,229,231,241,162, 46, 60, 40, 43,124,
-         38,233,234,235,232,237,238,239,236,223, 33, 36, 42, 41, 59, 94,
-         45, 47,194,196,192,193,195,197,199,209,166, 44, 37, 95, 62, 63,
-        248,201,202,203,200,205,206,207,204, 96, 58, 35, 64, 39, 61, 34,
-        216, 97, 98, 99,100,101,102,103,104,105,171,187,240,253,254,177,
-        176,106,107,108,109,110,111,112,113,114,170,186,230,184,198,164,
-        181,126,115,116,117,118,119,120,121,122,161,191,208, 91,222,174,
-        172,163,165,183,169,167,182,188,189,190,221,168,175, 93,180,215,
-        123, 65, 66, 67, 68, 69, 70, 71, 72, 73,173,244,246,242,243,245,
-        125, 74, 75, 76, 77, 78, 79, 80, 81, 82,185,251,252,249,250,255,
-         92,247, 83, 84, 85, 86, 87, 88, 89, 90,178,212,214,210,211,213,
-         48, 49, 50, 51, 52, 53, 54, 55, 56, 57,179,219,220,217,218,159
-    );
     # The following regular expression does not address the
     # mappings for: ('.' => '%2E', '/' => '%2F', ':' => '%3A')
     $url =~ s/([\t "#%&\(\),;<=>\?\@\[\\\]^`{|}~])/
-                sprintf("%%%02X",$e2a_1047[ord($1)])/xge;
+               sprintf("%%%02X",utf8::native_to_unicode(ord($1)))/xge;
 
 where a more complete solution would split the URL into components
 and apply a full s/// substitution only to the appropriate parts.
 
-In the remaining examples a @e2a or @a2e array may be employed
-but the assignment will not be shown explicitly.  For code page 1047
-you could use the @a2e_1047 or @e2a_1047 arrays just shown.
-
 =head2 uu encoding and decoding
 
 The C<u> template to pack() or unpack() will render EBCDIC data in EBCDIC
@@ -1196,19 +1160,17 @@ following will print "Yes indeed\n" on either an ASCII 
or EBCDIC computer:
         print "indeed\n";
     }
 
-Here is a very spartan uudecoder that will work on EBCDIC provided
-that the @e2a array is filled in appropriately:
+Here is a very spartan uudecoder that will work on EBCDIC:
 
     #!/usr/local/bin/perl
-    @e2a = ( # this must be filled in
-           );
     $_ = <> until ($mode,$file) = /^begin\s*(\d*)\s*(\S*)/;
     open(OUT, "> $file") if $file ne "";
     while(<>) {
         last if /^end/;
         next if /[a-z]/;
-        next unless int(((($e2a[ord()] - 32 ) & 077) + 2) / 3) ==
-            int(length() / 4);
+        next unless int((((utf8::native_to_unicode(ord()) - 32 ) & 077)
+                                                               + 2) / 3)
+                    == int(length() / 4);
         print OUT unpack("u", $_);
     }
     close(OUT);
@@ -1224,36 +1186,28 @@ the printable set using:
     $qp_string =~ s/([=\x00-\x1F\x80-\xFF])/sprintf("=%02X",ord($1))/ge;
 
 Whereas a QP encoder that works on both ASCII and EBCDIC platforms
-would look somewhat like the following (where the EBCDIC branch @e2a
-array is omitted for brevity):
+would look somewhat like the following:
 
-    if (ord('A') == 65) {    # ASCII
-        $delete = "\x7F";    # ASCII
-        @e2a = (0 .. 255)    # ASCII to ASCII identity map
-    }
-    else {                   # EBCDIC
-        $delete = "\x07";    # EBCDIC
-        @e2a =               # EBCDIC to ASCII map (as shown above)
-    }
+    $delete = utf8::unicode_to_native(ord("\x7F"));
     $qp_string =~
-      s/([^ !"\#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~$delete])/
-         sprintf("=%02X",$e2a[ord($1)])/xge;
+      s/([^[:print:]$delete])/
+         sprintf("=%02X",utf8::native_to_unicode(ord($1)))/xage;
 
 (although in production code the substitutions might be done
-in the EBCDIC branch with the @e2a array and separately in the
+in the EBCDIC branch with the function call and separately in the
 ASCII branch without the expense of the identity map).
 
 Such QP strings can be decoded with:
 
     # This QP decoder is limited to ASCII only
-    $string =~ s/=([0-9A-Fa-f][0-9A-Fa-f])/chr hex $1/ge;
+    $string =~ s/=([[:xdigit:][[:xdigit:])/chr hex $1/ge;
     $string =~ s/=[\n\r]+$//;
 
 Whereas a QP decoder that works on both ASCII and EBCDIC platforms
-would look somewhat like the following (where the @a2e array is
-omitted for brevity):
+would look somewhat like the following:
 
-    $string =~ s/=([0-9A-Fa-f][0-9A-Fa-f])/chr $a2e[hex $1]/ge;
+    $string =~ s/=([[:xdigit:][:xdigit:]])/
+                                chr utf8::native_to_unicode(hex $1)/xge;
     $string =~ s/=[\n\r]+$//;
 
 =head2 Caesarean ciphers
diff --git a/sv.c b/sv.c
index bb7481a..13ea53c 100644
--- a/sv.c
+++ b/sv.c
@@ -1584,6 +1584,9 @@ Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
        if (newlen < minlen)
            newlen = minlen;
 #ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
+
+        /* Don't round up on the first allocation, as odds are pretty good that
+         * the initial request is accurate as to what is really needed */
         if (SvLEN(sv)) {
             newlen = PERL_STRLEN_ROUNDUP(newlen);
         }
diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat
index 4f3d2c0..b53fc85 100644
--- a/t/porting/known_pod_issues.dat
+++ b/t/porting/known_pod_issues.dat
@@ -247,7 +247,6 @@ pod/perldebtut.pod  Verbatim line length including indents 
exceeds 79 by    22
 pod/perldebug.pod      Verbatim line length including indents exceeds 79 by    
3
 pod/perldsc.pod        Verbatim line length including indents exceeds 79 by    
4
 pod/perldtrace.pod     Verbatim line length including indents exceeds 79 by    
26
-pod/perlebcdic.pod     Verbatim line length including indents exceeds 79 by    
3
 pod/perlfunc.pod       ? Should you be using F<...> or maybe L<...> instead of 
1
 pod/perlgit.pod        Verbatim line length including indents exceeds 79 by    
12
 pod/perlgpl.pod        Verbatim line length including indents exceeds 79 by    
50
diff --git a/utfebcdic.h b/utfebcdic.h
index 7eec66f..23b4ae2 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -84,6 +84,8 @@
  * pages.  Best is to convert to I8 before sending them, as the I8
  * representation is the same no matter what the underlying code page is.
  *
+ * tr16 also says that NEL and LF be swapped.  We don't do that.
+ *
  * EBCDIC characters above 0xFF are the same as Unicode in Perl's
  * implementation of all 3 encodings, so for those Step 1 is trivial.
  *

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.21.0-229-gf11f9c4

Reply via email to