[perl.git] branch khw/ebcdic, updated. v5.17.10-216-g11e6631

Karl Williamson Fri, 05 Apr 2013 15:46:16 -0700

In perl.git, the branch khw/ebcdic has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/11e66312239bc787b3f0096923f554ef5edc64c6?hp=f10c945af3d902f93b141af04a0dadeccc7e26b3>


- Log -----------------------------------------------------------------
commit 11e66312239bc787b3f0096923f554ef5edc64c6
Author: Karl Williamson <[email protected]>
Date:   Fri Apr 5 16:22:36 2013 -0600

    XXX rebase, document

M       dist/IO/t/io_utf8argv.t
M       t/op/pack.t
M       t/re/pat.t
M       t/test.pl

commit 6d22a7f4cbd5d7e21bbe6715273a69daa263a663
Author: Karl Williamson <[email protected]>
Date:   Fri Apr 5 16:20:20 2013 -0600

    lib/utf8.pm: Fix pod verbatim line wrap

M       lib/utf8.pm
M       t/porting/known_pod_issues.dat
-----------------------------------------------------------------------

Summary of changes:
 dist/IO/t/io_utf8argv.t        |    2 +-
 lib/utf8.pm                    |   28 +++++++++++++++-------------
 t/op/pack.t                    |    4 ++--
 t/porting/known_pod_issues.dat |    1 -
 t/re/pat.t                     |    2 +-
 t/test.pl                      |   18 +++++++++++++-----
 6 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/dist/IO/t/io_utf8argv.t b/dist/IO/t/io_utf8argv.t
index d106d4d..ec3e415 100644
--- a/dist/IO/t/io_utf8argv.t
+++ b/dist/IO/t/io_utf8argv.t
@@ -16,7 +16,7 @@ plan(tests => 2);
 
 open my $fh, ">:raw", 'io_utf8argv';
 # Needs a function utf8_bytes_to_native
-print $fh utf8a_to_utf8n(
+print $fh byte_utf8a_to_utf8n(
    "\xce\x9c\xe1\xbd\xb7\xce\xb1\x20\xcf\x80\xe1\xbd\xb1\xcf\x80\xce".
    "\xb9\xce\xb1\x2c\x20\xce\xbc\xe1\xbd\xb0\x20\xcf\x80\xce\xbf\xce".
    "\xb9\xe1\xbd\xb0\x20\xcf\x80\xe1\xbd\xb1\xcf\x80\xce\xb9\xce\xb1".
diff --git a/lib/utf8.pm b/lib/utf8.pm
index 1d6992c..90492fe 100644
--- a/lib/utf8.pm
+++ b/lib/utf8.pm
@@ -29,22 +29,22 @@ utf8 - Perl pragma to enable/disable UTF-8 (or UTF-EBCDIC) 
in source code
 
 =head1 SYNOPSIS
 
-    use utf8;
-    no utf8;
+ use utf8;
+ no utf8;
 
-    # Convert the internal representation of a Perl scalar to/from UTF-8.
+ # Convert the internal representation of a Perl scalar to/from UTF-8.
 
-    $num_octets = utf8::upgrade($string);
-    $success    = utf8::downgrade($string[, FAIL_OK]);
+ $num_octets = utf8::upgrade($string);
+ $success    = utf8::downgrade($string[, FAIL_OK]);
 
-    # Change each character of a Perl scalar to/from a series of
-    # characters that represent the UTF-8 bytes of each original character.
+ # Change each character of a Perl scalar to/from a series of
+ # characters that represent the UTF-8 bytes of each original character.
 
-    utf8::encode($string);  # "\x{100}"  becomes "\xc4\x80"
-    utf8::decode($string);  # "\xc4\x80" becomes "\x{100}"
+ utf8::encode($string);  # "\x{100}"  becomes "\xc4\x80"
+ utf8::decode($string);  # "\xc4\x80" becomes "\x{100}"
 
-    $flag = utf8::is_utf8(STRING); # since Perl 5.8.1
-    $flag = utf8::valid(STRING);
+ $flag = utf8::is_utf8(STRING); # since Perl 5.8.1
+ $flag = utf8::valid(STRING);
 
 =head1 DESCRIPTION
 
@@ -145,7 +145,8 @@ individual I<UTF-X> bytes of the character.  The UTF8 flag 
is turned off.
 Returns nothing.
 
     my $a = "\x{100}"; # $a contains one character, with ord 0x100
-    utf8::encode($a);  # $a contains two characters, with ords 0xc4 and 0x80
+    utf8::encode($a);  # $a contains two characters, with ords 0xc4
+                       # and 0x80
 
 B<Note that this function does not handle arbitrary encodings.>
 Therefore Encode is recommended for the general purposes; see also
@@ -161,7 +162,8 @@ turned on only if the source string contains multiple-byte 
I<UTF-X>
 characters.  If I<$string> is invalid as I<UTF-X>, returns false;
 otherwise returns true.
 
-    my $a = "\xc4\x80"; # $a contains two characters, with ords 0xc4 and 0x80
+    my $a = "\xc4\x80"; # $a contains two characters, with ords 0xc4
+                        # and 0x80
     utf8::decode($a);   # $a contains one character, with ord 0x100
 
 B<Note that this function does not handle arbitrary encodings.>
diff --git a/t/op/pack.t b/t/op/pack.t
index 858fe3b..4853be9 100644
--- a/t/op/pack.t
+++ b/t/op/pack.t
@@ -1530,7 +1530,7 @@ is(unpack('c'), 65, "one-arg unpack (change #18751)"); # 
defaulting to $_
 #    }
 }
 
-my $U_1FFC_utf8 = utf8a_to_utf8n("\341\277\274");
+my $U_1FFC_utf8 = byte_utf8a_to_utf8n("\341\277\274");
 {
     # U0 and C0 must be scoped
     my (@x) = unpack("a(U0)U", "b$U_1FFC_utf8");
@@ -1786,7 +1786,7 @@ my $U_1FFC_utf8 = utf8a_to_utf8n("\341\277\274");
     is(pack("A*", $high), "\xfeb");
     is(pack("Z*", $high), "\xfeb\x00");
 
-    utf8::upgrade($high = utf8a_to_utf8n("\xc3\xbe") . "b");
+    utf8::upgrade($high = byte_utf8a_to_utf8n("\xc3\xbe") . "b");
     is(pack("U0a2", $high), latin1_to_native("\xfe"));
     is(pack("U0A2", $high), latin1_to_native("\xfe"));
     is(pack("U0Z1", $high), latin1_to_native("\x00"));
diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat
index 46790cb..efd1c11 100644
--- a/t/porting/known_pod_issues.dat
+++ b/t/porting/known_pod_issues.dat
@@ -318,7 +318,6 @@ lib/strict.pm       Verbatim line length including indents 
exceeds 79 by    1
 lib/tie/array.pm       Verbatim line length including indents exceeds 79 by    
1
 lib/tie/hash.pm        Verbatim line length including indents exceeds 79 by    
3
 lib/tie/scalar.pm      Verbatim line length including indents exceeds 79 by    
1
-lib/utf8.pm    Verbatim line length including indents exceeds 79 by    4
 lib/version.pod        Verbatim line length including indents exceeds 79 by    
1
 lib/version/internals.pod      Verbatim line length including indents exceeds 
79 by    2
 lib/vmsish.pm  Verbatim line length including indents exceeds 79 by    1
diff --git a/t/re/pat.t b/t/re/pat.t
index 1d378a6..73e6ce2 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -1246,7 +1246,7 @@ EOP
        # consider the UTF8ness of the previous and current pattern
        # string, as well as the physical bytes of the pattern string
 
-       for my $s (utf8a_to_utf8n("\xc4\x80"), "\x{100}") {
+       for my $s (byte_utf8a_to_utf8n("\xc4\x80"), "\x{100}") {
            ok($s =~ /^$s$/, "re-compile check is UTF8-aware");
        }
     }
diff --git a/t/test.pl b/t/test.pl
index 027bea8..f2bdbd3 100644
--- a/t/test.pl
+++ b/t/test.pl
@@ -1560,7 +1560,7 @@ sub ord_native_to_latin1 {
     return utf8::native_to_unicode($ord);
 }
 
-sub utf8a_to_utf8n {
+sub byte_utf8a_to_utf8n {
     # Convert a UTF-8 byte sequence into the platform's native UTF-8
     # equivalent, like UTF-EBCDIC.
 
@@ -1588,15 +1588,19 @@ sub utf8a_to_utf8n {
     );
 
     my $string = shift;
-    die "Input to utf8a-to_utf8n() must not be flagged UTF-8"
+    die "Input to byte_utf8a-to_utf8n() must not be flagged UTF-8"
                                                     if utf8::is_utf8($string);
-    return $string if ord('^') == 94;   # ASCII, Latin1
+    #return $string if ord('^') == 94;   # ASCII, Latin1
 
     my $length = length($string);
+    #diag($string);
+    #diag($length);
     my $out = "";
     for ($i = 0; $i < $length; $i++) {
-        my $byte = ord substr($in, $i, 1);
+        my $byte = ord substr($string, $i, 1);
         my $byte_count = $utf8_skip[$byte];
+        #diag($byte);
+        #diag($byte_count);
 
         die "Illegal start byte" if $byte_count < 0;
         if ($i + $byte_count > $length) {
@@ -1615,13 +1619,15 @@ sub utf8a_to_utf8n {
         my $ord = $byte & (0x1F >> ($byte_count - 2));
         my $bytes_remaining = $byte_count - 1;
         while ($bytes_remaining > 0) {
-            $byte = ord substr($in, ++$i, 1);
+            $byte = ord substr($string, ++$i, 1);
             unless (($byte & 0xC0) == 0x80) {
                 die sprintf "byte '%X' is not a valid continuation", $byte;
             }
             $ord = $ord << 6 | ($byte & 0x3f);
             $bytes_remaining--;
         }
+        #diag($byte);
+        #diag($ord);
 
         my $expected_bytes = $ord < 0x80          
                              ? 1
@@ -1647,6 +1653,8 @@ sub utf8a_to_utf8n {
         $out .= chr utf8::unicode_to_native($ord);
     }
 
+    utf8::encode($out); # Turn off utf8 flag.
+    #diag($out);
     return $out;
 }
 

--
Perl5 Master Repository

[perl.git] branch khw/ebcdic, updated. v5.17.10-216-g11e6631

Reply via email to