In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/8ff713d9d0097d24f522a287d497b801fedd19ce?hp=120921acd4cf27bb932a725a8cf5c957652b22eb>
- Log ----------------------------------------------------------------- commit 8ff713d9d0097d24f522a287d497b801fedd19ce Author: Karl Williamson <[email protected]> Date: Sun Jan 22 10:55:43 2017 -0700 lib/utf8.t: Generalize for EBCDIC This had an ASCII-specific test which has been failing on EBCDIC platforms. M lib/utf8.t commit 5d4414ef5fb50c1a13cfc41ab798de251e78c07c Author: Karl Williamson <[email protected]> Date: Sun Jan 22 10:53:41 2017 -0700 Move I8 test helpers to common file This moves the code that helps in testing I8 (which is the same as UTF-8 on non-EBCDIC platforms) to t/charset_tools.pl, away from the .t where they previously were. This means these can now be used in other .t's. M ext/XS-APItest/t/utf8.t M t/charset_tools.pl ----------------------------------------------------------------------- Summary of changes: ext/XS-APItest/t/utf8.t | 43 ++++++++----------------------------------- lib/utf8.t | 5 ++++- t/charset_tools.pl | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 36 deletions(-) diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t index c7f2c1d65f..ab3c21829d 100644 --- a/ext/XS-APItest/t/utf8.t +++ b/ext/XS-APItest/t/utf8.t @@ -2,6 +2,12 @@ use strict; use Test::More; + +BEGIN { + use_ok('XS::APItest'); + require 'charset_tools.pl'; +}; + $|=1; no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit @@ -27,42 +33,9 @@ sub output_warnings(@) { # This test file can't use byte_utf8a_to_utf8n() from t/charset_tools.pl # because that uses the same functions we are testing here. So UTF-EBCDIC -# strings are hard-coded as I8 strings in this file instead, and we use array -# lookup to translate into the appropriate code page. - -my @i8_to_native = ( # Only code page 1047 so far. -# _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F -0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, -0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, -0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, -0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, -0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, -0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAD,0xE0,0xBD,0x5F,0x6D, -0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, -0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, -0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, -0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, -0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x51,0x52,0x53,0x54,0x55,0x56, -0x57,0x58,0x59,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x70,0x71,0x72,0x73, -0x74,0x75,0x76,0x77,0x78,0x80,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x9A,0x9B,0x9C, -0x9D,0x9E,0x9F,0xA0,0xAA,0xAB,0xAC,0xAE,0xAF,0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6, -0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, -0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE, -); - -my @native_to_i8; -for (my $i = 0; $i < 256; $i++) { - $native_to_i8[$i8_to_native[$i]] = $i; -} +# strings are hard-coded as I8 strings in this file instead, and we use the +# translation functions to/from I8 from that file instead. -*I8_to_native = (isASCII) - ? sub { return shift } - : sub { return join "", map { chr $i8_to_native[ord $_] } - split "", shift }; -*native_to_I8 = (isASCII) - ? sub { return shift } - : sub { return join "", map { chr $native_to_i8[ord $_] } - split "", shift }; sub start_byte_to_cont($) { # Extract the code point information from the input UTF-8 start byte, and diff --git a/lib/utf8.t b/lib/utf8.t index 6b28eae948..e5f9547a8a 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -140,6 +140,9 @@ no utf8; # Ironic, no? = join " . ", map {sprintf 'chr (%d)', ord $_} split //, $char; push @char, [$_, $char, $charsubst, $char_as_ord]; } + my $malformed = $::IS_ASCII + ? "\xE1\xA0" + : I8_to_native("\xE6\xA0"); # Now we've done all the UTF8 munching hopefully we're safe my @tests = ( ['check our detection program works', @@ -162,7 +165,7 @@ no utf8; # Ironic, no? # "out of memory" error. We really need the "" [rather than qq() # or q()] to get the best explosion. ["!Feed malformed utf8 into perl.", <<"BANG", - use utf8; %a = ("\xE1\xA0"=>"sterling"); + use utf8; %a = ("$malformed" =>"sterling"); print 'start'; printf '%x,', ord \$_ foreach keys %a; print "end\n"; BANG qr/^Malformed UTF-8 character: .*? \(too short; \d bytes? available, need \d\).*start\d+,end$/sm diff --git a/t/charset_tools.pl b/t/charset_tools.pl index 0621a7ae96..6e88a37531 100644 --- a/t/charset_tools.pl +++ b/t/charset_tools.pl @@ -139,4 +139,38 @@ sub byte_utf8a_to_utf8n { return $out; } +my @i8_to_native = ( # Only code page 1047 so far. +# _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F +0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, +0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, +0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, +0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAD,0xE0,0xBD,0x5F,0x6D, +0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, +0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, +0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, +0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, +0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x51,0x52,0x53,0x54,0x55,0x56, +0x57,0x58,0x59,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x70,0x71,0x72,0x73, +0x74,0x75,0x76,0x77,0x78,0x80,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x9A,0x9B,0x9C, +0x9D,0x9E,0x9F,0xA0,0xAA,0xAB,0xAC,0xAE,0xAF,0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6, +0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, +0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE, +); + +my @native_to_i8; +for (my $i = 0; $i < 256; $i++) { + $native_to_i8[$i8_to_native[$i]] = $i; +} + +*I8_to_native = ($::IS_ASCII) + ? sub { return shift } + : sub { return join "", map { chr $i8_to_native[ord $_] } + split "", shift }; +*native_to_I8 = ($::IS_ASCII) + ? sub { return shift } + : sub { return join "", map { chr $native_to_i8[ord $_] } + split "", shift }; + 1 -- Perl5 Master Repository
