Change 18496 by jhi@lyta on 2003/01/16 20:36:23
Fix 'use encoding' I/O for code points 0x80..0xFF;
code changes from Inaba Hiroto; test tweaks by jhi.
Affected files ...
... //depot/perl/MANIFEST#966 edit
... //depot/perl/doio.c#212 edit
... //depot/perl/ext/Encode/encoding.pm#17 edit
... //depot/perl/ext/Encode/t/enc_eucjp.t#1 add
... //depot/perl/ext/Encode/t/enc_utf8.t#2 edit
... //depot/perl/pp.c#362 edit
... //depot/perl/sv.c#612 edit
... //depot/perl/sv.h#131 edit
... //depot/perl/t/uni/tr_utf8.t#5 edit
Differences ...
==== //depot/perl/MANIFEST#966 (text) ====
Index: perl/MANIFEST
--- perl/MANIFEST#965~18485~ Wed Jan 15 12:10:57 2003
+++ perl/MANIFEST Thu Jan 16 12:36:23 2003
@@ -259,6 +259,7 @@
ext/Encode/t/CJKT.t test script
ext/Encode/t/Encode.t test script
ext/Encode/t/Encoder.t test script
+ext/Encode/t/enc_eucjp.t test script
ext/Encode/t/enc_utf8.t test script
ext/Encode/t/encoding.t test script
ext/Encode/t/fallback.t test script
==== //depot/perl/doio.c#212 (text) ====
Index: perl/doio.c
--- perl/doio.c#211~18456~ Tue Jan 7 01:20:22 2003
+++ perl/doio.c Thu Jan 16 12:36:23 2003
@@ -1268,7 +1268,8 @@
default:
if (PerlIO_isutf8(fp)) {
if (!SvUTF8(sv))
- sv_utf8_upgrade(sv = sv_mortalcopy(sv));
+ sv_utf8_upgrade_flags(sv = sv_mortalcopy(sv),
+ SV_GMAGIC|SV_UTF8_NO_ENCODING);
}
else if (DO_UTF8(sv)) {
if (!sv_utf8_downgrade((sv = sv_mortalcopy(sv)), TRUE)
==== //depot/perl/ext/Encode/encoding.pm#17 (text) ====
Index: perl/ext/Encode/encoding.pm
--- perl/ext/Encode/encoding.pm#16~18478~ Mon Jan 13 15:10:55 2003
+++ perl/ext/Encode/encoding.pm Thu Jan 16 12:36:23 2003
@@ -29,8 +29,7 @@
Carp::croak("Unknown encoding '$name'");
}
unless ($arg{Filter}) {
- ${^ENCODING} = $enc # this is all you need, actually.
- unless $name =~ /^(?:utf-?(?:8|16|32)|ucs-?(?:2|4))(?:[bl]e)?$/i;
+ ${^ENCODING} = $enc;
$HAS_PERLIO or return 1;
for my $h (qw(STDIN STDOUT)){
if ($arg{$h}){
==== //depot/perl/ext/Encode/t/enc_eucjp.t#1 (text) ====
Index: perl/ext/Encode/t/enc_eucjp.t
--- /dev/null Tue May 5 13:32:27 1998
+++ perl/ext/Encode/t/enc_eucjp.t Thu Jan 16 12:36:23 2003
@@ -0,0 +1,66 @@
+# This is the twin of enc_utf8.t, the only difference is that
+# this has "use encoding 'euc-jp'".
+
+BEGIN {
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ unless (find PerlIO::Layer 'perlio') {
+ print "1..0 # Skip: PerlIO was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ exit(0);
+ }
+}
+
+use encoding 'euc-jp';
+
+my @c = (127, 128, 255, 256);
+
+print "1.." . (scalar @c + 1) . "\n";
+
+my @f;
+
+for my $i (0..$#c) {
+ push @f, "f$i";
+ open(F, ">f$i") or die "$0: failed to open 'f$i' for writing: $!";
+ binmode(F, ":utf8");
+ print F chr($c[$i]);
+ close F;
+}
+
+my $t = 1;
+
+for my $i (0..$#c) {
+ open(F, "<f$i") or die "$0: failed to open 'f$i' for reading: $!";
+ binmode(F, ":utf8");
+ my $c = <F>;
+ my $o = ord($c);
+ print $o == $c[$i] ? "ok $t - utf8 I/O $c[$i]\n" : "not ok $t - utf8 I/O $c[$i]: $o
+!= $c[$i]\n";
+ $t++;
+}
+
+my $f = "f" . @f;
+
+push @f, $f;
+open(F, ">$f") or die "$0: failed to open '$f' for writing: $!";
+binmode(F, ":raw"); # Output raw bytes.
+print F chr(128); # Output illegal UTF-8.
+close F;
+open(F, $f) or die "$0: failed to open '$f' for reading: $!";
+binmode(F, ":encoding(utf-8)");
+{
+ local $^W = 1;
+ local $SIG{__WARN__} = sub { $a = shift };
+ eval { <F> }; # This should get caught.
+}
+print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
+ "ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " .
+unpack("H*", $a) . "\n";
+
+END {
+ 1 while unlink @f;
+}
==== //depot/perl/ext/Encode/t/enc_utf8.t#2 (text) ====
Index: perl/ext/Encode/t/enc_utf8.t
--- perl/ext/Encode/t/enc_utf8.t#1~18479~ Mon Jan 13 15:13:02 2003
+++ perl/ext/Encode/t/enc_utf8.t Thu Jan 16 12:36:23 2003
@@ -1,3 +1,6 @@
+# This is the twin of enc_eucjp.t, the only difference is that
+# this has "use encoding 'utf8'".
+
BEGIN {
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
@@ -37,11 +40,11 @@
binmode(F, ":utf8");
my $c = <F>;
my $o = ord($c);
- print $o == $c[$i] ? "ok $t\n" : "not ok $t # $o != $c[$i]\n";
+ print $o == $c[$i] ? "ok $t - utf8 I/O $c[$i]\n" : "not ok $t - utf8 I/O $c[$$i]:
+$o != $c[$i]\n";
$t++;
}
-my $f = "f4";
+my $f = "f" . @f;
push @f, $f;
open(F, ">$f") or die "$0: failed to open '$f' for writing: $!";
@@ -56,7 +59,7 @@
eval { <F> }; # This should get caught.
}
print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
- "ok $t\n" : "not ok $t: $a\n";
+ "ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " .
+unpack("H*", $a) . "\n";
END {
1 while unlink @f;
==== //depot/perl/pp.c#362 (text) ====
Index: perl/pp.c
--- perl/pp.c#361~18280~ Tue Dec 10 13:30:10 2002
+++ perl/pp.c Thu Jan 16 12:36:23 2003
@@ -3278,8 +3278,19 @@
*tmps++ = (char)value;
*tmps = '\0';
(void)SvPOK_only(TARG);
- if (PL_encoding)
+ if (PL_encoding && !IN_BYTES) {
sv_recode_to_utf8(TARG, PL_encoding);
+ tmps = SvPVX(TARG);
+ if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
+ memEQ(tmps, "\xef\xbf\xbd\0", 4)) {
+ SvGROW(TARG,3);
+ SvCUR_set(TARG, 2);
+ *tmps++ = (U8)UTF8_EIGHT_BIT_HI(value);
+ *tmps++ = (U8)UTF8_EIGHT_BIT_LO(value);
+ *tmps = '\0';
+ SvUTF8_on(TARG);
+ }
+ }
XPUSHs(TARG);
RETURN;
}
==== //depot/perl/sv.c#612 (text) ====
Index: perl/sv.c
--- perl/sv.c#611~18456~ Tue Jan 7 01:20:22 2003
+++ perl/sv.c Thu Jan 16 12:36:23 2003
@@ -3395,7 +3395,7 @@
sv_force_normal_flags(sv, 0);
}
- if (PL_encoding)
+ if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING))
sv_recode_to_utf8(sv, PL_encoding);
else { /* Assume Latin-1/EBCDIC */
/* This function could be much more efficient if we
==== //depot/perl/sv.h#131 (text) ====
Index: perl/sv.h
--- perl/sv.h#130~18419~ Fri Jan 3 15:45:34 2003
+++ perl/sv.h Thu Jan 16 12:36:23 2003
@@ -1030,6 +1030,7 @@
#define SV_IMMEDIATE_UNREF 1
#define SV_GMAGIC 2
#define SV_COW_DROP_PV 4
+#define SV_UTF8_NO_ENCODING 8
/* We are about to replace the SV's current value. So if it's copy on write
we need to normalise it. Use the SV_COW_DROP_PV flag hint to say that
==== //depot/perl/t/uni/tr_utf8.t#5 (text) ====
Index: perl/t/uni/tr_utf8.t
--- perl/t/uni/tr_utf8.t#4~18480~ Mon Jan 13 17:25:14 2003
+++ perl/t/uni/tr_utf8.t Thu Jan 16 12:36:23 2003
@@ -62,7 +62,6 @@
# [perl 16843]
my $line = 'abcdefghijklmnopqrstuvwxyz$0123456789';
$line =~
tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/;
-# is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9",
"[perl #16843]");
- ok(1, "TODO: Encode 1.84 broke the test for perl #16843");
+ is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9",
+"[perl #16843]");
}
__END__
End of Patch.