Change 24720 by [EMAIL PROTECTED] on 2005/06/07 09:23:58
Subject: [PATCH] Re: [perl #36130] chr(-1) should probably return undef
From: Jarkko Hietaniemi <[EMAIL PROTECTED]>
Date: Mon, 06 Jun 2005 20:28:35 +0300
Message-ID: <[EMAIL PROTECTED]>
Affected files ...
... //depot/perl/pod/perlfunc.pod#472 edit
... //depot/perl/pp.c#459 edit
... //depot/perl/t/op/chr.t#2 edit
Differences ...
==== //depot/perl/pod/perlfunc.pod#472 (text) ====
Index: perl/pod/perlfunc.pod
--- perl/pod/perlfunc.pod#471~24691~ Fri Jun 3 02:29:36 2005
+++ perl/pod/perlfunc.pod Tue Jun 7 02:23:58 2005
@@ -742,6 +742,10 @@
to 255 (inclusive) are by default not encoded in UTF-8 Unicode for
backward compatibility reasons (but see L<encoding>).
+Negative values give the Unicode replacement character (chr(0xfffd)),
+except under the L</bytes> pragma, where low eight bits of the value
+(truncated to an integer) are used.
+
If NUMBER is omitted, uses C<$_>.
For the reverse, use L</ord>.
==== //depot/perl/pp.c#459 (text) ====
Index: perl/pp.c
--- perl/pp.c#458~24717~ Tue Jun 7 01:18:59 2005
+++ perl/pp.c Tue Jun 7 02:23:58 2005
@@ -3356,7 +3356,20 @@
{
dSP; dTARGET;
char *tmps;
- UV value = POPu;
+ UV value;
+
+ if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
+ ||
+ (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
+ if (IN_BYTES) {
+ value = POPu; /* chr(-1) eq chr(0xff), etc. */
+ } else {
+ (void) POPs; /* Ignore the argument value. */
+ value = UNICODE_REPLACEMENT;
+ }
+ } else {
+ value = POPu;
+ }
SvUPGRADE(TARG,SVt_PV);
==== //depot/perl/t/op/chr.t#2 (text) ====
Index: perl/t/op/chr.t
--- perl/t/op/chr.t#1~24706~ Sat Jun 4 09:55:27 2005
+++ perl/t/op/chr.t Tue Jun 7 02:23:58 2005
@@ -6,7 +6,7 @@
require "test.pl";
}
-plan tests => 26;
+plan tests => 34;
# Note that t/op/ord.t already tests for chr() <-> ord() rountripping.
@@ -19,11 +19,24 @@
is(chr(128), "\x80");
is(chr(255), "\xFF");
-# is(chr(-1), undef); # Shouldn't it be?
+is(chr(-0.1), "\x{FFFD}"); # The U+FFFD Unicode replacement character.
+is(chr(-1 ), "\x{FFFD}");
+is(chr(-2 ), "\x{FFFD}");
+is(chr(-3.0), "\x{FFFD}");
+{
+ use bytes; # Backward compatibility.
+ is(chr(-0.1), "\x00");
+ is(chr(-1 ), "\xFF");
+ is(chr(-2 ), "\xFE");
+ is(chr(-3.0), "\xFD");
+}
# Check UTF-8.
-sub hexes { join(" ",map{sprintf"%02x",$_}unpack("C*",chr($_[0]))) }
+sub hexes {
+ no warnings 'utf8'; # avoid surrogate and beyond Unicode warnings
+ join(" ",map{sprintf"%02x",$_}unpack("C*",chr($_[0])));
+}
# The following code points are some interesting steps in UTF-8.
is(hexes( 0x100), "c4 80");
End of Patch.