Change 29985: Integrate:

Nicholas Clark Thu, 25 Jan 2007 15:00:19 -0800

Change 29985 by [EMAIL PROTECTED] on 2007/01/25 22:55:28

        Integrate:
        [ 28193]
        Subject: [PATCH] strange encodings upsets pp_chr
        From: SADAHIRO Tomoyuki <[EMAIL PROTECTED]>
        Date: Sun, 14 May 2006 19:57:28 +0900
        Message-Id: <[EMAIL PROTECTED]>
        
        [ 28215]
        Subject: Re: [perl #39145] win32, @_ and fork crashing in dounwind
        From: Dave Mitchell <[EMAIL PROTECTED]>
        Date: Wed, 17 May 2006 17:38:16 +0100
        Message-ID: <[EMAIL PROTECTED]>
        
        (Dave's fix, plus a test taken from [perl #39145])
        
        [ 28248]
        [perl #32041] SEGV with complicated regexp and long string
        PL_reg_maxiter was wrapping to a negative value


Affected files ...

... //depot/maint-5.8/perl/MANIFEST#295 integrate
... //depot/maint-5.8/perl/op.c#182 integrate
... //depot/maint-5.8/perl/pp.c#128 integrate
... //depot/maint-5.8/perl/regexec.c#80 integrate
... //depot/maint-5.8/perl/sv.c#319 integrate
... //depot/maint-5.8/perl/t/op/chr.t#2 integrate
... //depot/maint-5.8/perl/t/op/fork.t#3 integrate
... //depot/maint-5.8/perl/t/op/ord.t#3 integrate
... //depot/maint-5.8/perl/t/uni/chr.t#1 branch

Differences ...

==== //depot/maint-5.8/perl/MANIFEST#295 (text) ====
Index: perl/MANIFEST
--- perl/MANIFEST#294~29961~    2007-01-24 14:12:24.000000000 -0800
+++ perl/MANIFEST       2007-01-25 14:55:28.000000000 -0800
@@ -2943,6 +2943,7 @@
 t/test.pl                      Simple testing library
 t/uni/case.pl                  See if Unicode casing works
 t/uni/chomp.t                  See if Unicode chomp works
+t/uni/chr.t                    See if Unicode chr works
 t/uni/class.t                  See if Unicode classes work (\p)
 t/uni/fold.t                   See if Unicode folding works
 t/uni/lower.t                  See if Unicode casing works

==== //depot/maint-5.8/perl/pp.c#128 (text) ====
Index: perl/pp.c
--- perl/pp.c#127~29967~        2007-01-25 02:34:50.000000000 -0800
+++ perl/pp.c   2007-01-25 14:55:28.000000000 -0800
@@ -3353,20 +3353,21 @@
     *tmps++ = (char)value;
     *tmps = '\0';
     (void)SvPOK_only(TARG);
+
     if (PL_encoding && !IN_BYTES) {
         sv_recode_to_utf8(TARG, PL_encoding);
        tmps = SvPVX(TARG);
        if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
-           memEQ(tmps, "\xef\xbf\xbd\0", 4)) {
-           SvGROW(TARG, 3);
+           UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
+           SvGROW(TARG, 2);
            tmps = SvPVX(TARG);
-           SvCUR_set(TARG, 2);
-           *tmps++ = (U8)UTF8_EIGHT_BIT_HI(value);
-           *tmps++ = (U8)UTF8_EIGHT_BIT_LO(value);
+           SvCUR_set(TARG, 1);
+           *tmps++ = (char)value;
            *tmps = '\0';
-           SvUTF8_on(TARG);
+           SvUTF8_off(TARG);
        }
     }
+
     XPUSHs(TARG);
     RETURN;
 }

==== //depot/maint-5.8/perl/regexec.c#80 (text) ====
Index: perl/regexec.c
--- perl/regexec.c#79~29981~    2007-01-25 13:31:37.000000000 -0800
+++ perl/regexec.c      2007-01-25 14:55:28.000000000 -0800
@@ -3241,6 +3241,9 @@
                        *that* much linear. */
                if (!PL_reg_maxiter) {
                    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * 
(scan->flags>>4);
+                   /* possible overflow for long strings and many CURLYX's */
+                   if (PL_reg_maxiter < 0)
+                       PL_reg_maxiter = I32_MAX;
                    PL_reg_leftiter = PL_reg_maxiter;
                }
                if (PL_reg_leftiter-- == 0) {

==== //depot/maint-5.8/perl/sv.c#319 (text) ====
Index: perl/sv.c
--- perl/sv.c#318~29984~        2007-01-25 14:41:11.000000000 -0800
+++ perl/sv.c   2007-01-25 14:55:28.000000000 -0800
@@ -9668,6 +9668,8 @@
                ncx->blk_sub.olddepth   = cx->blk_sub.olddepth;
                ncx->blk_sub.hasargs    = cx->blk_sub.hasargs;
                ncx->blk_sub.lval       = cx->blk_sub.lval;
+               ncx->blk_sub.oldcomppad = (PAD*)ptr_table_fetch(PL_ptr_table,
+                                          cx->blk_sub.oldcomppad);
                break;
            case CXt_EVAL:
                ncx->blk_eval.old_in_eval = cx->blk_eval.old_in_eval;

==== //depot/maint-5.8/perl/t/op/chr.t#2 (text) ====
Index: perl/t/op/chr.t
--- perl/t/op/chr.t#1~25503~    2005-09-19 14:14:58.000000000 -0700
+++ perl/t/op/chr.t     2007-01-25 14:55:28.000000000 -0800
@@ -21,30 +21,32 @@
 
 # is(chr(-1), undef); # Shouldn't it be?
 
-# Check UTF-8.
+# Check UTF-8 (not UTF-EBCDIC).
+SKIP: {
+    skip "no UTF-8 on EBCDIC", 21 if chr(193) eq 'A';
 
 sub hexes { join(" ",map{sprintf"%02x",$_}unpack("C*",chr($_[0]))) }
 
 # The following code points are some interesting steps in UTF-8.
-is(hexes(   0x100), "c4 80");
-is(hexes(   0x7FF), "df bf");
-is(hexes(   0x800), "e0 a0 80");
-is(hexes(   0xFFF), "e0 bf bf");
-is(hexes(  0x1000), "e1 80 80");
-is(hexes(  0xCFFF), "ec bf bf");
-is(hexes(  0xD000), "ed 80 80");
-is(hexes(  0xD7FF), "ed 9f bf");
-is(hexes(  0xD800), "ed a0 80"); # not strict utf-8 (surrogate area begin)
-is(hexes(  0xDFFF), "ed bf bf"); # not strict utf-8 (surrogate area end)
-is(hexes(  0xE000), "ee 80 80");
-is(hexes(  0xFFFF), "ef bf bf");
-is(hexes( 0x10000), "f0 90 80 80");
-is(hexes( 0x3FFFF), "f0 bf bf bf");
-is(hexes( 0x40000), "f1 80 80 80");
-is(hexes( 0xFFFFF), "f3 bf bf bf");
-is(hexes(0x100000), "f4 80 80 80");
-is(hexes(0x10FFFF), "f4 8f bf bf"); # Unicode (4.1) last code point
-is(hexes(0x110000), "f4 90 80 80");
-is(hexes(0x1FFFFF), "f7 bf bf bf"); # last four byte encoding
-is(hexes(0x200000), "f8 88 80 80 80");
-
+    is(hexes(   0x100), "c4 80");
+    is(hexes(   0x7FF), "df bf");
+    is(hexes(   0x800), "e0 a0 80");
+    is(hexes(   0xFFF), "e0 bf bf");
+    is(hexes(  0x1000), "e1 80 80");
+    is(hexes(  0xCFFF), "ec bf bf");
+    is(hexes(  0xD000), "ed 80 80");
+    is(hexes(  0xD7FF), "ed 9f bf");
+    is(hexes(  0xD800), "ed a0 80"); # not strict utf-8 (surrogate area begin)
+    is(hexes(  0xDFFF), "ed bf bf"); # not strict utf-8 (surrogate area end)
+    is(hexes(  0xE000), "ee 80 80");
+    is(hexes(  0xFFFF), "ef bf bf");
+    is(hexes( 0x10000), "f0 90 80 80");
+    is(hexes( 0x3FFFF), "f0 bf bf bf");
+    is(hexes( 0x40000), "f1 80 80 80");
+    is(hexes( 0xFFFFF), "f3 bf bf bf");
+    is(hexes(0x100000), "f4 80 80 80");
+    is(hexes(0x10FFFF), "f4 8f bf bf"); # Unicode (4.1) last code point
+    is(hexes(0x110000), "f4 90 80 80");
+    is(hexes(0x1FFFFF), "f7 bf bf bf"); # last four byte encoding
+    is(hexes(0x200000), "f8 88 80 80 80");
+}

==== //depot/maint-5.8/perl/t/op/fork.t#3 (xtext) ====
Index: perl/t/op/fork.t
--- perl/t/op/fork.t#2~21489~   2003-10-19 04:14:32.000000000 -0700
+++ perl/t/op/fork.t    2007-01-25 14:55:28.000000000 -0800
@@ -444,3 +444,9 @@
 }
 EXPECT
 1
+########
+# [perl #39145] Perl_dounwind() crashing with Win32's fork() emulation
+sub { @_ = 3; fork ? die "1\n" : die "1\n" }->(2);
+EXPECT
+1
+1

==== //depot/maint-5.8/perl/t/op/ord.t#3 (xtext) ====
Index: perl/t/op/ord.t
--- perl/t/op/ord.t#2~19951~    2003-07-03 01:47:35.000000000 -0700
+++ perl/t/op/ord.t     2007-01-25 14:55:28.000000000 -0800
@@ -6,7 +6,7 @@
     require "test.pl";
 }
 
-plan tests => 7;
+plan tests => 35;
 
 # compile time evaluation
 
@@ -33,3 +33,36 @@
 $x = "\x{1234}";
 is(ord($x), 0x1234, 'runtime ord \x{....}');
 
+{
+    no warnings 'utf8'; # avoid Unicode warnings
+
+# The following code points are some interesting steps.
+    is(ord(chr(   0x100)),    0x100, '0x0100');
+    is(ord(chr(   0x3FF)),    0x3FF, 'last two-byte char in UTF-EBCDIC');
+    is(ord(chr(   0x400)),    0x400, 'first three-byte char in UTF-EBCDIC');
+    is(ord(chr(   0x7FF)),    0x7FF, 'last two-byte char in UTF-8');
+    is(ord(chr(   0x800)),    0x800, 'first three-byte char in UTF-8');
+    is(ord(chr(   0xFFF)),    0xFFF, '0x0FFF');
+    is(ord(chr(  0x1000)),   0x1000, '0x1000');
+    is(ord(chr(  0x3FFF)),   0x3FFF, 'last three-byte char in UTF-EBCDIC');
+    is(ord(chr(  0x4000)),   0x4000, 'first four-byte char in UTF-EBCDIC');
+    is(ord(chr(  0xCFFF)),   0xCFFF, '0xCFFF');
+    is(ord(chr(  0xD000)),   0xD000, '0xD000');
+    is(ord(chr(  0xD7FF)),   0xD7FF, '0xD7FF');
+    is(ord(chr(  0xD800)),   0xD800, 'surrogate begin (not strict utf-8)');
+    is(ord(chr(  0xDFFF)),   0xDFFF, 'surrogate end (not strict utf-8)');
+    is(ord(chr(  0xE000)),   0xE000, '0xE000');
+    is(ord(chr(  0xFDD0)),   0xFDD0, 'first additional noncharacter in BMP');
+    is(ord(chr(  0xFDEF)),   0xFDEF, 'last additional noncharacter in BMP');
+    is(ord(chr(  0xFFFE)),   0xFFFE, '0xFFFE');
+    is(ord(chr(  0xFFFF)),   0xFFFF, 'last three-byte char in UTF-8');
+    is(ord(chr( 0x10000)),  0x10000, 'first four-byte char in UTF-8');
+    is(ord(chr( 0x3FFFF)),  0x3FFFF, 'last four-byte char in UTF-EBCDIC');
+    is(ord(chr( 0x40000)),  0x40000, 'first five-byte char in UTF-EBCDIC');
+    is(ord(chr( 0xFFFFF)),  0xFFFFF, '0xFFFFF');
+    is(ord(chr(0x100000)), 0x100000, '0x100000');
+    is(ord(chr(0x10FFFF)), 0x10FFFF, 'Unicode last code point');
+    is(ord(chr(0x110000)), 0x110000, '0x110000');
+    is(ord(chr(0x1FFFFF)), 0x1FFFFF, 'last four-byte char in UTF-8');
+    is(ord(chr(0x200000)), 0x200000, 'first five-byte char in UTF-8');
+}

==== //depot/maint-5.8/perl/t/uni/chr.t#1 (text) ====
Index: perl/t/uni/chr.t
--- /dev/null   2007-01-16 11:55:45.526841103 -0800
+++ perl/t/uni/chr.t    2007-01-25 14:55:28.000000000 -0800
@@ -0,0 +1,41 @@
+
+BEGIN {
+    if ($ENV{'PERL_CORE'}){
+        chdir 't';
+        @INC = '../lib';
+    }
+    require Config; import Config;
+    if ($Config{'extensions'} !~ /\bEncode\b/) {
+      print "1..0 # Skip: Encode was not built\n";
+      exit 0;
+    }
+    if (ord("A") == 193) {
+        print "1..0 # Skip: EBCDIC\n";
+        exit 0;
+    }
+    unless (PerlIO::Layer->find('perlio')){
+        print "1..0 # Skip: PerlIO required\n";
+        exit 0;
+    }
+    if ($ENV{PERL_CORE_MINITEST}) {
+        print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
+        exit 0;
+    }
+    $| = 1;
+}
+
+use strict;
+use Test::More tests => 6;
+use Encode;
+
+use encoding 'johab';
+
+ok(chr(0x7f) eq "\x7f");
+ok(chr(0x80) eq "\x80");
+ok(chr(0xff) eq "\xff");
+
+for my $i (127, 128, 255) {
+    ok(chr($i) eq pack('C', $i));
+}
+
+__END__
End of Patch.

Change 29985: Integrate:

Reply via email to