In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/841a5e1869a65f80379c03832eaf9887546a9622?hp=d93e3b8ce477b5ca3fdd199244bc7d258ae60fdd>

- Log -----------------------------------------------------------------
commit 841a5e1869a65f80379c03832eaf9887546a9622
Author: Father Chrysostomos <[email protected]>
Date:   Wed Jan 1 06:06:30 2014 -0800

    pp.c: Simplify lc and uc stringification code
    
    Originally, lc and uc would not warn about undef, due to an implemen-
    tation detail.
    
    The implementation changed in 673061948, and extra code was added to
    keep the behaviour the same.
    
    Commit 0a0ffbced enabled the warnings about undef, but did so by added
    even more code in the midst of the blocks that existed solely to avoid
    the warning.
    
    We can just delete those blocks and put in a simple stringification.

M       pp.c

commit 6006ebd02a4b62ba7535f6398bbb2d33a9ef46cc
Author: Father Chrysostomos <[email protected]>
Date:   Wed Jan 1 05:56:15 2014 -0800

    pp.c: Improve self-referential comment
    
    pp.c:pp_lc has this:
    
            /* Here is where we would do context-sensitive actions.  See the
             * commit message for this comment for why there isn't any */
    
    If I try to look up the commit that added the comment, I get this:
    
    commit 06b5486afd6f58eb7fdf8c5c8cdb8520a4c87f40
    Author: Karl Williamson <[email protected]>
    Date:   Fri Nov 11 10:13:28 2011 -0700
    
        pp.c: White-space only
    
        This outdents and reflows comments as a result of the removal of a
        surrounding block
    
    86510fb15 was the commit that added the comment, whose commit message
    contains the explanation, so cite that directly.

M       pp.c

commit 5cd5e2d6301836ca9b0f94e9a100e697bd374cd8
Author: Father Chrysostomos <[email protected]>
Date:   Wed Jan 1 05:51:36 2014 -0800

    Reënable in-place lc/uc
    
    It used to be that this code:
    
    for("$foo") {
      lc $_;
      ...
    }
    
    would modify $_, allowing other code in the ‘for’ block to see the
    changes (bug #43207).  Commit 17fa077605 fixed that by changing the
    logic that determined whether lc/uc(first) could modify the sca-
    lar in place.
    
    In doing so, it stopped in-place modification from happening at all,
    because the condition became SvPADTMP && SvTEMP, which never happens.
    
    (SvPADTMP unually indicates an operator return value stored in a pad;
    i.e., a scalar that will next be used by the same operator again to
    return another value.  SvTEMP indicates that the REFCNT will go down
    shortly, usually a temporary value created solely for the sake of
    returning something.)
    
    Now that bug #78194 is fixed, for("$foo") no longer exposes a PADTMP
    to the following code, so we *can* now assume (as was done erroneously
    before) that PADTMP indicates something like lc("$foo$bar") and modify
    pp_stringify’s return value in place.
    
    Also, we can extend this to apply to TEMP variables that have a ref-
    erence count of 1, since they cannot be in use elsewhere.  We skip
    TEMP variables with set-magic, because they could be tied, and
    SvSETMAGIC would have a side effect.  (That could happen with
    lc(delete $h{tied_elem}).)
    
    Previously, this was skipped for uc and lc for overloaded references,
    since stringification could change the utf8ness.  That is no longer
    sufficient.  As of Perl 5.16, typeglobs and non-overloaded blessed
    references can also enable their utf8 flag upon stringification, if
    the stash or glob names contains wide characters.  So I changed the
    !SvAMAGIC (not overloaded) to SvPOK (is a string already), which will
    cover most cases where this optimisation helps.  The two tests added
    to the end of lc.t fail with !SvAMAGIC.

M       pp.c
M       t/op/lc.t

commit 4b3427080c3b11e528bd84c7509af7fc8a60dcb1
Author: Father Chrysostomos <[email protected]>
Date:   Mon Dec 30 21:29:49 2013 -0800

    lc.t: More tests for #43207

M       t/op/lc.t
-----------------------------------------------------------------------

Summary of changes:
 pp.c      | 64 +++++++++++++++++++--------------------------------------------
 t/op/lc.t | 34 ++++++++++++++++++++++++++-------
 2 files changed, 46 insertions(+), 52 deletions(-)

diff --git a/pp.c b/pp.c
index 4175808..dd4d89a 100644
--- a/pp.c
+++ b/pp.c
@@ -3480,20 +3480,15 @@ PP(pp_ucfirst)
                     * UTF-8 or not, but in either case is the number of bytes 
*/
     bool tainted = FALSE;
 
-    SvGETMAGIC(source);
-    if (SvOK(source)) {
-       s = (const U8*)SvPV_nomg_const(source, slen);
-    } else {
-       if (ckWARN(WARN_UNINITIALIZED))
-           report_uninit(source);
-       s = (const U8*)"";
-       slen = 0;
-    }
+    s = (const U8*)SvPV_const(source, slen);
 
     /* We may be able to get away with changing only the first character, in
      * place, but not if read-only, etc.  Later we may discover more reasons to
      * not convert in-place. */
-    inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
+    inplace = !SvREADONLY(source)
+          && (  SvPADTMP(source)
+             || (  SvTEMP(source) && !SvSMAGICAL(source)
+                && SvREFCNT(source) == 1));
 
     /* First calculate what the changed first character should be.  This 
affects
      * whether we can just swap it out, leaving the rest of the string 
unchanged,
@@ -3706,8 +3701,11 @@ PP(pp_uc)
 
     SvGETMAGIC(source);
 
-    if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
-       && SvTEMP(source) && !DO_UTF8(source)
+    if ((SvPADTMP(source)
+        ||
+       (SvTEMP(source) && !SvSMAGICAL(source) && SvREFCNT(source) == 1))
+       && !SvREADONLY(source) && SvPOK(source)
+       && !DO_UTF8(source)
        && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
 
        /* We can convert in place.  The reason we can't if in UNI_8_BIT is to
@@ -3725,21 +3723,7 @@ PP(pp_uc)
 
        dest = TARG;
 
-       /* The old implementation would copy source into TARG at this point.
-          This had the side effect that if source was undef, TARG was now
-          an undefined SV with PADTMP set, and they don't warn inside
-          sv_2pv_flags(). However, we're now getting the PV direct from
-          source, which doesn't have PADTMP set, so it would warn. Hence the
-          little games.  */
-
-       if (SvOK(source)) {
-           s = (const U8*)SvPV_nomg_const(source, len);
-       } else {
-           if (ckWARN(WARN_UNINITIALIZED))
-               report_uninit(source);
-           s = (const U8*)"";
-           len = 0;
-       }
+       s = (const U8*)SvPV_nomg_const(source, len);
        min = len + 1;
 
        SvUPGRADE(dest, SVt_PV);
@@ -3952,8 +3936,12 @@ PP(pp_lc)
 
     SvGETMAGIC(source);
 
-    if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
-       && SvTEMP(source) && !DO_UTF8(source)) {
+    if (   (  SvPADTMP(source)
+          || (  SvTEMP(source) && !SvSMAGICAL(source)
+             && SvREFCNT(source) == 1  )
+          )
+       && !SvREADONLY(source) && SvPOK(source)
+       && !DO_UTF8(source)) {
 
        /* We can convert in place, as lowercasing anything in the latin1 range
         * (or else DO_UTF8 would have been on) doesn't lengthen it */
@@ -3965,21 +3953,7 @@ PP(pp_lc)
 
        dest = TARG;
 
-       /* The old implementation would copy source into TARG at this point.
-          This had the side effect that if source was undef, TARG was now
-          an undefined SV with PADTMP set, and they don't warn inside
-          sv_2pv_flags(). However, we're now getting the PV direct from
-          source, which doesn't have PADTMP set, so it would warn. Hence the
-          little games.  */
-
-       if (SvOK(source)) {
-           s = (const U8*)SvPV_nomg_const(source, len);
-       } else {
-           if (ckWARN(WARN_UNINITIALIZED))
-               report_uninit(source);
-           s = (const U8*)"";
-           len = 0;
-       }
+       s = (const U8*)SvPV_nomg_const(source, len);
        min = len + 1;
 
        SvUPGRADE(dest, SVt_PV);
@@ -4005,7 +3979,7 @@ PP(pp_lc)
                                 cBOOL(IN_LOCALE_RUNTIME), &tainted);
 
            /* Here is where we would do context-sensitive actions.  See the
-            * commit message for this comment for why there isn't any */
+            * commit message for 86510fb15 for why there isn't any */
 
            if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
 
diff --git a/t/op/lc.t b/t/op/lc.t
index ae15625..66f365b 100644
--- a/t/op/lc.t
+++ b/t/op/lc.t
@@ -10,7 +10,7 @@ BEGIN {
 
 use feature qw( fc );
 
-plan tests => 129;
+plan tests => 134;
 
 is(lc(undef),     "", "lc(undef) is ''");
 is(lcfirst(undef), "", "lcfirst(undef) is ''");
@@ -256,17 +256,26 @@ for (1, 4, 9, 16, 25) {
 }
 
 # bug #43207
-my $temp = "Hello";
+my $temp = "HellO";
 for ("$temp") {
     lc $_;
-    is($_, "Hello");
+    is($_, "HellO", '[perl #43207] lc($_) modifying $_');
 }
-
-# bug #43207
-my $temp = "Hello";
 for ("$temp") {
     fc $_;
-    is($_, "Hello");
+    is($_, "HellO", '[perl #43207] fc($_) modifying $_');
+}
+for ("$temp") {
+    uc $_;
+    is($_, "HellO", '[perl #43207] uc($_) modifying $_');
+}
+for ("$temp") {
+    ucfirst $_;
+    is($_, "HellO", '[perl #43207] ucfirst($_) modifying $_');
+}
+for ("$temp") {
+    lcfirst $_;
+    is($_, "HellO", '[perl #43207] lcfirst($_) modifying $_');
 }
 
 # new in Unicode 5.1.0
@@ -293,3 +302,14 @@ fresh_perl_like(<<'constantfolding', qr/^(\d+),\1\z/, {},
     }
 constantfolding
     'folded uc() in string eval uses the right hints');
+
+# In-place lc/uc should not corrupt string buffers when given a non-utf8-
+# flagged thingy that stringifies to utf8
+$h{k} = bless[], "\x{3b0}\x{3b0}\x{3b0}bcde"; # U+03B0 grows with uc()
+   # using delete marks it as TEMP, so uc-in-place is permitted
+like uc delete $h{k}, qr "^(?:\x{3a5}\x{308}\x{301}){3}BCDE=ARRAY\(.*\)",
+    'uc(TEMP ref) does not produce a corrupt string';
+$h{k} = bless[], "\x{130}bcde"; # U+0130 grows with lc()
+   # using delete marks it as TEMP, so uc-in-place is permitted
+like lc delete $h{k}, qr "^i\x{307}bcde=array\(.*\)",
+    'lc(TEMP ref) does not produce a corrupt string';

--
Perl5 Master Repository

Reply via email to