In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/83f78d1a27d5727dabfc8bcc2b961cb405b831e9?hp=5d4e58dcdb5d3b0a6dfc17071d845e7fc69ff071>

- Log -----------------------------------------------------------------
commit 83f78d1a27d5727dabfc8bcc2b961cb405b831e9
Author: Father Chrysostomos <[email protected]>
Date:   Sun Dec 4 10:47:40 2011 -0800

    Adjust substr offsets when using, not when creating, lvalue
    
    When substr() occurs in potential lvalue context, the offsets are
    adjusted to the current string (negative being converted to positive,
    lengths reaching beyond the end of the string being shortened, etc.)
    as soon as the special lvalue to be returned is created.
    
    When that lvalue is assigned to, the original scalar is stringified
    once more.
    
    That implementation results in two bugs:
    
    1) Fetch is called twice in a simple substr() assignment (except in
    void context, due to the special optimisation of commit 24fcb59fc).
    
    2) These two calls are not equivalent:
    
    $SIG{__WARN__} = sub { warn "w ",shift};
    sub myprint { print @_; $_[0] = 1 }
    print substr("", 2);
    myprint substr("", 2);
    
    The second one dies.  The first one only warns.  That’s mean.  The
    error is also wrong, sometimes, if the original string is going to get
    longer before the substr lvalue is actually used.
    
    The behaviour of \substr($str, -1) if $str changes length is com-
    pletely undocumented.  Before 5.10, it was documented as being unreli-
    able and subject to change.
    
    What this commit does is make the lvalue returned by substr remember
    the original arguments and only adjust the offsets when the assign-
    ment happens.
    
    This means that the following now prints z, instead of xyz (which is
    actually what I would expect):
    
    $str = "a";
    $substr = \substr($str,-1);
    $str = "xyz";
    print $substr;

M       dump.c
M       embed.fnc
M       embed.h
M       mg.c
M       pp.c
M       proto.h
M       sv.h
M       t/op/substr.t

commit 7ba26d48121ff365601a73eefc7798693a3a9118
Author: Father Chrysostomos <[email protected]>
Date:   Sat Dec 3 11:02:32 2011 -0800

    Clarify docs for sv_usepvn_flags
    
    Note that the string must be the start of a mallocked block of memory,
    and not a pointer to the middle of it.

M       sv.c
-----------------------------------------------------------------------

Summary of changes:
 dump.c        |    1 +
 embed.fnc     |    6 ++
 embed.h       |    3 +
 mg.c          |   37 ++++++++++---
 pp.c          |  171 ++++++++++++++++++++++++++++++++-------------------------
 proto.h       |    8 +++
 sv.c          |    4 +-
 sv.h          |    2 +
 t/op/substr.t |   47 +++++++++++++++-
 9 files changed, 194 insertions(+), 85 deletions(-)

diff --git a/dump.c b/dump.c
index 3cb7167..2c635de 100644
--- a/dump.c
+++ b/dump.c
@@ -1941,6 +1941,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, 
I32 nest, I32 maxnest, bo
            Perl_dump_indent(aTHX_ level, file, "  TARGOFF = %"IVdf"\n", 
(IV)LvTARGOFF(sv));
            Perl_dump_indent(aTHX_ level, file, "  TARGLEN = %"IVdf"\n", 
(IV)LvTARGLEN(sv));
            Perl_dump_indent(aTHX_ level, file, "  TARG = 0x%"UVxf"\n", 
PTR2UV(LvTARG(sv)));
+           Perl_dump_indent(aTHX_ level, file, "  FLAGS = %"IVdf"\n", 
(IV)LvFLAGS(sv));
            if (LvTYPE(sv) != 't' && LvTYPE(sv) != 'T')
                do_sv_dump(level+1, file, LvTARG(sv), nest+1, maxnest,
                    dumpops, pvlim);
diff --git a/embed.fnc b/embed.fnc
index 4d2b666..470b11d 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1391,6 +1391,12 @@ Apd      |UV     |to_utf8_upper  |NN const U8 *p|NN U8* 
ustrp|NULLOK STRLEN *lenp
 Apd    |UV     |to_utf8_title  |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp
 Ampd   |UV     |to_utf8_fold   |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp
 AMp    |UV     |_to_utf8_fold_flags|NN const U8 *p|NN U8* ustrp|NULLOK STRLEN 
*lenp|U8 flags
+#if defined(PERL_IN_MG_C) || defined(PERL_IN_PP_C)
+p      |bool   |translate_substr_offsets|STRLEN curlen|IV pos1_iv \
+                                        |bool pos1_is_uv|IV len_iv \
+                                        |bool len_is_uv|NN STRLEN *posp \
+                                        |NN STRLEN *lenp
+#endif
 #if defined(UNLINK_ALL_VERSIONS)
 Ap     |I32    |unlnk          |NN const char* f
 #endif
diff --git a/embed.h b/embed.h
index 03aefc1..d05dd8a 100644
--- a/embed.h
+++ b/embed.h
@@ -1346,6 +1346,9 @@
 #define save_magic(a,b)                S_save_magic(aTHX_ a,b)
 #define unwind_handler_stack(a)        S_unwind_handler_stack(aTHX_ a)
 #  endif
+#  if defined(PERL_IN_MG_C) || defined(PERL_IN_PP_C)
+#define translate_substr_offsets(a,b,c,d,e,f,g)        
Perl_translate_substr_offsets(aTHX_ a,b,c,d,e,f,g)
+#  endif
 #  if defined(PERL_IN_MRO_C)
 #define mro_clean_isarev(a,b,c,d,e)    S_mro_clean_isarev(aTHX_ a,b,c,d,e)
 #define mro_gather_and_rename(a,b,c,d,e)       S_mro_gather_and_rename(aTHX_ 
a,b,c,d,e)
diff --git a/mg.c b/mg.c
index fa4b446..c55ca63 100644
--- a/mg.c
+++ b/mg.c
@@ -2163,16 +2163,24 @@ Perl_magic_getsubstr(pTHX_ SV *sv, MAGIC *mg)
     const char * const tmps = SvPV_const(lsv,len);
     STRLEN offs = LvTARGOFF(sv);
     STRLEN rem = LvTARGLEN(sv);
+    const bool negoff = LvFLAGS(sv) & 1;
+    const bool negrem = LvFLAGS(sv) & 2;
 
     PERL_ARGS_ASSERT_MAGIC_GETSUBSTR;
     PERL_UNUSED_ARG(mg);
 
+    if (!translate_substr_offsets(
+           SvUTF8(lsv) ? sv_len_utf8(lsv) : len,
+           negoff ? -(IV)offs : (IV)offs, !negoff,
+           negrem ? -(IV)rem  : (IV)rem,  !negrem, &offs, &rem
+    )) {
+       Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
+       sv_setsv_nomg(sv, &PL_sv_undef);
+       return 0;
+    }
+
     if (SvUTF8(lsv))
        offs = sv_pos_u2b_flags(lsv, offs, &rem, SV_CONST_RETURN);
-    if (offs > len)
-       offs = len;
-    if (rem > len - offs)
-       rem = len - offs;
     sv_setpvn(sv, tmps + offs, rem);
     if (SvUTF8(lsv))
         SvUTF8_on(sv);
@@ -2183,11 +2191,13 @@ int
 Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg)
 {
     dVAR;
-    STRLEN len;
+    STRLEN len, lsv_len, oldtarglen, newtarglen;
     const char * const tmps = SvPV_const(sv, len);
     SV * const lsv = LvTARG(sv);
     STRLEN lvoff = LvTARGOFF(sv);
     STRLEN lvlen = LvTARGLEN(sv);
+    const bool negoff = LvFLAGS(sv) & 1;
+    const bool neglen = LvFLAGS(sv) & 2;
 
     PERL_ARGS_ASSERT_MAGIC_SETSUBSTR;
     PERL_UNUSED_ARG(mg);
@@ -2197,25 +2207,36 @@ Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg)
        Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
                            "Attempt to use reference as lvalue in substr"
        );
+    if (SvUTF8(lsv)) lsv_len = sv_len_utf8(lsv);
+    else (void)SvPV_nomg(lsv,lsv_len);
+    if (!translate_substr_offsets(
+           lsv_len,
+           negoff ? -(IV)lvoff : (IV)lvoff, !negoff,
+           neglen ? -(IV)lvlen : (IV)lvlen, !neglen, &lvoff, &lvlen
+    ))
+       Perl_croak(aTHX_ "substr outside of string");
+    oldtarglen = lvlen;
     if (DO_UTF8(sv)) {
        sv_utf8_upgrade(lsv);
        lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN);
        sv_insert_flags(lsv, lvoff, lvlen, tmps, len, 0);
-       LvTARGLEN(sv) = sv_len_utf8(sv);
+       newtarglen = sv_len_utf8(sv);
        SvUTF8_on(lsv);
     }
     else if (lsv && SvUTF8(lsv)) {
        const char *utf8;
        lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN);
-       LvTARGLEN(sv) = len;
+       newtarglen = len;
        utf8 = (char*)bytes_to_utf8((U8*)tmps, &len);
        sv_insert_flags(lsv, lvoff, lvlen, utf8, len, 0);
        Safefree(utf8);
     }
     else {
        sv_insert_flags(lsv, lvoff, lvlen, tmps, len, 0);
-       LvTARGLEN(sv) = len;
+       newtarglen = len;
     }
+    if (!neglen) LvTARGLEN(sv) = newtarglen;
+    if (negoff)  LvTARGOFF(sv) += newtarglen - oldtarglen;
 
     return 0;
 }
diff --git a/pp.c b/pp.c
index 0ecd144..d55c7fd 100644
--- a/pp.c
+++ b/pp.c
@@ -2954,6 +2954,73 @@ PP(pp_length)
     RETURN;
 }
 
+/* Returns false if substring is completely outside original string.
+   No length is indicated by len_iv = 0 and len_is_uv = 0.  len_is_uv must
+   always be true for an explicit 0.
+*/
+bool
+Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv,
+                                   bool pos1_is_uv, IV len_iv,
+                                   bool len_is_uv, STRLEN *posp,
+                                   STRLEN *lenp)
+{
+    IV pos2_iv;
+    int    pos2_is_uv;
+
+    PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
+
+    if (!pos1_is_uv && pos1_iv < 0 && curlen) {
+       pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
+       pos1_iv += curlen;
+    }
+    if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
+       return FALSE;
+
+    if (len_iv || len_is_uv) {
+       if (!len_is_uv && len_iv < 0) {
+           pos2_iv = curlen + len_iv;
+           if (curlen)
+               pos2_is_uv = curlen-1 > ~(UV)len_iv;
+           else
+               pos2_is_uv = 0;
+       } else {  /* len_iv >= 0 */
+           if (!pos1_is_uv && pos1_iv < 0) {
+               pos2_iv = pos1_iv + len_iv;
+               pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
+           } else {
+               if ((UV)len_iv > curlen-(UV)pos1_iv)
+                   pos2_iv = curlen;
+               else
+                   pos2_iv = pos1_iv+len_iv;
+               pos2_is_uv = 1;
+           }
+       }
+    }
+    else {
+       pos2_iv = curlen;
+       pos2_is_uv = 1;
+    }
+
+    if (!pos2_is_uv && pos2_iv < 0) {
+       if (!pos1_is_uv && pos1_iv < 0)
+           return FALSE;
+       pos2_iv = 0;
+    }
+    else if (!pos1_is_uv && pos1_iv < 0)
+       pos1_iv = 0;
+
+    if ((UV)pos2_iv < (UV)pos1_iv)
+       pos2_iv = pos1_iv;
+    if ((UV)pos2_iv > curlen)
+       pos2_iv = curlen;
+
+    /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
+    *posp = (STRLEN)( (UV)pos1_iv );
+    *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
+
+    return TRUE;
+}
+
 PP(pp_substr)
 {
     dVAR; dSP; dTARGET;
@@ -2963,11 +3030,9 @@ PP(pp_substr)
     SV *   pos_sv;
     IV     pos1_iv;
     int    pos1_is_uv;
-    IV     pos2_iv;
-    int    pos2_is_uv;
     SV *   len_sv;
     IV     len_iv = 0;
-    int    len_is_uv = 1;
+    int    len_is_uv = 0;
     I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
     const bool rvalue = (GIMME_V != G_VOID);
     const char *tmps;
@@ -2984,7 +3049,7 @@ PP(pp_substr)
        }
        if ((len_sv = POPs)) {
            len_iv    = SvIV(len_sv);
-           len_is_uv = SvIOK_UV(len_sv);
+           len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
        }
        else num_args--;
     }
@@ -3006,15 +3071,27 @@ PP(pp_substr)
        }
        else if (DO_UTF8(sv))
            repl_need_utf8_upgrade = TRUE;
-       lvalue = 0;
     }
-    if (lvalue) {
-       tmps = NULL; /* unused */
-       SvGETMAGIC(sv);
-       if (SvOK(sv)) (void)SvPV_nomg_const(sv, curlen);
-       else curlen = 0;
+    else if (lvalue) {
+       SV * ret;
+       ret = sv_2mortal(newSV_type(SVt_PVLV));  /* Not TARG RT#67838 */
+       sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
+       LvTYPE(ret) = 'x';
+       LvTARG(ret) = SvREFCNT_inc_simple(sv);
+       LvTARGOFF(ret) =
+           pos1_is_uv || pos1_iv >= 0
+               ? (STRLEN)(UV)pos1_iv
+               : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv);
+       LvTARGLEN(ret) =
+           len_is_uv || len_iv > 0
+               ? (STRLEN)(UV)len_iv
+               : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv);
+
+       SPAGAIN;
+       PUSHs(ret);    /* avoid SvSETMAGIC here */
+       RETURN;
     }
-    else tmps = SvPV_const(sv, curlen);
+    tmps = SvPV_const(sv, curlen);
     if (DO_UTF8(sv)) {
         utf8_curlen = sv_len_utf8(sv);
        if (utf8_curlen == curlen)
@@ -3025,72 +3102,16 @@ PP(pp_substr)
     else
        utf8_curlen = 0;
 
-    if (!pos1_is_uv && pos1_iv < 0 && curlen) {
-       pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
-       pos1_iv += curlen;
-    }
-    if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
-       goto bound_fail;
-
-    if (num_args > 2) {
-       if (!len_is_uv && len_iv < 0) {
-           pos2_iv = curlen + len_iv;
-           if (curlen)
-               pos2_is_uv = curlen-1 > ~(UV)len_iv;
-           else
-               pos2_is_uv = 0;
-       } else {  /* len_iv >= 0 */
-           if (!pos1_is_uv && pos1_iv < 0) {
-               pos2_iv = pos1_iv + len_iv;
-               pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
-           } else {
-               if ((UV)len_iv > curlen-(UV)pos1_iv)
-                   pos2_iv = curlen;
-               else
-                   pos2_iv = pos1_iv+len_iv;
-               pos2_is_uv = 1;
-           }
-       }
-    }
-    else {
-       pos2_iv = curlen;
-       pos2_is_uv = 1;
-    }
-
-    if (!pos2_is_uv && pos2_iv < 0) {
-       if (!pos1_is_uv && pos1_iv < 0)
-           goto bound_fail;
-       pos2_iv = 0;
-    }
-    else if (!pos1_is_uv && pos1_iv < 0)
-       pos1_iv = 0;
-
-    if ((UV)pos2_iv < (UV)pos1_iv)
-       pos2_iv = pos1_iv;
-    if ((UV)pos2_iv > curlen)
-       pos2_iv = curlen;
-
     {
-       /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
-       const STRLEN pos = (STRLEN)( (UV)pos1_iv );
-       const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
-       STRLEN byte_len = len;
-       STRLEN byte_pos = utf8_curlen
-           ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
+       STRLEN pos, len, byte_len, byte_pos;
 
-       if (lvalue) {
-           SV * ret;
-           ret = sv_2mortal(newSV_type(SVt_PVLV));  /* Not TARG RT#67838 */
-           sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
-           LvTYPE(ret) = 'x';
-           LvTARG(ret) = SvREFCNT_inc_simple(sv);
-           LvTARGOFF(ret) = pos;
-           LvTARGLEN(ret) = len;
+       if (!translate_substr_offsets(
+               curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
+       )) goto bound_fail;
 
-           SPAGAIN;
-           PUSHs(ret);    /* avoid SvSETMAGIC here */
-           RETURN;
-       }
+       byte_len = len;
+       byte_pos = utf8_curlen
+           ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
 
        tmps += byte_pos;
 
@@ -3134,7 +3155,7 @@ PP(pp_substr)
     RETURN;
 
 bound_fail:
-    if (lvalue || repl)
+    if (repl)
        Perl_croak(aTHX_ "substr outside of string");
     Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
     RETPUSHUNDEF;
diff --git a/proto.h b/proto.h
index d324314..ff84a22 100644
--- a/proto.h
+++ b/proto.h
@@ -5601,6 +5601,14 @@ STATIC void      S_save_magic(pTHX_ I32 mgs_ix, SV *sv)
 
 STATIC void    S_unwind_handler_stack(pTHX_ const void *p);
 #endif
+#if defined(PERL_IN_MG_C) || defined(PERL_IN_PP_C)
+PERL_CALLCONV bool     Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV 
pos1_iv, bool pos1_is_uv, IV len_iv, bool len_is_uv, STRLEN *posp, STRLEN *lenp)
+                       __attribute__nonnull__(pTHX_6)
+                       __attribute__nonnull__(pTHX_7);
+#define PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS      \
+       assert(posp); assert(lenp)
+
+#endif
 #if defined(PERL_IN_MRO_C)
 STATIC void    S_mro_clean_isarev(pTHX_ HV * const isa, const char * const 
name, const STRLEN len, HV * const exceptions, U32 flags)
                        __attribute__nonnull__(pTHX_1)
diff --git a/sv.c b/sv.c
index 0aebda2..ae97f1d 100644
--- a/sv.c
+++ b/sv.c
@@ -4606,7 +4606,9 @@ Perl_sv_sethek(pTHX_ register SV *const sv, const HEK 
*const hek)
 Tells an SV to use C<ptr> to find its string value.  Normally the
 string is stored inside the SV but sv_usepvn allows the SV to use an
 outside string.  The C<ptr> should point to memory that was allocated
-by C<malloc>.  The string length, C<len>, must be supplied.  By default
+by C<malloc>.  It must be the start of a mallocked block
+of memory, and not a pointer to the middle of it.  The
+string length, C<len>, must be supplied.  By default
 this function will realloc (i.e. move) the memory pointed to by C<ptr>,
 so that pointer should not be freed or used by the programmer after
 giving it to sv_usepvn, and neither should any pointers from "behind"
diff --git a/sv.h b/sv.h
index 33a61be..4282144 100644
--- a/sv.h
+++ b/sv.h
@@ -482,6 +482,7 @@ struct xpvlv {
     SV*                xlv_targ;
     char       xlv_type;       /* k=keys .=pos x=substr v=vec /=join/re
                                 * y=alem/helem/iter t=tie T=tied HE */
+    char       xlv_flags;      /* 1 = negative offset  2 = negative len */
 };
 
 /* This structure works in 3 ways - regular scalar, GV with GP, or fast
@@ -1325,6 +1326,7 @@ the scalar's value cannot change unless written to.
 #define LvTARG(sv)     ((XPVLV*)  SvANY(sv))->xlv_targ
 #define LvTARGOFF(sv)  ((XPVLV*)  SvANY(sv))->xlv_targoff
 #define LvTARGLEN(sv)  ((XPVLV*)  SvANY(sv))->xlv_targlen
+#define LvFLAGS(sv)    ((XPVLV*)  SvANY(sv))->xlv_flags
 
 #define IoIFP(sv)      (sv)->sv_u.svu_fp
 #define IoOFP(sv)      ((XPVIO*)  SvANY(sv))->xio_ofp
diff --git a/t/op/substr.t b/t/op/substr.t
index 2673fc7..f93b64c 100644
--- a/t/op/substr.t
+++ b/t/op/substr.t
@@ -23,7 +23,7 @@ $SIG{__WARN__} = sub {
 
 BEGIN { require './test.pl'; }
 
-plan(360);
+plan(380);
 
 run_tests() unless caller;
 
@@ -635,6 +635,51 @@ is($x, "\x{100}\x{200}\xFFb");
        is($_, 'YYYY'); 
        is($x, 'aYYYYef');
     }
+    $x = "abcdef";
+    for (substr($x,1)) {
+       is($_, 'bcdef');
+       $_ = 'XX';
+       is($_, 'XX');
+       is($x, 'aXX');
+       $x .= "frompswiggle";
+       is $_, "XXfrompswiggle";
+    }
+    $x = "abcdef";
+    for (substr($x,1,-1)) {
+       is($_, 'bcde');
+       $_ = 'XX';
+       is($_, 'XX');
+       is($x, 'aXXf');
+       $x .= "frompswiggle";
+       is $_, "XXffrompswiggl";
+    }
+    $x = "abcdef";
+    for (substr($x,-5,3)) {
+       is($_, 'bcd');
+       $_ = 'XX';   # now $_ is substr($x, -4, 2)
+       is($_, 'XX');
+       is($x, 'aXXef');
+       $x .= "frompswiggle";
+       is $_, "gg";
+    }
+    $x = "abcdef";
+    for (substr($x,-5)) {
+       is($_, 'bcdef');
+       $_ = 'XX';  # now substr($x, -2)
+       is($_, 'XX');
+       is($x, 'aXX');
+       $x .= "frompswiggle";
+       is $_, "le";
+    }
+    $x = "abcdef";
+    for (substr($x,-5,-1)) {
+       is($_, 'bcde');
+       $_ = 'XX';  # now substr($x, -3, -1)
+       is($_, 'XX');
+       is($x, 'aXXf');
+       $x .= "frompswiggle";
+       is $_, "gl";
+    }
 }
 
 # [perl #24200] string corruption with lvalue sub

--
Perl5 Master Repository

Reply via email to