In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/622c613e12cef84c93c5df207a321fe13d0f2a7f?hp=c9fe309b988eb6db0705de33e24381da2b3761eb>

- Log -----------------------------------------------------------------
commit 622c613e12cef84c93c5df207a321fe13d0f2a7f
Author: David Mitchell <[email protected]>
Date:   Fri Aug 4 14:28:15 2017 +0100

    PVLV-as-REGEXP: avoid PVX double free
    
    With v5.27.2-30-gdf6b4bd, I changed the way that PVLVs store a regexp
    value (by making the xpv_len field point to a regexp struct). There was a
    bug in this, which caused the PVX buffer to be double freed.
    
    Several REGEXP SVs can share a PVX buffer. Only one of them will have a
    non-zero xpv_len field, and that SV gets to free the buffer.
    
    After the commit above, the non-zero xpv_len was triggering an extra free.
    
    This was showing up in smokes as failures in re/recompile.t when invoked
    with PERL_DESTRUCT_LEVEL=2 (which t/TEST does).

M       sv.c
M       t/op/qr.t

commit 89042fa4090fc5634ff775e753c58b0827ad6af8
Author: David Mitchell <[email protected]>
Date:   Fri Aug 4 14:00:26 2017 +0100

    sv_dump(): display regex LEN and LV-as-RX regexp
    
    When the len field of a REGEXP isn't usurped, display it (it used to
    always be skipped for REGEXPs).
    
    When it's usurped by a PVLV to point to a 'struct regexp', display it as
    a pointer.

M       dump.c
M       ext/Devel-Peek/t/Peek.t

commit 89699a04a6346cff31d7d8cdd6e39556b846dcf6
Author: David Mitchell <[email protected]>
Date:   Fri Aug 4 13:12:55 2017 +0100

    Perl_reg_temp_copy(): rename args.
    
    This function copies a regexp SV. Rename its args to ssv and dsv to
    match a usual convention in other functions such as sv_catsv().
    
    Similarly rename the two local vars holding ReANY(ssv/dsv) to srx, drx.
    
    This is less confusing than having four vars called rx, ret_x, r, ret.
    
    Also update the comments explaining what the function does.

M       embed.fnc
M       proto.h
M       regcomp.c
-----------------------------------------------------------------------

Summary of changes:
 dump.c                  |  7 ++++-
 embed.fnc               |  2 +-
 ext/Devel-Peek/t/Peek.t |  5 +++-
 proto.h                 |  4 +--
 regcomp.c               | 75 +++++++++++++++++++++++++++----------------------
 sv.c                    | 12 ++++++--
 t/op/qr.t               | 18 +++++++++++-
 7 files changed, 82 insertions(+), 41 deletions(-)

diff --git a/dump.c b/dump.c
index fa5f0baf91..1fa242204e 100644
--- a/dump.c
+++ b/dump.c
@@ -1836,7 +1836,12 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, 
I32 nest, I32 maxnest, bo
                 PerlIO_printf(file, "\n");
             }
            Perl_dump_indent(aTHX_ level, file, "  CUR = %" IVdf "\n", 
(IV)SvCUR(sv));
-           if (!re)
+           if (re && type == SVt_PVLV)
+                /* LV-as-REGEXP usurps len field to store poiunter to
+                 * regexp struct */
+               Perl_dump_indent(aTHX_ level, file, "  REGEXP = 0x%" UVxf "\n",
+                   PTR2UV(((XPV*)SvANY(sv))->xpv_len_u.xpvlenu_rx));
+            else
                Perl_dump_indent(aTHX_ level, file, "  LEN = %" IVdf "\n",
                                       (IV)SvLEN(sv));
 #ifdef PERL_COPY_ON_WRITE
diff --git a/embed.fnc b/embed.fnc
index 2dd73bfde0..77e898de9b 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1298,7 +1298,7 @@ Ap        |I32    |pregexec       |NN REGEXP * const 
prog|NN char* stringarg \
 Ap     |void   |pregfree       |NULLOK REGEXP* r
 Ap     |void   |pregfree2      |NN REGEXP *rx
 : FIXME - is anything in re using this now?
-EXp    |REGEXP*|reg_temp_copy  |NULLOK REGEXP* ret_x|NN REGEXP* rx
+EXp    |REGEXP*|reg_temp_copy  |NULLOK REGEXP* dsv|NN REGEXP* ssv
 Ap     |void   |regfree_internal|NN REGEXP *const rx
 #if defined(USE_ITHREADS)
 Ap     |void*  |regdupe_internal|NN REGEXP * const r|NN CLONE_PARAMS* param
diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t
index db9354bd0e..58dc109d97 100644
--- a/ext/Devel-Peek/t/Peek.t
+++ b/ext/Devel-Peek/t/Peek.t
@@ -362,7 +362,7 @@ do_test('reference to regexp',
     FLAGS = \\(OBJECT,POK,FAKE,pPOK\\)
     PV = $ADDR "\\(\\?\\^:tic\\)"
     CUR = 8
-    LEN = 0                                    # $] < 5.017006
+    LEN = 0
     STASH = $ADDR\\t"Regexp"'
 . ($] < 5.013 ? '' :
 '
@@ -389,6 +389,7 @@ do_test('reference to regexp',
       FLAGS = \(POK,pPOK\)
       PV = $ADDR "\(\?\^:tic\)"
       CUR = 8
+      LEN = \d+
       COMPFLAGS = 0x0 \(\)
       EXTFLAGS = 0x680000 \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\)
 (?:      ENGINE = $ADDR \(STANDARD\)
@@ -1164,6 +1165,7 @@ do_test('UTF-8 in a regular expression',
     FLAGS = \(OBJECT,POK,FAKE,pPOK,UTF8\)
     PV = $ADDR "\(\?\^u:\\\\\\\\x\{100\}\)" \[UTF8 
"\(\?\^u:\\\\\\\\x\{100\}\)"\]
     CUR = 13
+    LEN = 0
     STASH = $ADDR      "Regexp"
     COMPFLAGS = 0x0 \(\)
     EXTFLAGS = $ADDR \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\)
@@ -1188,6 +1190,7 @@ do_test('UTF-8 in a regular expression',
       FLAGS = \(POK,pPOK,UTF8\)
       PV = $ADDR "\(\?\^u:\\\\\\\\x\{100\}\)" \[UTF8 
"\(\?\^u:\\\\\\\\x\{100\}\)"\]
       CUR = 13
+      LEN = \d+
       COMPFLAGS = 0x0 \(\)
       EXTFLAGS = $ADDR \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\)
 (?:      ENGINE = $ADDR \(STANDARD\)
diff --git a/proto.h b/proto.h
index 5988bf6b97..efbc52ba27 100644
--- a/proto.h
+++ b/proto.h
@@ -2675,9 +2675,9 @@ PERL_CALLCONV void        
Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I
 PERL_CALLCONV SV*      Perl_reg_qr_package(pTHX_ REGEXP * const rx);
 #define PERL_ARGS_ASSERT_REG_QR_PACKAGE        \
        assert(rx)
-PERL_CALLCONV REGEXP*  Perl_reg_temp_copy(pTHX_ REGEXP* ret_x, REGEXP* rx);
+PERL_CALLCONV REGEXP*  Perl_reg_temp_copy(pTHX_ REGEXP* dsv, REGEXP* ssv);
 #define PERL_ARGS_ASSERT_REG_TEMP_COPY \
-       assert(rx)
+       assert(ssv)
 PERL_CALLCONV SV*      Perl_regclass_swash(pTHX_ const regexp *prog, const 
struct regnode *node, bool doinit, SV **listsvp, SV **altsvp);
 #define PERL_ARGS_ASSERT_REGCLASS_SWASH        \
        assert(node)
diff --git a/regcomp.c b/regcomp.c
index e5037fc046..5a9e56b080 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19535,9 +19535,17 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
         Safefree(r->recurse_locinput);
 }
 
+
 /*  reg_temp_copy()
 
-    This is a hacky workaround to the structural issue of match results
+    Copy ssv to dsv, both of which should of type SVt_REGEXP or SVt_PVLV,
+    except that dsv will be created if NULL.
+
+    This function is used in two main ways. First to implement
+        $r = qr/....; $s = $$r;
+
+    Secondly, it is used as a hacky workaround to the structural issue of
+    match results
     being stored in the regexp structure which is in turn stored in
     PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern
     could be PL_curpm in multiple contexts, and could require multiple
@@ -19553,79 +19561,80 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
 
 
 REGEXP *
-Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
+Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
 {
-    struct regexp *ret;
-    struct regexp *const r = ReANY(rx);
-    const bool islv = ret_x && SvTYPE(ret_x) == SVt_PVLV;
+    struct regexp *drx;
+    struct regexp *const srx = ReANY(ssv);
+    const bool islv = dsv && SvTYPE(dsv) == SVt_PVLV;
 
     PERL_ARGS_ASSERT_REG_TEMP_COPY;
 
-    if (!ret_x)
-       ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
+    if (!dsv)
+       dsv = (REGEXP*) newSV_type(SVt_REGEXP);
     else {
-       SvOK_off((SV *)ret_x);
+       SvOK_off((SV *)dsv);
        if (islv) {
            /* For PVLVs, the head (sv_any) points to an XPVLV, while
              * the LV's xpvlenu_rx will point to a regexp body, which
              * we allocate here */
            REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
-           assert(!SvPVX(ret_x));
-            ((XPV*)SvANY(ret_x))->xpv_len_u.xpvlenu_rx = temp->sv_any;
+           assert(!SvPVX(dsv));
+            ((XPV*)SvANY(dsv))->xpv_len_u.xpvlenu_rx = temp->sv_any;
            temp->sv_any = NULL;
            SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
            SvREFCNT_dec_NN(temp);
            /* SvCUR still resides in the xpvlv struct, so the regexp copy-
               ing below will not set it. */
-           SvCUR_set(ret_x, SvCUR(rx));
+           SvCUR_set(dsv, SvCUR(ssv));
        }
     }
     /* This ensures that SvTHINKFIRST(sv) is true, and hence that
        sv_force_normal(sv) is called.  */
-    SvFAKE_on(ret_x);
-    ret = ReANY(ret_x);
+    SvFAKE_on(dsv);
+    drx = ReANY(dsv);
 
-    SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8);
-    SvPV_set(ret_x, RX_WRAPPED(rx));
+    SvFLAGS(dsv) |= SvFLAGS(ssv) & (SVf_POK|SVp_POK|SVf_UTF8);
+    SvPV_set(dsv, RX_WRAPPED(ssv));
     /* We share the same string buffer as the original regexp, on which we
        hold a reference count, incremented when mother_re is set below.
        The string pointer is copied here, being part of the regexp struct.
      */
-    memcpy(&(ret->xpv_cur), &(r->xpv_cur),
+    memcpy(&(drx->xpv_cur), &(srx->xpv_cur),
           sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
     if (!islv)
-        SvLEN_set(ret_x, 0);
-    if (r->offs) {
-        const I32 npar = r->nparens+1;
-        Newx(ret->offs, npar, regexp_paren_pair);
-        Copy(r->offs, ret->offs, npar, regexp_paren_pair);
+        SvLEN_set(dsv, 0);
+    if (srx->offs) {
+        const I32 npar = srx->nparens+1;
+        Newx(drx->offs, npar, regexp_paren_pair);
+        Copy(srx->offs, drx->offs, npar, regexp_paren_pair);
     }
-    if (r->substrs) {
+    if (srx->substrs) {
         int i;
-        Newx(ret->substrs, 1, struct reg_substr_data);
-       StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
+        Newx(drx->substrs, 1, struct reg_substr_data);
+       StructCopy(srx->substrs, drx->substrs, struct reg_substr_data);
 
         for (i = 0; i < 2; i++) {
-            SvREFCNT_inc_void(ret->substrs->data[i].substr);
-            SvREFCNT_inc_void(ret->substrs->data[i].utf8_substr);
+            SvREFCNT_inc_void(drx->substrs->data[i].substr);
+            SvREFCNT_inc_void(drx->substrs->data[i].utf8_substr);
         }
 
        /* check_substr and check_utf8, if non-NULL, point to either their
           anchored or float namesakes, and don't hold a second reference.  */
     }
-    RX_MATCH_COPIED_off(ret_x);
+    RX_MATCH_COPIED_off(dsv);
 #ifdef PERL_ANY_COW
-    ret->saved_copy = NULL;
+    drx->saved_copy = NULL;
 #endif
-    ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
-    SvREFCNT_inc_void(ret->qr_anoncv);
-    if (r->recurse_locinput)
-        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
+    drx->mother_re = ReREFCNT_inc(srx->mother_re ? srx->mother_re : ssv);
+    SvREFCNT_inc_void(drx->qr_anoncv);
+    if (srx->recurse_locinput)
+        Newxz(drx->recurse_locinput,srx->nparens + 1,char *);
 
-    return ret_x;
+    return dsv;
 }
 #endif
 
+
 /* regfree_internal()
 
    Free the private data in a regexp. This is overloadable by
diff --git a/sv.c b/sv.c
index b32db9632b..055f891016 100644
--- a/sv.c
+++ b/sv.c
@@ -6624,7 +6624,6 @@ Perl_sv_clear(pTHX_ SV *const orig_sv)
            goto freescalar;
        case SVt_REGEXP:
            /* FIXME for plugins */
-         freeregexp:
            pregfree2((REGEXP*) sv);
            goto freescalar;
        case SVt_PVCV:
@@ -6703,7 +6702,16 @@ Perl_sv_clear(pTHX_ SV *const orig_sv)
            }
            else if (LvTYPE(sv) != 't') /* unless tie: unrefcnted fake SV**  */
                SvREFCNT_dec(LvTARG(sv));
-           if (isREGEXP(sv)) goto freeregexp;
+           if (isREGEXP(sv)) {
+                /* SvLEN points to a regex body. Free the body, then
+                 * set SvLEN to whatever value was in the now-freed
+                 * regex body. The PVX buffer is shared by multiple re's
+                 * and only freed once, by the re whose len in non-null */
+                STRLEN len = ReANY(sv)->xpv_len;
+                pregfree2((REGEXP*) sv);
+                SvLEN_set((sv), len);
+                goto freescalar;
+            }
             /* FALLTHROUGH */
        case SVt_PVGV:
            if (isGV_with_GP(sv)) {
diff --git a/t/op/qr.t b/t/op/qr.t
index 2944a0e04d..32b9e3b23b 100644
--- a/t/op/qr.t
+++ b/t/op/qr.t
@@ -7,7 +7,7 @@ BEGIN {
     require './test.pl';
 }
 
-plan(tests => 33);
+plan(tests => 34);
 
 sub r {
     return qr/Good/;
@@ -119,3 +119,19 @@ sub {
     utf8::upgrade($$r1);
     like "xxx", $r1, "RT #131821 utf8::upgrade: case insensitive";
 }
+
+# after v5.27.2-30-gdf6b4bd, this was double-freeing the PVX buffer
+# and would crash under valgrind or similar. The eval ensures that the
+# regex any children are freed.
+
+{
+    my %h;
+    eval q{
+        sub {
+           my $r = qr/abc/;
+           $_[0] = $$r;
+        }->($h{foo});
+        1;
+    };
+}
+pass("PVLV-as-REGEXP double-free of PVX");

--
Perl5 Master Repository

Reply via email to