In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/622c613e12cef84c93c5df207a321fe13d0f2a7f?hp=c9fe309b988eb6db0705de33e24381da2b3761eb>
- Log ----------------------------------------------------------------- commit 622c613e12cef84c93c5df207a321fe13d0f2a7f Author: David Mitchell <[email protected]> Date: Fri Aug 4 14:28:15 2017 +0100 PVLV-as-REGEXP: avoid PVX double free With v5.27.2-30-gdf6b4bd, I changed the way that PVLVs store a regexp value (by making the xpv_len field point to a regexp struct). There was a bug in this, which caused the PVX buffer to be double freed. Several REGEXP SVs can share a PVX buffer. Only one of them will have a non-zero xpv_len field, and that SV gets to free the buffer. After the commit above, the non-zero xpv_len was triggering an extra free. This was showing up in smokes as failures in re/recompile.t when invoked with PERL_DESTRUCT_LEVEL=2 (which t/TEST does). M sv.c M t/op/qr.t commit 89042fa4090fc5634ff775e753c58b0827ad6af8 Author: David Mitchell <[email protected]> Date: Fri Aug 4 14:00:26 2017 +0100 sv_dump(): display regex LEN and LV-as-RX regexp When the len field of a REGEXP isn't usurped, display it (it used to always be skipped for REGEXPs). When it's usurped by a PVLV to point to a 'struct regexp', display it as a pointer. M dump.c M ext/Devel-Peek/t/Peek.t commit 89699a04a6346cff31d7d8cdd6e39556b846dcf6 Author: David Mitchell <[email protected]> Date: Fri Aug 4 13:12:55 2017 +0100 Perl_reg_temp_copy(): rename args. This function copies a regexp SV. Rename its args to ssv and dsv to match a usual convention in other functions such as sv_catsv(). Similarly rename the two local vars holding ReANY(ssv/dsv) to srx, drx. This is less confusing than having four vars called rx, ret_x, r, ret. Also update the comments explaining what the function does. M embed.fnc M proto.h M regcomp.c ----------------------------------------------------------------------- Summary of changes: dump.c | 7 ++++- embed.fnc | 2 +- ext/Devel-Peek/t/Peek.t | 5 +++- proto.h | 4 +-- regcomp.c | 75 +++++++++++++++++++++++++++---------------------- sv.c | 12 ++++++-- t/op/qr.t | 18 +++++++++++- 7 files changed, 82 insertions(+), 41 deletions(-) diff --git a/dump.c b/dump.c index fa5f0baf91..1fa242204e 100644 --- a/dump.c +++ b/dump.c @@ -1836,7 +1836,12 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo PerlIO_printf(file, "\n"); } Perl_dump_indent(aTHX_ level, file, " CUR = %" IVdf "\n", (IV)SvCUR(sv)); - if (!re) + if (re && type == SVt_PVLV) + /* LV-as-REGEXP usurps len field to store poiunter to + * regexp struct */ + Perl_dump_indent(aTHX_ level, file, " REGEXP = 0x%" UVxf "\n", + PTR2UV(((XPV*)SvANY(sv))->xpv_len_u.xpvlenu_rx)); + else Perl_dump_indent(aTHX_ level, file, " LEN = %" IVdf "\n", (IV)SvLEN(sv)); #ifdef PERL_COPY_ON_WRITE diff --git a/embed.fnc b/embed.fnc index 2dd73bfde0..77e898de9b 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1298,7 +1298,7 @@ Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \ Ap |void |pregfree |NULLOK REGEXP* r Ap |void |pregfree2 |NN REGEXP *rx : FIXME - is anything in re using this now? -EXp |REGEXP*|reg_temp_copy |NULLOK REGEXP* ret_x|NN REGEXP* rx +EXp |REGEXP*|reg_temp_copy |NULLOK REGEXP* dsv|NN REGEXP* ssv Ap |void |regfree_internal|NN REGEXP *const rx #if defined(USE_ITHREADS) Ap |void* |regdupe_internal|NN REGEXP * const r|NN CLONE_PARAMS* param diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t index db9354bd0e..58dc109d97 100644 --- a/ext/Devel-Peek/t/Peek.t +++ b/ext/Devel-Peek/t/Peek.t @@ -362,7 +362,7 @@ do_test('reference to regexp', FLAGS = \\(OBJECT,POK,FAKE,pPOK\\) PV = $ADDR "\\(\\?\\^:tic\\)" CUR = 8 - LEN = 0 # $] < 5.017006 + LEN = 0 STASH = $ADDR\\t"Regexp"' . ($] < 5.013 ? '' : ' @@ -389,6 +389,7 @@ do_test('reference to regexp', FLAGS = \(POK,pPOK\) PV = $ADDR "\(\?\^:tic\)" CUR = 8 + LEN = \d+ COMPFLAGS = 0x0 \(\) EXTFLAGS = 0x680000 \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\) (?: ENGINE = $ADDR \(STANDARD\) @@ -1164,6 +1165,7 @@ do_test('UTF-8 in a regular expression', FLAGS = \(OBJECT,POK,FAKE,pPOK,UTF8\) PV = $ADDR "\(\?\^u:\\\\\\\\x\{100\}\)" \[UTF8 "\(\?\^u:\\\\\\\\x\{100\}\)"\] CUR = 13 + LEN = 0 STASH = $ADDR "Regexp" COMPFLAGS = 0x0 \(\) EXTFLAGS = $ADDR \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\) @@ -1188,6 +1190,7 @@ do_test('UTF-8 in a regular expression', FLAGS = \(POK,pPOK,UTF8\) PV = $ADDR "\(\?\^u:\\\\\\\\x\{100\}\)" \[UTF8 "\(\?\^u:\\\\\\\\x\{100\}\)"\] CUR = 13 + LEN = \d+ COMPFLAGS = 0x0 \(\) EXTFLAGS = $ADDR \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\) (?: ENGINE = $ADDR \(STANDARD\) diff --git a/proto.h b/proto.h index 5988bf6b97..efbc52ba27 100644 --- a/proto.h +++ b/proto.h @@ -2675,9 +2675,9 @@ PERL_CALLCONV void Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I PERL_CALLCONV SV* Perl_reg_qr_package(pTHX_ REGEXP * const rx); #define PERL_ARGS_ASSERT_REG_QR_PACKAGE \ assert(rx) -PERL_CALLCONV REGEXP* Perl_reg_temp_copy(pTHX_ REGEXP* ret_x, REGEXP* rx); +PERL_CALLCONV REGEXP* Perl_reg_temp_copy(pTHX_ REGEXP* dsv, REGEXP* ssv); #define PERL_ARGS_ASSERT_REG_TEMP_COPY \ - assert(rx) + assert(ssv) PERL_CALLCONV SV* Perl_regclass_swash(pTHX_ const regexp *prog, const struct regnode *node, bool doinit, SV **listsvp, SV **altsvp); #define PERL_ARGS_ASSERT_REGCLASS_SWASH \ assert(node) diff --git a/regcomp.c b/regcomp.c index e5037fc046..5a9e56b080 100644 --- a/regcomp.c +++ b/regcomp.c @@ -19535,9 +19535,17 @@ Perl_pregfree2(pTHX_ REGEXP *rx) Safefree(r->recurse_locinput); } + /* reg_temp_copy() - This is a hacky workaround to the structural issue of match results + Copy ssv to dsv, both of which should of type SVt_REGEXP or SVt_PVLV, + except that dsv will be created if NULL. + + This function is used in two main ways. First to implement + $r = qr/....; $s = $$r; + + Secondly, it is used as a hacky workaround to the structural issue of + match results being stored in the regexp structure which is in turn stored in PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern could be PL_curpm in multiple contexts, and could require multiple @@ -19553,79 +19561,80 @@ Perl_pregfree2(pTHX_ REGEXP *rx) REGEXP * -Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx) +Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv) { - struct regexp *ret; - struct regexp *const r = ReANY(rx); - const bool islv = ret_x && SvTYPE(ret_x) == SVt_PVLV; + struct regexp *drx; + struct regexp *const srx = ReANY(ssv); + const bool islv = dsv && SvTYPE(dsv) == SVt_PVLV; PERL_ARGS_ASSERT_REG_TEMP_COPY; - if (!ret_x) - ret_x = (REGEXP*) newSV_type(SVt_REGEXP); + if (!dsv) + dsv = (REGEXP*) newSV_type(SVt_REGEXP); else { - SvOK_off((SV *)ret_x); + SvOK_off((SV *)dsv); if (islv) { /* For PVLVs, the head (sv_any) points to an XPVLV, while * the LV's xpvlenu_rx will point to a regexp body, which * we allocate here */ REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP); - assert(!SvPVX(ret_x)); - ((XPV*)SvANY(ret_x))->xpv_len_u.xpvlenu_rx = temp->sv_any; + assert(!SvPVX(dsv)); + ((XPV*)SvANY(dsv))->xpv_len_u.xpvlenu_rx = temp->sv_any; temp->sv_any = NULL; SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL; SvREFCNT_dec_NN(temp); /* SvCUR still resides in the xpvlv struct, so the regexp copy- ing below will not set it. */ - SvCUR_set(ret_x, SvCUR(rx)); + SvCUR_set(dsv, SvCUR(ssv)); } } /* This ensures that SvTHINKFIRST(sv) is true, and hence that sv_force_normal(sv) is called. */ - SvFAKE_on(ret_x); - ret = ReANY(ret_x); + SvFAKE_on(dsv); + drx = ReANY(dsv); - SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8); - SvPV_set(ret_x, RX_WRAPPED(rx)); + SvFLAGS(dsv) |= SvFLAGS(ssv) & (SVf_POK|SVp_POK|SVf_UTF8); + SvPV_set(dsv, RX_WRAPPED(ssv)); /* We share the same string buffer as the original regexp, on which we hold a reference count, incremented when mother_re is set below. The string pointer is copied here, being part of the regexp struct. */ - memcpy(&(ret->xpv_cur), &(r->xpv_cur), + memcpy(&(drx->xpv_cur), &(srx->xpv_cur), sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur)); if (!islv) - SvLEN_set(ret_x, 0); - if (r->offs) { - const I32 npar = r->nparens+1; - Newx(ret->offs, npar, regexp_paren_pair); - Copy(r->offs, ret->offs, npar, regexp_paren_pair); + SvLEN_set(dsv, 0); + if (srx->offs) { + const I32 npar = srx->nparens+1; + Newx(drx->offs, npar, regexp_paren_pair); + Copy(srx->offs, drx->offs, npar, regexp_paren_pair); } - if (r->substrs) { + if (srx->substrs) { int i; - Newx(ret->substrs, 1, struct reg_substr_data); - StructCopy(r->substrs, ret->substrs, struct reg_substr_data); + Newx(drx->substrs, 1, struct reg_substr_data); + StructCopy(srx->substrs, drx->substrs, struct reg_substr_data); for (i = 0; i < 2; i++) { - SvREFCNT_inc_void(ret->substrs->data[i].substr); - SvREFCNT_inc_void(ret->substrs->data[i].utf8_substr); + SvREFCNT_inc_void(drx->substrs->data[i].substr); + SvREFCNT_inc_void(drx->substrs->data[i].utf8_substr); } /* check_substr and check_utf8, if non-NULL, point to either their anchored or float namesakes, and don't hold a second reference. */ } - RX_MATCH_COPIED_off(ret_x); + RX_MATCH_COPIED_off(dsv); #ifdef PERL_ANY_COW - ret->saved_copy = NULL; + drx->saved_copy = NULL; #endif - ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx); - SvREFCNT_inc_void(ret->qr_anoncv); - if (r->recurse_locinput) - Newxz(ret->recurse_locinput,r->nparens + 1,char *); + drx->mother_re = ReREFCNT_inc(srx->mother_re ? srx->mother_re : ssv); + SvREFCNT_inc_void(drx->qr_anoncv); + if (srx->recurse_locinput) + Newxz(drx->recurse_locinput,srx->nparens + 1,char *); - return ret_x; + return dsv; } #endif + /* regfree_internal() Free the private data in a regexp. This is overloadable by diff --git a/sv.c b/sv.c index b32db9632b..055f891016 100644 --- a/sv.c +++ b/sv.c @@ -6624,7 +6624,6 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) goto freescalar; case SVt_REGEXP: /* FIXME for plugins */ - freeregexp: pregfree2((REGEXP*) sv); goto freescalar; case SVt_PVCV: @@ -6703,7 +6702,16 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) } else if (LvTYPE(sv) != 't') /* unless tie: unrefcnted fake SV** */ SvREFCNT_dec(LvTARG(sv)); - if (isREGEXP(sv)) goto freeregexp; + if (isREGEXP(sv)) { + /* SvLEN points to a regex body. Free the body, then + * set SvLEN to whatever value was in the now-freed + * regex body. The PVX buffer is shared by multiple re's + * and only freed once, by the re whose len in non-null */ + STRLEN len = ReANY(sv)->xpv_len; + pregfree2((REGEXP*) sv); + SvLEN_set((sv), len); + goto freescalar; + } /* FALLTHROUGH */ case SVt_PVGV: if (isGV_with_GP(sv)) { diff --git a/t/op/qr.t b/t/op/qr.t index 2944a0e04d..32b9e3b23b 100644 --- a/t/op/qr.t +++ b/t/op/qr.t @@ -7,7 +7,7 @@ BEGIN { require './test.pl'; } -plan(tests => 33); +plan(tests => 34); sub r { return qr/Good/; @@ -119,3 +119,19 @@ sub { utf8::upgrade($$r1); like "xxx", $r1, "RT #131821 utf8::upgrade: case insensitive"; } + +# after v5.27.2-30-gdf6b4bd, this was double-freeing the PVX buffer +# and would crash under valgrind or similar. The eval ensures that the +# regex any children are freed. + +{ + my %h; + eval q{ + sub { + my $r = qr/abc/; + $_[0] = $$r; + }->($h{foo}); + 1; + }; +} +pass("PVLV-as-REGEXP double-free of PVX"); -- Perl5 Master Repository
