In perl.git, the branch maint-5.24 has been updated <http://perl5.git.perl.org/perl.git/commitdiff/78b60e17093d684bb07f20fc953075801b1ce207?hp=e9bd40cdfc5ef693fe27c440ca36462b72c39444>
- Log ----------------------------------------------------------------- commit 78b60e17093d684bb07f20fc953075801b1ce207 Author: Dan Collins <[email protected]> Date: Fri Jun 17 19:40:57 2016 -0400 Add tests for regex recursion d5a00e4af introduced a bug in reginsert that was fixed by da7cf1cc7, originally documented in [perl #128109]. This patch adds two regression tests for the testcase reported by Jan Goyvaerts in [perl #128420]. M t/re/re_tests commit 4539ae3741e1a4de9a54e5bc52524b40abc2d58a Author: Yves Orton <[email protected]> Date: Tue May 10 09:44:31 2016 +0200 fix #128109 - do not move RExC_open_parens[0] in reginsert In d5a00e4af6b155495be31a35728b8fef8e671ebe I merged GOSUB and GOSTART, part of which involved making RExC_open_parens[0] refer to the start of the pattern, and RExC_close_parens[0] referring to the end of the pattern. This tripped up in reginsert in a subtle way, the start of the pattern cannot and should not move in reginsert(). Unlike a paren that might be at the start of the pattern which should move when something is inserted in front of it, the start is a fixed point and should never move. This patches fixes this up, and adds an assert to check that reginsert() is not called once study_chunk() starts, as reginsert() does not adjust RExC_recurse. This was noticed by hv while debugging [perl #128085], thanks hugo! M regcomp.c commit 5edefd5d2abb0a8696bd2159e1f17b5975257818 Author: Yves Orton <[email protected]> Date: Mon May 9 08:52:16 2016 +0200 fix #128085 - SIGSEGV in S_regmatch with S_study_chunk: Assertion "!frame" failed. The goto target should have been before the if (frame) block. Clearly this code is not well tested in our test suite. This patch does NOT include tests. M regcomp.c ----------------------------------------------------------------------- Summary of changes: regcomp.c | 25 ++++++++++++++----------- t/re/re_tests | 2 ++ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/regcomp.c b/regcomp.c index be6cb96a70..cbaad1e33a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -222,6 +222,7 @@ struct RExC_state_t { #endif bool seen_unfolded_sharp_s; bool strict; + bool study_started; }; #define RExC_flags (pRExC_state->flags) @@ -288,6 +289,7 @@ struct RExC_state_t { #define RExC_frame_last (pRExC_state->frame_last) #define RExC_frame_count (pRExC_state->frame_count) #define RExC_strict (pRExC_state->strict) +#define RExC_study_started (pRExC_state->study_started) /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set * a flag to disable back-off on the fixed/floating substrings - if it's @@ -4102,6 +4104,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, GET_RE_DEBUG_FLAGS_DECL; PERL_ARGS_ASSERT_STUDY_CHUNK; + RExC_study_started= 1; if ( depth == 0 ) { @@ -5899,15 +5902,10 @@ Perl_re_printf( aTHX_ "LHS=%"UVuf" RHS=%"UVuf"\n", /* Else: zero-length, ignore. */ scan = regnext(scan); } - /* If we are exiting a recursion we can unset its recursed bit - * and allow ourselves to enter it again - no danger of an - * infinite loop there. - if (stopparen > -1 && recursed) { - DEBUG_STUDYDATA("unset:", data,depth); - PAREN_UNSET( recursed, stopparen); - } - */ + + finish: if (frame) { + /* we need to unwind recursion. */ depth = depth - 1; DEBUG_STUDYDATA("frame-end:",data,depth); @@ -5924,7 +5922,6 @@ Perl_re_printf( aTHX_ "LHS=%"UVuf" RHS=%"UVuf"\n", goto fake_study_recurse; } - finish: assert(!frame); DEBUG_STUDYDATA("pre-fin:",data,depth); @@ -6883,6 +6880,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_contains_locale = 0; RExC_contains_i = 0; RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT); + RExC_study_started = 0; pRExC_state->runtime_code_qr = NULL; RExC_frame_head= NULL; RExC_frame_last= NULL; @@ -18241,7 +18239,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) RExC_size += size; return; } - + assert(!RExC_study_started); /* I believe we should never use reginsert once we have started + studying. If this is wrong then we need to adjust RExC_recurse + below like we do with RExC_open_parens/RExC_close_parens. */ src = RExC_emit; RExC_emit += size; dst = RExC_emit; @@ -18252,7 +18252,10 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) * iow it is 1 more than the number of parens seen in * the pattern so far. */ for ( paren=0 ; paren < RExC_npar ; paren++ ) { - if ( RExC_open_parens[paren] >= opnd ) { + /* note, RExC_open_parens[0] is the start of the + * regex, it can't move. RExC_close_parens[0] is the end + * of the regex, it *can* move. */ + if ( paren && RExC_open_parens[paren] >= opnd ) { /*DEBUG_PARSE_FMT("open"," - %d",size);*/ RExC_open_parens[paren] += size; } else { diff --git a/t/re/re_tests b/t/re/re_tests index 34ac94a213..7e8522da98 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1966,6 +1966,8 @@ ab(?#Comment){2}c abbc y $& abbc .{1}?? - c - Nested quantifiers .{1}?+ - c - Nested quantifiers (?:.||)(?|)000000000@ 000000000@ y $& 000000000@ # [perl #126405] +aa$|a(?R)a|a aaa y $& aaa # [perl 128420] recursive matches +(?:\1|a)([bcd])\1(?:(?R)|e)\1 abbaccaddedcb y $& abbaccaddedcb # [perl 128420] recursive match with backreferences # Keep these lines at the end of the file # vim: softtabstop=0 noexpandtab -- Perl5 Master Repository
