if this patch causes trouble please revert it without hesitation On 7 June 2012 09:00, Yves Orton <[email protected]> wrote: > In perl.git, the branch blead has been updated > > <http://perl5.git.perl.org/perl.git/commitdiff/3b6759a6b10290b6fea26fb98b50fbf5ce4466b5?hp=ae0a0fb2ce70e35111ba07e2a578edd8fcc989f8> > > - Log ----------------------------------------------------------------- > commit 3b6759a6b10290b6fea26fb98b50fbf5ce4466b5 > Author: Yves Orton <[email protected]> > Date: Wed Jun 6 08:53:05 2012 +0200 > > optimise (?:|) and related NOTHING like constructs out of the compiled > optree > > A pattern like (?:|) causes the regex engine to do extra work even though > it is equivelent to matching nothing. This optimises such sequences into > more efficient opcodes that do less work, and in some cases optimises > them away entirely. > ----------------------------------------------------------------------- > > Summary of changes: > regcomp.c | 141 > ++++++++++++++++++++++++++++++++++++++++++++++++++----------- > 1 files changed, 116 insertions(+), 25 deletions(-) > > diff --git a/regcomp.c b/regcomp.c > index 5ec3bc4..906add7 100644 > --- a/regcomp.c > +++ b/regcomp.c > @@ -3372,21 +3372,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, > regnode **scanp, > * in which case we do some bookkeeping, > otherwise we update > * the end pointer. */ > if ( !first ) { > + first = cur; > + trietype = noper_trietype; > if ( noper_trietype == NOTHING ) { > #if !defined(DEBUGGING) && !defined(NOJUMPTRIE) > regnode * const noper_next = regnext( > noper ); > - U8 noper_next_type = (noper_next && > noper_next != tail) ? OP(noper_next) : 0; > + U8 noper_next_type = (noper_next && > noper_next!=tail) ? OP(noper_next) : 0; > U8 noper_next_trietype = > noper_next_type ? TRIE_TYPE( noper_next_type ) :0; > #endif > > - if ( noper_next_trietype ) { > - first = cur; > + if ( noper_next_trietype ) > trietype = noper_next_trietype; > - } > - } else { > - first = cur; > - trietype = noper_trietype; > - } > + } > } else { > if ( trietype == NOTHING ) > trietype = noper_trietype; > @@ -3436,24 +3433,46 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, > regnode **scanp, > "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur)); > > }); > - if ( last && trietype != NOTHING ) { > - /* the last branch of the sequence was part of a > trie, > - * so we have to construct it here outside of > the loop > - */ > - made= make_trie( pRExC_state, startbranch, > first, scan, tail, count, trietype, depth+1 ); > + if ( last ) { > + if ( trietype != NOTHING ) { > + /* the last branch of the sequence was part > of a trie, > + * so we have to construct it here outside > of the loop > + */ > + made= make_trie( pRExC_state, startbranch, > first, scan, tail, count, trietype, depth+1 ); > #ifdef TRIE_STUDY_OPT > - if ( ((made == MADE_EXACT_TRIE && > - startbranch == first) > - || ( first_non_open == first )) && > - depth==0 ) { > - flags |= SCF_TRIE_RESTUDY; > - if ( startbranch == first > - && scan == tail ) > - { > - RExC_seen &=~REG_TOP_LEVEL_BRANCHES; > + if ( ((made == MADE_EXACT_TRIE && > + startbranch == first) > + || ( first_non_open == first )) && > + depth==0 ) { > + flags |= SCF_TRIE_RESTUDY; > + if ( startbranch == first > + && scan == tail ) > + { > + RExC_seen &=~REG_TOP_LEVEL_BRANCHES; > + } > } > - } > #endif > + } else { > + /* at this point we know whatever we have is > a NOTHING sequence/branch > + * AND if 'startbranch' is 'first' then we > can turn the whole thing into a NOTHING > + */ > + if ( startbranch == first ) { > + regnode *opt; > + /* the entire thing is a NOTHING > sequence, something like this: > + * (?:|) So we can turn it into a plain > NOTHING op. */ > + DEBUG_TRIE_COMPILE_r({ > + regprop(RExC_rx, mysv, cur); > + PerlIO_printf( Perl_debug_log, > + "%*s- %s (%d) <NOTHING BRANCH > SEQUENCE>\n", (int)depth * 2 + 2, > + "", SvPV_nolen_const( mysv > ),REG_NODE_NUM(cur)); > + > + }); > + OP(startbranch)= NOTHING; > + NEXT_OFF(startbranch)= tail - > startbranch; > + for ( opt= startbranch + 1; opt < tail ; > opt++ ) > + OP(opt)= OPTIMIZED; > + } > + } > } /* end if ( last) */ > } /* TRIE_MAXBUF is non zero */ > > @@ -4378,6 +4397,32 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode > **scanp, > /* Lookbehind, or need to calculate parens/evals/stclass: */ > && (scan->flags || data || (flags & SCF_DO_STCLASS)) > && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) { > + if ( OP(scan) == UNLESSM && > + scan->flags == 0 && > + OP(NEXTOPER(NEXTOPER(scan))) == NOTHING && > + OP(regnext(NEXTOPER(NEXTOPER(scan)))) == SUCCEED > + ) { > + regnode *opt; > + regnode *upto= regnext(scan); > + DEBUG_PARSE_r({ > + SV * const mysv_val=sv_newmortal(); > + DEBUG_STUDYDATA("OPFAIL",data,depth); > + > + /*DEBUG_PARSE_MSG("opfail");*/ > + regprop(RExC_rx, mysv_val, upto); > + PerlIO_printf(Perl_debug_log, "~ replace with OPFAIL > pointed at %s (%"IVdf") offset %"IVdf"\n", > + SvPV_nolen_const(mysv_val), > + (IV)REG_NODE_NUM(upto), > + (IV)(upto - scan) > + ); > + }); > + OP(scan) = OPFAIL; > + NEXT_OFF(scan) = upto - scan; > + for (opt= scan + 1; opt < upto ; opt++) > + OP(opt) = OPTIMIZED; > + scan= upto; > + continue; > + } > if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY > || OP(scan) == UNLESSM ) > { > @@ -4542,8 +4587,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode > **scanp, > } > } > } > - > - > } > #endif > } > @@ -8412,9 +8455,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 > *flagp,U32 depth) > } > break; > } > + DEBUG_PARSE_r(if (!SIZE_ONLY) { > + SV * const mysv_val1=sv_newmortal(); > + SV * const mysv_val2=sv_newmortal(); > + DEBUG_PARSE_MSG("lsbr"); > + regprop(RExC_rx, mysv_val1, lastbr); > + regprop(RExC_rx, mysv_val2, ender); > + PerlIO_printf(Perl_debug_log, "~ tying lastbr %s (%"IVdf") to > ender %s (%"IVdf") offset %"IVdf"\n", > + SvPV_nolen_const(mysv_val1), > + (IV)REG_NODE_NUM(lastbr), > + SvPV_nolen_const(mysv_val2), > + (IV)REG_NODE_NUM(ender), > + (IV)(ender - lastbr) > + ); > + }); > REGTAIL(pRExC_state, lastbr, ender); > > if (have_branch && !SIZE_ONLY) { > + char is_nothing= 1; > if (depth==1) > RExC_seen |= REG_TOP_LEVEL_BRANCHES; > > @@ -8423,11 +8481,44 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 > *flagp,U32 depth) > const U8 op = PL_regkind[OP(br)]; > if (op == BRANCH) { > REGTAIL_STUDY(pRExC_state, NEXTOPER(br), ender); > + if (OP(NEXTOPER(br)) != NOTHING || regnext(NEXTOPER(br)) > != ender) > + is_nothing= 0; > } > else if (op == BRANCHJ) { > REGTAIL_STUDY(pRExC_state, NEXTOPER(NEXTOPER(br)), ender); > + /* for now we always disable this optimisation * / > + if (OP(NEXTOPER(NEXTOPER(br))) != NOTHING || > regnext(NEXTOPER(NEXTOPER(br))) != ender) > + */ > + is_nothing= 0; > } > } > + if (is_nothing) { > + br= PL_regkind[OP(ret)] != BRANCH ? regnext(ret) : ret; > + DEBUG_PARSE_r(if (!SIZE_ONLY) { > + SV * const mysv_val1=sv_newmortal(); > + SV * const mysv_val2=sv_newmortal(); > + DEBUG_PARSE_MSG("NADA"); > + regprop(RExC_rx, mysv_val1, ret); > + regprop(RExC_rx, mysv_val2, ender); > + PerlIO_printf(Perl_debug_log, "~ converting ret %s > (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n", > + SvPV_nolen_const(mysv_val1), > + (IV)REG_NODE_NUM(ret), > + SvPV_nolen_const(mysv_val2), > + (IV)REG_NODE_NUM(ender), > + (IV)(ender - ret) > + ); > + }); > + OP(br)= NOTHING; > + if (OP(ender) == TAIL) { > + NEXT_OFF(br)= 0; > + RExC_emit= br + 1; > + } else { > + regnode *opt; > + for ( opt= br + 1; opt < ender ; opt++ ) > + OP(opt)= OPTIMIZED; > + NEXT_OFF(br)= ender - br; > + } > + } > } > } > > > -- > Perl5 Master Repository
-- perl -Mre=debug -e "/just|another|perl|hacker/"
