In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/105c827d9a0f19a772c7b179e2997d842a095460?hp=7654aaa1134e4908a6a6dc0b85d360816e79faa3>
- Log ----------------------------------------------------------------- commit 105c827d9a0f19a772c7b179e2997d842a095460 Author: Tony Cook <[email protected]> Date: Thu Sep 28 14:40:24 2017 +1000 (perl #124256) disallow \K in lookahead and lookbehind \K can cause infinite loops in matching in these, and we're not sure how it really should behave, so forbid it. ----------------------------------------------------------------------- Summary of changes: pod/perldiag.pod | 5 +++++ regcomp.c | 42 +++++++++++++++++++++++++++++++----------- t/lib/croak/regcomp | 9 +++++++++ 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index f9c023d148..2fa4b62257 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -3277,6 +3277,11 @@ line. See L<perlrun> for more details. (P) The regular expression parser is confused. +=item \K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/%s/ + +(F) Your regular expression used C<\K> in a lookhead or lookbehind +assertion, which isn't permitted. + =item Label not found for "last %s" (F) You named a loop to break out of, but you're not currently in a loop diff --git a/regcomp.c b/regcomp.c index cf9246473f..aba6648da5 100644 --- a/regcomp.c +++ b/regcomp.c @@ -182,6 +182,7 @@ struct RExC_state_t { through */ U32 study_chunk_recursed_bytes; /* bytes in bitmap */ I32 in_lookbehind; + I32 in_lookahead; I32 contains_locale; I32 override_recoding; #ifdef EBCDIC @@ -273,6 +274,7 @@ struct RExC_state_t { #define RExC_study_chunk_recursed_bytes \ (pRExC_state->study_chunk_recursed_bytes) #define RExC_in_lookbehind (pRExC_state->in_lookbehind) +#define RExC_in_lookahead (pRExC_state->in_lookahead) #define RExC_contains_locale (pRExC_state->contains_locale) #ifdef EBCDIC # define RExC_recode_x_to_native (pRExC_state->recode_x_to_native) @@ -7622,6 +7624,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_seen = 0; RExC_maxlen = 0; RExC_in_lookbehind = 0; + RExC_in_lookahead = 0; RExC_seen_zerolen = *exp == '^' ? -1 : 0; #ifdef EBCDIC RExC_recode_x_to_native = 0; @@ -11078,6 +11081,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) *flagp = 0; /* Tentatively. */ + if (RExC_in_lookbehind) { + RExC_in_lookbehind++; + } + if (RExC_in_lookahead) { + RExC_in_lookahead++; + } + /* Having this true makes it feasible to have a lot fewer tests for the * parse pointer being in scope. For example, we can write * while(isFOO(*RExC_parse)) RExC_parse++; @@ -11542,10 +11552,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) if (RExC_parse >= RExC_end) { vFAIL("Sequence (?... not terminated"); } - - /* FALLTHROUGH */ + RExC_seen_zerolen++; + break; case '=': /* (?=...) */ RExC_seen_zerolen++; + RExC_in_lookahead++; break; case '!': /* (?!...) */ RExC_seen_zerolen++; @@ -12344,6 +12355,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) if (RExC_in_lookbehind) { RExC_in_lookbehind--; } + if (RExC_in_lookahead) { + RExC_in_lookahead--; + } if (after_freeze > RExC_npar) RExC_npar = after_freeze; return(ret); @@ -13429,15 +13443,21 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) *flagp |= SIMPLE; goto finish_meta_pat; case 'K': - RExC_seen_zerolen++; - ret = reg_node(pRExC_state, KEEPS); - *flagp |= SIMPLE; - /* XXX:dmq : disabling in-place substitution seems to - * be necessary here to avoid cases of memory corruption, as - * with: C<$_="x" x 80; s/x\K/y/> -- rgs - */ - RExC_seen |= REG_LOOKBEHIND_SEEN; - goto finish_meta_pat; + if (!RExC_in_lookbehind && !RExC_in_lookahead) { + RExC_seen_zerolen++; + ret = reg_node(pRExC_state, KEEPS); + *flagp |= SIMPLE; + /* XXX:dmq : disabling in-place substitution seems to + * be necessary here to avoid cases of memory corruption, as + * with: C<$_="x" x 80; s/x\K/y/> -- rgs + */ + RExC_seen |= REG_LOOKBEHIND_SEEN; + goto finish_meta_pat; + } + else { + ++RExC_parse; /* advance past the 'K' */ + vFAIL("\\K not permitted in lookahead/lookbehind"); + } case 'Z': ret = reg_node(pRExC_state, SEOL); *flagp |= SIMPLE; diff --git a/t/lib/croak/regcomp b/t/lib/croak/regcomp index 0ba705e915..fc410829b6 100644 --- a/t/lib/croak/regcomp +++ b/t/lib/croak/regcomp @@ -70,3 +70,12 @@ qr/((a))/; EXPECT Too many nested open parens in regex; marked by <-- HERE in m/(( <-- HERE a))/ at - line 3. ######## +# NAME \K not permitted in lookahead +$x =~ /(?=a\Ka)a/; +EXPECT +\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(?=a\K <-- HERE a)a/ at - line 1. +######## +# NAME \K not permitted in lookbehind +$x =~ /(?<=a\Ka)a/; +EXPECT +\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(?<=a\K <-- HERE a)a/ at - line 1. -- Perl5 Master Repository
