In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/8a6d8ec6fe627c401c6c759edd38bbb10e4b56e9?hp=a53bfdae91fb2d719e69761f2d2f84c5d8a47753>
- Log ----------------------------------------------------------------- commit 8a6d8ec6fe627c401c6c759edd38bbb10e4b56e9 Author: Hugo van der Sanden <[email protected]> Date: Sat Feb 7 16:31:04 2015 +0000 [perl #123755] including unknown char in error requires care AFL (<http://lcamtuf.coredump.cx/afl>) found that when producing the error message for /(??/ we hit an assert because we've stepped past the end of the pattern string. Code inspection found that we also do that in other branches, and we also need to check UTF more carefully. ----------------------------------------------------------------------- Summary of changes: regcomp.c | 11 ++++++++--- t/re/pat.t | 13 ++++++++++++- t/re/re_tests | 2 +- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/regcomp.c b/regcomp.c index 0d6d344..e069a15 100644 --- a/regcomp.c +++ b/regcomp.c @@ -521,6 +521,10 @@ static const scan_data_t zero_scan_data = UTF8fARG(UTF, offset, RExC_precomp), \ UTF8fARG(UTF, RExC_end - RExC_precomp - offset, RExC_precomp + offset) +/* Used to point after bad bytes for an error message, but avoid skipping + * past a nul byte. */ +#define SKIP_IF_CHAR(s) (!*(s) ? 0 : UTF ? UTF8SKIP(s) : 1) + /* * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given * arg. Show regex, up to a maximum length. If it's too long, chop and add @@ -9705,7 +9709,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state) /*NOTREACHED*/ default: fail_modifiers: - RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1; + RExC_parse += SKIP_IF_CHAR(RExC_parse); /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */ vFAIL2utf8f("Sequence (%"UTF8f"...) not recognized", UTF8fARG(UTF, RExC_parse-seqstart, seqstart)); @@ -9953,7 +9957,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) nextchar(pRExC_state); return ret; } - RExC_parse++; + --RExC_parse; + RExC_parse += SKIP_IF_CHAR(RExC_parse); /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */ vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart); @@ -10176,7 +10181,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) case '?': /* (??...) */ is_logical = 1; if (*RExC_parse != '{') { - RExC_parse++; + RExC_parse += SKIP_IF_CHAR(RExC_parse); /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */ vFAIL2utf8f( "Sequence (%"UTF8f"...) not recognized", diff --git a/t/re/pat.t b/t/re/pat.t index 3d52554..53972fe 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -22,7 +22,7 @@ BEGIN { skip_all_without_unicode_tables(); } -plan tests => 759; # Update this when adding/deleting tests. +plan tests => 765; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1635,6 +1635,17 @@ EOP ok(1, "did not crash"); ok($match, "[bbb...] resolved as character class, not subscript"); } + + { # [perl #123755] + for my $pat ('(??', '(?P', '(?i-') { + eval qq{ qr/$pat/ }; + ok(1, "qr/$pat/ did not crash"); + eval qq{ qr/${pat}\x{123}/ }; + my $e = $@; + like($e, qr{\x{123}}, + "qr/${pat}x/ shows x in error even if it's a wide character"); + } + } } # End of sub run_tests 1; diff --git a/t/re/re_tests b/t/re/re_tests index ce8d0cf..41cda56 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1102,7 +1102,7 @@ X(?<=foo.)[YZ] ..XfooXY.. y pos 8 (?P<n>foo)(??{ $+{n} }) snofooefoofoowaa yM $+{n} foo miniperl cannot load Tie::Hash::NamedCapture (?P<=n>foo|bar|baz) - c - Group name must start with a non-digit word character (?P<!n>foo|bar|baz) - c - Group name must start with a non-digit word character -(?PX<n>foo|bar|baz) - c - Sequence (?PX<...) not recognized +(?PX<n>foo|bar|baz) - c - Sequence (?PX...) not recognized /(?'n'foo|bar|baz)/ snofooewa y $1 foo /(?'n'foo|bar|baz)/ snofooewa yM $+{n} foo miniperl cannot load Tie::Hash::NamedCapture /(?'n'foo|bar|baz)(?'m'[ew]+)/ snofooewa yM $+{n} foo miniperl cannot load Tie::Hash::NamedCapture -- Perl5 Master Repository
