In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/d9a72fccda5cacaccd2671769c010f3cda59ef8a?hp=49f9fecb5d7481850ac2474c72c1013a4c763226>
- Log ----------------------------------------------------------------- commit d9a72fccda5cacaccd2671769c010f3cda59ef8a Author: Yves Orton <[email protected]> Date: Sun Sep 28 12:17:37 2014 +0200 Add tests for a51d618a fix of RT #122283 Add a new re debug mode for outputing stuff useful for testing. In this case we count the number of times that we go through study_chunk. With a51d618a we should do 5 times (or less) when we traverse the test pattern. Without a51d618a we recurse 11 times. In the case of RT #122283 we would do gazilions of recursions, so many I never let it run to finish. / (?(DEFINE)(?<foo>foo)) (?(DEFINE)(?<bar>(?&foo)bar)) (?(DEFINE)(?<baz>(?&bar)baz)) (?(DEFINE)(?<bop>(?&baz)bop)) /x I say "or less" because you could argue that since these defines are never called, we should not actually recurse at all, and should maybe just compile this as a simple empty pattern. ----------------------------------------------------------------------- Summary of changes: ext/re/re.pm | 16 +++++++++++++++- ext/re/t/regop.pl | 5 +++-- ext/re/t/regop.t | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- regcomp.c | 19 ++++++++++++++----- regcomp.h | 3 +++ 5 files changed, 89 insertions(+), 9 deletions(-) diff --git a/ext/re/re.pm b/ext/re/re.pm index ea7e3d0..c2d6eed 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -4,7 +4,7 @@ package re; use strict; use warnings; -our $VERSION = "0.26"; +our $VERSION = "0.27"; our @ISA = qw(Exporter); our @EXPORT_OK = ('regmust', qw(is_regexp regexp_pattern @@ -57,6 +57,7 @@ my %flags = ( TRIEC => 0x000004, DUMP => 0x000008, FLAGS => 0x000010, + TEST => 0x000020, EXECUTE => 0x00FF00, INTUIT => 0x000100, @@ -396,6 +397,14 @@ Detailed info about trie compilation. Dump the final program out after it is compiled and optimised. +=item FLAGS + +Dump the flags associated with the program + +=item TEST + +Print output intended for testing the internals of the compile process + =back =item Execute related options @@ -448,6 +457,10 @@ Enable debugging of the recursion stack in the engine. Enabling or disabling this option automatically does the same for debugging states as well. This output from this can be quite large. +=item GPOS + +Enable debugging of the \G modifier. + =item OPTIMISEM Enable enhanced optimisation debugging and start-point optimisations. @@ -473,6 +486,7 @@ debug options. Almost definitely only useful to people hacking on the offsets part of the debug engine. + =back =item Other useful flags diff --git a/ext/re/t/regop.pl b/ext/re/t/regop.pl index 961af39..86976ee 100644 --- a/ext/re/t/regop.pl +++ b/ext/re/t/regop.pl @@ -1,4 +1,4 @@ -use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC); +use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC TEST); my @tests=( XY => 'X(A|[B]Q||C|D)Y' , foobar => '[f][o][o][b][a][r]', @@ -7,7 +7,8 @@ my @tests=( 'D:\\dev/perl/ver/28321_/perl.exe'=> '/(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\.WSF|\\.WSH|\\.pyo|\\.pyc|\\.pyw|\\.py)$/i', 'q'=>'[q]', - "path_sep:\t8490" => '^(\\S{1,9}):\\s*(\\d+)$' + "path_sep:\t8490" => '^(\\S{1,9}):\\s*(\\d+)$', + '' => '(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE)(?<baz>(?&bar)baz))(?(DEFINE)(?<bop>(?&baz)bop))', ); while (@tests) { my ($str,$pat)=splice @tests,0,2; diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t index 8ed2029..6397d4e 100644 --- a/ext/re/t/regop.t +++ b/ext/re/t/regop.t @@ -55,7 +55,7 @@ foreach my $testout ( @tests ) { # that the tests for this result set are finished. # If you add a test make sure you update $NUM_SECTS # the commented output is just for legacy/debugging purposes -BEGIN{ $NUM_SECTS= 7 } +BEGIN{ $NUM_SECTS= 8 } __END__ #Compiling REx "X(A|[B]Q||C|D)Y" @@ -282,3 +282,56 @@ Freeing REx: "[q]" floating ":" at 1..9 (checking floating) stclass ANYOF[\x{00}-\x{06}\a\b\x{0E}-\x{1F}\x{21}-\x{FF}][{utf8}0100-167F 1681-1FFF 200B-2027 202A-202E 2030-205E 2060-2FFF 3001-INFINITY] anchored(SBOL) min ... [5 chars truncated] %MATCHED% synthetic stclass +--- +#Compiling REx "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"... +#Got 532 bytes for offset annotations. +study_chunk_recursed_count: 5 +#Final program: +# 1: DEFINEP (3) +# 3: IFTHEN (14) +# 5: OPEN1 'foo' (7) +# 7: EXACT <foo> (9) +# 9: CLOSE1 'foo' (14) +# 11: LONGJMP (13) +# 13: TAIL (14) +# 14: DEFINEP (16) +# 16: IFTHEN (30) +# 18: OPEN2 'bar' (20) +# 20: GOSUB1[-15] (23) +# 23: EXACT <bar> (25) +# 25: CLOSE2 'bar' (30) +# 27: LONGJMP (29) +# 29: TAIL (30) +# 30: DEFINEP (32) +# 32: IFTHEN (46) +# 34: OPEN3 'baz' (36) +# 36: GOSUB2[-18] (39) +# 39: EXACT <baz> (41) +# 41: CLOSE3 'baz' (46) +# 43: LONGJMP (45) +# 45: TAIL (46) +# 46: DEFINEP (48) +# 48: IFTHEN (62) +# 50: OPEN4 'bop' (52) +# 52: GOSUB3[-18] (55) +# 55: EXACT <bop> (57) +# 57: CLOSE4 'bop' (62) +# 59: LONGJMP (61) +# 61: TAIL (62) +# 62: END (0) +minlen 0 +#Offsets: [66] +# 1:3[0] 3:10[0] 5:17[1] 7:18[3] 9:21[1] 11:21[0] 13:22[0] 14:25[0] 16:32[0] 18:39[1] 20:41[3] 23:47[3] 25:50[1] 27:50[0] 29:51[0] 30:54[0] 32:61[0] 34:68[1] 36:70[3] 39:76[3] 41:79[1] 43:79[0 ... [96 chars truncated] +#Matching REx "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"... against "" +# 0 <> <> | 1:DEFINEP(3) +# 0 <> <> | 3:IFTHEN(14) +# 0 <> <> | 14:DEFINEP(16) +# 0 <> <> | 16:IFTHEN(30) +# 0 <> <> | 30:DEFINEP(32) +# 0 <> <> | 32:IFTHEN(46) +# 0 <> <> | 46:DEFINEP(48) +# 0 <> <> | 48:IFTHEN(62) +# 0 <> <> | 62:END(0) +#Match successful! +%MATCHED% +#Freeing REx: "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"... diff --git a/regcomp.c b/regcomp.c index 555cca1..2b69938 100644 --- a/regcomp.c +++ b/regcomp.c @@ -171,9 +171,11 @@ struct RExC_state_t { const char *lastparse; I32 lastnum; AV *paren_name_list; /* idx -> name */ + U32 study_chunk_recursed_count; #define RExC_lastparse (pRExC_state->lastparse) #define RExC_lastnum (pRExC_state->lastnum) #define RExC_paren_name_list (pRExC_state->paren_name_list) +#define RExC_study_chunk_recursed_count (pRExC_state->study_chunk_recursed_count) #endif }; @@ -3637,6 +3639,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, fake_study_recurse: + DEBUG_r( + RExC_study_chunk_recursed_count++; + ); while ( scan && OP(scan) != END && scan < last ){ UV min_subtract = 0; /* How mmany chars to subtract from the minimum node length to get a real minimum (because @@ -3646,8 +3651,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, DEBUG_OPTIMISE_MORE_r( { PerlIO_printf(Perl_debug_log, - "%*sstudy_chunk stopparen=%ld depth=%lu recursed_depth=%lu ", + "%*sstudy_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu ", ((int) depth*2), "", (long)stopparen, + (unsigned long)RExC_study_chunk_recursed_count, (unsigned long)depth, (unsigned long)recursed_depth); if (recursed_depth) { U32 i; @@ -4179,9 +4185,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, * However if we are not in SCF_DO_SUBSTR mode then there is * no point in doing this, and it can cause a serious slowdown. * See RT #122283. - * Note the !is_inf and !is_inf_internal flags may be - * superfluous for this decision, however I am including the - * logic anyway as I am pretty sure it wont cause any harm. * Note also that this was a workaround for the core problem * which was that during compilation logic the excessive * recursion resulted in slowly consuming all the memory on @@ -6798,6 +6801,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, reStudy: r->minlen = minlen = sawlookahead = sawplus = sawopen = sawminmod = 0; + DEBUG_r( + RExC_study_chunk_recursed_count= 0; + ); Zero(r->substrs, 1, struct reg_substr_data); if (RExC_study_chunk_recursed) Zero(RExC_study_chunk_recursed, @@ -7267,7 +7273,10 @@ reStudy: } Newxz(r->offs, RExC_npar, regexp_paren_pair); /* assume we don't need to swap parens around before we match */ - + DEBUG_TEST_r({ + PerlIO_printf(Perl_debug_log,"study_chunk_recursed_count: %lu\n", + RExC_study_chunk_recursed_count); + }); DEBUG_DUMP_r({ DEBUG_RExC_seen(); PerlIO_printf(Perl_debug_log,"Final program:\n"); diff --git a/regcomp.h b/regcomp.h index d4d3a29..1d41d6e 100644 --- a/regcomp.h +++ b/regcomp.h @@ -856,6 +856,7 @@ re.pm, especially to the documentation. #define RE_DEBUG_COMPILE_TRIE 0x000004 #define RE_DEBUG_COMPILE_DUMP 0x000008 #define RE_DEBUG_COMPILE_FLAGS 0x000010 +#define RE_DEBUG_COMPILE_TEST 0x000020 /* Execute */ #define RE_DEBUG_EXECUTE_MASK 0x00FF00 @@ -891,6 +892,8 @@ re.pm, especially to the documentation. if (re_debug_flags & RE_DEBUG_COMPILE_TRIE) x ) #define DEBUG_FLAGS_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_COMPILE_FLAGS) x ) +#define DEBUG_TEST_r(x) DEBUG_r( \ + if (re_debug_flags & RE_DEBUG_COMPILE_TEST) x ) /* Execute */ #define DEBUG_EXECUTE_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_EXECUTE_MASK) x ) -- Perl5 Master Repository
