In perl.git, the branch smoke-me/fast_glob has been updated <http://perl5.git.perl.org/perl.git/commitdiff/c6f39ea30d2a587ec137d62da7c36a0fe4010481?hp=33252c318625f3c6c89b816ee88481940e3e6f95>
- Log ----------------------------------------------------------------- commit c6f39ea30d2a587ec137d62da7c36a0fe4010481 Author: Yves Orton <[email protected]> Date: Mon May 8 14:49:15 2017 +0200 Add microoptimisation for M_ONE and M_SET As per feedback from Jilles Tjoelker <[email protected]> in the FreeBSD community. M ext/File-Glob/bsd_glob.c commit 6608867fb450b5748f05a49116dadd31085d67c3 Author: Yves Orton <[email protected]> Date: Mon May 8 14:48:55 2017 +0200 update comments as per feedback from FreeBSD devs M ext/File-Glob/bsd_glob.c ----------------------------------------------------------------------- Summary of changes: ext/File-Glob/bsd_glob.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/ext/File-Glob/bsd_glob.c b/ext/File-Glob/bsd_glob.c index d577d0164c..e96fb7356a 100644 --- a/ext/File-Glob/bsd_glob.c +++ b/ext/File-Glob/bsd_glob.c @@ -563,8 +563,12 @@ glob0(const Char *pattern, glob_t *pglob) break; case BG_STAR: pglob->gl_flags |= GLOB_MAGCHAR; - /* collapse adjacent stars to one, - * to avoid exponential behavior + /* Collapse adjacent stars to one. + * This is required to ensure that a pattern like + * "a**" matches a name like "a", as without this + * check when the first star matched everything it would + * cause the second star to return a match fail. + * As long ** is folded here this does not happen. */ if (bufnext == patbuf || bufnext[-1] != M_ALL) *bufnext++ = M_ALL; @@ -909,15 +913,20 @@ globextend(const Char *path, glob_t *pglob, size_t *limitp) /* - * pattern matching function for filenames. Each occurrence of the * - * pattern causes a recursion level. + * pattern matching function for filenames using state machine to avoid + * recursion. We maintain a "nextp" and "nextn" to allow us to backtrack + * without additional callframes, and to do cleanly prune the backtracking + * state when multiple '*' (start) matches are included in the patter. * - * Note, this function differs from the original as per the discussion - * here: https://research.swtch.com/glob + * Thanks to Russ Cox for the improved state machine logic to avoid quadratic + * matching on failure. * - * Basically we removed the recursion and made it use the algorithm - * from Russ Cox to not go quadratic on cases like a file called ("a" x 100) . "x" - * matched against a pattern like "a*a*a*a*a*a*a*y". + * https://research.swtch.com/glob + * + * An example would be a pattern + * ("a*" x 100) . "y" + * against a file name like + * ("a" x 100) . "x" * */ static int @@ -941,13 +950,19 @@ match(Char *name, Char *pat, Char *patend, int nocase) nextp = pat - 1; break; case M_ONE: + /* since * matches leftmost-shortest first * + * if we encounter the EOS then backtracking * + * will not help, so we can exit early here. */ if (*name++ == BG_EOS) - goto fail; + return 0; break; case M_SET: ok = 0; + /* since * matches leftmost-shortest first * + * if we encounter the EOS then backtracking * + * will not help, so we can exit early here. */ if ((k = *name++) == BG_EOS) - goto fail; + return 0; if ((negate_range = ((*pat & M_MASK) == M_NOT)) != BG_EOS) ++pat; while (((c = *pat++) & M_MASK) != M_END) -- Perl5 Master Repository
