In perl.git, the branch smoke-me/fast_glob has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/c6f39ea30d2a587ec137d62da7c36a0fe4010481?hp=33252c318625f3c6c89b816ee88481940e3e6f95>

- Log -----------------------------------------------------------------
commit c6f39ea30d2a587ec137d62da7c36a0fe4010481
Author: Yves Orton <[email protected]>
Date:   Mon May 8 14:49:15 2017 +0200

    Add microoptimisation for M_ONE and M_SET
    
    As per feedback from Jilles Tjoelker <[email protected]> in the FreeBSD
    community.

M       ext/File-Glob/bsd_glob.c

commit 6608867fb450b5748f05a49116dadd31085d67c3
Author: Yves Orton <[email protected]>
Date:   Mon May 8 14:48:55 2017 +0200

    update comments as per feedback from FreeBSD devs

M       ext/File-Glob/bsd_glob.c
-----------------------------------------------------------------------

Summary of changes:
 ext/File-Glob/bsd_glob.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/ext/File-Glob/bsd_glob.c b/ext/File-Glob/bsd_glob.c
index d577d0164c..e96fb7356a 100644
--- a/ext/File-Glob/bsd_glob.c
+++ b/ext/File-Glob/bsd_glob.c
@@ -563,8 +563,12 @@ glob0(const Char *pattern, glob_t *pglob)
                        break;
                case BG_STAR:
                        pglob->gl_flags |= GLOB_MAGCHAR;
-                       /* collapse adjacent stars to one,
-                        * to avoid exponential behavior
+                        /* Collapse adjacent stars to one.
+                         * This is required to ensure that a pattern like
+                         * "a**" matches a name like "a", as without this
+                         * check when the first star matched everything it 
would
+                         * cause the second star to return a match fail.
+                         * As long ** is folded here this does not happen.
                         */
                        if (bufnext == patbuf || bufnext[-1] != M_ALL)
                                *bufnext++ = M_ALL;
@@ -909,15 +913,20 @@ globextend(const Char *path, glob_t *pglob, size_t 
*limitp)
 
 
 /*
- * pattern matching function for filenames.  Each occurrence of the *
- * pattern causes a recursion level.
+ * pattern matching function for filenames using state machine to avoid
+ * recursion. We maintain a "nextp" and "nextn" to allow us to backtrack
+ * without additional callframes, and to do cleanly prune the backtracking
+ * state when multiple '*' (start) matches are included in the patter.
  *
- * Note, this function differs from the original as per the discussion
- * here: https://research.swtch.com/glob
+ * Thanks to Russ Cox for the improved state machine logic to avoid quadratic
+ * matching on failure.
  *
- * Basically we removed the recursion and made it use the algorithm
- * from Russ Cox to not go quadratic on cases like a file called ("a" x 100) . 
"x"
- * matched against a pattern like "a*a*a*a*a*a*a*y".
+ * https://research.swtch.com/glob
+ *
+ * An example would be a pattern
+ *  ("a*" x 100) . "y"
+ * against a file name like
+ *  ("a" x 100) . "x"
  *
  */
 static int
@@ -941,13 +950,19 @@ match(Char *name, Char *pat, Char *patend, int nocase)
                        nextp = pat - 1;
                        break;
                case M_ONE:
+                        /* since * matches leftmost-shortest first   *
+                         * if we encounter the EOS then backtracking *
+                         * will not help, so we can exit early here. */
                        if (*name++ == BG_EOS)
-                               goto fail;
+                                return 0;
                        break;
                case M_SET:
                        ok = 0;
+                        /* since * matches leftmost-shortest first   *
+                         * if we encounter the EOS then backtracking *
+                         * will not help, so we can exit early here. */
                        if ((k = *name++) == BG_EOS)
-                               goto fail;
+                                return 0;
                        if ((negate_range = ((*pat & M_MASK) == M_NOT)) != 
BG_EOS)
                                ++pat;
                        while (((c = *pat++) & M_MASK) != M_END)

--
Perl5 Master Repository

Reply via email to