spectral created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Previously, we would receive a path like abc/def/ghi and "walk up" the 
directory
  hierarchy, checking abc/def, abc, and `b''` to see if they were in the set of
  prefixes that this matcher covered. We did this indiscriminately - we 
generated
  all of these paths even if the set of prefixes the matcher covered was
  completely empty, which is the case for a lot of repos at my company (the 
narrow
  matcher we use is usually non-recursive).
  
  This brings the time for a rebase in one of my repos from 12.20s to 10.87s. In
  this particular repo, this is entirely due to the `len(prefix_set) == 0` 
check,
  as I do not have any recursive patterns in the narrowspec.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9488

AFFECTED FILES
  mercurial/match.py

CHANGE DETAILS

diff --git a/mercurial/match.py b/mercurial/match.py
--- a/mercurial/match.py
+++ b/mercurial/match.py
@@ -563,6 +563,36 @@
         return b'<predicatenmatcher pred=%s>' % s
 
 
+def path_or_parents_in_set(path, prefix_set):
+    """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
+    l = len(prefix_set)
+    if l == 0:
+        return False
+    if path in prefix_set:
+        return True
+    # If there's more than 5 paths in prefix_set, it's *probably* quicker to
+    # "walk up" the directory hierarchy instead, with the assumption that most
+    # directory hierarchies are relatively shallow and hash lookup is cheap.
+    if l > 5:
+        return any(
+                parentdir in prefix_set for parentdir in 
pathutil.finddirs(path)
+        )
+
+    # FIXME: Ideally we'd never get to this point if this is the case - we'd
+    # recognize ourselves as an 'always' matcher and skip this.
+    if b'' in prefix_set:
+        return True
+
+    if pycompat.ispy3:
+        sl = ord(b'/')
+    else:
+        sl = '/'
+
+    # We already checked that path isn't in prefix_set exactly, so
+    # `path[len(pf)] should never raise IndexError.
+    return any(path.startswith(pf) and path[len(pf)] == sl for pf in 
prefix_set)
+
+
 class patternmatcher(basematcher):
     r"""Matches a set of (kind, pat, source) against a 'root' directory.
 
@@ -611,12 +641,8 @@
         if self._prefix and dir in self._fileset:
             return b'all'
         return (
-            dir in self._fileset
-            or dir in self._dirs
-            or any(
-                parentdir in self._fileset
-                for parentdir in pathutil.finddirs(dir)
-            )
+            dir in self._dirs
+            or path_or_parents_in_set(dir, self._fileset)
         )
 
     def visitchildrenset(self, dir):
@@ -698,12 +724,9 @@
         if self._prefix and dir in self._roots:
             return b'all'
         return (
-            dir in self._roots
-            or dir in self._dirs
+            dir in self._dirs
             or dir in self._parents
-            or any(
-                parentdir in self._roots for parentdir in 
pathutil.finddirs(dir)
-            )
+            or path_or_parents_in_set(dir, self._roots)
         )
 
     @propertycache
@@ -726,11 +749,8 @@
         # visitdir, that's handled below.
         if (
             b'' in self._roots
-            or dir in self._roots
             or dir in self._dirs
-            or any(
-                parentdir in self._roots for parentdir in 
pathutil.finddirs(dir)
-            )
+            or path_or_parents_in_set(dir, self._roots)
         ):
             return b'this'
 



To: spectral, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Reply via email to