D1224: merge: cache unknown dir checks (issue5716)

2017-12-01 Thread mbthomas (Mark Thomas)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHGb85962350bb3: merge: cache unknown dir checks (issue5716) 
(authored by mbthomas, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D1224?vs=3473=4056

REVISION DETAIL
  https://phab.mercurial-scm.org/D1224

AFFECTED FILES
  mercurial/merge.py

CHANGE DETAILS

diff --git a/mercurial/merge.py b/mercurial/merge.py
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -653,7 +653,7 @@
 and repo.dirstate.normalize(f) not in repo.dirstate
 and mctx[f2].cmp(wctx[f]))
 
-def _checkunknowndirs(repo, f):
+class _unknowndirschecker(object):
 """
 Look for any unknown files or directories that may have a path conflict
 with a file.  If any path prefix of the file exists as a file or link,
@@ -663,23 +663,42 @@
 Returns the shortest path at which a conflict occurs, or None if there is
 no conflict.
 """
+def __init__(self):
+# A set of paths known to be good.  This prevents repeated checking of
+# dirs.  It will be updated with any new dirs that are checked and 
found
+# to be safe.
+self._unknowndircache = set()
 
-# Check for path prefixes that exist as unknown files.
-for p in reversed(list(util.finddirs(f))):
-if (repo.wvfs.audit.check(p)
-and repo.wvfs.isfileorlink(p)
-and repo.dirstate.normalize(p) not in repo.dirstate):
-return p
+# A set of paths that are known to be absent.  This prevents repeated
+# checking of subdirectories that are known not to exist. It will be
+# updated with any new dirs that are checked and found to be absent.
+self._missingdircache = set()
 
-# Check if the file conflicts with a directory containing unknown files.
-if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f):
-# Does the directory contain any files that are not in the dirstate?
-for p, dirs, files in repo.wvfs.walk(f):
-for fn in files:
-relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn))
-if relf not in repo.dirstate:
-return f
-return None
+def __call__(self, repo, f):
+# Check for path prefixes that exist as unknown files.
+for p in reversed(list(util.finddirs(f))):
+if p in self._missingdircache:
+return
+if p in self._unknowndircache:
+continue
+if repo.wvfs.audit.check(p):
+if (repo.wvfs.isfileorlink(p)
+and repo.dirstate.normalize(p) not in repo.dirstate):
+return p
+if not repo.wvfs.lexists(p):
+self._missingdircache.add(p)
+return
+self._unknowndircache.add(p)
+
+# Check if the file conflicts with a directory containing unknown 
files.
+if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f):
+# Does the directory contain any files that are not in the 
dirstate?
+for p, dirs, files in repo.wvfs.walk(f):
+for fn in files:
+relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn))
+if relf not in repo.dirstate:
+return f
+return None
 
 def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce):
 """
@@ -701,12 +720,13 @@
 elif config == 'warn':
 warnconflicts.update(conflicts)
 
+checkunknowndirs = _unknowndirschecker()
 for f, (m, args, msg) in actions.iteritems():
 if m in ('c', 'dc'):
 if _checkunknownfile(repo, wctx, mctx, f):
 fileconflicts.add(f)
 elif pathconfig and f not in wctx:
-path = _checkunknowndirs(repo, f)
+path = checkunknowndirs(repo, f)
 if path is not None:
 pathconflicts.add(path)
 elif m == 'dg':



To: mbthomas, #hg-reviewers, durin42, krbullock
Cc: durin42, sid0, krbullock, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D1224: merge: cache unknown dir checks (issue5716)

2017-11-14 Thread mbthomas (Mark Thomas)
mbthomas updated this revision to Diff 3473.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D1224?vs=3083=3473

REVISION DETAIL
  https://phab.mercurial-scm.org/D1224

AFFECTED FILES
  mercurial/merge.py

CHANGE DETAILS

diff --git a/mercurial/merge.py b/mercurial/merge.py
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -653,7 +653,7 @@
 and repo.dirstate.normalize(f) not in repo.dirstate
 and mctx[f2].cmp(wctx[f]))
 
-def _checkunknowndirs(repo, f):
+class _unknowndirschecker(object):
 """
 Look for any unknown files or directories that may have a path conflict
 with a file.  If any path prefix of the file exists as a file or link,
@@ -663,23 +663,42 @@
 Returns the shortest path at which a conflict occurs, or None if there is
 no conflict.
 """
+def __init__(self):
+# A set of paths known to be good.  This prevents repeated checking of
+# dirs.  It will be updated with any new dirs that are checked and 
found
+# to be safe.
+self._unknowndircache = set()
 
-# Check for path prefixes that exist as unknown files.
-for p in reversed(list(util.finddirs(f))):
-if (repo.wvfs.audit.check(p)
-and repo.wvfs.isfileorlink(p)
-and repo.dirstate.normalize(p) not in repo.dirstate):
-return p
+# A set of paths that are known to be absent.  This prevents repeated
+# checking of subdirectories that are known not to exist. It will be
+# updated with any new dirs that are checked and found to be absent.
+self._missingdircache = set()
 
-# Check if the file conflicts with a directory containing unknown files.
-if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f):
-# Does the directory contain any files that are not in the dirstate?
-for p, dirs, files in repo.wvfs.walk(f):
-for fn in files:
-relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn))
-if relf not in repo.dirstate:
-return f
-return None
+def __call__(self, repo, f):
+# Check for path prefixes that exist as unknown files.
+for p in reversed(list(util.finddirs(f))):
+if p in self._missingdircache:
+return
+if p in self._unknowndircache:
+continue
+if repo.wvfs.audit.check(p):
+if (repo.wvfs.isfileorlink(p)
+and repo.dirstate.normalize(p) not in repo.dirstate):
+return p
+if not repo.wvfs.lexists(p):
+self._missingdircache.add(p)
+return
+self._unknowndircache.add(p)
+
+# Check if the file conflicts with a directory containing unknown 
files.
+if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f):
+# Does the directory contain any files that are not in the 
dirstate?
+for p, dirs, files in repo.wvfs.walk(f):
+for fn in files:
+relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn))
+if relf not in repo.dirstate:
+return f
+return None
 
 def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce):
 """
@@ -701,12 +720,13 @@
 elif config == 'warn':
 warnconflicts.update(conflicts)
 
+checkunknowndirs = _unknowndirschecker()
 for f, (m, args, msg) in actions.iteritems():
 if m in ('c', 'dc'):
 if _checkunknownfile(repo, wctx, mctx, f):
 fileconflicts.add(f)
 elif pathconfig and f not in wctx:
-path = _checkunknowndirs(repo, f)
+path = checkunknowndirs(repo, f)
 if path is not None:
 pathconflicts.add(path)
 elif m == 'dg':



To: mbthomas, #hg-reviewers, durin42
Cc: durin42, sid0, krbullock, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D1224: merge: cache unknown dir checks (issue5716)

2017-11-13 Thread durin42 (Augie Fackler)
durin42 requested changes to this revision.
durin42 added a comment.
This revision now requires changes to proceed.


  needs rebased, otherwise looks fine

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1224

To: mbthomas, #hg-reviewers, durin42
Cc: durin42, sid0, krbullock, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D1224: merge: cache unknown dir checks (issue5716)

2017-11-02 Thread mbthomas (Mark Thomas)
mbthomas added a comment.


  Yes, fine to punt to after 4.4.  With https://phab.mercurial-scm.org/D1223 
the checks are disabled, so we're back to the old behaviour for path conflicts, 
but the perf is ok.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1224

To: mbthomas, #hg-reviewers
Cc: durin42, sid0, krbullock, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D1224: merge: cache unknown dir checks (issue5716)

2017-10-30 Thread durin42 (Augie Fackler)
durin42 added a comment.


  Is it okay to punt on this until after 4.4?

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1224

To: mbthomas, #hg-reviewers
Cc: durin42, sid0, krbullock, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D1224: merge: cache unknown dir checks (issue5716)

2017-10-25 Thread mbthomas (Mark Thomas)
mbthomas added a comment.


  Updated as requested.
  
  The whole of `_checkunknownfiles` is probably a reasonable candidate for 
native code if we want to speed up `update`.  It's the second most expensive 
part, after actually updating the files.  The function is also pretty isolated 
- it accesses the dirstate (which may also be native code), and in some cases 
it checks the file contents on disk against the destination revision, but 
that's pretty rare.  It might even be possible to do some of it in parallel.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1224

To: mbthomas, #hg-reviewers
Cc: sid0, krbullock, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D1224: merge: cache unknown dir checks (issue5716)

2017-10-25 Thread mbthomas (Mark Thomas)
mbthomas updated this revision to Diff 3083.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D1224?vs=3078=3083

REVISION DETAIL
  https://phab.mercurial-scm.org/D1224

AFFECTED FILES
  mercurial/merge.py

CHANGE DETAILS

diff --git a/mercurial/merge.py b/mercurial/merge.py
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -653,7 +653,7 @@
 and repo.dirstate.normalize(f) not in repo.dirstate
 and mctx[f2].cmp(wctx[f]))
 
-def _checkunknowndirs(repo, f):
+class _unknowndirschecker(object):
 """
 Look for any unknown files or directories that may have a path conflict
 with a file.  If any path prefix of the file exists as a file or link,
@@ -663,23 +663,42 @@
 Returns the shortest path at which a conflict occurs, or None if there is
 no conflict.
 """
+def __init__(self):
+# A set of paths known to be good.  This prevents repeated checking of
+# dirs.  It will be updated with any new dirs that are checked and 
found
+# to be safe.
+self._unknowndircache = set()
 
-# Check for path prefixes that exist as unknown files.
-for p in reversed(list(util.finddirs(f))):
-if (repo.wvfs.audit.check(p)
-and repo.wvfs.isfileorlink(p)
-and repo.dirstate.normalize(p) not in repo.dirstate):
-return p
+# A set of paths that are known to be absent.  This prevents repeated
+# checking of subdirectories that are known not to exist. It will be
+# updated with any new dirs that are checked and found to be absent.
+self._missingdircache = set()
 
-# Check if the file conflicts with a directory containing unknown files.
-if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f):
-# Does the directory contain any files that are not in the dirstate?
-for p, dirs, files in repo.wvfs.walk(f):
-for fn in files:
-relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn))
-if relf not in repo.dirstate:
-return f
-return None
+def __call__(self, repo, f):
+# Check for path prefixes that exist as unknown files.
+for p in reversed(list(util.finddirs(f))):
+if p in self._missingdircache:
+return
+if p in self._unknowndircache:
+continue
+if repo.wvfs.audit.check(p):
+if (repo.wvfs.isfileorlink(p)
+and repo.dirstate.normalize(p) not in repo.dirstate):
+return p
+if not repo.wvfs.lexists(p):
+self._missingdircache.add(p)
+return
+self._unknowndircache.add(p)
+
+# Check if the file conflicts with a directory containing unknown 
files.
+if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f):
+# Does the directory contain any files that are not in the 
dirstate?
+for p, dirs, files in repo.wvfs.walk(f):
+for fn in files:
+relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn))
+if relf not in repo.dirstate:
+return f
+return None
 
 def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce):
 """
@@ -700,12 +719,13 @@
 elif config == 'warn':
 warnconflicts.update(conflicts)
 
+checkunknowndirs = _unknowndirschecker()
 for f, (m, args, msg) in actions.iteritems():
 if m in ('c', 'dc'):
 if _checkunknownfile(repo, wctx, mctx, f):
 fileconflicts.add(f)
 elif f not in wctx:
-path = _checkunknowndirs(repo, f)
+path = checkunknowndirs(repo, f)
 if path is not None:
 pathconflicts.add(path)
 elif m == 'dg':



To: mbthomas, #hg-reviewers
Cc: sid0, krbullock, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D1224: merge: cache unknown dir checks (issue5716)

2017-10-24 Thread sid0 (Siddharth Agarwal)
sid0 added a comment.


  Could you make this a class with unknowndircache and missingdircache being 
private members?
  
  How much do you think writing some of this code in native code (e.g. Rust) 
would help?

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1224

To: mbthomas, #hg-reviewers
Cc: sid0, krbullock, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D1224: merge: cache unknown dir checks (issue5716)

2017-10-24 Thread mbthomas (Mark Thomas)
mbthomas added a comment.


  There is still some cost to doing the check, so it's not a complete reversal 
of issue5716, but it helps a lot.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1224

To: mbthomas, #hg-reviewers
Cc: krbullock, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel