D1224: merge: cache unknown dir checks (issue5716)
This revision was automatically updated to reflect the committed changes. Closed by commit rHGb85962350bb3: merge: cache unknown dir checks (issue5716) (authored by mbthomas, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D1224?vs=3473=4056 REVISION DETAIL https://phab.mercurial-scm.org/D1224 AFFECTED FILES mercurial/merge.py CHANGE DETAILS diff --git a/mercurial/merge.py b/mercurial/merge.py --- a/mercurial/merge.py +++ b/mercurial/merge.py @@ -653,7 +653,7 @@ and repo.dirstate.normalize(f) not in repo.dirstate and mctx[f2].cmp(wctx[f])) -def _checkunknowndirs(repo, f): +class _unknowndirschecker(object): """ Look for any unknown files or directories that may have a path conflict with a file. If any path prefix of the file exists as a file or link, @@ -663,23 +663,42 @@ Returns the shortest path at which a conflict occurs, or None if there is no conflict. """ +def __init__(self): +# A set of paths known to be good. This prevents repeated checking of +# dirs. It will be updated with any new dirs that are checked and found +# to be safe. +self._unknowndircache = set() -# Check for path prefixes that exist as unknown files. -for p in reversed(list(util.finddirs(f))): -if (repo.wvfs.audit.check(p) -and repo.wvfs.isfileorlink(p) -and repo.dirstate.normalize(p) not in repo.dirstate): -return p +# A set of paths that are known to be absent. This prevents repeated +# checking of subdirectories that are known not to exist. It will be +# updated with any new dirs that are checked and found to be absent. +self._missingdircache = set() -# Check if the file conflicts with a directory containing unknown files. -if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f): -# Does the directory contain any files that are not in the dirstate? -for p, dirs, files in repo.wvfs.walk(f): -for fn in files: -relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn)) -if relf not in repo.dirstate: -return f -return None +def __call__(self, repo, f): +# Check for path prefixes that exist as unknown files. +for p in reversed(list(util.finddirs(f))): +if p in self._missingdircache: +return +if p in self._unknowndircache: +continue +if repo.wvfs.audit.check(p): +if (repo.wvfs.isfileorlink(p) +and repo.dirstate.normalize(p) not in repo.dirstate): +return p +if not repo.wvfs.lexists(p): +self._missingdircache.add(p) +return +self._unknowndircache.add(p) + +# Check if the file conflicts with a directory containing unknown files. +if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f): +# Does the directory contain any files that are not in the dirstate? +for p, dirs, files in repo.wvfs.walk(f): +for fn in files: +relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn)) +if relf not in repo.dirstate: +return f +return None def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce): """ @@ -701,12 +720,13 @@ elif config == 'warn': warnconflicts.update(conflicts) +checkunknowndirs = _unknowndirschecker() for f, (m, args, msg) in actions.iteritems(): if m in ('c', 'dc'): if _checkunknownfile(repo, wctx, mctx, f): fileconflicts.add(f) elif pathconfig and f not in wctx: -path = _checkunknowndirs(repo, f) +path = checkunknowndirs(repo, f) if path is not None: pathconflicts.add(path) elif m == 'dg': To: mbthomas, #hg-reviewers, durin42, krbullock Cc: durin42, sid0, krbullock, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D1224: merge: cache unknown dir checks (issue5716)
mbthomas updated this revision to Diff 3473. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D1224?vs=3083=3473 REVISION DETAIL https://phab.mercurial-scm.org/D1224 AFFECTED FILES mercurial/merge.py CHANGE DETAILS diff --git a/mercurial/merge.py b/mercurial/merge.py --- a/mercurial/merge.py +++ b/mercurial/merge.py @@ -653,7 +653,7 @@ and repo.dirstate.normalize(f) not in repo.dirstate and mctx[f2].cmp(wctx[f])) -def _checkunknowndirs(repo, f): +class _unknowndirschecker(object): """ Look for any unknown files or directories that may have a path conflict with a file. If any path prefix of the file exists as a file or link, @@ -663,23 +663,42 @@ Returns the shortest path at which a conflict occurs, or None if there is no conflict. """ +def __init__(self): +# A set of paths known to be good. This prevents repeated checking of +# dirs. It will be updated with any new dirs that are checked and found +# to be safe. +self._unknowndircache = set() -# Check for path prefixes that exist as unknown files. -for p in reversed(list(util.finddirs(f))): -if (repo.wvfs.audit.check(p) -and repo.wvfs.isfileorlink(p) -and repo.dirstate.normalize(p) not in repo.dirstate): -return p +# A set of paths that are known to be absent. This prevents repeated +# checking of subdirectories that are known not to exist. It will be +# updated with any new dirs that are checked and found to be absent. +self._missingdircache = set() -# Check if the file conflicts with a directory containing unknown files. -if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f): -# Does the directory contain any files that are not in the dirstate? -for p, dirs, files in repo.wvfs.walk(f): -for fn in files: -relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn)) -if relf not in repo.dirstate: -return f -return None +def __call__(self, repo, f): +# Check for path prefixes that exist as unknown files. +for p in reversed(list(util.finddirs(f))): +if p in self._missingdircache: +return +if p in self._unknowndircache: +continue +if repo.wvfs.audit.check(p): +if (repo.wvfs.isfileorlink(p) +and repo.dirstate.normalize(p) not in repo.dirstate): +return p +if not repo.wvfs.lexists(p): +self._missingdircache.add(p) +return +self._unknowndircache.add(p) + +# Check if the file conflicts with a directory containing unknown files. +if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f): +# Does the directory contain any files that are not in the dirstate? +for p, dirs, files in repo.wvfs.walk(f): +for fn in files: +relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn)) +if relf not in repo.dirstate: +return f +return None def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce): """ @@ -701,12 +720,13 @@ elif config == 'warn': warnconflicts.update(conflicts) +checkunknowndirs = _unknowndirschecker() for f, (m, args, msg) in actions.iteritems(): if m in ('c', 'dc'): if _checkunknownfile(repo, wctx, mctx, f): fileconflicts.add(f) elif pathconfig and f not in wctx: -path = _checkunknowndirs(repo, f) +path = checkunknowndirs(repo, f) if path is not None: pathconflicts.add(path) elif m == 'dg': To: mbthomas, #hg-reviewers, durin42 Cc: durin42, sid0, krbullock, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D1224: merge: cache unknown dir checks (issue5716)
durin42 requested changes to this revision. durin42 added a comment. This revision now requires changes to proceed. needs rebased, otherwise looks fine REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D1224 To: mbthomas, #hg-reviewers, durin42 Cc: durin42, sid0, krbullock, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D1224: merge: cache unknown dir checks (issue5716)
mbthomas added a comment. Yes, fine to punt to after 4.4. With https://phab.mercurial-scm.org/D1223 the checks are disabled, so we're back to the old behaviour for path conflicts, but the perf is ok. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D1224 To: mbthomas, #hg-reviewers Cc: durin42, sid0, krbullock, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D1224: merge: cache unknown dir checks (issue5716)
durin42 added a comment. Is it okay to punt on this until after 4.4? REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D1224 To: mbthomas, #hg-reviewers Cc: durin42, sid0, krbullock, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D1224: merge: cache unknown dir checks (issue5716)
mbthomas added a comment. Updated as requested. The whole of `_checkunknownfiles` is probably a reasonable candidate for native code if we want to speed up `update`. It's the second most expensive part, after actually updating the files. The function is also pretty isolated - it accesses the dirstate (which may also be native code), and in some cases it checks the file contents on disk against the destination revision, but that's pretty rare. It might even be possible to do some of it in parallel. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D1224 To: mbthomas, #hg-reviewers Cc: sid0, krbullock, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D1224: merge: cache unknown dir checks (issue5716)
mbthomas updated this revision to Diff 3083. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D1224?vs=3078=3083 REVISION DETAIL https://phab.mercurial-scm.org/D1224 AFFECTED FILES mercurial/merge.py CHANGE DETAILS diff --git a/mercurial/merge.py b/mercurial/merge.py --- a/mercurial/merge.py +++ b/mercurial/merge.py @@ -653,7 +653,7 @@ and repo.dirstate.normalize(f) not in repo.dirstate and mctx[f2].cmp(wctx[f])) -def _checkunknowndirs(repo, f): +class _unknowndirschecker(object): """ Look for any unknown files or directories that may have a path conflict with a file. If any path prefix of the file exists as a file or link, @@ -663,23 +663,42 @@ Returns the shortest path at which a conflict occurs, or None if there is no conflict. """ +def __init__(self): +# A set of paths known to be good. This prevents repeated checking of +# dirs. It will be updated with any new dirs that are checked and found +# to be safe. +self._unknowndircache = set() -# Check for path prefixes that exist as unknown files. -for p in reversed(list(util.finddirs(f))): -if (repo.wvfs.audit.check(p) -and repo.wvfs.isfileorlink(p) -and repo.dirstate.normalize(p) not in repo.dirstate): -return p +# A set of paths that are known to be absent. This prevents repeated +# checking of subdirectories that are known not to exist. It will be +# updated with any new dirs that are checked and found to be absent. +self._missingdircache = set() -# Check if the file conflicts with a directory containing unknown files. -if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f): -# Does the directory contain any files that are not in the dirstate? -for p, dirs, files in repo.wvfs.walk(f): -for fn in files: -relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn)) -if relf not in repo.dirstate: -return f -return None +def __call__(self, repo, f): +# Check for path prefixes that exist as unknown files. +for p in reversed(list(util.finddirs(f))): +if p in self._missingdircache: +return +if p in self._unknowndircache: +continue +if repo.wvfs.audit.check(p): +if (repo.wvfs.isfileorlink(p) +and repo.dirstate.normalize(p) not in repo.dirstate): +return p +if not repo.wvfs.lexists(p): +self._missingdircache.add(p) +return +self._unknowndircache.add(p) + +# Check if the file conflicts with a directory containing unknown files. +if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f): +# Does the directory contain any files that are not in the dirstate? +for p, dirs, files in repo.wvfs.walk(f): +for fn in files: +relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn)) +if relf not in repo.dirstate: +return f +return None def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce): """ @@ -700,12 +719,13 @@ elif config == 'warn': warnconflicts.update(conflicts) +checkunknowndirs = _unknowndirschecker() for f, (m, args, msg) in actions.iteritems(): if m in ('c', 'dc'): if _checkunknownfile(repo, wctx, mctx, f): fileconflicts.add(f) elif f not in wctx: -path = _checkunknowndirs(repo, f) +path = checkunknowndirs(repo, f) if path is not None: pathconflicts.add(path) elif m == 'dg': To: mbthomas, #hg-reviewers Cc: sid0, krbullock, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D1224: merge: cache unknown dir checks (issue5716)
sid0 added a comment. Could you make this a class with unknowndircache and missingdircache being private members? How much do you think writing some of this code in native code (e.g. Rust) would help? REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D1224 To: mbthomas, #hg-reviewers Cc: sid0, krbullock, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D1224: merge: cache unknown dir checks (issue5716)
mbthomas added a comment. There is still some cost to doing the check, so it's not a complete reversal of issue5716, but it helps a lot. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D1224 To: mbthomas, #hg-reviewers Cc: krbullock, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel