D10909: status: Extend read_dir caching to directories with ignored files

2021-06-24 Thread SimonSapin
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  See code comments

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10909

AFFECTED FILES
  rust/hg-core/src/dirstate_tree/on_disk.rs
  rust/hg-core/src/dirstate_tree/status.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate_tree/status.rs 
b/rust/hg-core/src/dirstate_tree/status.rs
--- a/rust/hg-core/src/dirstate_tree/status.rs
+++ b/rust/hg-core/src/dirstate_tree/status.rs
@@ -190,18 +190,21 @@
 // This happens for example with `hg status -mard`.
 return true;
 }
-if let Some(cached_mtime) = cached_directory_mtime {
-// The dirstate contains a cached mtime for this directory, set by
-// a previous run of the `status` algorithm which found this
-// directory eligible for `read_dir` caching.
-if let Some(meta) = directory_metadata {
-if let Ok(current_mtime) = meta.modified() {
-if current_mtime == cached_mtime.into() {
-// The mtime of that directory has not changed since
-// then, which means that the
-// results of `read_dir` should also
-// be unchanged.
-return true;
+if !self.options.list_ignored
+&& self.ignore_patterns_have_changed == Some(false)
+{
+if let Some(cached_mtime) = cached_directory_mtime {
+// The dirstate contains a cached mtime for this directory, set
+// by a previous run of the `status` algorithm which found this
+// directory eligible for `read_dir` caching.
+if let Some(meta) = directory_metadata {
+if let Ok(current_mtime) = meta.modified() {
+if current_mtime == cached_mtime.into() {
+// The mtime of that directory has not changed
+// since then, which means that the results of
+// `read_dir` should also be unchanged.
+return true;
+}
 }
 }
 }
@@ -209,8 +212,8 @@
 false
 }
 
-/// Returns whether the filesystem directory was found to have any entry
-/// that does not have a corresponding dirstate tree node.
+/// Returns whether all child entries of the filesystem directory have a
+/// corresponding dirstate node or are ignored.
 fn traverse_fs_directory_and_dirstate(
 ,
 has_ignored_ancestor: bool,
@@ -248,11 +251,10 @@
 })
 .collect::>()?;
 
-// Conservatively don’t let the caller assume that there aren’t
-// any, since we don’t know.
-let directory_has_any_fs_only_entry = true;
+// We don’t know, so conservatively say this isn’t the case
+let children_all_have_dirstate_node_or_are_ignored = false;
 
-return Ok(directory_has_any_fs_only_entry);
+return Ok(children_all_have_dirstate_node_or_are_ignored);
 }
 
 let mut fs_entries = if let Ok(entries) = self.read_dir(
@@ -295,28 +297,32 @@
 .par_bridge()
 .map(|pair| {
 use itertools::EitherOrBoth::*;
-let is_fs_only = pair.is_right();
+let has_dirstate_node_or_is_ignored;
 match pair {
-Both(dirstate_node, fs_entry) => self
-.traverse_fs_and_dirstate(
+Both(dirstate_node, fs_entry) => {
+self.traverse_fs_and_dirstate(
 _entry.full_path,
 _entry.metadata,
 dirstate_node,
 has_ignored_ancestor,
-)?,
+)?;
+has_dirstate_node_or_is_ignored = true
+}
 Left(dirstate_node) => {
-self.traverse_dirstate_only(dirstate_node)?
+self.traverse_dirstate_only(dirstate_node)?;
+has_dirstate_node_or_is_ignored = true;
 }
 Right(fs_entry) => {
 let hg_path = directory_hg_path.join(_entry.base_name);
 let is_ignored =
 has_ignored_ancestor || (self.ignore_fn)(_path);
 self.traverse_fs_only(is_ignored, hg_path, fs_entry);
+has_dirstate_node_or_is_ignored = is_ignored;
 }
 }
-Ok(is_fs_only)
+Ok(has_dirstate_node_or_is_ignored)
 })
-.try_reduce(|| false, |a, b| Ok(a || b))
+.try_reduce(|| true, |a, b| Ok(a && b))
 }
 
 fn 

D10907: dirstate-v2: Drop cached read_dir results after .hgignore changes

2021-06-24 Thread SimonSapin
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Soon we’ll want the status algorithm to be able to skip `std::fs::read_dir` in
  more cases, notabling when listing unknown files but not ignored files.
  
  When ignore patterns change (which we detect by their hash, added to the
  dirstate-v2 format in a previous changeset), a formerly-ignored file could
  become unknown without changing its parent directory’s modification time.
  Therefore we remove any directory mtime from the dirstate, effictively
  invalidating the existing caches.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10907

AFFECTED FILES
  rust/hg-core/src/dirstate_tree/dirstate_map.rs
  rust/hg-core/src/dirstate_tree/status.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate_tree/status.rs 
b/rust/hg-core/src/dirstate_tree/status.rs
--- a/rust/hg-core/src/dirstate_tree/status.rs
+++ b/rust/hg-core/src/dirstate_tree/status.rs
@@ -6,7 +6,6 @@
 use crate::dirstate_tree::dirstate_map::NodeRef;
 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
 use crate::dirstate_tree::on_disk::Timestamp;
-use crate::dirstate_tree::path_with_basename::WithBasename;
 use crate::matchers::get_ignore_function;
 use crate::matchers::Matcher;
 use crate::utils::files::get_bytes_from_os_string;
@@ -70,6 +69,7 @@
 outcome: Default::default(),
 ignore_patterns_have_changed: patterns_changed,
 new_cachable_directories: Default::default(),
+outated_cached_directories: Default::default(),
 filesystem_time_at_status_start: filesystem_now(_dir).ok(),
 };
 let is_at_repo_root = true;
@@ -91,18 +91,22 @@
 )?;
 let mut outcome = common.outcome.into_inner().unwrap();
 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
+let outdated = common.outated_cached_directories.into_inner().unwrap();
 
 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
+|| !outdated.is_empty()
 || !new_cachable.is_empty();
 
+// Remove outdated mtimes before adding new mtimes, in case a given
+// directory is both
+for path in  {
+let node = dmap.get_or_insert(path)?;
+if let NodeData::CachedDirectory { .. } =  {
+node.data = NodeData::None
+}
+}
 for (path, mtime) in _cachable {
-let node = DirstateMap::get_or_insert_node(
-dmap.on_disk,
- dmap.root,
-path,
-WithBasename::to_cow_owned,
-|_| {},
-)?;
+let node = dmap.get_or_insert(path)?;
 match  {
 NodeData::Entry(_) => {} // Don’t overwrite an entry
 NodeData::CachedDirectory { .. } | NodeData::None => {
@@ -123,6 +127,7 @@
 ignore_fn: IgnoreFnType<'a>,
 outcome: Mutex>,
 new_cachable_directories: Mutex, Timestamp)>>,
+outated_cached_directories: Mutex>>,
 
 /// Whether ignore files like `.hgignore` have changed since the previous
 /// time a `status()` call wrote their hash to the dirstate. `None` means
@@ -155,6 +160,22 @@
 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
 }
 
+fn check_for_outdated_directory_cache(
+,
+dirstate_node: <'tree, 'on_disk>,
+) -> Result<(), DirstateV2ParseError> {
+if self.ignore_patterns_have_changed == Some(true)
+&& dirstate_node.cached_directory_mtime().is_some()
+{
+self.outated_cached_directories.lock().unwrap().push(
+dirstate_node
+.full_path_borrowed(self.dmap.on_disk)?
+.detach_from_tree(),
+)
+}
+Ok(())
+}
+
 /// If this returns true, we can get accurate results by only using
 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
 /// need to call `read_dir`.
@@ -304,6 +325,7 @@
 dirstate_node: NodeRef<'tree, 'on_disk>,
 has_ignored_ancestor: bool,
 ) -> Result<(), DirstateV2ParseError> {
+self.check_for_outdated_directory_cache(_node)?;
 let hg_path = _node.full_path_borrowed(self.dmap.on_disk)?;
 let file_type = fs_metadata.file_type();
 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
@@ -521,6 +543,7 @@
 ,
 dirstate_node: NodeRef<'tree, 'on_disk>,
 ) -> Result<(), DirstateV2ParseError> {
+self.check_for_outdated_directory_cache(_node)?;
 self.mark_removed_or_deleted_if_file(
 _node.full_path_borrowed(self.dmap.on_disk)?,
 dirstate_node.state()?,
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs 
b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -495,6 +495,19 @@
 }
 }
 
+   

D10908: status: Move some `is_ignored` computation earlier

2021-06-24 Thread SimonSapin
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  This refactor should make have no observable behavior difference, it will make
  an upcoming changeset easier.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10908

AFFECTED FILES
  rust/hg-core/src/dirstate_tree/status.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate_tree/status.rs 
b/rust/hg-core/src/dirstate_tree/status.rs
--- a/rust/hg-core/src/dirstate_tree/status.rs
+++ b/rust/hg-core/src/dirstate_tree/status.rs
@@ -307,11 +307,12 @@
 Left(dirstate_node) => {
 self.traverse_dirstate_only(dirstate_node)?
 }
-Right(fs_entry) => self.traverse_fs_only(
-has_ignored_ancestor,
-directory_hg_path,
-fs_entry,
-),
+Right(fs_entry) => {
+let hg_path = directory_hg_path.join(_entry.base_name);
+let is_ignored =
+has_ignored_ancestor || (self.ignore_fn)(_path);
+self.traverse_fs_only(is_ignored, hg_path, fs_entry);
+}
 }
 Ok(is_fs_only)
 })
@@ -394,7 +395,9 @@
 } else {
 // `node.entry.is_none()` indicates a "directory"
 // node, but the filesystem has a file
-self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path)
+let is_ignored =
+has_ignored_ancestor || (self.ignore_fn)(_path);
+self.mark_unknown_or_ignored(is_ignored, hg_path)
 }
 }
 
@@ -586,16 +589,13 @@
 /// Something in the filesystem has no corresponding dirstate node
 fn traverse_fs_only(
 ,
-has_ignored_ancestor: bool,
-directory_hg_path: ,
+is_ignored: bool,
+hg_path: HgPathBuf,
 fs_entry: ,
 ) {
-let hg_path = directory_hg_path.join(_entry.base_name);
 let file_type = fs_entry.metadata.file_type();
 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
 if file_type.is_dir() {
-let is_ignored =
-has_ignored_ancestor || (self.ignore_fn)(_path);
 let traverse_children = if is_ignored {
 // Descendants of an ignored directory are all ignored
 self.options.list_ignored
@@ -612,9 +612,11 @@
 is_at_repo_root,
 ) {
 children_fs_entries.par_iter().for_each(|child_fs_entry| {
+let child_hg_path =
+hg_path.join(_fs_entry.base_name);
 self.traverse_fs_only(
-is_ignored,
-_path,
+is_ignored || (self.ignore_fn)(_hg_path),
+child_hg_path,
 child_fs_entry,
 )
 })
@@ -625,7 +627,7 @@
 }
 } else if file_or_symlink && self.matcher.matches(_path) {
 self.mark_unknown_or_ignored(
-has_ignored_ancestor,
+is_ignored,
 ::InMemory(_path),
 )
 }
@@ -633,10 +635,9 @@
 
 fn mark_unknown_or_ignored(
 ,
-has_ignored_ancestor: bool,
+is_ignored: bool,
 hg_path: <'_, 'on_disk>,
 ) {
-let is_ignored = has_ignored_ancestor || (self.ignore_fn)(_path);
 if is_ignored {
 if self.options.list_ignored {
 self.outcome



To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D10906: rhg: fallback if tweakdefaults or statuscopies is enabled with status

2021-06-24 Thread pulkit (Pulkit Goyal)
pulkit created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  `rhg status` is experimental right now and does not support all 
functionalities.
  While the long term target is to implement them, for now we add a fallback to
  have all tests pass with `rhg status` enabled.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10906

AFFECTED FILES
  rust/rhg/src/commands/status.rs

CHANGE DETAILS

diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -141,6 +141,30 @@
 ));
 }
 
+// TODO: lift this limitation
+match invocation.config.get_bool(b"ui", b"tweakdefaults") {
+Ok(val) => {
+if val {
+return Err(CommandError::unsupported(
+"ui.tweakdefaults is not yet supported with rhg status",
+));
+}
+}
+Err(_) => {}
+};
+
+// TODO: lift this limitation
+match invocation.config.get_bool(b"ui", b"statuscopies") {
+Ok(val) => {
+if val {
+return Err(CommandError::unsupported(
+"ui.statuscopies is not yet supported with rhg status",
+));
+}
+}
+Err(_) => {}
+};
+
 let ui = invocation.ui;
 let args = invocation.subcommand_args;
 let display_states = if args.is_present("all") {



To: pulkit, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D10905: stream: double check that self.vfs is *not* in the vfsmap

2021-06-24 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The stream clone logic allows for writing any content to any file under 
various
  vfs. This is *not* suitable for *vfs*, since writing in `.hg/` directly allow 
to
  modify the configuration and is a great and simple gateway for remote code
  execution.

REPOSITORY
  rHG Mercurial

BRANCH
  stable

REVISION DETAIL
  https://phab.mercurial-scm.org/D10905

AFFECTED FILES
  mercurial/streamclone.py

CHANGE DETAILS

diff --git a/mercurial/streamclone.py b/mercurial/streamclone.py
--- a/mercurial/streamclone.py
+++ b/mercurial/streamclone.py
@@ -560,6 +560,12 @@
 def _emit2(repo, entries, totalfilesize):
 """actually emit the stream bundle"""
 vfsmap = _makemap(repo)
+# we keep repo.vfs out of the on purpose, ther are too many danger there
+# (eg: .hg/hgrc),
+#
+# this assert is duplicated (from _makemap) as author might think this is
+# fine, while this is really not fine.
+assert repo.vfs not in vfsmap.values()
 progress = repo.ui.makeprogress(
 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
 )
@@ -685,6 +691,12 @@
 progress.update(0)
 
 vfsmap = _makemap(repo)
+# we keep repo.vfs out of the on purpose, ther are too many danger
+# there (eg: .hg/hgrc),
+#
+# this assert is duplicated (from _makemap) as author might think this
+# is fine, while this is really not fine.
+assert repo.vfs not in vfsmap.values()
 
 with repo.transaction(b'clone'):
 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())



To: marmoute, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel