# HG changeset patch # User Georges Racinet <graci...@anybox.fr> # Date 1538060596 -7200 # Thu Sep 27 17:03:16 2018 +0200 # Node ID d8c9571755a64e1fc3429587dfd3949b9862eceb # Parent d3d4b4b5f725124ef9e93cf74d779a7a05aa11b7 # EXP-Topic rustancestors-rfc rust: pure Rust lazyancestors iterator
This is the first of a patch series aiming to provide an alternative implementation in the Rust programming language of the _lazyancestorsiter from the ancestor module. This iterator has been brought to our attention by the people at Octobus, as a potential good candidate for incremental "oxydation" (rewriting in Rust), because it has shown performance issues lately and it merely deals with ints (revision numbers) obtained by calling the index, whih should be directly callable from Rust code, being itself implemented as a C extension. The idea behind this series is to provide a minimal example of Rust code collaborating with existing C and Python code. To open the way to gradually rewriting more of Mercurial's python code in Rust, without being forced to pay a large initial cost of rewriting the existing fast core into Rust. This patch does not introduce any bindings to other Mercurial code yet, but someone with a rustc/cargo installation may chdir into mercurial/rust and run the tests by issuing: cargo test The algorithm is a bit simplified (see details in docstrings), and at its simplest becomes rather trivial, showcasing that Rust has batteries included too: BinaryHeap, the Rust analog of Python's heapq does actually all the work. The implementation can be further optimized and be made more idiomatic Rust. diff -r d3d4b4b5f725 -r d8c9571755a6 mercurial/rust/Cargo.lock --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/rust/Cargo.lock Thu Sep 27 17:03:16 2018 +0200 @@ -0,0 +1,4 @@ +[[package]] +name = "hgancestors" +version = "0.1.0" + diff -r d3d4b4b5f725 -r d8c9571755a6 mercurial/rust/Cargo.toml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/rust/Cargo.toml Thu Sep 27 17:03:16 2018 +0200 @@ -0,0 +1,4 @@ +[package] +name = "hgancestors" +version = "0.1.0" +authors = ["Georges Racinet <graci...@anybox.fr>"] diff -r d3d4b4b5f725 -r d8c9571755a6 mercurial/rust/rustfmt.toml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/rust/rustfmt.toml Thu Sep 27 17:03:16 2018 +0200 @@ -0,0 +1,3 @@ +max_width = 79 +wrap_comments = true +error_on_line_overflow = true diff -r d3d4b4b5f725 -r d8c9571755a6 mercurial/rust/src/ancestors.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/rust/src/ancestors.rs Thu Sep 27 17:03:16 2018 +0200 @@ -0,0 +1,203 @@ +// ancestors.rs +// +// Copyright 2018 Georges Racinet <graci...@anybox.fr> +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Rust versions of generic DAG ancestors algorithms for Mercurial + +use std::iter::Iterator; +use std::collections::{BinaryHeap, HashSet}; + +/// The simplest expression of what we need of Mercurial DAGs. +/// +/// As noted in revlog.c, revision numbers are actually encoded in +/// 4 bytes, and are liberally converted to ints, whence the i32. It would +/// probably be a good idea to make this trait generic over a suitable integer +/// type. +pub trait Graph { + fn parents(&self, i32) -> (i32, i32); +} + +/// Iterator over the ancestors of a given list of revisions +/// This is a generic type, defined and implemented for any Graph, so that +/// it's easy to +/// +/// - unit test in pure Rust +/// - bind to main Mercurial code, potentially in several ways and have these +/// bindings evolve over time +pub struct AncestorsIterator<G: Graph> { + graph: G, + visit: BinaryHeap<i32>, + seen: HashSet<i32>, + stoprev: i32, +} + +impl<G: Graph> AncestorsIterator<G> { + /// Constructor. + /// + /// We actually expect initrevs to be i64, and we coerce them to the + /// i32 of our internal representations. The reason is that from C + /// code, they actually come as such. + /// Here it would be better to take any iterable over our internal + /// revision numbers, and have the conversion be made from the caller. + pub fn new( + graph: G, + initrevs: &Vec<i64>, + stoprev: i64, + inclusive: bool, + ) -> Self { + let stop32 = stoprev as i32; + // because in iteration, contrary to what happens in ancestor.py, we + // compare with stoprev before pushing, we have to prefilter in the + // constructor too. + let deref_filter = initrevs.iter() + .map(|&x| x as i32).filter(|x| stop32 <= *x); + + if inclusive { + return AncestorsIterator { + visit: deref_filter.clone().collect(), + seen: deref_filter.collect(), + stoprev: stop32, + graph: graph, + }; + } + let mut this = AncestorsIterator { + visit: BinaryHeap::new(), + seen: HashSet::new(), + stoprev: stop32, + graph: graph, + }; + this.seen.insert(-1); // TODO nullrev constant + for rev in deref_filter { + this.conditionally_push_parents(rev); + } + this + } + + #[inline] + fn conditionally_push_rev(&mut self, rev: i32) { + if self.stoprev <= rev && !self.seen.contains(&rev) { + self.seen.insert(rev); + self.visit.push(rev); + } + } + + #[inline] + fn conditionally_push_parents(&mut self, rev: i32) { + let parents = self.graph.parents(rev); + self.conditionally_push_rev(parents.0); + self.conditionally_push_rev(parents.1); + } +} + +/// Main implementation. +/// +/// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py` +/// with a few non crucial differences: +/// +/// - there's no filtering of invalid parent revisions. Actually, it should be +/// consistent and more efficient to filter them from the end caller. +/// - we don't use the equivalent of `heapq.heapreplace()`, but we should, for +/// the same reasons (using `peek_mut`) +/// - we don't have the optimization for adjacent revs (case where p1 == rev-1) +/// - we save a few pushes by comparing with `stoprev` before pushing +impl<G: Graph> Iterator for AncestorsIterator<G> { + type Item = i32; + + fn next(&mut self) -> Option<i32> { + let current = match self.visit.pop() { + None => { + return None; + } + Some(i) => i, + }; + self.conditionally_push_parents(current); + Some(current) + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[derive(Clone, Debug)] + enum Stub { + Some, + } + + /// This is the same as the dict from test-ancestors.py + impl Graph for Stub { + fn parents(&self, rev: i32) -> (i32, i32) { + match rev { + 0 => (-1, -1), + 1 => (0, -1), + 2 => (1, -1), + 3 => (1, -1), + 4 => (2, -1), + 5 => (4, -1), + 6 => (4, -1), + 7 => (4, -1), + 8 => (-1, -1), + 9 => (6, 7), + 10 => (5, -1), + 11 => (3, 7), + 12 => (9, -1), + 13 => (8, -1), + _ => panic!("Unexpected acces in graph"), + } + } + } + + fn list_ancestors<G: Graph + Clone>( + graph: &G, + initrevs: &Vec<i64>, + stoprev: i64, + inclusive: bool, + ) -> Vec<i32> { + AncestorsIterator::new((*graph).clone(), initrevs, stoprev, inclusive) + .collect() + } + + #[test] + /// Same tests as test-ancestor.py, without membership + /// (see also test-ancestor.py.out) + fn test_list_ancestor() { + let stub1 = &Stub::Some; + assert_eq!(list_ancestors(stub1, &vec![], 0, false), vec![]); + assert_eq!( + list_ancestors(stub1, &vec![11, 13], 0, false), + vec![8, 7, 4, 3, 2, 1, 0] + ); + assert_eq!(list_ancestors(stub1, &vec![1, 3], 0, false), vec![1, 0]); + assert_eq!( + list_ancestors(stub1, &vec![11, 13], 0, true), + vec![13, 11, 8, 7, 4, 3, 2, 1, 0] + ); + assert_eq!(list_ancestors(stub1, &vec![11, 13], 6, false), vec![8, 7]); + assert_eq!( + list_ancestors(stub1, &vec![11, 13], 6, true), + vec![13, 11, 8, 7] + ); + assert_eq!( + list_ancestors(stub1, &vec![11, 13], 11, true), + vec![13, 11] + ); + assert_eq!(list_ancestors(stub1, &vec![11, 13], 12, true), vec![13]); + assert_eq!( + list_ancestors(stub1, &vec![10, 1], 0, true), + vec![10, 5, 4, 2, 1, 0] + ); + } + + #[test] + /// Corner case that's not directly in test-ancestors.py, but + /// that happens quite often, as demonstrated by running the whole suite. + /// For instance, run tests/test-obsolete-checkheads.t + fn test_nullrev_input() { + let mut iter = AncestorsIterator::new(Stub::Some, &vec![-1], 0, false); + assert_eq!(iter.next(), None) + } +} diff -r d3d4b4b5f725 -r d8c9571755a6 mercurial/rust/src/lib.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/rust/src/lib.rs Thu Sep 27 17:03:16 2018 +0200 @@ -0,0 +1,7 @@ +// Copyright 2018 Georges Racinet <graci...@anybox.fr> +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +mod ancestors; +pub use ancestors::{AncestorsIterator, Graph}; _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel