D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-29 Thread yuja (Yuya Nishihara)
yuja added a comment.


  > +impl DirsMultiset {
  > +/// Initializes the multiset from a dirstate or a manifest.
  > +///
  > +/// If `skip_state` is provided, skips dirstate entries with equal 
state.
  > +pub fn new(iterable: DirsIterable, skip_state: Option) -> Self {
  > +let mut multiset = DirsMultiset {
  > +inner: HashMap::new(),
  > +};
  > +
  > +match iterable {
  > +DirsIterable::Dirstate(vec) => {
  > +for (ref filename, DirstateEntry { state, .. }) in vec {
  > +// This `if` is optimized out of the loop
  > +if let Some(skip) = skip_state {
  > +if skip != state {
  > +multiset.add_path(filename);
  > +}
  > +} else {
  > +multiset.add_path(filename);
  > +}
  > +}
  > +}
  > +DirsIterable::Manifest(vec) => {
  > +for ref filename in vec {
  > +multiset.add_path(filename);
  > +}
  > +}
  > +}
  > +
  > +multiset
  > +}
  
  Could be `from_dirstate(vec, skip_state)` and `from_vec(vec)` since the
  `skip_state` argument only applies to the `Dirstate` variant.
  
  > +/// Returns the slice up to the next directory name from right to left,
  > +/// without trailing slash
  > +fn find_dir(path: &[u8]) -> &[u8] {
  > +let mut path = path;
  > +loop {
  > +if let Some(new_pos) = path.len().checked_sub(1) {
  > +if path[new_pos] == b'/' {
  > +break &path[..new_pos];
  > +}
  > +path = &path[..new_pos];
  > +} else {
  > +break &[];
  > +}
  > +}
  > +}
  
  Maybe use Iterator::rposition()?
  
let p = path.iter().rposition(|&c| c == b'/').unwrap_or(0);
&path[..p]
  
  Anyway, we'll probably want an iterator which yields parent directories.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6393/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

To: Alphare, #hg-reviewers, kevincox
Cc: yuja, martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-29 Thread Yuya Nishihara
> +impl DirsMultiset {
> +/// Initializes the multiset from a dirstate or a manifest.
> +///
> +/// If `skip_state` is provided, skips dirstate entries with equal state.
> +pub fn new(iterable: DirsIterable, skip_state: Option) -> Self {
> +let mut multiset = DirsMultiset {
> +inner: HashMap::new(),
> +};
> +
> +match iterable {
> +DirsIterable::Dirstate(vec) => {
> +for (ref filename, DirstateEntry { state, .. }) in vec {
> +// This `if` is optimized out of the loop
> +if let Some(skip) = skip_state {
> +if skip != state {
> +multiset.add_path(filename);
> +}
> +} else {
> +multiset.add_path(filename);
> +}
> +}
> +}
> +DirsIterable::Manifest(vec) => {
> +for ref filename in vec {
> +multiset.add_path(filename);
> +}
> +}
> +}
> +
> +multiset
> +}

Could be `from_dirstate(vec, skip_state)` and `from_vec(vec)` since the
`skip_state` argument only applies to the `Dirstate` variant.

> +/// Returns the slice up to the next directory name from right to left,
> +/// without trailing slash
> +fn find_dir(path: &[u8]) -> &[u8] {
> +let mut path = path;
> +loop {
> +if let Some(new_pos) = path.len().checked_sub(1) {
> +if path[new_pos] == b'/' {
> +break &path[..new_pos];
> +}
> +path = &path[..new_pos];
> +} else {
> +break &[];
> +}
> +}
> +}


Maybe use Iterator::rposition()?

```
let p = path.iter().rposition(|&c| c == b'/').unwrap_or(0);
&path[..p]
```

Anyway, we'll probably want an iterator which yields parent directories.
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-28 Thread Raphaël Gomès
Closed by commit rHG2dcee6497b0b: rust-dirstate: add "dirs" Rust 
implementation (authored by Alphare).
This revision was automatically updated to reflect the committed changes.
This revision was not accepted when it landed; it landed in state "Needs 
Review".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6393?vs=15670&id=15694

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6393/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/lib.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -15,8 +15,10 @@
 pub mod discovery;
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
+dirs_multiset::DirsMultiset,
 parsers::{pack_dirstate, parse_dirstate},
-CopyVec, CopyVecEntry, DirstateEntry, DirstateParents, DirstateVec,
+CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
+DirstateVec,
 };
 mod filepatterns;
 mod utils;
@@ -73,6 +75,12 @@
 BadSize(usize, usize),
 }
 
+#[derive(Debug, PartialEq)]
+pub enum DirstateMapError {
+PathNotFound(Vec),
+EmptyPath,
+}
+
 impl From for DirstatePackError {
 fn from(e: std::io::Error) -> Self {
 DirstatePackError::CorruptedEntry(e.to_string())
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -1,3 +1,4 @@
+pub mod dirs_multiset;
 pub mod parsers;
 
 #[derive(Debug, PartialEq, Copy, Clone)]
@@ -26,3 +27,10 @@
 }
 
 pub type CopyVec<'a> = Vec>;
+
+/// The Python implementation passes either a mapping (dirstate) or a flat
+/// iterable (manifest)
+pub enum DirsIterable {
+Dirstate(DirstateVec),
+Manifest(Vec>),
+}
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs 
b/rust/hg-core/src/dirstate/dirs_multiset.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -0,0 +1,355 @@
+// dirs_multiset.rs
+//
+// Copyright 2019 Raphaël Gomès 
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! A multiset of directory names.
+//!
+//! Used to counts the references to directories in a manifest or dirstate.
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
+use std::ops::Deref;
+use {DirsIterable, DirstateEntry, DirstateMapError};
+
+#[derive(PartialEq, Debug)]
+pub struct DirsMultiset {
+inner: HashMap, u32>,
+}
+
+impl Deref for DirsMultiset {
+type Target = HashMap, u32>;
+
+fn deref(&self) -> &Self::Target {
+&self.inner
+}
+}
+
+impl DirsMultiset {
+/// Initializes the multiset from a dirstate or a manifest.
+///
+/// If `skip_state` is provided, skips dirstate entries with equal state.
+pub fn new(iterable: DirsIterable, skip_state: Option) -> Self {
+let mut multiset = DirsMultiset {
+inner: HashMap::new(),
+};
+
+match iterable {
+DirsIterable::Dirstate(vec) => {
+for (ref filename, DirstateEntry { state, .. }) in vec {
+// This `if` is optimized out of the loop
+if let Some(skip) = skip_state {
+if skip != state {
+multiset.add_path(filename);
+}
+} else {
+multiset.add_path(filename);
+}
+}
+}
+DirsIterable::Manifest(vec) => {
+for ref filename in vec {
+multiset.add_path(filename);
+}
+}
+}
+
+multiset
+}
+
+/// Returns the slice up to the next directory name from right to left,
+/// without trailing slash
+fn find_dir(path: &[u8]) -> &[u8] {
+let mut path = path;
+loop {
+if let Some(new_pos) = path.len().checked_sub(1) {
+if path[new_pos] == b'/' {
+break &path[..new_pos];
+}
+path = &path[..new_pos];
+} else {
+break &[];
+}
+}
+}
+
+/// Increases the count of deepest directory contained in the path.
+///
+/// If the directory is not yet in the map, adds its parents.
+pub fn add_path(&mut self, path: &[u8]) {
+let mut pos = path.len();
+
+loop {
+let subpath = Self::find_dir(&path[..pos]);
+if let Some(val) = self.inner.get_mut(subpath) {
+*val += 1;
+break;
+}
+self.inner.insert(subpath.to_owned(), 1);
+
+pos = subpath.len();
+if pos == 0 {
+

D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-28 Thread kevincox (Kevin Cox)
kevincox added inline comments.

INLINE COMMENTS

> martinvonz wrote in dirs_multiset.rs:43
> Maybe Kevin meant something like this?
> 
>   if skip_state == None | skip_state == Some(state) {
>   multiset.add_path(filename);
>   }

No, you are right. I misread the condition.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6393/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

To: Alphare, #hg-reviewers, kevincox
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-27 Thread martinvonz (Martin von Zweigbergk)
martinvonz added inline comments.

INLINE COMMENTS

> Alphare wrote in dirs_multiset.rs:43
> Unless I'm missing something, this would render the `if` statement useless.

Maybe Kevin meant something like this?

  if skip_state == None | skip_state == Some(state) {
  multiset.add_path(filename);
  }

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6393/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

To: Alphare, #hg-reviewers, kevincox
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-27 Thread Raphaël Gomès
Alphare added a comment.


  In D6393#95768 , @kevincox wrote:
  
  > Sorry. I was busy. In general don't worry about blocking on me, I can't 
promise any sort of reasonable response time. Worst case I can review the 
changes after submission and changes can be made afterwards.
  
  Sure, no problem, thanks.
  I've also sneaked some `&[u8]` instead of `Vec` in my last update.

INLINE COMMENTS

> kevincox wrote in dirs_multiset.rs:43
> You can replate the nested if with:
> 
>   if skip_state == Some(state) {

Unless I'm missing something, this would render the `if` statement useless.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6393/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

To: Alphare, #hg-reviewers, kevincox
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-27 Thread Raphaël Gomès
Alphare marked an inline comment as done.
Alphare updated this revision to Diff 15670.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6393?vs=15508&id=15670

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6393/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/lib.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -15,8 +15,10 @@
 pub mod discovery;
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
+dirs_multiset::DirsMultiset,
 parsers::{pack_dirstate, parse_dirstate},
-CopyVec, CopyVecEntry, DirstateEntry, DirstateParents, DirstateVec,
+CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
+DirstateVec,
 };
 mod filepatterns;
 mod utils;
@@ -73,6 +75,12 @@
 BadSize(usize, usize),
 }
 
+#[derive(Debug, PartialEq)]
+pub enum DirstateMapError {
+PathNotFound(Vec),
+EmptyPath,
+}
+
 impl From for DirstatePackError {
 fn from(e: std::io::Error) -> Self {
 DirstatePackError::CorruptedEntry(e.to_string())
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -1,3 +1,4 @@
+pub mod dirs_multiset;
 pub mod parsers;
 
 #[derive(Debug, PartialEq, Copy, Clone)]
@@ -26,3 +27,10 @@
 }
 
 pub type CopyVec<'a> = Vec>;
+
+/// The Python implementation passes either a mapping (dirstate) or a flat
+/// iterable (manifest)
+pub enum DirsIterable {
+Dirstate(DirstateVec),
+Manifest(Vec>),
+}
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs 
b/rust/hg-core/src/dirstate/dirs_multiset.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -0,0 +1,355 @@
+// dirs_multiset.rs
+//
+// Copyright 2019 Raphaël Gomès 
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! A multiset of directory names.
+//!
+//! Used to counts the references to directories in a manifest or dirstate.
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
+use std::ops::Deref;
+use {DirsIterable, DirstateEntry, DirstateMapError};
+
+#[derive(PartialEq, Debug)]
+pub struct DirsMultiset {
+inner: HashMap, u32>,
+}
+
+impl Deref for DirsMultiset {
+type Target = HashMap, u32>;
+
+fn deref(&self) -> &Self::Target {
+&self.inner
+}
+}
+
+impl DirsMultiset {
+/// Initializes the multiset from a dirstate or a manifest.
+///
+/// If `skip_state` is provided, skips dirstate entries with equal state.
+pub fn new(iterable: DirsIterable, skip_state: Option) -> Self {
+let mut multiset = DirsMultiset {
+inner: HashMap::new(),
+};
+
+match iterable {
+DirsIterable::Dirstate(vec) => {
+for (ref filename, DirstateEntry { state, .. }) in vec {
+// This `if` is optimized out of the loop
+if let Some(skip) = skip_state {
+if skip != state {
+multiset.add_path(filename);
+}
+} else {
+multiset.add_path(filename);
+}
+}
+}
+DirsIterable::Manifest(vec) => {
+for ref filename in vec {
+multiset.add_path(filename);
+}
+}
+}
+
+multiset
+}
+
+/// Returns the slice up to the next directory name from right to left,
+/// without trailing slash
+fn find_dir(path: &[u8]) -> &[u8] {
+let mut path = path;
+loop {
+if let Some(new_pos) = path.len().checked_sub(1) {
+if path[new_pos] == b'/' {
+break &path[..new_pos];
+}
+path = &path[..new_pos];
+} else {
+break &[];
+}
+}
+}
+
+/// Increases the count of deepest directory contained in the path.
+///
+/// If the directory is not yet in the map, adds its parents.
+pub fn add_path(&mut self, path: &[u8]) {
+let mut pos = path.len();
+
+loop {
+let subpath = Self::find_dir(&path[..pos]);
+if let Some(val) = self.inner.get_mut(subpath) {
+*val += 1;
+break;
+}
+self.inner.insert(subpath.to_owned(), 1);
+
+pos = subpath.len();
+if pos == 0 {
+break;
+}
+}
+}
+
+/// Decreases the count of deepest directory contained in the path.
+///
+/// If it is the only referen

D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-24 Thread kevincox (Kevin Cox)
kevincox added a comment.
kevincox accepted this revision.


  Sorry. I was busy. In general don't worry about blocking on me, I can't 
promise any sort of reasonable response time. Worst case I can review the 
changes after submission and changes can be made afterwards.

INLINE COMMENTS

> dirs_multiset.rs:43
> +if let Some(skip) = skip_state {
> +if skip != state {
> +multiset.add_path(filename);

You can replate the nested if with:

  if skip_state == Some(state) {

> dirs_multiset.rs:63
> +/// without trailing slash, from right to left.
> +fn find_dir(path: &[u8], mut pos: usize) -> Option<&[u8]> {
> +loop {

I would remove the `pos` argument. IIUC following two are currently identical.

  find_dir(path, n);
  find_dir(path[..n], n);

If you remove the second argument then you can just always remove the last 
component of the path. This will also allow you to have a more clear doc 
comment. Right now I find it a little confusing.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6393/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

To: Alphare, #hg-reviewers, kevincox
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-17 Thread Raphaël Gomès
Alphare added a comment.


  @kevincox, do you have any comments about this change?

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6393/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

To: Alphare, #hg-reviewers
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-14 Thread Raphaël Gomès
Alphare updated this revision to Diff 15508.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6393?vs=15341&id=15508

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6393/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/lib.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -15,8 +15,10 @@
 pub mod discovery;
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
+dirs_multiset::DirsMultiset,
 parsers::{pack_dirstate, parse_dirstate},
-CopyVec, CopyVecEntry, DirstateEntry, DirstateParents, DirstateVec,
+CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
+DirstateVec,
 };
 mod filepatterns;
 mod utils;
@@ -73,6 +75,12 @@
 BadSize(usize, usize),
 }
 
+#[derive(Debug, PartialEq)]
+pub enum DirstateMapError {
+PathNotFound(Vec),
+EmptyPath,
+}
+
 impl From for DirstatePackError {
 fn from(e: std::io::Error) -> Self {
 DirstatePackError::CorruptedEntry(e.to_string())
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -1,3 +1,4 @@
+pub mod dirs_multiset;
 pub mod parsers;
 
 #[derive(Debug, PartialEq, Copy, Clone)]
@@ -26,3 +27,10 @@
 }
 
 pub type CopyVec<'a> = Vec>;
+
+/// The Python implementation passes either a mapping (dirstate) or a flat
+/// iterable (manifest)
+pub enum DirsIterable {
+Dirstate(DirstateVec),
+Manifest(Vec>),
+}
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs 
b/rust/hg-core/src/dirstate/dirs_multiset.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -0,0 +1,354 @@
+// dirs_multiset.rs
+//
+// Copyright 2019 Raphaël Gomès 
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! A multiset of directory names.
+//!
+//! Used to counts the references to directories in a manifest or dirstate.
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
+use std::ops::Deref;
+use {DirsIterable, DirstateEntry, DirstateMapError};
+
+#[derive(PartialEq, Debug)]
+pub struct DirsMultiset {
+inner: HashMap, u32>,
+}
+
+impl Deref for DirsMultiset {
+type Target = HashMap, u32>;
+
+fn deref(&self) -> &Self::Target {
+&self.inner
+}
+}
+
+impl DirsMultiset {
+/// Initializes the multiset from a dirstate or a manifest.
+///
+/// If `skip_state` is provided, skips dirstate entries with equal state.
+pub fn new(iterable: DirsIterable, skip_state: Option) -> Self {
+let mut multiset = DirsMultiset {
+inner: HashMap::new(),
+};
+
+match iterable {
+DirsIterable::Dirstate(vec) => {
+for (filename, DirstateEntry { state, .. }) in vec {
+// This `if` is optimized out of the loop
+if let Some(skip) = skip_state {
+if skip != state {
+multiset.add_path(filename);
+}
+} else {
+multiset.add_path(filename);
+}
+}
+}
+DirsIterable::Manifest(vec) => {
+for filename in vec {
+multiset.add_path(filename);
+}
+}
+}
+
+multiset
+}
+
+/// Returns (maybe) a slice of path containing the next directory name
+/// without trailing slash, from right to left.
+fn find_dir(path: &[u8], mut pos: usize) -> &[u8] {
+loop {
+if let Some(new_pos) = pos.checked_sub(1) {
+if path[new_pos] == b'/' {
+break &path[..new_pos];
+}
+pos = new_pos;
+} else {
+break &[];
+}
+}
+}
+
+/// Increases the count of deepest directory contained in the path.
+///
+/// If the directory is not yet in the map, adds its parents.
+pub fn add_path(&mut self, path: Vec) {
+let mut pos = path.len();
+
+loop {
+let subpath = Self::find_dir(&path, pos);
+if let Some(val) = self.inner.get_mut(subpath) {
+*val += 1;
+break;
+}
+self.inner.insert(subpath.to_owned(), 1);
+
+pos = subpath.len();
+if pos == 0 {
+break;
+}
+}
+}
+
+/// Decreases the count of deepest directory contained in the path.
+///
+/// If it is the only reference, decreases all parents until one is
+/// removed.
+///

D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-05 Thread Raphaël Gomès
Alphare updated this revision to Diff 15341.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6393?vs=15160&id=15341

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/lib.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -15,8 +15,10 @@
 pub mod discovery;
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
+dirs_multiset::DirsMultiset,
 parsers::{pack_dirstate, parse_dirstate},
-CopyVec, CopyVecEntry, DirstateEntry, DirstateParents, DirstateVec,
+CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
+DirstateVec,
 };
 mod filepatterns;
 
@@ -72,6 +74,12 @@
 BadSize(usize, usize),
 }
 
+#[derive(Debug, PartialEq)]
+pub enum DirstateMapError {
+PathNotFound(Vec),
+EmptyPath,
+}
+
 impl From for DirstatePackError {
 fn from(e: std::io::Error) -> Self {
 DirstatePackError::CorruptedEntry(e.to_string())
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -1,3 +1,4 @@
+pub mod dirs_multiset;
 pub mod parsers;
 
 #[derive(Debug, PartialEq, Copy, Clone)]
@@ -26,3 +27,10 @@
 }
 
 pub type CopyVec<'a> = Vec>;
+
+/// The Python implementation passes either a mapping (dirstate) or a flat
+/// iterable (manifest)
+pub enum DirsIterable {
+Dirstate(DirstateVec),
+Manifest(Vec>),
+}
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs 
b/rust/hg-core/src/dirstate/dirs_multiset.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -0,0 +1,354 @@
+// dirs_multiset.rs
+//
+// Copyright 2019 Raphaël Gomès 
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! A multiset of directory names.
+//!
+//! Used to counts the references to directories in a manifest or dirstate.
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
+use std::ops::Deref;
+use {DirsIterable, DirstateEntry, DirstateMapError};
+
+#[derive(PartialEq, Debug)]
+pub struct DirsMultiset {
+inner: HashMap, u32>,
+}
+
+impl Deref for DirsMultiset {
+type Target = HashMap, u32>;
+
+fn deref(&self) -> &Self::Target {
+&self.inner
+}
+}
+
+impl DirsMultiset {
+/// Initializes the multiset from a dirstate or a manifest.
+///
+/// If `skip_state` is provided, skips dirstate entries with equal state.
+pub fn new(iterable: DirsIterable, skip_state: Option) -> Self {
+let mut multiset = DirsMultiset {
+inner: HashMap::new(),
+};
+
+match iterable {
+DirsIterable::Dirstate(vec) => {
+for (filename, DirstateEntry { state, .. }) in vec {
+// This `if` is optimized out of the loop
+if let Some(skip) = skip_state {
+if skip != state {
+multiset.add_path(filename);
+}
+} else {
+multiset.add_path(filename);
+}
+}
+}
+DirsIterable::Manifest(vec) => {
+for filename in vec {
+multiset.add_path(filename);
+}
+}
+}
+
+multiset
+}
+
+/// Returns (maybe) a slice of path containing the next directory name
+/// without trailing slash, from right to left.
+fn find_dir(path: &[u8], mut pos: usize) -> &[u8] {
+loop {
+if let Some(new_pos) = pos.checked_sub(1) {
+if path[new_pos] == b'/' {
+break &path[..new_pos];
+}
+pos = new_pos;
+} else {
+break &[];
+}
+}
+}
+
+/// Increases the count of deepest directory contained in the path.
+///
+/// If the directory is not yet in the map, adds its parents.
+pub fn add_path(&mut self, path: Vec) {
+let mut pos = path.len();
+
+loop {
+let subpath = Self::find_dir(&path, pos);
+if let Some(val) = self.inner.get_mut(subpath) {
+*val += 1;
+break;
+}
+self.inner.insert(subpath.to_owned(), 1);
+
+pos = subpath.len();
+if pos == 0 {
+break;
+}
+}
+}
+
+/// Decreases the count of deepest directory contained in the path.
+///
+/// If it is the only reference, decreases all parents until one is
+/// removed.
+/// If the directory is not in the map, something horrible has happened.
+pub fn

D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-05 Thread Raphaël Gomès
Alphare added a comment.


  In https://phab.mercurial-scm.org/D6393#94007, @martinvonz wrote:
  
  > Does this need updating after https://phab.mercurial-scm.org/D6403? Based 
on a cursory look at the patch, it has not been updated yet.
  
  
  Indeed it did. I've updated this changeset and rebased.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

To: Alphare, #hg-reviewers
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6393: rust-dirstate: add "dirs" Rust implementation

2019-06-03 Thread martinvonz (Martin von Zweigbergk)
martinvonz added a comment.


  Does this need updating after https://phab.mercurial-scm.org/D6403? Based on 
a cursory look at the patch, it has not been updated yet.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

To: Alphare, #hg-reviewers
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6393: rust-dirstate: add "dirs" Rust implementation

2019-05-17 Thread Raphaël Gomès
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Following the work done in 
https://phab.mercurial-scm.org/rHGd1786c1d34fa927d4048054ebe62c55fa14f9b1e and 
working towards the goal of a
  complete Rust implementation of the dirstate, this rewrites the `dirs` class.
  
  There is already a C implementation, which relies heavily on CPython hacks and
  protocol violations for performance, so I don't expect this to perform as well
  for now, as this is very straight-forward code.
  The immediate benefits are new high-level documentation and some unit tests.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/lib.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -15,8 +15,10 @@
 pub mod discovery;
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
+dirs_multiset::DirsMultiset,
 parsers::{pack_dirstate, parse_dirstate},
-CopyVec, CopyVecEntry, DirstateEntry, DirstateParents, DirstateVec,
+CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
+DirstateVec,
 };
 mod filepatterns;
 
@@ -72,6 +74,12 @@
 BadSize(usize, usize),
 }
 
+#[derive(Debug, PartialEq)]
+pub enum DirstateMapError {
+PathNotFound(Vec),
+EmptyPath,
+}
+
 impl From for DirstatePackError {
 fn from(e: std::io::Error) -> Self {
 DirstatePackError::CorruptedEntry(e.to_string())
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -1,3 +1,4 @@
+pub mod dirs_multiset;
 pub mod parsers;
 
 #[derive(Debug, PartialEq, Copy, Clone)]
@@ -26,3 +27,10 @@
 }
 
 pub type CopyVec<'a> = Vec>;
+
+/// The Python implementation passes either a mapping (dirstate) or a flat
+/// iterable (manifest)
+pub enum DirsIterable {
+Dirstate(DirstateVec),
+Manifest(Vec>),
+}
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs 
b/rust/hg-core/src/dirstate/dirs_multiset.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -0,0 +1,342 @@
+// dirs_multiset.rs
+//
+// Copyright 2019 Raphaël Gomès 
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! A multiset of directory names.
+//!
+//! Used to counts the references to directories in a manifest or dirstate.
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
+use std::ops::Deref;
+use {DirsIterable, DirstateEntry, DirstateMapError};
+
+#[derive(PartialEq, Debug)]
+pub struct DirsMultiset {
+inner: HashMap, u32>,
+}
+
+impl Deref for DirsMultiset {
+type Target = HashMap, u32>;
+
+fn deref(&self) -> &Self::Target {
+&self.inner
+}
+}
+
+impl DirsMultiset {
+/// Initializes the multiset from a dirstate or a manifest.
+///
+/// If `skip_state` is provided, skips dirstate entries with equal state.
+pub fn new(iterable: DirsIterable, skip_state: Option) -> Self {
+let mut multiset = DirsMultiset {
+inner: HashMap::new(),
+};
+
+match iterable {
+DirsIterable::Dirstate(vec) => {
+for (filename, DirstateEntry { state, .. }) in vec {
+// This `if` is optimized out of the loop
+if let Some(skip) = skip_state {
+if skip != state {
+multiset.add_path(filename);
+}
+} else {
+multiset.add_path(filename);
+}
+}
+}
+DirsIterable::Manifest(vec) => {
+for filename in vec {
+multiset.add_path(filename);
+}
+}
+}
+
+multiset
+}
+
+/// Returns (maybe) a slice of path containing the next directory name
+/// without trailing slash, from right to left.
+fn find_dir(path: &[u8], mut pos: usize) -> Option<&[u8]> {
+loop {
+if let Some(new_pos) = pos.checked_sub(1) {
+if path[new_pos] == b'/' {
+break Some(&path[..new_pos]);
+}
+pos = new_pos;
+} else {
+break None;
+}
+}
+}
+
+/// Increases the count of deepest directory contained in the path.
+///
+/// If the directory is not yet in the map, adds its parents.
+pub fn add_path(&mut self, path: Vec) {
+if path.is_empty() {
+return;
+}
+let mut pos = path.len();
+
+while let Some(sub