D7903: sha1dc: avoid including the nonexistent stdint.h with Visual Studio 2008

2020-01-15 Thread mharbison72 (Matt Harbison)
mharbison72 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7903

AFFECTED FILES
  mercurial/thirdparty/sha1dc/lib/ubc_check.c
  mercurial/thirdparty/sha1dc/lib/ubc_check.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/sha1dc/lib/ubc_check.h 
b/mercurial/thirdparty/sha1dc/lib/ubc_check.h
--- a/mercurial/thirdparty/sha1dc/lib/ubc_check.h
+++ b/mercurial/thirdparty/sha1dc/lib/ubc_check.h
@@ -28,7 +28,12 @@
 #endif
 
 #ifndef SHA1DC_NO_STANDARD_INCLUDES
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
 #include 
+#else
+/* prior to Visual Studio 2010 */
+typedef unsigned int uint32_t;
+#endif
 #endif
 
 #define DVMASKSIZE 1
diff --git a/mercurial/thirdparty/sha1dc/lib/ubc_check.c 
b/mercurial/thirdparty/sha1dc/lib/ubc_check.c
--- a/mercurial/thirdparty/sha1dc/lib/ubc_check.c
+++ b/mercurial/thirdparty/sha1dc/lib/ubc_check.c
@@ -25,8 +25,10 @@
 */
 
 #ifndef SHA1DC_NO_STANDARD_INCLUDES
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
 #include 
 #endif
+#endif
 #ifdef SHA1DC_CUSTOM_INCLUDE_UBC_CHECK_C
 #include SHA1DC_CUSTOM_INCLUDE_UBC_CHECK_C
 #endif



To: mharbison72, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7902: merge: introduce a clean_update() for that use-case

2020-01-15 Thread martinvonz (Martin von Zweigbergk)
martinvonz created this revision.
Herald added a reviewer: durin42.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  In the same vein as the previous patch.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7902

AFFECTED FILES
  hgext/histedit.py
  hgext/rebase.py
  mercurial/hg.py
  mercurial/merge.py
  mercurial/shelve.py

CHANGE DETAILS

diff --git a/mercurial/shelve.py b/mercurial/shelve.py
--- a/mercurial/shelve.py
+++ b/mercurial/shelve.py
@@ -745,7 +745,7 @@
 try:
 checkparents(repo, state)
 
-merge.update(repo, state.pendingctx, branchmerge=False, force=True)
+merge.clean_update(state.pendingctx)
 if state.activebookmark and state.activebookmark in 
repo._bookmarks:
 bookmarks.activate(repo, state.activebookmark)
 mergefiles(ui, repo, state.wctx, state.pendingctx)
diff --git a/mercurial/merge.py b/mercurial/merge.py
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -2591,6 +2591,10 @@
 )
 
 
+def clean_update(ctx, wc=None):
+return update(ctx.repo(), ctx.rev(), branchmerge=False, force=True, wc=wc)
+
+
 def graft(
 repo, ctx, base, labels=None, keepparent=False, keepconflictparent=False
 ):
diff --git a/mercurial/hg.py b/mercurial/hg.py
--- a/mercurial/hg.py
+++ b/mercurial/hg.py
@@ -1182,7 +1182,7 @@
 node = repo[b'.'].hex()
 
 repo.ui.status(_(b"aborting the merge, updating back to %s\n") % node[:12])
-stats = mergemod.update(repo, node, branchmerge=False, force=True)
+stats = mergemod.clean_update(repo[node])
 _showstats(repo, stats)
 return stats.unresolvedcount > 0
 
diff --git a/hgext/rebase.py b/hgext/rebase.py
--- a/hgext/rebase.py
+++ b/hgext/rebase.py
@@ -799,9 +799,7 @@
 
 # Update away from the rebase if necessary
 if shouldupdate:
-mergemod.update(
-repo, self.originalwd, branchmerge=False, force=True
-)
+mergemod.clean_update(repo[self.originalwd])
 
 # Strip from the first rebased revision
 if rebased:
@@ -1475,7 +1473,7 @@
 else:
 if repo[b'.'].rev() != p1:
 repo.ui.debug(b" update to %d:%s\n" % (p1, p1ctx))
-mergemod.update(repo, p1, branchmerge=False, force=True)
+mergemod.clean_update(p1ctx)
 else:
 repo.ui.debug(b" already in destination\n")
 # This is, alas, necessary to invalidate workingctx's manifest cache,
diff --git a/hgext/histedit.py b/hgext/histedit.py
--- a/hgext/histedit.py
+++ b/hgext/histedit.py
@@ -945,7 +945,7 @@
 class base(histeditaction):
 def run(self):
 if self.repo[b'.'].node() != self.node:
-mergemod.update(self.repo, self.node, branchmerge=False, 
force=True)
+mergemod.clean_update(self.repo[self.node])
 return self.continueclean()
 
 def continuedirty(self):



To: martinvonz, durin42, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7901: merge: introduce a revert_to() for that use-case

2020-01-15 Thread martinvonz (Martin von Zweigbergk)
martinvonz created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  I find it hard to understand what value to pass for all the arguments
  to `merge.update()`. I would like to introduce functions that are more
  specific to each use-case. We already have `graft()`. This patch
  introduces a `revert_to()` and uses it in some places to show that it
  works.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7901

AFFECTED FILES
  hgext/fix.py
  mercurial/cmdutil.py
  mercurial/merge.py

CHANGE DETAILS

diff --git a/mercurial/merge.py b/mercurial/merge.py
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -2205,6 +2205,7 @@
 labels=None,
 matcher=None,
 mergeforce=False,
+updatedirstate=True,
 updatecheck=None,
 wc=None,
 ):
@@ -2504,7 +2505,7 @@
 ### apply phase
 if not branchmerge:  # just jump to the new rev
 fp1, fp2, xp1, xp2 = fp2, nullid, xp2, b''
-updatedirstate = always and not wc.isinmemory()
+updatedirstate = updatedirstate and always and not wc.isinmemory()
 if updatedirstate:
 repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2)
 # note that we're in the middle of an update
@@ -2578,6 +2579,18 @@
 return stats
 
 
+def revert_to(ctx, matcher=None, wc=None):
+return update(
+ctx.repo(),
+ctx.rev(),
+branchmerge=False,
+force=True,
+updatedirstate=False,
+matcher=matcher,
+wc=wc,
+)
+
+
 def graft(
 repo, ctx, base, labels=None, keepparent=False, keepconflictparent=False
 ):
diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -584,15 +584,8 @@
 [os.unlink(repo.wjoin(c)) for c in newlyaddedandmodifiedfiles]
 # 3a. apply filtered patch to clean repo  (clean)
 if backups:
-# Equivalent to hg.revert
 m = scmutil.matchfiles(repo, set(backups.keys()) | alsorestore)
-mergemod.update(
-repo,
-repo.dirstate.p1(),
-branchmerge=False,
-force=True,
-matcher=m,
-)
+mergemod.revert_to(repo[b'.'], matcher=m)
 
 # 3b. (apply)
 if dopatch:
diff --git a/hgext/fix.py b/hgext/fix.py
--- a/hgext/fix.py
+++ b/hgext/fix.py
@@ -735,15 +735,7 @@
 
 wctx = context.overlayworkingctx(repo)
 wctx.setbase(repo[newp1node])
-merge.update(
-repo,
-ctx.rev(),
-branchmerge=False,
-force=True,
-ancestor=p1rev,
-mergeancestor=False,
-wc=wctx,
-)
+merge.revert_to(ctx, wc=wctx)
 copies.graftcopies(wctx, ctx, ctx.p1())
 
 for path in filedata.keys():



To: martinvonz, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7900: merge: avoid a negation in the definition of updatedirstate

2020-01-15 Thread martinvonz (Martin von Zweigbergk)
martinvonz created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We only use `partial` in one place: the definition of
  `updatedirstate`. Let's simplify that a little.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7900

AFFECTED FILES
  mercurial/merge.py

CHANGE DETAILS

diff --git a/mercurial/merge.py b/mercurial/merge.py
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -2291,10 +2291,7 @@
 # If we're doing a partial update, we need to skip updating
 # the dirstate, so make a note of any partial-ness to the
 # update here.
-if matcher is None or matcher.always():
-partial = False
-else:
-partial = True
+always = matcher is None or matcher.always()
 with repo.wlock():
 if wc is None:
 wc = repo[None]
@@ -2507,7 +2504,7 @@
 ### apply phase
 if not branchmerge:  # just jump to the new rev
 fp1, fp2, xp1, xp2 = fp2, nullid, xp2, b''
-updatedirstate = not partial and not wc.isinmemory()
+updatedirstate = always and not wc.isinmemory()
 if updatedirstate:
 repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2)
 # note that we're in the middle of an update



To: martinvonz, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7899: merge: define updatedirstate a little earlier and reuse it

2020-01-15 Thread martinvonz (Martin von Zweigbergk)
martinvonz created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7899

AFFECTED FILES
  mercurial/merge.py

CHANGE DETAILS

diff --git a/mercurial/merge.py b/mercurial/merge.py
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -2507,7 +2507,8 @@
 ### apply phase
 if not branchmerge:  # just jump to the new rev
 fp1, fp2, xp1, xp2 = fp2, nullid, xp2, b''
-if not partial and not wc.isinmemory():
+updatedirstate = not partial and not wc.isinmemory()
+if updatedirstate:
 repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2)
 # note that we're in the middle of an update
 repo.vfs.write(b'updatestate', p2.hex())
@@ -2553,7 +2554,6 @@
 )
 )
 
-updatedirstate = not partial and not wc.isinmemory()
 wantfiledata = updatedirstate and not branchmerge
 stats, getfiledata = applyupdates(
 repo, actions, wc, p2, overwrite, wantfiledata, labels=labels



To: martinvonz, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7897: rebase: fix bug where `--collapse` would apply diff on missing file

2020-01-15 Thread martinvonz (Martin von Zweigbergk)
martinvonz created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Even though the file was missing, the rebase would succeed.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7897

AFFECTED FILES
  hgext/rebase.py
  tests/test-rebase-collapse.t

CHANGE DETAILS

diff --git a/tests/test-rebase-collapse.t b/tests/test-rebase-collapse.t
--- a/tests/test-rebase-collapse.t
+++ b/tests/test-rebase-collapse.t
@@ -288,6 +288,18 @@
 
   $ hg rebase -s F --dest I --collapse # root (F) is not a merge
   rebasing 6:c82b08f646f1 "F" (F)
+  file 'E' was deleted in local [dest] but was modified in other [source].
+  You can use (c)hanged version, leave (d)eleted, or leave (u)nresolved.
+  What do you want to do? u
+  unresolved conflicts (see hg resolve, then hg rebase --continue)
+  [1]
+
+  $ echo F > E
+  $ hg resolve -m
+  (no more unresolved files)
+  continue: hg rebase --continue
+  $ hg rebase -c
+  rebasing 6:c82b08f646f1 "F" (F)
   rebasing 7:a6db7fa104e1 "G" (G)
   rebasing 8:e1d201b72d91 "H" (H tip)
   saved backup bundle to 
$TESTTMP/external-parent/.hg/strip-backup/c82b08f646f1-f2721fbf-rebase.hg
diff --git a/hgext/rebase.py b/hgext/rebase.py
--- a/hgext/rebase.py
+++ b/hgext/rebase.py
@@ -1486,15 +1486,16 @@
 repo.ui.debug(b" merge against %d:%s\n" % (rev, ctx))
 if base is not None:
 repo.ui.debug(b"   detach base %d:%s\n" % (base, repo[base]))
-# When collapsing in-place, the parent is the common ancestor, we
-# have to allow merging with it.
+
+# See explanation in merge.graft()
+mergeancestor = repo.changelog.isancestor(p1ctx.node(), ctx.node())
 stats = mergemod.update(
 repo,
 rev,
 branchmerge=True,
 force=True,
 ancestor=base,
-mergeancestor=collapse,
+mergeancestor=mergeancestor,
 labels=[b'dest', b'source'],
 wc=wctx,
 )



To: martinvonz, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7898: merge: don't call update hook when using in-memory context

2020-01-15 Thread martinvonz (Martin von Zweigbergk)
martinvonz created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  I'm pretty sure many hook implementors will assume that they can
  inspect the working copy and/or dirstate parents when the hook is
  called, so I don't think we should call the hook when using an
  in-memory context. The new behavior matches that of the preupdate
  hook.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7898

AFFECTED FILES
  mercurial/merge.py

CHANGE DETAILS

diff --git a/mercurial/merge.py b/mercurial/merge.py
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -2574,7 +2574,7 @@
 if not branchmerge:
 sparse.prunetemporaryincludes(repo)
 
-if not partial:
+if updatedirstate:
 repo.hook(
 b'update', parent1=xp1, parent2=xp2, error=stats.unresolvedcount
 )



To: martinvonz, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7896: rebase: extract a variable for a repeated `repo[p1]`

2020-01-15 Thread martinvonz (Martin von Zweigbergk)
martinvonz created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  I'll add another use site in the next patch.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7896

AFFECTED FILES
  hgext/rebase.py

CHANGE DETAILS

diff --git a/hgext/rebase.py b/hgext/rebase.py
--- a/hgext/rebase.py
+++ b/hgext/rebase.py
@@ -1469,11 +1469,12 @@
 """Rebase a single revision rev on top of p1 using base as merge 
ancestor"""
 # Merge phase
 # Update to destination and merge it with local
+p1ctx = repo[p1]
 if wctx.isinmemory():
-wctx.setbase(repo[p1])
+wctx.setbase(p1ctx)
 else:
 if repo[b'.'].rev() != p1:
-repo.ui.debug(b" update to %d:%s\n" % (p1, repo[p1]))
+repo.ui.debug(b" update to %d:%s\n" % (p1, p1ctx))
 mergemod.update(repo, p1, branchmerge=False, force=True)
 else:
 repo.ui.debug(b" already in destination\n")



To: martinvonz, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7787: rust-nodemap: building blocks for nodetree structures

2020-01-15 Thread gracinet (Georges Racinet)
gracinet added inline comments.
gracinet marked 2 inline comments as done.

INLINE COMMENTS

> gracinet wrote in nodemap.rs:111
> Nice, thanks for the tip

So, that gives formatting with braces, hence for consistency I changed the 
`block!` macro, too.

I didn't keep the hexadecimal formatting, because it'd now lead to lots of `\"` 
making the tests less readable.

An upside of this is that it's now really consistent with `block!`. A downside 
is that someone using it for real debugging with input given in hexadecimal 
would presumably have to mentally convert hexadecimal nybbles to their decimal 
form.
It would have been a bit of a drag in the intiial development effort, but I 
don't think
that'll be a problem in the future: : either it'll be on small data or with 
diffrent tools anyway.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7787/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7787

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7819: rust-nodemap: core implementation for shortest

2020-01-15 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 19331.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7819?vs=19139=19331

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7819/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7819

AFFECTED FILES
  rust/hg-core/src/revlog/node.rs
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs 
b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -16,6 +16,7 @@
 node::get_nybble, node::NULL_NODE, Node, NodeError, NodePrefix,
 NodePrefixRef, Revision, RevlogIndex, NULL_REVISION,
 };
+use std::cmp::max;
 use std::fmt;
 use std::mem;
 use std::ops::Deref;
@@ -47,6 +48,20 @@
 prefix: NodePrefixRef<'a>,
 ) -> Result, NodeMapError>;
 
+/// Give the size of the shortest node prefix that determines
+/// the revision uniquely.
+///
+/// From a binary node prefix, if it is matched in the node map, this
+/// returns the number of hexadecimal digits that would had sufficed
+/// to find the revision uniquely.
+///
+/// Returns `None` if no `Revision` could be found for the prefix.
+fn shortest_bin<'a>(
+,
+idx:  RevlogIndex,
+node_prefix: NodePrefixRef<'a>,
+) -> Result, NodeMapError>;
+
 fn find_hex(
 ,
 idx:  RevlogIndex,
@@ -54,6 +69,16 @@
 ) -> Result, NodeMapError> {
 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
 }
+
+/// Same as `shortest_bin`, with the hexadecimal representation of the
+/// prefix as input.
+fn shortest_hex(
+,
+idx:  RevlogIndex,
+prefix: ,
+) -> Result, NodeMapError> {
+self.shortest_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
+}
 }
 
 pub trait MutableNodeMap: NodeMap {
@@ -215,20 +240,24 @@
 fn validate_candidate<'p>(
 idx:  RevlogIndex,
 prefix: NodePrefixRef<'p>,
-rev: Option,
-) -> Result, NodeMapError> {
-if prefix.is_prefix_of(_NODE) {
-// NULL_REVISION always matches a prefix made only of zeros
+cand: (Option, usize),
+) -> Result<(Option, usize), NodeMapError> {
+let (rev, steps) = cand;
+if let Some(nz_nybble) = prefix.first_different_nybble(_NODE) {
+rev.map_or(Ok((None, steps)), |r| {
+has_prefix_or_none(idx, prefix, r)
+.map(|opt| (opt, max(steps, nz_nybble + 1)))
+})
+} else {
+// the prefix is only made of zeros; NULL_REVISION always matches it
 // and any other *valid* result is an ambiguity
 match rev {
-None => Ok(Some(NULL_REVISION)),
+None => Ok((Some(NULL_REVISION), steps + 1)),
 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
-None => Ok(Some(NULL_REVISION)),
+None => Ok((Some(NULL_REVISION), steps + 1)),
 _ => Err(NodeMapError::MultipleResults),
 },
 }
-} else {
-rev.map_or(Ok(None), |r| has_prefix_or_none(idx, prefix, r))
 }
 }
 
@@ -308,10 +337,10 @@
 fn lookup<'p>(
 ,
 prefix: NodePrefixRef<'p>,
-) -> Result, NodeMapError> {
-for (leaf, _, _, opt) in self.visit(prefix) {
+) -> Result<(Option, usize), NodeMapError> {
+for (i, (leaf, _, _, opt)) in self.visit(prefix).enumerate() {
 if leaf {
-return Ok(opt);
+return Ok((opt, i + 1));
 }
 }
 Err(NodeMapError::MultipleResults)
@@ -540,6 +569,16 @@
 prefix: NodePrefixRef<'a>,
 ) -> Result, NodeMapError> {
 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
+.map(|(opt, _shortest)| opt)
+}
+
+fn shortest_bin<'a>(
+,
+idx:  RevlogIndex,
+prefix: NodePrefixRef<'a>,
+) -> Result, NodeMapError> {
+validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
+.map(|(opt, shortest)| opt.map(|_rev| shortest))
 }
 }
 
@@ -665,6 +704,7 @@
 assert_eq!(nt.find_hex(, "01"), Ok(Some(9)));
 assert_eq!(nt.find_hex(, "00"), Err(MultipleResults));
 assert_eq!(nt.find_hex(, "00a"), Ok(Some(0)));
+assert_eq!(nt.shortest_hex(, "00a"), Ok(Some(3)));
 assert_eq!(nt.find_hex(, "000"), Ok(Some(NULL_REVISION)));
 }
 
@@ -684,8 +724,10 @@
 };
 assert_eq!(nt.find_hex(, "10")?, Some(1));
 assert_eq!(nt.find_hex(, "c")?, Some(2));
+assert_eq!(nt.shortest_hex(, "c")?, Some(1));
 assert_eq!(nt.find_hex(, "00"), Err(MultipleResults));
 assert_eq!(nt.find_hex(, "000")?, Some(NULL_REVISION));
+assert_eq!(nt.shortest_hex(, "000")?, Some(3));
 assert_eq!(nt.find_hex(, "01")?, Some(9));
 Ok(())
 }
@@ -721,6 +763,13 @@
 self.nt.find_hex(, prefix)
 }
 
+fn 

D7798: rust-nodemap: special case for prefixes of NULL_NODE

2020-01-15 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 19330.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7798?vs=19138=19330

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7798/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7798

AFFECTED FILES
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs 
b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -13,8 +13,8 @@
 //! is used in a more abstract context.
 
 use super::{
-node::get_nybble, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
-RevlogIndex,
+node::get_nybble, node::NULL_NODE, Node, NodeError, NodePrefix,
+NodePrefixRef, Revision, RevlogIndex, NULL_REVISION,
 };
 use std::fmt;
 use std::mem;
@@ -207,6 +207,31 @@
 })
 }
 
+/// validate that the candidate's node starts indeed with given prefix,
+/// and treat ambiguities related to `NULL_REVISION`.
+///
+/// From the data in the NodeTree, one can only conclude that some
+/// revision is the only one for a *subprefix* of the one being looked up.
+fn validate_candidate<'p>(
+idx:  RevlogIndex,
+prefix: NodePrefixRef<'p>,
+rev: Option,
+) -> Result, NodeMapError> {
+if prefix.is_prefix_of(_NODE) {
+// NULL_REVISION always matches a prefix made only of zeros
+// and any other *valid* result is an ambiguity
+match rev {
+None => Ok(Some(NULL_REVISION)),
+Some(r) => match has_prefix_or_none(idx, prefix, r)? {
+None => Ok(Some(NULL_REVISION)),
+_ => Err(NodeMapError::MultipleResults),
+},
+}
+} else {
+rev.map_or(Ok(None), |r| has_prefix_or_none(idx, prefix, r))
+}
+}
+
 impl NodeTree {
 /// Initiate a NodeTree from an immutable slice-like of `Block`
 ///
@@ -280,9 +305,6 @@
 }
 
 /// Main working method for `NodeTree` searches
-///
-/// This partial implementation lacks
-/// - special cases for NULL_REVISION
 fn lookup<'p>(
 ,
 prefix: NodePrefixRef<'p>,
@@ -517,9 +539,7 @@
 idx:  RevlogIndex,
 prefix: NodePrefixRef<'a>,
 ) -> Result, NodeMapError> {
-self.lookup(prefix.clone()).and_then(|opt| {
-opt.map_or(Ok(None), |rev| has_prefix_or_none(idx, prefix, rev))
-})
+validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
 }
 }
 
@@ -643,8 +663,9 @@
 
 assert_eq!(nt.find_hex(, "0"), Err(MultipleResults));
 assert_eq!(nt.find_hex(, "01"), Ok(Some(9)));
-assert_eq!(nt.find_hex(, "00"), Ok(Some(0)));
+assert_eq!(nt.find_hex(, "00"), Err(MultipleResults));
 assert_eq!(nt.find_hex(, "00a"), Ok(Some(0)));
+assert_eq!(nt.find_hex(, "000"), Ok(Some(NULL_REVISION)));
 }
 
 #[test]
@@ -663,7 +684,8 @@
 };
 assert_eq!(nt.find_hex(, "10")?, Some(1));
 assert_eq!(nt.find_hex(, "c")?, Some(2));
-assert_eq!(nt.find_hex(, "00")?, Some(0));
+assert_eq!(nt.find_hex(, "00"), Err(MultipleResults));
+assert_eq!(nt.find_hex(, "000")?, Some(NULL_REVISION));
 assert_eq!(nt.find_hex(, "01")?, Some(9));
 Ok(())
 }



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7796: rust-nodemap: input/output primitives

2020-01-15 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 19329.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7796?vs=19137=19329

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7796/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7796

AFFECTED FILES
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs 
b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -17,8 +17,10 @@
 RevlogIndex,
 };
 use std::fmt;
+use std::mem;
 use std::ops::Deref;
 use std::ops::Index;
+use std::slice;
 
 #[derive(Debug, PartialEq)]
 pub enum NodeMapError {
@@ -132,6 +134,8 @@
 #[derive(Clone, PartialEq)]
 pub struct Block([RawElement; 16]);
 
+pub const BLOCK_SIZE: usize = mem::size_of::();
+
 impl Block {
 fn new() -> Self {
 Block([-1; 16])
@@ -219,6 +223,57 @@
 }
 }
 
+/// Create from an opaque bunch of bytes
+///
+/// The created `NodeTreeBytes` is taken after the fixed `offset` from
+/// `buffer`, of which exactly `amount` bytes are used.
+///
+/// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
+/// - `offset` allows for the final file format to include fixed data
+///   (generation number, behavioural flags)
+/// - `amount` is expressed in bytes, and is not automatically derived from
+///   `bytes`, so that a caller that manages them atomically can perform
+///   temporary disk serializations and still rollback easily if needed.
+///   First use-case for this would be to support Mercurial shell hooks.
+///
+/// panics if `buffer` is smaller than `offset + amount`
+pub fn load_bytes(
+bytes: Box + Send>,
+offset: usize,
+amount: usize,
+) -> Self {
+NodeTree::new(Box::new(NodeTreeBytes::new(bytes, offset, amount)))
+}
+
+/// Retrieve added `Block` and the original immutable data
+pub fn into_readonly_and_added(
+self,
+) -> (Box + Send>, Vec) {
+let mut vec = self.growable;
+let readonly = self.readonly;
+if readonly.last() != Some() {
+vec.push(self.root);
+}
+(readonly, vec)
+}
+
+/// Retrieve added `Blocks` as bytes, ready to be written to persistent
+/// storage
+pub fn into_readonly_and_added_bytes(
+self,
+) -> (Box + Send>, Vec) {
+let (readonly, vec) = self.into_readonly_and_added();
+let bytes = unsafe {
+Vec::from_raw_parts(
+vec.as_ptr() as *mut u8,
+vec.len() * BLOCK_SIZE,
+vec.capacity() * BLOCK_SIZE,
+)
+};
+mem::forget(vec);
+(readonly, bytes)
+}
+
 /// Total number of blocks
 fn len() -> usize {
 self.readonly.len() + self.growable.len() + 1
@@ -364,6 +419,42 @@
 }
 }
 
+pub struct NodeTreeBytes {
+buffer: Box + Send>,
+offset: usize,
+len_in_blocks: usize,
+}
+
+impl NodeTreeBytes {
+fn new(
+buffer: Box + Send>,
+offset: usize,
+amount: usize,
+) -> Self {
+assert!(buffer.len() >= offset + amount);
+let len_in_blocks = amount / BLOCK_SIZE;
+NodeTreeBytes {
+buffer,
+offset,
+len_in_blocks,
+}
+}
+}
+
+impl Deref for NodeTreeBytes {
+type Target = [Block];
+
+fn deref() -> &[Block] {
+unsafe {
+slice::from_raw_parts(
+().as_ptr().offset(self.offset as isize)
+as *const Block,
+self.len_in_blocks,
+)
+}
+}
+}
+
 struct NodeTreeVisitor<'n, 'p> {
 nt: &'n NodeTree,
 prefix: NodePrefixRef<'p>,
@@ -708,4 +799,30 @@
 
 Ok(())
 }
+
+#[test]
+fn test_into_added_empty() {
+assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
+assert!(sample_nodetree()
+.into_readonly_and_added_bytes()
+.1
+.is_empty());
+}
+
+#[test]
+fn test_into_added_bytes() -> Result<(), NodeMapError> {
+let mut idx = TestNtIndex::new();
+idx.insert(0, "1234")?;
+let mut idx = idx.commit();
+idx.insert(4, "cafe")?;
+let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
+
+// only the root block has been changed
+assert_eq!(bytes.len(), BLOCK_SIZE);
+// big endian for -2
+assert_eq!([4..2 * 4], [255, 255, 255, 254]);
+// big endian for -6
+assert_eq!([12 * 4..13 * 4], [255, 255, 255, 250]);
+Ok(())
+}
 }



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7795: rust-nodemap: insert method

2020-01-15 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 19328.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7795?vs=19044=19328

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7795/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7795

AFFECTED FILES
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs 
b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -12,7 +12,10 @@
 //! Following existing implicit conventions, the "nodemap" terminology
 //! is used in a more abstract context.
 
-use super::{NodeError, NodePrefix, NodePrefixRef, Revision, RevlogIndex};
+use super::{
+node::get_nybble, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
+RevlogIndex,
+};
 use std::fmt;
 use std::ops::Deref;
 use std::ops::Index;
@@ -51,6 +54,15 @@
 }
 }
 
+pub trait MutableNodeMap: NodeMap {
+fn insert(
+ self,
+index: ,
+node: ,
+rev: Revision,
+) -> Result<(), NodeMapError>;
+}
+
 /// Low level NodeTree [`Blocks`] elements
 ///
 /// These are exactly as for instance on persistent storage.
@@ -240,6 +252,116 @@
 done: false,
 }
 }
+/// Return a mutable reference for `Block` at index `idx`.
+///
+/// If `idx` lies in the immutable area, then the reference is to
+/// a newly appended copy.
+///
+/// Returns (new_idx, glen, mut_ref) where
+///
+/// - `new_idx` is the index of the mutable `Block`
+/// - `mut_ref` is a mutable reference to the mutable Block.
+/// - `glen` is the new length of `self.growable`
+///
+/// Note: the caller wouldn't be allowed to query `self.growable.len()`
+/// itself because of the mutable borrow taken with the returned `Block`
+fn mutable_block( self, idx: usize) -> (usize,  Block, usize) {
+let ro_blocks = 
+let ro_len = ro_blocks.len();
+let glen = self.growable.len();
+if idx < ro_len {
+// TODO OPTIM I think this makes two copies
+self.growable.push(ro_blocks[idx].clone());
+(glen + ro_len,  self.growable[glen], glen + 1)
+} else if glen + ro_len == idx {
+(idx,  self.root, glen)
+} else {
+(idx,  self.growable[idx - ro_len], glen)
+}
+}
+
+/// Main insertion method
+///
+/// This will dive in the node tree to find the deepest `Block` for
+/// `node`, split it as much as needed and record `node` in there.
+/// The method then backtracks, updating references in all the visited
+/// blocks from the root.
+///
+/// All the mutated `Block` are copied first to the growable part if
+/// needed. That happens for those in the immutable part except the root.
+pub fn insert(
+ self,
+index: ,
+node: ,
+rev: Revision,
+) -> Result<(), NodeMapError> {
+let ro_len = ();
+
+let mut visit_steps: Vec<(usize, u8, Option)> = self
+.visit(node.into())
+.map(|(_leaf, visit, nybble, rev_opt)| (visit, nybble, rev_opt))
+.collect();
+let read_nybbles = visit_steps.len();
+// visit_steps cannot be empty, since we always visit the root block
+let (deepest_idx, mut nybble, rev_opt) = visit_steps.pop().unwrap();
+let (mut block_idx, mut block, mut glen) =
+self.mutable_block(deepest_idx);
+
+match rev_opt {
+None => {
+// Free slot in the deepest block: no splitting has to be done
+block.set(nybble, Element::Rev(rev));
+}
+Some(old_rev) => {
+let old_node = index.node(old_rev).ok_or_else(|| {
+NodeMapError::RevisionNotInIndex(old_rev)
+})?;
+if old_node == node {
+return Ok(()); // avoid creating lots of useless blocks
+}
+
+// Looping over the tail of nybbles in both nodes, creating
+// new blocks until we find the difference
+let mut new_block_idx = ro_len + glen;
+for nybble_pos in read_nybbles..40 {
+block.set(nybble, Element::Block(new_block_idx));
+
+let new_nybble = get_nybble(nybble_pos, node);
+let old_nybble = get_nybble(nybble_pos, old_node);
+
+if old_nybble == new_nybble {
+self.growable.push(Block::new());
+block =  self.growable[glen];
+glen += 1;
+new_block_idx += 1;
+nybble = new_nybble;
+} else {
+let mut new_block = Block::new();
+new_block.set(old_nybble, Element::Rev(old_rev));
+

D7788: rust-node: binary Node and conversion utilities

2020-01-15 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 19323.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7788?vs=19037=19323

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7788/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7788

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/matchers.rs
  rust/hg-core/src/revlog.rs
  rust/hg-core/src/revlog/node.rs
  rust/hg-core/src/utils.rs
  rust/hg-core/src/utils/hg_path.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/hg_path.rs 
b/rust/hg-core/src/utils/hg_path.rs
--- a/rust/hg-core/src/utils/hg_path.rs
+++ b/rust/hg-core/src/utils/hg_path.rs
@@ -157,7 +157,7 @@
 return Err(HgPathError::ContainsNullByte(
 bytes.to_vec(),
 index,
-))
+));
 }
 b'/' => {
 if previous_byte.is_some() && previous_byte == Some(b'/') {
diff --git a/rust/hg-core/src/utils.rs b/rust/hg-core/src/utils.rs
--- a/rust/hg-core/src/utils.rs
+++ b/rust/hg-core/src/utils.rs
@@ -38,10 +38,7 @@
 /// use crate::hg::utils::replace_slice;
 /// let mut line = b"I hate writing tests!".to_vec();
 /// replace_slice( line, b"hate", b"love");
-/// assert_eq!(
-/// line,
-/// b"I love writing tests!".to_vec()
-/// );
+/// assert_eq!(line, b"I love writing tests!".to_vec());
 /// ```
 pub fn replace_slice(buf:  [T], from: &[T], to: &[T])
 where
@@ -86,18 +83,9 @@
 
 /// ```
 /// use hg::utils::SliceExt;
-/// assert_eq!(
-/// b"  to trim  ".trim(),
-/// b"to trim"
-/// );
-/// assert_eq!(
-/// b"to trim  ".trim(),
-/// b"to trim"
-/// );
-/// assert_eq!(
-/// b"  to trim".trim(),
-/// b"to trim"
-/// );
+/// assert_eq!(b"  to trim  ".trim(), b"to trim");
+/// assert_eq!(b"to trim  ".trim(), b"to trim");
+/// assert_eq!(b"  to trim".trim(), b"to trim");
 /// ```
 fn trim() -> &[u8] {
 self.trim_start().trim_end()
diff --git a/rust/hg-core/src/revlog/node.rs b/rust/hg-core/src/revlog/node.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/revlog/node.rs
@@ -0,0 +1,91 @@
+// Copyright 2019-2020 Georges Racinet 
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Definitions and utilities for Revision nodes
+//!
+//! In Mercurial code base, it is customary to call "a node" the binary SHA
+//! of a revision.
+
+use std::num::ParseIntError;
+
+/// Binary revisions SHA
+pub type Node = [u8; 20];
+
+/// The node value for NULL_REVISION
+pub const NULL_NODE: Node = [0; 20];
+
+#[derive(Debug, PartialEq)]
+pub enum NodeError {
+ExactLengthRequired(String),
+NotHexadecimal,
+}
+
+pub fn node_from_hex(hex: ) -> Result {
+if hex.len() != 40 {
+return Err(NodeError::ExactLengthRequired(hex.to_string()));
+}
+let mut node = [0; 20];
+for i in 0..20 {
+node[i] = u8::from_str_radix([i * 2..i * 2 + 2], 16)?
+}
+Ok(node)
+}
+
+pub fn node_to_hex(n: ) -> String {
+let as_vec: Vec = n.iter().map(|b| format!("{:02x}", b)).collect();
+as_vec.join("")
+}
+
+/// Retrieve the `i`th half-byte from a bytes slice
+///
+/// This is also the `i`th hexadecimal digit in numeric form,
+/// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
+pub fn get_nybble(i: usize, s: &[u8]) -> u8 {
+if i % 2 == 0 {
+s[i / 2] >> 4
+} else {
+s[i / 2] & 0x0f
+}
+}
+
+impl From for NodeError {
+fn from(_: ParseIntError) -> Self {
+NodeError::NotHexadecimal
+}
+}
+
+#[cfg(test)]
+mod tests {
+use super::*;
+
+const SAMPLE_NODE_HEX:  = "0123456789abcdeffedcba9876543210deadbeef";
+
+#[test]
+fn test_node_from_hex() {
+assert_eq!(
+node_from_hex(SAMPLE_NODE_HEX),
+Ok([
+0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc,
+0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef
+])
+);
+let short = "0123456789abcdeffedcba9876543210";
+assert_eq!(
+node_from_hex(short),
+Err(NodeError::ExactLengthRequired(short.to_string())),
+);
+}
+
+#[test]
+fn test_node_to_hex() {
+assert_eq!(
+node_to_hex(&[
+0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc,
+0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef
+]),
+SAMPLE_NODE_HEX
+);
+}
+}
diff --git a/rust/hg-core/src/revlog.rs b/rust/hg-core/src/revlog.rs
--- a/rust/hg-core/src/revlog.rs
+++ b/rust/hg-core/src/revlog.rs
@@ -5,7 +5,9 @@
 // GNU General Public License version 2 or any later version.
 //! Mercurial concepts for handling revision history
 
+pub mod 

D7794: rust-nodemap: generic NodeTreeVisitor

2020-01-15 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 19327.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7794?vs=19043=19327

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7794/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7794

AFFECTED FILES
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs 
b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -220,17 +220,58 @@
 ,
 prefix: NodePrefixRef<'p>,
 ) -> Result, NodeMapError> {
-let mut visit = self.len() - 1;
-for i in 0..prefix.len() {
-let nybble = prefix.get_nybble(i);
-match self[visit].get(nybble) {
-Element::None => return Ok(None),
-Element::Rev(r) => return Ok(Some(r)),
-Element::Block(idx) => visit = idx,
+for (leaf, _, _, opt) in self.visit(prefix) {
+if leaf {
+return Ok(opt);
 }
 }
 Err(NodeMapError::MultipleResults)
 }
+
+fn visit<'n, 'p>(
+&'n self,
+prefix: NodePrefixRef<'p>,
+) -> NodeTreeVisitor<'n, 'p> {
+NodeTreeVisitor {
+nt: self,
+prefix: prefix,
+visit: self.len() - 1,
+nybble_idx: 0,
+done: false,
+}
+}
+}
+
+struct NodeTreeVisitor<'n, 'p> {
+nt: &'n NodeTree,
+prefix: NodePrefixRef<'p>,
+visit: usize,
+nybble_idx: usize,
+done: bool,
+}
+
+impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
+type Item = (bool, usize, u8, Option);
+
+fn next( self) -> Option {
+if self.done || self.nybble_idx >= self.prefix.len() {
+return None;
+}
+
+let nybble = self.prefix.get_nybble(self.nybble_idx);
+let visit = self.visit;
+let (leaf, opt) = match self.nt[visit].get(nybble) {
+Element::None => (true, None),
+Element::Rev(r) => (true, Some(r)),
+Element::Block(idx) => {
+self.visit = idx;
+(false, None)
+}
+};
+self.nybble_idx += 1;
+self.done = leaf;
+Some((leaf, visit, nybble, opt))
+}
 }
 
 impl From> for NodeTree {



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7793: rust-nodemap: mutable NodeTree data structure

2020-01-15 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 19326.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7793?vs=19136=19326

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7793/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7793

AFFECTED FILES
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs 
b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -146,16 +146,31 @@
 }
 }
 
-/// A 16-radix tree with the root block at the end
+/// A mutable 16-radix tree with the root block logically at the end
+///
+/// Because of the append only nature of our node trees, we need to
+/// keep the original untouched and store new blocks separately.
+///
+/// The mutable root `Block` is kept apart so that we don't have to rebump
+/// it on each insertion.
 pub struct NodeTree {
 readonly: Box + Send>,
+growable: Vec,
+root: Block,
 }
 
 impl Index for NodeTree {
 type Output = Block;
 
 fn index(, i: usize) ->  {
-[i]
+let ro_len = self.readonly.len();
+if i < ro_len {
+[i]
+} else if i == ro_len + self.growable.len() {
+
+} else {
+[i - ro_len]
+}
 }
 }
 
@@ -177,8 +192,24 @@
 }
 
 impl NodeTree {
+/// Initiate a NodeTree from an immutable slice-like of `Block`
+///
+/// We keep `readonly` and clone its root block if it isn't empty.
+fn new(readonly: Box + Send>) -> Self {
+let root = readonly
+.last()
+.map(|b| b.clone())
+.unwrap_or_else(|| Block::new());
+NodeTree {
+readonly: readonly,
+growable: Vec::new(),
+root: root,
+}
+}
+
+/// Total number of blocks
 fn len() -> usize {
-self.readonly.len()
+self.readonly.len() + self.growable.len() + 1
 }
 
 /// Main working method for `NodeTree` searches
@@ -189,11 +220,7 @@
 ,
 prefix: NodePrefixRef<'p>,
 ) -> Result, NodeMapError> {
-let len = self.len();
-if len == 0 {
-return Ok(None);
-}
-let mut visit = len - 1;
+let mut visit = self.len() - 1;
 for i in 0..prefix.len() {
 let nybble = prefix.get_nybble(i);
 match self[visit].get(nybble) {
@@ -208,16 +235,18 @@
 
 impl From> for NodeTree {
 fn from(vec: Vec) -> Self {
-NodeTree {
-readonly: Box::new(vec),
-}
+Self::new(Box::new(vec))
 }
 }
 
 impl fmt::Debug for NodeTree {
 fn fmt(, f:  fmt::Formatter<'_>) -> fmt::Result {
-let blocks: &[Block] = &*self.readonly;
-write!(f, "readonly: {:?}", blocks)
+let readonly: &[Block] = &*self.readonly;
+write!(
+f,
+"readonly: {:?}, growable: {:?}, root: {:?}",
+readonly, self.growable, self.root
+)
 }
 }
 
@@ -318,7 +347,9 @@
 assert_eq!(
 format!("{:?}", nt),
 "readonly: \
- [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}]"
+ [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
+ growable: [], \
+ root: {0: Block(1), 1: Rev(1)}",
 );
 }
 
@@ -327,7 +358,7 @@
 let mut idx: TestIndex = HashMap::new();
 pad_insert( idx, 1, "1234deadcafe");
 
-let nt = NodeTree::from(vec![block![1: Rev(1)]]);
+let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
 assert_eq!(nt.find_hex(, "1")?, Some(1));
 assert_eq!(nt.find_hex(, "12")?, Some(1));
 assert_eq!(nt.find_hex(, "1234de")?, Some(1));
@@ -349,4 +380,25 @@
 assert_eq!(nt.find_hex(, "00"), Ok(Some(0)));
 assert_eq!(nt.find_hex(, "00a"), Ok(Some(0)));
 }
+
+#[test]
+fn test_mutated_find() -> Result<(), NodeMapError> {
+let mut idx = TestIndex::new();
+pad_insert( idx, 9, "012");
+pad_insert( idx, 0, "00a");
+pad_insert( idx, 2, "cafe");
+pad_insert( idx, 3, "15");
+pad_insert( idx, 1, "10");
+
+let nt = NodeTree {
+readonly: sample_nodetree().readonly,
+growable: vec![block![0: Rev(1), 5: Rev(3)]],
+root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
+};
+assert_eq!(nt.find_hex(, "10")?, Some(1));
+assert_eq!(nt.find_hex(, "c")?, Some(2));
+assert_eq!(nt.find_hex(, "00")?, Some(0));
+assert_eq!(nt.find_hex(, "01")?, Some(9));
+Ok(())
+}
 }



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7792: rust-nodemap: abstracting the indexing

2020-01-15 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 19325.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7792?vs=19135=19325

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7792/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7792

AFFECTED FILES
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs 
b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -15,6 +15,7 @@
 use super::{NodeError, NodePrefix, NodePrefixRef, Revision, RevlogIndex};
 use std::fmt;
 use std::ops::Deref;
+use std::ops::Index;
 
 #[derive(Debug, PartialEq)]
 pub enum NodeMapError {
@@ -150,6 +151,14 @@
 readonly: Box + Send>,
 }
 
+impl Index for NodeTree {
+type Output = Block;
+
+fn index(, i: usize) ->  {
+[i]
+}
+}
+
 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
 fn has_prefix_or_none<'p>(
 idx:  RevlogIndex,
@@ -168,6 +177,10 @@
 }
 
 impl NodeTree {
+fn len() -> usize {
+self.readonly.len()
+}
+
 /// Main working method for `NodeTree` searches
 ///
 /// This partial implementation lacks
@@ -176,14 +189,14 @@
 ,
 prefix: NodePrefixRef<'p>,
 ) -> Result, NodeMapError> {
-let blocks: &[Block] = &*self.readonly;
-if blocks.is_empty() {
+let len = self.len();
+if len == 0 {
 return Ok(None);
 }
-let mut visit = blocks.len() - 1;
+let mut visit = len - 1;
 for i in 0..prefix.len() {
 let nybble = prefix.get_nybble(i);
-match blocks[visit].get(nybble) {
+match self[visit].get(nybble) {
 Element::None => return Ok(None),
 Element::Rev(r) => return Ok(Some(r)),
 Element::Block(idx) => visit = idx,



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7791: rust-nodemap: NodeMap trait with simplest implementor

2020-01-15 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 19324.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7791?vs=19134=19324

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7791/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7791

AFFECTED FILES
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs 
b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -12,8 +12,43 @@
 //! Following existing implicit conventions, the "nodemap" terminology
 //! is used in a more abstract context.
 
-use super::Revision;
+use super::{NodeError, NodePrefix, NodePrefixRef, Revision, RevlogIndex};
 use std::fmt;
+use std::ops::Deref;
+
+#[derive(Debug, PartialEq)]
+pub enum NodeMapError {
+MultipleResults,
+InvalidNodePrefix(NodeError),
+/// A `Revision` stored in the nodemap could not be found in the index
+RevisionNotInIndex(Revision),
+}
+
+impl From for NodeMapError {
+fn from(err: NodeError) -> Self {
+NodeMapError::InvalidNodePrefix(err)
+}
+}
+
+/// Mapping system from Mercurial nodes to revision numbers.
+///
+/// Many methods in this trait work in conjunction with a `RevlogIndex`
+/// whose data should not be owned by the `NodeMap`.
+pub trait NodeMap {
+fn find_bin<'a>(
+,
+idx:  RevlogIndex,
+prefix: NodePrefixRef<'a>,
+) -> Result, NodeMapError>;
+
+fn find_hex(
+,
+idx:  RevlogIndex,
+prefix: ,
+) -> Result, NodeMapError> {
+self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
+}
+}
 
 /// Low level NodeTree [`Blocks`] elements
 ///
@@ -110,9 +145,87 @@
 }
 }
 
+/// A 16-radix tree with the root block at the end
+pub struct NodeTree {
+readonly: Box + Send>,
+}
+
+/// Return `None` unless the `Node` for `rev` has given prefix in `index`.
+fn has_prefix_or_none<'p>(
+idx:  RevlogIndex,
+prefix: NodePrefixRef<'p>,
+rev: Revision,
+) -> Result, NodeMapError> {
+idx.node(rev)
+.ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
+.map(|node| {
+if prefix.is_prefix_of(node) {
+Some(rev)
+} else {
+None
+}
+})
+}
+
+impl NodeTree {
+/// Main working method for `NodeTree` searches
+///
+/// This partial implementation lacks
+/// - special cases for NULL_REVISION
+fn lookup<'p>(
+,
+prefix: NodePrefixRef<'p>,
+) -> Result, NodeMapError> {
+let blocks: &[Block] = &*self.readonly;
+if blocks.is_empty() {
+return Ok(None);
+}
+let mut visit = blocks.len() - 1;
+for i in 0..prefix.len() {
+let nybble = prefix.get_nybble(i);
+match blocks[visit].get(nybble) {
+Element::None => return Ok(None),
+Element::Rev(r) => return Ok(Some(r)),
+Element::Block(idx) => visit = idx,
+}
+}
+Err(NodeMapError::MultipleResults)
+}
+}
+
+impl From> for NodeTree {
+fn from(vec: Vec) -> Self {
+NodeTree {
+readonly: Box::new(vec),
+}
+}
+}
+
+impl fmt::Debug for NodeTree {
+fn fmt(, f:  fmt::Formatter<'_>) -> fmt::Result {
+let blocks: &[Block] = &*self.readonly;
+write!(f, "readonly: {:?}", blocks)
+}
+}
+
+impl NodeMap for NodeTree {
+fn find_bin<'a>(
+,
+idx:  RevlogIndex,
+prefix: NodePrefixRef<'a>,
+) -> Result, NodeMapError> {
+self.lookup(prefix.clone()).and_then(|opt| {
+opt.map_or(Ok(None), |rev| has_prefix_or_none(idx, prefix, rev))
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
+use super::NodeMapError::*;
 use super::*;
+use crate::revlog::node::{node_from_hex, Node};
+use std::collections::HashMap;
 
 /// Creates a `Block` using a syntax close to the `Debug` output
 macro_rules! block {
@@ -157,4 +270,70 @@
 assert_eq!(block.get(2), Element::Rev(0));
 assert_eq!(block.get(4), Element::Rev(1));
 }
+
+type TestIndex = HashMap;
+
+impl RevlogIndex for TestIndex {
+fn node(, rev: Revision) -> Option<> {
+self.get()
+}
+
+fn len() -> usize {
+self.len()
+}
+}
+
+/// Pad hexadecimal Node prefix with zeros on the right, then insert
+///
+/// This is just to avoid having to repeatedly write 40 hexadecimal
+/// digits for test data.
+fn pad_insert(idx:  TestIndex, rev: Revision, hex: ) {
+idx.insert(rev, node_from_hex(!("{:0<40}", hex)).unwrap());
+}
+
+fn sample_nodetree() -> NodeTree {
+NodeTree::from(vec![
+block![0: Rev(9)],
+block![0: Rev(0), 1: Rev(9)],
+block![0: Block(1), 1:Rev(1)],
+])
+}
+
+#[test]
+fn 

D7787: rust-nodemap: building blocks for nodetree structures

2020-01-15 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 19322.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7787?vs=19133=19322

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7787/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7787

AFFECTED FILES
  rust/hg-core/src/revlog.rs
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs 
b/rust/hg-core/src/revlog/nodemap.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -0,0 +1,160 @@
+// Copyright 2018-2020 Georges Racinet 
+//   and Mercurial contributors
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+//! Indexing facilities for fast retrieval of `Revision` from `Node`
+//!
+//! This provides a variation on the radix tree with valency 16 that is
+//! provided as "nodetree" in revlog.c, ready for append-only persistence
+//! on disk.
+//!
+//! Following existing implicit conventions, the "nodemap" terminology
+//! is used in a more abstract context.
+
+use super::Revision;
+use std::fmt;
+
+/// Low level NodeTree [`Blocks`] elements
+///
+/// These are exactly as for instance on persistent storage.
+type RawElement = i32;
+
+/// High level representation of values in NodeTree
+/// [`Blocks`](struct.Block.html)
+///
+/// This is the high level representation that most algorithms should
+/// use.
+#[derive(Clone, Debug, Eq, PartialEq)]
+enum Element {
+Rev(Revision),
+Block(usize),
+None,
+}
+
+impl From for Element {
+/// Conversion from low level representation, after endianity conversion.
+///
+/// See [`Block`](struct.Block.html) for explanation about the encoding.
+fn from(raw: RawElement) -> Element {
+if raw >= 0 {
+Element::Block(raw as usize)
+} else if raw == -1 {
+Element::None
+} else {
+Element::Rev(-raw - 2)
+}
+}
+}
+
+impl From for RawElement {
+fn from(elt: Element) -> RawElement {
+match elt {
+Element::None => 0,
+Element::Block(i) => i as RawElement,
+Element::Rev(rev) => -rev - 2,
+}
+}
+}
+
+/// A logical block of the `NodeTree`, packed with a fixed size.
+///
+/// These are always used in container types implementing `Index`,
+/// such as ``
+///
+/// As an array of integers, its ith element encodes that the
+/// ith potential edge from the block, representing the ith hexadecimal digit
+/// (nybble) `i` is either:
+///
+/// - absent (value -1)
+/// - another `Block` in the same indexable container (value ≥ 0)
+///  - a `Revision` leaf (value ≤ -2)
+///
+/// Endianity has to be fixed for consistency on shared storage across
+/// different architectures.
+///
+/// A key difference with the C `nodetree` is that we need to be
+/// able to represent the [`Block`] at index 0, hence -1 is the empty marker
+/// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
+///
+/// Another related difference is that `NULL_REVISION` (-1) is not
+/// represented at all, because we want an immutable empty nodetree
+/// to be valid.
+
+#[derive(Clone, PartialEq)]
+pub struct Block([RawElement; 16]);
+
+impl Block {
+fn new() -> Self {
+Block([-1; 16])
+}
+
+fn get(, nybble: u8) -> Element {
+Element::from(RawElement::from_be(self.0[nybble as usize]))
+}
+
+fn set( self, nybble: u8, elt: Element) {
+self.0[nybble as usize] = RawElement::to_be(elt.into())
+}
+}
+
+impl fmt::Debug for Block {
+/// sparse representation for testing and debugging purposes
+fn fmt(, f:  fmt::Formatter<'_>) -> fmt::Result {
+f.debug_map()
+.entries((0..16).filter_map(|i| match self.get(i) {
+Element::None => None,
+elt => Some((i, elt)),
+}))
+.finish()
+}
+}
+
+#[cfg(test)]
+mod tests {
+use super::*;
+
+/// Creates a `Block` using a syntax close to the `Debug` output
+macro_rules! block {
+{$($nybble:tt : $variant:ident($val:tt)),*} => (
+{
+let mut block = Block::new();
+$(block.set($nybble, Element::$variant($val)));*;
+block
+}
+)
+}
+
+#[test]
+fn test_block_debug() {
+let mut block = Block::new();
+block.set(1, Element::Rev(3));
+block.set(10, Element::Block(0));
+assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
+}
+
+#[test]
+fn test_block_macro() {
+let block = block! {5: Block(2)};
+assert_eq!(format!("{:?}", block), "{5: Block(2)}");
+
+let block = block! {13: Rev(15), 5: Block(2)};
+assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
+}
+
+#[test]
+fn test_raw_block() {
+let mut raw 

D7871: rust-utils: add util for canonical path

2020-01-15 Thread Raphaël Gomès
Alphare updated this revision to Diff 19321.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7871?vs=19263=19321

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,17 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
+use crate::utils::hg_path::{
+path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError,
+};
+use crate::utils::path_auditor::PathAuditor;
 use crate::utils::replace_slice;
 use lazy_static::lazy_static;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use same_file::is_same_file;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -260,9 +264,62 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until that causes no change (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if name.components().next().is_none() {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => p,
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -403,4 +460,53 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("subdir/filename"))
+);
+}
 }
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -17,7 +17,8 @@
 rayon = "1.2.0"
 regex = "1.1.0"
 twox-hash = "1.5.0"
+same-file = "1.0.6"
 
 [dev-dependencies]
 tempfile = "3.1.0"
 pretty_assertions = "0.6.1"
\ No newline 

D7866: rust-pathauditor: add Rust implementation of the `pathauditor`

2020-01-15 Thread Raphaël Gomès
Alphare updated this revision to Diff 19317.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7866?vs=19258=19317

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7866/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7866

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils.rs
  rust/hg-core/src/utils/files.rs
  rust/hg-core/src/utils/hg_path.rs
  rust/hg-core/src/utils/path_auditor.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/path_auditor.rs 
b/rust/hg-core/src/utils/path_auditor.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/utils/path_auditor.rs
@@ -0,0 +1,235 @@
+// path_auditor.rs
+//
+// Copyright 2020
+// Raphaël Gomès ,
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use crate::utils::{
+files::{lower_clean, split_drive},
+find_slice_in_slice,
+hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
+};
+use std::collections::HashSet;
+use std::path::{Path, PathBuf};
+
+/// Ensures that a path is valid for use in the repository i.e. does not use
+/// any banned components, does not traverse a symlink, etc.
+#[derive(Debug, Default)]
+pub struct PathAuditor {
+audited: HashSet,
+audited_dirs: HashSet,
+root: PathBuf,
+}
+
+impl PathAuditor {
+pub fn new(root: impl AsRef) -> Self {
+Self {
+root: root.as_ref().to_owned(),
+..Default::default()
+}
+}
+pub fn audit_path(
+ self,
+path: impl AsRef,
+) -> Result<(), HgPathError> {
+// TODO windows "localpath" normalization
+let path = path.as_ref();
+if path.is_empty() {
+return Ok(());
+}
+// TODO case normalization
+if self.audited.contains(path) {
+return Ok(());
+}
+// AIX ignores "/" at end of path, others raise EISDIR.
+let last_byte = path.as_bytes()[path.len() - 1];
+if last_byte == b'/' || last_byte == b'\\' {
+return Err(HgPathError::EndsWithSlash(path.to_owned()));
+}
+let parts: Vec<_> = path
+.as_bytes()
+.split(|b| std::path::is_separator(*b as char))
+.collect();
+if !split_drive(path).0.is_empty()
+|| [".hg"[..], ".hg."[..], ""[..]]
+.contains(_clean(parts[0]).as_ref())
+|| parts.iter().any(|c| c == b"..")
+{
+return Err(HgPathError::ContainsIllegalComponent(
+path.to_owned(),
+));
+}
+
+// Windows shortname aliases
+for part in parts.iter() {
+if part.contains('~') {
+let mut split = part.splitn(1, |b| *b == b'~');
+let mut first = split.next().unwrap().to_owned();
+first.make_ascii_uppercase();
+let last = split.next().unwrap();
+if last.iter().all(|b| b.is_ascii_digit())
+&& ["HG"[..], "HG8B6C"[..]].contains(_ref())
+{
+return Err(HgPathError::ContainsIllegalComponent(
+path.to_owned(),
+));
+}
+}
+}
+if find_slice_in_slice(_clean(path.as_bytes()), b".hg").is_some()
+{
+let lower_parts: Vec<_> =
+parts.iter().map(|p| lower_clean(p)).collect();
+for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
+if lower_parts[1..].contains(pattern) {
+let pos = lower_parts
+.iter()
+.position(|part| part == pattern)
+.unwrap();
+let base = lower_parts[..pos]
+.iter()
+.fold(HgPathBuf::new(), |acc, p| {
+acc.join(HgPath::new(p))
+});
+return Err(HgPathError::IsInsideNestedRepo {
+path: path.to_owned(),
+nested_repo: base,
+});
+}
+}
+}
+
+let parts = [..parts.len().saturating_sub(1)];
+
+let mut prefixes = vec![];
+
+// It's important that we check the path parts starting from the root.
+// This means we won't accidentally traverse a symlink into some other
+// filesystem (which is potentially expensive to access).
+for index in 0..parts.len() {
+let prefix =
+[..index + 1].join(&(std::path::MAIN_SEPARATOR as u8));
+let prefix = HgPath::new(prefix);
+if self.audited_dirs.contains(prefix) {
+continue;
+}
+self.check_filesystem(, )?;
+

D7869: rust-dirs-multiset: add `DirsChildrenMultiset`

2020-01-15 Thread Raphaël Gomès
Alphare marked 2 inline comments as done.
Alphare updated this revision to Diff 19319.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7869?vs=19261=19319

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7869/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7869

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/utils/files.rs
  rust/hg-core/src/utils/hg_path.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/hg_path.rs 
b/rust/hg-core/src/utils/hg_path.rs
--- a/rust/hg-core/src/utils/hg_path.rs
+++ b/rust/hg-core/src/utils/hg_path.rs
@@ -180,6 +180,29 @@
 [..]
 })
 }
+/// Returns a tuple of slices `(base, suffix)` resulting from the split
+/// at the rightmost `/`, if any.
+///
+/// # Examples:
+///
+/// ```
+/// use hg::utils::hg_path::HgPath;
+///
+/// let path = HgPath::new(b"cool/hg/path").split_at_suffix();
+/// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path")));
+///
+/// let path = HgPath::new(b"pathwithoutsep").split_at_suffix();
+/// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep")));
+/// ```
+pub fn split_at_suffix() -> (, ) {
+match ().rposition(|c| *c == b'/') {
+None => (HgPath::new(""), ),
+Some(size) => (
+HgPath::new([..*size]),
+HgPath::new([*size + 1..]),
+),
+}
+}
 pub fn join>(, other: ) -> HgPathBuf {
 let mut inner = self.inner.to_owned();
 if inner.len() != 0 && inner.last() != Some('/') {
diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -10,11 +10,11 @@
 //! Functions for fiddling with files.
 
 use crate::utils::hg_path::{HgPath, HgPathBuf};
-use std::iter::FusedIterator;
 
 use crate::utils::replace_slice;
 use lazy_static::lazy_static;
 use std::fs::Metadata;
+use std::iter::FusedIterator;
 use std::path::Path;
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
@@ -64,6 +64,28 @@
 
 impl<'a> FusedIterator for Ancestors<'a> {}
 
+/// An iterator over repository path yielding itself and its ancestors.
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct AncestorsWithBase<'a> {
+next: Option<(&'a HgPath, &'a HgPath)>,
+}
+
+impl<'a> Iterator for AncestorsWithBase<'a> {
+type Item = (&'a HgPath, &'a HgPath);
+
+fn next( self) -> Option {
+let next = self.next;
+self.next = match self.next {
+Some((s, _)) if s.is_empty() => None,
+Some((s, _)) => Some(s.split_at_suffix()),
+None => None,
+};
+next
+}
+}
+
+impl<'a> FusedIterator for AncestorsWithBase<'a> {}
+
 /// Returns an iterator yielding ancestor directories of the given repository
 /// path.
 ///
@@ -79,6 +101,25 @@
 dirs
 }
 
+/// Returns an iterator yielding ancestor directories of the given repository
+/// path.
+///
+/// The path is separated by '/', and must not start with '/'.
+///
+/// The path itself isn't included unless it is b"" (meaning the root
+/// directory.)
+pub(crate) fn find_dirs_with_base<'a>(
+path: &'a HgPath,
+) -> AncestorsWithBase<'a> {
+let mut dirs = AncestorsWithBase {
+next: Some((path, HgPath::new(b""))),
+};
+if !path.is_empty() {
+dirs.next(); // skip itself
+}
+dirs
+}
+
 /// TODO more than ASCII?
 pub fn normalize_case(path: ) -> HgPathBuf {
 #[cfg(windows)] // NTFS compares via upper()
@@ -338,4 +379,28 @@
 )
 );
 }
+
+#[test]
+fn test_find_dirs_with_base_some() {
+let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
+assert_eq!(
+dirs.next(),
+Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
+);
+assert_eq!(
+dirs.next(),
+Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
+);
+assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo";
+assert_eq!(dirs.next(), None);
+assert_eq!(dirs.next(), None);
+}
+
+#[test]
+fn test_find_dirs_with_base_empty() {
+let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
+assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"";
+assert_eq!(dirs.next(), None);
+assert_eq!(dirs.next(), None);
+}
 }
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs 
b/rust/hg-core/src/dirstate/dirs_multiset.rs
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -8,12 +8,15 @@
 //! A multiset of directory names.
 //!
 //! Used to counts the references to directories in a manifest or dirstate.
-use crate::utils::hg_path::{HgPath, HgPathBuf};
 use crate::{
-dirstate::EntryState, utils::files, DirstateEntry, 

D7870: rust-utils: add `Escaped` trait

2020-01-15 Thread Raphaël Gomès
Alphare added a comment.


  In D7870#115935 , @kevincox 
wrote:
  
  > I'm not convinced PrettyPrint is the best name. I might call it something 
more to do with escaping instead of "pretty". However I can't think of anything 
great.
  
  Yeah, I am not convinced either. I've sent a new patch, maybe that's better?

INLINE COMMENTS

> kevincox wrote in utils.rs:146
>   write!(acc, "\\x{:x}", self).unwrap();

Oh nice!

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7870/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7870

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7870: rust-utils: add `Escaped` trait

2020-01-15 Thread Raphaël Gomès
Alphare retitled this revision from "rust-utils: add `PrettyPrint` trait" to 
"rust-utils: add `Escaped` trait".
Alphare updated this revision to Diff 19320.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7870?vs=19262=19320

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7870/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7870

AFFECTED FILES
  rust/hg-core/src/utils.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils.rs b/rust/hg-core/src/utils.rs
--- a/rust/hg-core/src/utils.rs
+++ b/rust/hg-core/src/utils.rs
@@ -7,6 +7,9 @@
 
 //! Contains useful functions, traits, structs, etc. for use in core.
 
+use crate::utils::hg_path::HgPath;
+use std::ops::Deref;
+
 pub mod files;
 pub mod hg_path;
 pub mod path_auditor;
@@ -112,3 +115,57 @@
 }
 }
 }
+
+pub trait Escaped {
+/// Return bytes escaped for display to the user
+fn escaped_bytes() -> Vec;
+}
+
+impl Escaped for u8 {
+fn escaped_bytes() -> Vec {
+let mut acc = vec![];
+match self {
+c @ b'\'' | c @ b'\\' => {
+acc.push(b'\\');
+acc.push(*c);
+}
+b'\t' => {
+acc.extend(br"\\t");
+}
+b'\n' => {
+acc.extend(br"\\n");
+}
+b'\r' => {
+acc.extend(br"\\r");
+}
+c if (*c < b' ' || *c >= 127) => {
+write!(acc, "\\x{:x}", self).unwrap();
+}
+c => {
+acc.push(*c);
+}
+}
+acc
+}
+}
+
+impl<'a, T: Escaped> Escaped for &'a [T] {
+fn escaped_bytes() -> Vec {
+self.iter().fold(vec![], |mut acc, item| {
+acc.extend(item.escaped_bytes());
+acc
+})
+}
+}
+
+impl Escaped for Vec {
+fn escaped_bytes() -> Vec {
+self.deref().escaped_bytes()
+}
+}
+
+impl<'a> Escaped for &'a HgPath {
+fn escaped_bytes() -> Vec {
+self.as_bytes().escaped_bytes()
+}
+}



To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7867: rust-hg-path: add useful methods to `HgPath`

2020-01-15 Thread Raphaël Gomès
Alphare updated this revision to Diff 19318.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7867?vs=19259=19318

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7867/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7867

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/hg_path.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/hg_path.rs 
b/rust/hg-core/src/utils/hg_path.rs
--- a/rust/hg-core/src/utils/hg_path.rs
+++ b/rust/hg-core/src/utils/hg_path.rs
@@ -164,13 +164,23 @@
 pub fn as_bytes() -> &[u8] {
 
 }
+pub fn as_bytes_mut( self) ->  [u8] {
+ self.inner
+}
 pub fn contains(, other: u8) -> bool {
 self.inner.contains()
 }
-pub fn starts_with(, needle: impl AsRef) -> bool {
+pub fn starts_with(, needle: impl AsRef) -> bool {
 self.inner.starts_with(needle.as_ref().as_bytes())
 }
-pub fn join>(, other: ) -> HgPathBuf {
+pub fn trim_trailing_slash() ->  {
+Self::new(if self.inner.last() == Some('/') {
+[..self.inner.len() - 1]
+} else {
+[..]
+})
+}
+pub fn join>(, other: ) -> HgPathBuf {
 let mut inner = self.inner.to_owned();
 if inner.len() != 0 && inner.last() != Some('/') {
 inner.push(b'/');
@@ -178,17 +188,24 @@
 inner.extend(other.as_ref().bytes());
 HgPathBuf::from_bytes()
 }
+pub fn parent() ->  {
+let inner = self.trim_trailing_slash().as_bytes();
+HgPath::new(match inner.iter().rposition(|b| *b == b'/') {
+Some(pos) => [..pos],
+None => &[],
+})
+}
 /// Given a base directory, returns the slice of `self` relative to the
 /// base directory. If `base` is not a directory (does not end with a
 /// `b'/'`), returns `None`.
-pub fn relative_to(, base: impl AsRef) -> Option<> {
+pub fn relative_to(, base: impl AsRef) -> Option<> {
 let base = base.as_ref();
 if base.is_empty() {
 return Some(self);
 }
 let is_dir = base.as_bytes().ends_with(b"/");
 if is_dir && self.starts_with(base) {
-Some(HgPath::new([base.len()..]))
+Some(Self::new([base.len()..]))
 } else {
 None
 }
@@ -403,6 +420,7 @@
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn test_path_states() {
@@ -534,4 +552,19 @@
 let base = HgPath::new(b"ends/");
 assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
 }
+
+#[test]
+fn test_parent() {
+let path = HgPath::new(b"");
+assert_eq!(path.parent(), path);
+
+let path = HgPath::new(b"a/");
+assert_eq!(path.parent(), HgPath::new(b""));
+
+let path = HgPath::new(b"a/b");
+assert_eq!(path.parent(), HgPath::new(b"a"));
+
+let path = HgPath::new(b"a/other/b");
+assert_eq!(path.parent(), HgPath::new(b"a/other"));
+}
 }
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -19,4 +19,5 @@
 twox-hash = "1.5.0"
 
 [dev-dependencies]
-tempfile = "3.1.0"
\ No newline at end of file
+tempfile = "3.1.0"
+pretty_assertions = "0.6.1"
\ No newline at end of file
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -9,6 +9,14 @@
 ]
 
 [[package]]
+name = "ansi_term"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+dependencies = [
+ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "arrayvec"
 version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index;
@@ -104,6 +112,20 @@
 ]
 
 [[package]]
+name = "ctor"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+dependencies = [
+ "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "syn 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "difference"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+
+[[package]]
 name = "either"
 version = "1.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index;
@@ -130,6 +152,7 @@
  "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "pretty_assertions 0.6.1 
(registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -200,11 +223,38 

D7864: rust-utils: add Rust implementation of Python's "os.path.splitdrive"

2020-01-15 Thread Raphaël Gomès
Alphare updated this revision to Diff 19316.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7864?vs=19256=19316

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7864/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7864

AFFECTED FILES
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -85,6 +85,77 @@
 path.to_ascii_lowercase()
 }
 
+#[cfg(windows)]
+/// Copied from the Python stdlib's `os.path.splitdrive` implementation.
+///
+/// Split a pathname into drive/UNC sharepoint and relative path specifiers.
+/// Returns a 2-tuple (drive_or_unc, path); either part may be empty.
+///
+/// If you assign
+///  result = split_drive(p)
+/// It is always true that:
+///  result[0] + result[1] == p
+///
+/// If the path contained a drive letter, drive_or_unc will contain everything
+/// up to and including the colon.
+/// e.g. split_drive("c:/dir") returns ("c:", "/dir")
+///
+/// If the path contained a UNC path, the drive_or_unc will contain the host
+/// name and share up to but not including the fourth directory separator
+/// character.
+/// e.g. split_drive("//host/computer/dir") returns ("//host/computer", "/dir")
+///
+/// Paths cannot contain both a drive letter and a UNC path.
+pub fn split_drive<'a>(path: ) -> (, ) {
+let bytes = path.as_bytes();
+let is_sep = |b| std::path::is_separator(b as char);
+
+if path.len() < 2 {
+(HgPath::new(b""), path)
+} else if is_sep(bytes[0])
+&& is_sep(bytes[1])
+&& (path.len() == 2 || !is_sep(bytes[2]))
+{
+// Is a UNC path:
+//  drive letter or UNC path
+// \\machine\mountpoint\directory\etc\...
+//   directory ^^^
+
+let machine_end_index = bytes[2..].iter().position(|b| is_sep(*b));
+let mountpoint_start_index = if let Some(i) = machine_end_index {
+i + 2
+} else {
+return (HgPath::new(b""), path);
+};
+
+match bytes[mountpoint_start_index + 1..]
+.iter()
+.position(|b| is_sep(*b))
+{
+// A UNC path can't have two slashes in a row
+// (after the initial two)
+Some(0) => (HgPath::new(b""), path),
+Some(i) => {
+let (a, b) = bytes.split_at(mountpoint_start_index + 1 + i);
+(HgPath::new(a), HgPath::new(b))
+}
+None => (path, HgPath::new(b"")),
+}
+} else if bytes[1] == b':' {
+// Drive path c:\directory
+let (a, b) = bytes.split_at(2);
+(HgPath::new(a), HgPath::new(b))
+} else {
+(HgPath::new(b""), path)
+}
+}
+
+#[cfg(unix)]
+/// Split a pathname into drive and path. On Posix, drive is always empty.
+pub fn split_drive(path: ) -> (, ) {
+(HgPath::new(b""), path)
+}
+
 #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
 pub struct HgMetadata {
 pub st_dev: u64,
@@ -133,4 +204,101 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+#[cfg(unix)]
+fn test_split_drive() {
+// Taken from the Python stdlib's tests
+assert_eq!(
+split_drive(HgPath::new(br"/foo/bar")),
+(HgPath::new(b""), HgPath::new(br"/foo/bar"))
+);
+assert_eq!(
+split_drive(HgPath::new(br"foo:bar")),
+(HgPath::new(b""), HgPath::new(br"foo:bar"))
+);
+assert_eq!(
+split_drive(HgPath::new(br":foo:bar")),
+(HgPath::new(b""), HgPath::new(br":foo:bar"))
+);
+// Also try NT paths; should not split them
+assert_eq!(
+split_drive(HgPath::new(br"c:\foo\bar")),
+(HgPath::new(b""), HgPath::new(br"c:\foo\bar"))
+);
+assert_eq!(
+split_drive(HgPath::new(b"c:/foo/bar")),
+(HgPath::new(b""), HgPath::new(br"c:/foo/bar"))
+);
+assert_eq!(
+split_drive(HgPath::new(br"\\conky\mountpoint\foo\bar")),
+(
+HgPath::new(b""),
+HgPath::new(br"\\conky\mountpoint\foo\bar")
+)
+);
+}
+
+#[test]
+#[cfg(windows)]
+fn test_split_drive() {
+assert_eq!(
+split_drive(HgPath::new(br"c:\foo\bar")),
+(HgPath::new(br"c:"), HgPath::new(br"\foo\bar"))
+);
+assert_eq!(
+split_drive(HgPath::new(b"c:/foo/bar")),
+(HgPath::new(br"c:"), HgPath::new(br"/foo/bar"))
+);
+assert_eq!(
+split_drive(HgPath::new(br"\\conky\mountpoint\foo\bar")),
+(
+HgPath::new(br"\\conky\mountpoint"),
+HgPath::new(br"\foo\bar")
+)
+);
+  

D7863: rust-utils: add util to find a slice in another slice

2020-01-15 Thread Raphaël Gomès
Alphare updated this revision to Diff 19315.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7863?vs=19274=19315

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7863/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7863

AFFECTED FILES
  rust/hg-core/src/utils.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils.rs b/rust/hg-core/src/utils.rs
--- a/rust/hg-core/src/utils.rs
+++ b/rust/hg-core/src/utils.rs
@@ -10,6 +10,26 @@
 pub mod files;
 pub mod hg_path;
 
+/// Useful until rust/issues/56345 is stable
+///
+/// # Examples
+///
+/// ```
+/// use crate::hg::utils::find_slice_in_slice;
+///
+/// let haystack = b"This is the haystack".to_vec();
+/// assert_eq!(find_slice_in_slice(, b"the"), Some(8));
+/// assert_eq!(find_slice_in_slice(, b"not here"), None);
+/// ```
+pub fn find_slice_in_slice(slice: &[T], needle: &[T]) -> Option
+where
+for<'a> &'a [T]: PartialEq,
+{
+slice
+.windows(needle.len())
+.position(|window| window == needle)
+}
+
 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
 ///
 /// # Examples



To: Alphare, #hg-reviewers, pulkit
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7862: dirstate: move rust fast-path calling code to its own method

2020-01-15 Thread Raphaël Gomès
Alphare updated this revision to Diff 19314.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7862?vs=19273=19314

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7862/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7862

AFFECTED FILES
  mercurial/dirstate.py

CHANGE DETAILS

diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -1083,6 +1083,48 @@
 results[next(iv)] = st
 return results
 
+def _rust_status(self, matcher, list_clean):
+# Force Rayon (Rust parallelism library) to respect the number of
+# workers. This is a temporary workaround until Rust code knows
+# how to read the config file.
+numcpus = self._ui.configint(b"worker", b"numcpus")
+if numcpus is not None:
+encoding.environ.setdefault(
+b'RAYON_NUM_THREADS', b'%d' % numcpus
+)
+
+workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
+if not workers_enabled:
+encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
+
+(
+lookup,
+modified,
+added,
+removed,
+deleted,
+unknown,
+clean,
+) = rustmod.status(
+self._map._rustmap,
+matcher,
+self._rootdir,
+bool(list_clean),
+self._lastnormaltime,
+self._checkexec,
+)
+
+status = scmutil.status(
+modified=modified,
+added=added,
+removed=removed,
+deleted=deleted,
+unknown=unknown,
+ignored=[],
+clean=clean,
+)
+return (lookup, status)
+
 def status(self, match, subrepos, ignored, clean, unknown):
 '''Determine the status of the working copy relative to the
 dirstate and return a pair of (unsure, status), where status is of type
@@ -1127,46 +1169,7 @@
 use_rust = False
 
 if use_rust:
-# Force Rayon (Rust parallelism library) to respect the number of
-# workers. This is a temporary workaround until Rust code knows
-# how to read the config file.
-numcpus = self._ui.configint(b"worker", b"numcpus")
-if numcpus is not None:
-encoding.environ.setdefault(
-b'RAYON_NUM_THREADS', b'%d' % numcpus
-)
-
-workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
-if not workers_enabled:
-encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
-
-(
-lookup,
-modified,
-added,
-removed,
-deleted,
-unknown,
-clean,
-) = rustmod.status(
-dmap._rustmap,
-match,
-self._rootdir,
-bool(listclean),
-self._lastnormaltime,
-self._checkexec,
-)
-
-status = scmutil.status(
-modified=modified,
-added=added,
-removed=removed,
-deleted=deleted,
-unknown=unknown,
-ignored=ignored,
-clean=clean,
-)
-return (lookup, status)
+return self._rust_status(match, listclean)
 
 def noop(f):
 pass



To: Alphare, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7866: rust-pathauditor: add Rust implementation of the `pathauditor`

2020-01-15 Thread Raphaël Gomès
Alphare added inline comments.
Alphare marked 2 inline comments as done.

INLINE COMMENTS

> kevincox wrote in path_auditor.rs:54
> It would be nice to have this in a helper function in path to get a component 
> iterator.

I think that's a good idea indeed, but I would prefer to do it in another patch.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7866/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7866

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7895: rust-core: fix typo in comment

2020-01-15 Thread gracinet (Georges Racinet)
gracinet added a comment.
gracinet accepted this revision.


  Ah yes, thanks

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7895/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7895

To: aayjaychan, #hg-reviewers, kevincox, gracinet
Cc: gracinet, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7864: rust-utils: add Rust implementation of Python's "os.path.splitdrive"

2020-01-15 Thread Raphaël Gomès
Alphare added inline comments.

INLINE COMMENTS

> kevincox wrote in files.rs:109
> I would recommend just converting to bytes at the top of the function then 
> converting the return value to a path at the exit. I feel when you are doing 
> manipulation like this it makes the most sense to treat it as plain bytes 
> within the function.
> 
> Alternatively I wouldn't mind putting an index operator but have a slight 
> preference for `path.as_bytes()[n]` to keep it explicit as most of the code 
> shouldn't be reaching into paths.

I agree. I'll send a follow-up for all your remarks in an hour.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7864/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7864

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7864: rust-utils: add Rust implementation of Python's "os.path.splitdrive"

2020-01-15 Thread kevincox (Kevin Cox)
kevincox added inline comments.

INLINE COMMENTS

> Alphare wrote in files.rs:109
> Indeed, this is much better. While trying to adapt this code to fit with 
> `HgPath`, I find myself needing to translate to and from bytes whenever 
> indexing or when using `split_at`. Should we give a `HgPath` a `split_at` 
> method or also all the `Index<>` ones? I remember that we decided against 
> that earlier.

I would recommend just converting to bytes at the top of the function then 
converting the return value to a path at the exit. I feel when you are doing 
manipulation like this it makes the most sense to treat it as plain bytes 
within the function.

Alternatively I wouldn't mind putting an index operator but have a slight 
preference for `path.as_bytes()[n]` to keep it explicit as most of the code 
shouldn't be reaching into paths.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7864/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7864

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7864: rust-utils: add Rust implementation of Python's "os.path.splitdrive"

2020-01-15 Thread Raphaël Gomès
Alphare added inline comments.

INLINE COMMENTS

> kevincox wrote in files.rs:109
> I think this can be simplified. See 
> https://play.rust-lang.org/?version=stable=debug=2018=78136ead96596afe6305e7542a881ca4.
>  I had to use &[u8] to avoid having to copy all of the HgPath stuff into the 
> playground, but the code should be easy to integrate.
> 
> Notable changes:
> 
> - Avoid creating `norm_bytes` vector (and allocation) by creating an is_sep 
> function.
>   - Note, we can probably use std::path::is_separator 
> 
> - Return references to the input since we guarantee that the output will be 
> parts of the input. The caller can always clone.
> - Use slice::split_at to make it obvious that we are returning the entire 
> path just split.
> - Use pattern matching rather than unwrapping.
> - Use fall-through returns to make it obvious we handle every path.

Indeed, this is much better. While trying to adapt this code to fit with 
`HgPath`, I find myself needing to translate to and from bytes whenever 
indexing or when using `split_at`. Should we give a `HgPath` a `split_at` 
method or also all the `Index<>` ones? I remember that we decided against that 
earlier.

> kevincox wrote in files.rs:230
> We should add an example of a UNC and Drive path here to ensure that they 
> don't get split.

Good idea

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7864/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7864

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: 5.4sprint location/date finalization

2020-01-15 Thread Pierre-Yves David

Gentle reminder, please update the sprint wiki page with availability.

On 1/6/20 10:14 PM, Augie Fackler wrote:

Howdy folks,

https://www.mercurial-scm.org/wiki/5.4Sprint has three choices of location: 
Finland, Brussels, and Paris. Given the tentative availability of Paris, and 
the relative proximity of Brussels and Paris means Brussels is probably the 
better choice?


To clarify: the Mozilla office is available as long as we have a full 
time Mozilla employee on site at all time. That employee can be Connor, 
so confirming Connor presence confirm the venue. The Mozilla office 
moved, but they still have large meeting room (and a better outside view).


In the mean time Logilab (also contacted in December) updated the wiki 
page with their available. For those who were here in 2010, this is the 
same location, but the space have been remodeled have has an extra 
larger room.


--
Pierre-Yves David
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Nlnet funding for transitioning out of SHA-1

2020-01-15 Thread Raphaël Gomès

Hello all,

As you all know, we have to transition out of using SHA-1 for Mercurial 
(https://www.mercurial-scm.org/wiki/SHA1TransitionPlan). While a known 
mitigation has been introduced by a few of Augie's patches, we still 
have to act on that transition.


The Nlnet foundation has a program (https://nlnet.nl/PET/) for 
sponsoring privacy and trust enhancing technologies, category which this 
aspect of Mercurial falls into. Someone whose identity remains unclear 
came to the #mercurial IRC channel to tell us to send a submission.


The latest "sha-mbles" attack is the stingy reminder that we need to 
take care of this before it is too late. Getting explicit funding is a 
great way to move forward and ensure Mercurial does not become a 
security liability in the near future.


The deadline for submission is Feb 1st, so we have to move fast.

The NLnet process is fairly light. Here are the things that we need 
think about as a community for this submission:

    - Project abstract (1200 chars)
    - The requested amount ranging from 5k to 50k€ (with details on how 
it is going to be spent).
    - Comparison with other efforts (probably a comparison with what 
git did)

    - Explanation of the technical challenges. Probably a mix of:
        - Mercurial is a 15 year old code base with strong 
compatibility guarantees

        - A smooth but secure transition is going to be hard

The first step here is to sketch a high-level plan of the steps we need 
to take to transition out of SHA-1. The actual details (which algorithm, 
rehashing/compatibility, etc) can be dealt with while the work is 
actually being done.


Right now I can see the following high level steps

    - Update the core code to be able to deal with multiple hashing 
functions

    - Update the network protocol to deal with multiple hashing functions
    - Update the on-disk format to deal with larger hashes
    - How to deal with backwards and forwards compatibility with 
regards to both repositories and client/server (wire protocol changes, etc.)
    - How changing hashing functions impacts the user experience (from 
additional steps to UI getting broken)

    - Help extensions to migrate if need be
    - Actually select a new hash function

Am I missing anything? How do you all feel about this?

Thanks,
Raphaël

___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7895: rust-core: fix typo in comment

2020-01-15 Thread aayjaychan (Aay Jay Chan)
aayjaychan created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7895

AFFECTED FILES
  rust/hg-core/src/revlog.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog.rs b/rust/hg-core/src/revlog.rs
--- a/rust/hg-core/src/revlog.rs
+++ b/rust/hg-core/src/revlog.rs
@@ -14,7 +14,7 @@
 /// Marker expressing the absence of a parent
 ///
 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
-/// to be smaller that all existing revisions.
+/// to be smaller than all existing revisions.
 pub const NULL_REVISION: Revision = -1;
 
 /// Same as `mercurial.node.wdirrev`



To: aayjaychan, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7730: rebase: make sure pruning does not confuse rebase (issue6180)

2020-01-15 Thread martinvonz (Martin von Zweigbergk)
martinvonz added inline comments.

INLINE COMMENTS

> rebase.py:600
> +p1 = self.wctx.p1().node()
> +repo.setparents(p1)
>  elif self.state[rev] == revtodo:

Actually, doesn't this need to be `wctx.setparents()` (which you can do now 
that D7822  has been queued) in order to 
work with in-memory rebase? Maybe time to add a test case with in-memory rebase?

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7730/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7730

To: khanchi97, martinvonz, #hg-reviewers
Cc: pulkit, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7730: rebase: make sure pruning does not confuse rebase (issue6180)

2020-01-15 Thread martinvonz (Martin von Zweigbergk)
martinvonz added inline comments.

INLINE COMMENTS

> khanchi97 wrote in rebase.py:597
> I will look into it.

No need, I've already sent D7827 

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7730/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7730

To: khanchi97, martinvonz, #hg-reviewers
Cc: pulkit, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7866: rust-pathauditor: add Rust implementation of the `pathauditor`

2020-01-15 Thread kevincox (Kevin Cox)
kevincox added inline comments.
kevincox accepted this revision.

INLINE COMMENTS

> path_auditor.rs:53
> +.as_bytes()
> +.split(|c| *c as char == std::path::MAIN_SEPARATOR)
> +.collect();

Should this be `std::path::is_separator(*c as char)`?.

If not please add a comment explaining why.

> path_auditor.rs:54
> +.split(|c| *c as char == std::path::MAIN_SEPARATOR)
> +.collect();
> +if !split_drive(path).0.is_empty()

It would be nice to have this in a helper function in path to get a component 
iterator.

> path_auditor.rs:72
> +let last = split.next().unwrap();
> +if last.iter().all(|b| (*b as char).is_digit(10))
> +&& ["HG"[..], "HG8B6C"[..]].contains(_ref())

You can just use 
https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_digit

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7866/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7866

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7870: rust-utils: add `PrettyPrint` trait

2020-01-15 Thread kevincox (Kevin Cox)
kevincox added a comment.
kevincox accepted this revision.


  I'm not convinced PrettyPrint is the best name. I might call it something 
more to do with escaping instead of "pretty". However I can't think of anything 
great.

INLINE COMMENTS

> utils.rs:146
> +acc.push(HEX_DIGITS[((*c & 0xf0) >> 4) as usize]);
> +acc.push(HEX_DIGITS[(*c & 0xf) as usize]);
> +}

write!(acc, "\\x{:x}", self).unwrap();

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7870/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7870

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7869: rust-dirs-multiset: add `DirsChildrenMultiset`

2020-01-15 Thread kevincox (Kevin Cox)
kevincox added inline comments.
kevincox accepted this revision.

INLINE COMMENTS

> dirs_multiset.rs:170
> +Some(i) => i.contains(),
> +} {
> +continue;

I would put this check into a helper function.

> files.rs:86
> +)),
> +},
> +None => None,

This should probably be a helper on `HgPath`? It would be much easier to 
understand what it is doing if it had a name.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7869/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7869

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7867: rust-hg-path: add useful methods to `HgPath`

2020-01-15 Thread kevincox (Kevin Cox)
kevincox added inline comments.
kevincox accepted this revision.

INLINE COMMENTS

> hg_path.rs:189
> +[..]
> +};
> +HgPath::new(match inner.iter().rposition(|b| *b == b'/') {

It would be nice to have a `trim_trailing_slash` helper.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7867/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7867

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7894: nodemap: introduce an option to use mmap to read the nodemap mapping

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  The performance and memory benefit is much greater if we don't have to copy 
all
  the data in memory for each information. So we introduce an option (on by
  default) to read the data using mmap.
  
  This changeset is the last one definition the API for index support nodemap
  data. (they have to be able to use the mmaping).

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7894

AFFECTED FILES
  mercurial/configitems.py
  mercurial/debugcommands.py
  mercurial/localrepo.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -84,3 +84,37 @@
   $ hg debugnodemap --check
   revision in index:   5002
   revision in nodemap: 5002
+
+Test code path without mmap
+---
+
+  $ echo bar > bar
+  $ hg add bar
+  $ hg ci -m 'bar' --config experimental.exp-persistent-nodemap.mmap=no
+
+  $ hg debugnodemap --check --config 
experimental.exp-persistent-nodemap.mmap=yes
+  revision in index:   5003
+  revision in nodemap: 5003
+  $ hg debugnodemap --check --config 
experimental.exp-persistent-nodemap.mmap=no
+  revision in index:   5003
+  revision in nodemap: 5003
+
+
+#if pure
+  $ hg debugnodemap --metadata
+  uid:  (glob)
+  tip-rev: 5002
+  data-length: 246656
+  data-unused: 768
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-.nd: size=246656, 
sha256=8221807a0860a7a65002d2d3e0d33512d28aa6db2433db966e56aa17dcf6329f (glob)
+
+#else
+  $ hg debugnodemap --metadata
+  uid:  (glob)
+  tip-rev: 5002
+  data-length: 245888
+  data-unused: 0
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-.nd: size=245888, 
sha256=42233b63e5567fd362fb7847fa7a9f4d40ad93c28f8571197b356a69fe8bd271 (glob)
+#endif
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -8,6 +8,7 @@
 
 from __future__ import absolute_import
 
+import errno
 import os
 import re
 import struct
@@ -46,10 +47,17 @@
 
 filename = _rawdata_filepath(revlog, docket)
 data = revlog.opener.tryread(filename)
+try:
+with revlog.opener(filename) as fd:
+if revlog.opener.options.get("exp-persistent-nodemap.mmap"):
+data = fd.read(data_length)
+else:
+data = util.buffer(util.mmapread(fd, data_length))
+except OSError as e:
+if e.errno != errno.ENOENT:
+raise
 if len(data) < data_length:
 return None
-elif len(data) > data_length:
-data = data[:data_length]
 return docket, data
 
 
@@ -81,6 +89,8 @@
 
 can_incremental = util.safehasattr(revlog.index, 
"nodemap_data_incremental")
 ondisk_docket = revlog._nodemap_docket
+feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
+use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
 
 data = None
 # first attemp an incremental update of the data
@@ -97,12 +107,18 @@
 datafile = _rawdata_filepath(revlog, target_docket)
 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
 # store vfs
+new_length = target_docket.data_length + len(data)
 with revlog.opener(datafile, 'r+') as fd:
 fd.seek(target_docket.data_length)
 fd.write(data)
-fd.seek(0)
-new_data = fd.read(target_docket.data_length + len(data))
-target_docket.data_length += len(data)
+if feed_data:
+if use_mmap:
+fd.seek(0)
+new_data = fd.read(new_length)
+else:
+fd.flush()
+new_data = util.buffer(util.mmapread(fd, new_length))
+target_docket.data_length = new_length
 target_docket.data_unused += data_changed_count
 
 if data is None:
@@ -115,9 +131,14 @@
 data = persistent_data(revlog.index)
 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
 # store vfs
-new_data = data
-with revlog.opener(datafile, 'w') as fd:
+with revlog.opener(datafile, 'w+') as fd:
 fd.write(data)
+if feed_data:
+if use_mmap:
+new_data = data
+else:
+fd.flush()
+new_data = util.buffer(util.mmapread(fd, len(data)))
 target_docket.data_length = len(data)
 target_docket.tip_rev = revlog.tiprev()
 # EXP-TODO: if this is 

D7888: nodemap: track the maximum revision tracked in the nodemap

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We need a simple way to detect when the on disk data contains less revision
  than the index we read from disk. The docket file is meant for this, we just 
had
  to start tracking that data.
  
  We should also try to detect strip operation, but we will deal with this in
  later changesets. Right now we are focusing on defining the API for index
  supporting persistent nodemap.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7888

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/pure/parsers.py
  mercurial/revlog.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -14,8 +14,9 @@
   $ hg debugbuilddag .+5000
   $ hg debugnodemap --metadata
   uid:  (glob)
+  tip-rev: 5000
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=18
+  .hg/store/00changelog.n: size=26
   $ f --sha256 .hg/store/00changelog-*.nd
   .hg/store/00changelog-.nd: 
sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
@@ -51,8 +52,9 @@
   $ hg ci -m 'foo'
   $ hg debugnodemap --metadata
   uid:  (glob)
+  tip-rev: 5001
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=18
+  .hg/store/00changelog.n: size=26
 
 (The pure code use the debug code that perform incremental update, the C code 
reencode from scratch)
 
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -36,9 +36,11 @@
 if version != ONDISK_VERSION:
 return None
 offset += S_VERSION.size
-(uid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
+headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
+uid_size, tip_rev = headers
 offset += S_HEADER.size
 docket = NodeMapDocket(pdata[offset : offset + uid_size])
+docket.tip_rev = tip_rev
 
 filename = _rawdata_filepath(revlog, docket)
 return docket, revlog.opener.tryread(filename)
@@ -94,6 +96,7 @@
 # store vfs
 with revlog.opener(datafile, 'w') as fd:
 fd.write(data)
+target_docket.tip_rev = revlog.tiprev()
 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
 # store vfs
 with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
@@ -142,7 +145,7 @@
 ONDISK_VERSION = 0
 
 S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">B")
+S_HEADER = struct.Struct(">BQ")
 
 ID_SIZE = 8
 
@@ -164,15 +167,19 @@
 if uid is None:
 uid = _make_uid()
 self.uid = uid
+self.tip_rev = None
 
 def copy(self):
-return NodeMapDocket(uid=self.uid)
+new = NodeMapDocket(uid=self.uid)
+new.tip_rev = self.tip_rev
+return new
 
 def serialize(self):
 """return serialized bytes for a docket using the passed uid"""
 data = []
 data.append(S_VERSION.pack(ONDISK_VERSION))
-data.append(S_HEADER.pack(len(self.uid)))
+headers = (len(self.uid), self.tip_rev)
+data.append(S_HEADER.pack(*headers))
 data.append(self.uid)
 return b''.join(data)
 
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -639,7 +639,7 @@
 if use_nodemap:
 nodemap_data = nodemaputil.persisted_data(self)
 if nodemap_data is not None:
-index.update_nodemap_data(nodemap_data[1])
+index.update_nodemap_data(*nodemap_data)
 except (ValueError, IndexError):
 raise error.RevlogError(
 _(b"index %s is corrupted") % self.indexfile
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -170,7 +170,7 @@
 self._nm_root = self._nm_max_idx = self._nm_rev = None
 return data
 
-def update_nodemap_data(self, nm_data):
+def update_nodemap_data(self, docket, nm_data):
 """provide full serialiazed data from a nodemap
 
 The data are expected to come from disk. See `nodemap_data_all` for a
@@ -178,7 +178,7 @@
 if nm_data is not None:
 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
 if self._nm_root:
-self._nm_rev = len(self) - 1
+self._nm_rev = docket.tip_rev
 else:
 self._nm_root = self._nm_max_idx = self._nm_rev = None
 
diff --git a/mercurial/debugcommands.py 

D7893: nodemap: update the index with the newly written data (when appropriate)

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  If we are to use mmap to read the nodemap data, and if the python code is
  responsible for the IO, we need to refresh the mmap after each write and 
provide
  it back to the index.
  
  We start this dance without the mmap first.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7893

AFFECTED FILES
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -100,6 +100,8 @@
 with revlog.opener(datafile, 'r+') as fd:
 fd.seek(target_docket.data_length)
 fd.write(data)
+fd.seek(0)
+new_data = fd.read(target_docket.data_length + len(data))
 target_docket.data_length += len(data)
 target_docket.data_unused += data_changed_count
 
@@ -113,6 +115,7 @@
 data = persistent_data(revlog.index)
 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
 # store vfs
+new_data = data
 with revlog.opener(datafile, 'w') as fd:
 fd.write(data)
 target_docket.data_length = len(data)
@@ -122,6 +125,9 @@
 with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
 fp.write(target_docket.serialize())
 revlog._nodemap_docket = target_docket
+if util.safehasattr(revlog.index, "update_nodemap_data"):
+revlog.index.update_nodemap_data(target_docket, new_data)
+
 # EXP-TODO: if the transaction abort, we should remove the new data and
 # reinstall the old one.
 



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7892: nodemap: never read more than the expected data amount

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Since we are tracking this number we can use it to detect corrupted rawdata 
file
  and to only read the correct amount of data when possible.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7892

AFFECTED FILES
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -45,7 +45,12 @@
 docket.data_unused = data_unused
 
 filename = _rawdata_filepath(revlog, docket)
-return docket, revlog.opener.tryread(filename)
+data = revlog.opener.tryread(filename)
+if len(data) < data_length:
+return None
+elif len(data) > data_length:
+data = data[:data_length]
+return docket, data
 
 
 def setup_persistent_nodemap(tr, revlog):



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7889: nodemap: track the total and unused amount of data in the rawdata file

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We need to keep that information around:
  
  - total data will allow transaction to start appending new information 
without confusing other reader.
  
  - unused data will allow to detect when we should regenerate new rawdata file.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7889

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -15,8 +15,10 @@
   $ hg debugnodemap --metadata
   uid:  (glob)
   tip-rev: 5000
+  data-length: 245760
+  data-unused: 0
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
   $ f --sha256 .hg/store/00changelog-*.nd
   .hg/store/00changelog-.nd: 
sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
@@ -50,11 +52,22 @@
   $ echo foo > foo
   $ hg add foo
   $ hg ci -m 'foo'
+
+#if pure
   $ hg debugnodemap --metadata
   uid:  (glob)
   tip-rev: 5001
+  data-length: 246144
+  data-unused: 384
+#else
+  $ hg debugnodemap --metadata
+  uid:  (glob)
+  tip-rev: 5001
+  data-length: 245760
+  data-unused: 0
+#endif
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
 
 (The pure code use the debug code that perform incremental update, the C code 
reencode from scratch)
 
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -37,10 +37,12 @@
 return None
 offset += S_VERSION.size
 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
-uid_size, tip_rev = headers
+uid_size, tip_rev, data_length, data_unused = headers
 offset += S_HEADER.size
 docket = NodeMapDocket(pdata[offset : offset + uid_size])
 docket.tip_rev = tip_rev
+docket.data_length = data_length
+docket.data_unused = data_unused
 
 filename = _rawdata_filepath(revlog, docket)
 return docket, revlog.opener.tryread(filename)
@@ -78,12 +80,14 @@
 # first attemp an incremental update of the data
 if can_incremental and ondisk_docket is not None:
 target_docket = revlog._nodemap_docket.copy()
-data = revlog.index.nodemap_data_incremental()
+data_changed_count, data = revlog.index.nodemap_data_incremental()
 datafile = _rawdata_filepath(revlog, target_docket)
 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
 # store vfs
 with revlog.opener(datafile, 'a') as fd:
 fd.write(data)
+target_docket.data_length += len(data)
+target_docket.data_unused += data_changed_count
 else:
 # otherwise fallback to a full new export
 target_docket = NodeMapDocket()
@@ -96,6 +100,7 @@
 # store vfs
 with revlog.opener(datafile, 'w') as fd:
 fd.write(data)
+target_docket.data_length = len(data)
 target_docket.tip_rev = revlog.tiprev()
 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
 # store vfs
@@ -143,9 +148,8 @@
 
 # version 0 is experimental, no BC garantee, do no use outside of tests.
 ONDISK_VERSION = 0
-
 S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">BQ")
+S_HEADER = struct.Struct(">BQQQ")
 
 ID_SIZE = 8
 
@@ -168,17 +172,26 @@
 uid = _make_uid()
 self.uid = uid
 self.tip_rev = None
+self.data_length = None
+self.data_unused = 0
 
 def copy(self):
 new = NodeMapDocket(uid=self.uid)
 new.tip_rev = self.tip_rev
+new.data_length = self.data_length
+new.data_unused = self.data_unused
 return new
 
 def serialize(self):
 """return serialized bytes for a docket using the passed uid"""
 data = []
 data.append(S_VERSION.pack(ONDISK_VERSION))
-headers = (len(self.uid), self.tip_rev)
+headers = (
+len(self.uid),
+self.tip_rev,
+self.data_length,
+self.data_unused,
+)
 data.append(S_HEADER.pack(*headers))
 data.append(self.uid)
 return b''.join(data)
@@ -236,8 +249,8 @@
 def update_persistent_data(index, root, max_idx, last_rev):
 """return the serialised data of a nodemap for a given index
 """
-trie = _update_trie(index, root, last_rev)
-return _dump_trie(trie, existing_idx=max_idx)
+changed_block, trie = _update_trie(index, root, last_rev)
+return changed_block * 

D7891: nodemap: write new data from the expected current data length

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  If the amount of data in the file exceed the expect amount, we will overwrite
  the extra data. This is a simple way to be safer.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7891

AFFECTED FILES
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -92,7 +92,8 @@
 datafile = _rawdata_filepath(revlog, target_docket)
 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
 # store vfs
-with revlog.opener(datafile, 'a') as fd:
+with revlog.opener(datafile, 'r+') as fd:
+fd.seek(target_docket.data_length)
 fd.write(data)
 target_docket.data_length += len(data)
 target_docket.data_unused += data_changed_count



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7885: nodemap: keep track of the docket for loaded data

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  To perform incremental update of the on disk data, we need to keep tracks of
  some aspect of that data.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7885

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/revlog.py
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -41,7 +41,7 @@
 docket = NodeMapDocket(pdata[offset : offset + uid_size])
 
 filename = _rawdata_filepath(revlog, docket)
-return revlog.opener.tryread(filename)
+return docket, revlog.opener.tryread(filename)
 
 
 def setup_persistent_nodemap(tr, revlog):
@@ -93,6 +93,7 @@
 # store vfs
 with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
 fp.write(target_docket.serialize())
+revlog._nodemap_docket = target_docket
 # EXP-TODO: if the transaction abort, we should remove the new data and
 # reinstall the old one.
 
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -455,6 +455,7 @@
 self._maxchainlen = None
 self._deltabothparents = True
 self.index = None
+self._nodemap_docket = None
 # Mapping of partial identifiers to full nodes.
 self._pcache = {}
 # Mapping of revision integer to full node.
@@ -544,6 +545,9 @@
 indexdata = b''
 self._initempty = True
 try:
+nodemap_data = nodemaputil.persisted_data(self)
+if nodemap_data is not None:
+self._nodemap_docket = nodemap_data[0]
 with self._indexfp() as f:
 if (
 mmapindexthreshold is not None
@@ -635,7 +639,7 @@
 if use_nodemap:
 nodemap_data = nodemaputil.persisted_data(self)
 if nodemap_data is not None:
-index.update_nodemap_data(nodemap_data)
+index.update_nodemap_data(nodemap_data[1])
 except (ValueError, IndexError):
 raise error.RevlogError(
 _(b"index %s is corrupted") % self.indexfile
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2106,13 +2106,17 @@
 elif args['dump_disk']:
 unfi = repo.unfiltered()
 cl = unfi.changelog
-data = nodemap.persisted_data(cl)
-ui.write(data)
+nm_data = nodemap.persisted_data(cl)
+if nm_data is not None:
+docket, data = nm_data
+ui.write(data)
 elif args['check']:
 unfi = repo.unfiltered()
 cl = unfi.changelog
-data = nodemap.persisted_data(cl)
-return nodemap.check_data(ui, cl.index, data)
+nm_data = nodemap.persisted_data(cl)
+if nm_data is not None:
+docket, data = nm_data
+return nodemap.check_data(ui, cl.index, data)
 
 
 @command(



To: marmoute, indygreg, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7890: nodemapdata: double check the source docket when doing incremental update

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  In theory, the index will have the information we expect it to have. However 
by
  security, it seems safer to double check that the incremental data are 
generated
  from the data currently on disk.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7890

AFFECTED FILES
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -77,18 +77,27 @@
 can_incremental = util.safehasattr(revlog.index, 
"nodemap_data_incremental")
 ondisk_docket = revlog._nodemap_docket
 
+data = None
 # first attemp an incremental update of the data
 if can_incremental and ondisk_docket is not None:
 target_docket = revlog._nodemap_docket.copy()
-data_changed_count, data = revlog.index.nodemap_data_incremental()
-datafile = _rawdata_filepath(revlog, target_docket)
-# EXP-TODO: if this is a cache, this should use a cache vfs, not a
-# store vfs
-with revlog.opener(datafile, 'a') as fd:
-fd.write(data)
-target_docket.data_length += len(data)
-target_docket.data_unused += data_changed_count
-else:
+(
+src_docket,
+data_changed_count,
+data,
+) = revlog.index.nodemap_data_incremental()
+if src_docket != target_docket:
+data = None
+else:
+datafile = _rawdata_filepath(revlog, target_docket)
+# EXP-TODO: if this is a cache, this should use a cache vfs, not a
+# store vfs
+with revlog.opener(datafile, 'a') as fd:
+fd.write(data)
+target_docket.data_length += len(data)
+target_docket.data_unused += data_changed_count
+
+if data is None:
 # otherwise fallback to a full new export
 target_docket = NodeMapDocket()
 datafile = _rawdata_filepath(revlog, target_docket)
@@ -182,6 +191,17 @@
 new.data_unused = self.data_unused
 return new
 
+def __cmp__(self, other):
+if self.uid < other.uid:
+return -1
+if self.uid > other.uid:
+return 1
+elif self.data_length < other.data_length:
+return -1
+elif self.data_length > other.data_length:
+return 1
+return 0
+
 def serialize(self):
 """return serialized bytes for a docket using the passed uid"""
 data = []
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -164,11 +164,13 @@
 """
 if self._nm_root is None:
 return None
+docket = self._nm_docket
 changed, data = nodemaputil.update_persistent_data(
-self, self._nm_root, self._nm_max_idx, self._nm_rev
+self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
 )
-self._nm_root = self._nm_max_idx = self._nm_rev = None
-return changed, data
+
+self._nm_root = self._nm_max_idx = self._nm_docket = None
+return docket, changed, data
 
 def update_nodemap_data(self, docket, nm_data):
 """provide full serialiazed data from a nodemap
@@ -178,9 +180,9 @@
 if nm_data is not None:
 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
 if self._nm_root:
-self._nm_rev = docket.tip_rev
+self._nm_docket = docket
 else:
-self._nm_root = self._nm_max_idx = self._nm_rev = None
+self._nm_root = self._nm_max_idx = self._nm_docket = None
 
 
 class InlinedIndexObject(BaseIndexObject):



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7887: nodemap: add a flag to dump the details of the docket

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We are about to add more information to the docket. We first introduce a way 
to
  debug its content.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7887

AFFECTED FILES
  mercurial/debugcommands.py
  tests/test-completion.t
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -12,6 +12,8 @@
   > persistent-nodemap=yes
   > EOF
   $ hg debugbuilddag .+5000
+  $ hg debugnodemap --metadata
+  uid:  (glob)
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
   $ f --sha256 .hg/store/00changelog-*.nd
@@ -47,6 +49,8 @@
   $ echo foo > foo
   $ hg add foo
   $ hg ci -m 'foo'
+  $ hg debugnodemap --metadata
+  uid:  (glob)
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
 
diff --git a/tests/test-completion.t b/tests/test-completion.t
--- a/tests/test-completion.t
+++ b/tests/test-completion.t
@@ -290,7 +290,7 @@
   debugmanifestfulltextcache: clear, add
   debugmergestate: 
   debugnamecomplete: 
-  debugnodemap: dump-new, dump-disk, check
+  debugnodemap: dump-new, dump-disk, check, metadata
   debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, 
user, template
   debugp1copies: rev
   debugp2copies: rev
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2093,6 +2093,12 @@
 False,
 _(b'check that the data on disk data are correct.'),
 ),
+(
+'',
+b'metadata',
+False,
+_(b'display the on disk meta data for the nodemap'),
+),
 ],
 )
 def debugnodemap(ui, repo, **args):
@@ -2117,6 +2123,13 @@
 if nm_data is not None:
 docket, data = nm_data
 return nodemap.check_data(ui, cl.index, data)
+elif args['metadata']:
+unfi = repo.unfiltered()
+cl = unfi.changelog
+nm_data = nodemap.persisted_data(cl)
+if nm_data is not None:
+docket, data = nm_data
+ui.write((b"uid: %s\n") % docket.uid)
 
 
 @command(



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7864: rust-utils: add Rust implementation of Python's "os.path.splitdrive"

2020-01-15 Thread kevincox (Kevin Cox)
This revision now requires changes to proceed.
kevincox added inline comments.
kevincox requested changes to this revision.

INLINE COMMENTS

> files.rs:109
> +/// Paths cannot contain both a drive letter and a UNC path.
> +pub fn split_drive(path: impl AsRef) -> (HgPathBuf, HgPathBuf) {
> +let path = path.as_ref();

I think this can be simplified. See 
https://play.rust-lang.org/?version=stable=debug=2018=78136ead96596afe6305e7542a881ca4.
 I had to use &[u8] to avoid having to copy all of the HgPath stuff into the 
playground, but the code should be easy to integrate.

Notable changes:

- Avoid creating `norm_bytes` vector (and allocation) by creating an is_sep 
function.
  - Note, we can probably use std::path::is_separator 

- Return references to the input since we guarantee that the output will be 
parts of the input. The caller can always clone.
- Use slice::split_at to make it obvious that we are returning the entire path 
just split.
- Use pattern matching rather than unwrapping.
- Use fall-through returns to make it obvious we handle every path.

> files.rs:230
> +);
> +}
> +

We should add an example of a UNC and Drive path here to ensure that they don't 
get split.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7864/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7864

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7845: nodemap: add basic checking of the on disk nodemap content

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute updated this revision to Diff 19299.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7845?vs=19182=19299

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7845/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7845

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/revlogutils/nodemap.py
  tests/test-completion.t
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -36,6 +36,9 @@
   00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ||
   00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ||
   00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ||
+  $ hg debugnodemap --check
+  revision in index:   5001
+  revision in nodemap: 5001
 
 add a new commit
 
@@ -48,3 +51,6 @@
   .hg/store/00changelog.n: size=18
   $ f --sha256 .hg/store/00changelog-*.nd --size
   .hg/store/00changelog-.nd: size=245760, 
sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob)
+  $ hg debugnodemap --check
+  revision in index:   5002
+  revision in nodemap: 5002
diff --git a/tests/test-completion.t b/tests/test-completion.t
--- a/tests/test-completion.t
+++ b/tests/test-completion.t
@@ -290,7 +290,7 @@
   debugmanifestfulltextcache: clear, add
   debugmergestate: 
   debugnamecomplete: 
-  debugnodemap: dump-new, dump-disk
+  debugnodemap: dump-new, dump-disk, check
   debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, 
user, template
   debugp1copies: rev
   debugp2copies: rev
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -341,3 +341,37 @@
 else:
 block[idx] = _transform_rev(v)
 return block
+
+
+# debug utility
+
+
+def check_data(ui, index, data):
+"""verify that the provided nodemap data are valid for the given idex"""
+ret = 0
+ui.status((b"revision in index:   %d\n") % len(index))
+root = parse_data(data)
+all_revs = set(_all_revisions(root))
+ui.status((b"revision in nodemap: %d\n") % len(all_revs))
+for r in range(len(index)):
+if r not in all_revs:
+msg = b"  revision missing from nodemap: %d\n" % r
+ui.write_err(msg)
+ret = 1
+else:
+all_revs.remove(r)
+if all_revs:
+for r in sorted(all_revs):
+msg = b"  extra revision in  nodemap: %d\n" % r
+ui.write_err(msg)
+ret = 1
+return ret
+
+
+def _all_revisions(root):
+"""return all revisions stored in a Trie"""
+for block in _walk_trie(root):
+for v in block:
+if v is None or isinstance(v, Block):
+continue
+yield v
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2087,6 +2087,12 @@
 _(b'write a (binary) serialised (new) nodemap on stdin'),
 ),
 ('', b'dump-disk', False, _(b'dump on-disk data on stdin')),
+(
+'',
+b'check',
+False,
+_(b'check that the data on disk data are correct.'),
+),
 ],
 )
 def debugnodemap(ui, repo, **args):
@@ -2102,6 +2108,11 @@
 cl = unfi.changelog
 data = nodemap.persisted_data(cl)
 ui.write(data)
+elif args['check']:
+unfi = repo.unfiltered()
+cl = unfi.changelog
+data = nodemap.persisted_data(cl)
+return nodemap.check_data(ui, cl.index, data)
 
 
 @command(



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7886: nodemap: introduce append-only incremental update of the persisten data

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Rewriting the full nodemap for each transaction has a cost we would like to
  avoid. We introduce a new way to write persistent nodemap data by adding new
  information at the end for file. Any new and updated block as added at the end
  of the file. The last block is the new root node.
  
  With this method, some of the block already on disk get "dereferenced" and
  become dead data. In later changesets, We'll start tracking the amount of dead
  data to eventually re-generate a full nodemap.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7886

AFFECTED FILES
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -49,8 +49,19 @@
   $ hg ci -m 'foo'
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
+
+(The pure code use the debug code that perform incremental update, the C code 
reencode from scratch)
+
+#if pure
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-.nd: size=246144, 
sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob)
+
+#else
   $ f --sha256 .hg/store/00changelog-*.nd --size
   .hg/store/00changelog-.nd: size=245760, 
sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob)
+
+#endif
+
   $ hg debugnodemap --check
   revision in index:   5002
   revision in nodemap: 5002
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -69,12 +69,41 @@
 if revlog.nodemap_file is None:
 msg = "calling persist nodemap on a revlog without the feature enableb"
 raise error.ProgrammingError(msg)
-if util.safehasattr(revlog.index, "nodemap_data_all"):
-data = revlog.index.nodemap_data_all()
+
+can_incremental = util.safehasattr(revlog.index, 
"nodemap_data_incremental")
+ondisk_docket = revlog._nodemap_docket
+
+# first attemp an incremental update of the data
+if can_incremental and ondisk_docket is not None:
+target_docket = revlog._nodemap_docket.copy()
+data = revlog.index.nodemap_data_incremental()
+datafile = _rawdata_filepath(revlog, target_docket)
+# EXP-TODO: if this is a cache, this should use a cache vfs, not a
+# store vfs
+with revlog.opener(datafile, 'a') as fd:
+fd.write(data)
 else:
-data = persistent_data(revlog.index)
-target_docket = NodeMapDocket()
-datafile = _rawdata_filepath(revlog, target_docket)
+# otherwise fallback to a full new export
+target_docket = NodeMapDocket()
+datafile = _rawdata_filepath(revlog, target_docket)
+if util.safehasattr(revlog.index, "nodemap_data_all"):
+data = revlog.index.nodemap_data_all()
+else:
+data = persistent_data(revlog.index)
+# EXP-TODO: if this is a cache, this should use a cache vfs, not a
+# store vfs
+with revlog.opener(datafile, 'w') as fd:
+fd.write(data)
+# EXP-TODO: if this is a cache, this should use a cache vfs, not a
+# store vfs
+with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
+fp.write(target_docket.serialize())
+revlog._nodemap_docket = target_docket
+# EXP-TODO: if the transaction abort, we should remove the new data and
+# reinstall the old one.
+
+# search for old index file in all cases, some older process might have
+# left one behind.
 olds = _other_rawdata_filepath(revlog, target_docket)
 if olds:
 realvfs = getattr(revlog, '_realopener', revlog.opener)
@@ -85,17 +114,6 @@
 
 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
 tr.addpostclose(callback_id, cleanup)
-# EXP-TODO: if this is a cache, this should use a cache vfs, not a
-# store vfs
-with revlog.opener(datafile, 'w') as fd:
-fd.write(data)
-# EXP-TODO: if this is a cache, this should use a cache vfs, not a
-# store vfs
-with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
-fp.write(target_docket.serialize())
-revlog._nodemap_docket = target_docket
-# EXP-TODO: if the transaction abort, we should remove the new data and
-# reinstall the old one.
 
 
 ### Nodemap docket file
@@ -208,6 +226,13 @@
 return _dump_trie(trie)
 
 
+def update_persistent_data(index, root, max_idx, last_rev):
+"""return the serialised data of a nodemap for a given index
+"""
+trie = _update_trie(index, root, last_rev)
+return _dump_trie(trie, existing_idx=max_idx)
+
+
 

D7884: nodemap: introduce an explicit class/object for the docket

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We are about to add more information to this docket, having a clear location 
to
  stock them in memory will help.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7884

AFFECTED FILES
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -36,11 +36,11 @@
 if version != ONDISK_VERSION:
 return None
 offset += S_VERSION.size
-(uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
+(uid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
 offset += S_HEADER.size
-uid = pdata[offset : offset + uuid_size]
+docket = NodeMapDocket(pdata[offset : offset + uid_size])
 
-filename = _rawdata_filepath(revlog, uid)
+filename = _rawdata_filepath(revlog, docket)
 return revlog.opener.tryread(filename)
 
 
@@ -73,9 +73,9 @@
 data = revlog.index.nodemap_data_all()
 else:
 data = persistent_data(revlog.index)
-uid = _make_uid()
-datafile = _rawdata_filepath(revlog, uid)
-olds = _other_rawdata_filepath(revlog, uid)
+target_docket = NodeMapDocket()
+datafile = _rawdata_filepath(revlog, target_docket)
+olds = _other_rawdata_filepath(revlog, target_docket)
 if olds:
 realvfs = getattr(revlog, '_realopener', revlog.opener)
 
@@ -92,7 +92,7 @@
 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
 # store vfs
 with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
-fp.write(_serialize_docket(uid))
+fp.write(target_docket.serialize())
 # EXP-TODO: if the transaction abort, we should remove the new data and
 # reinstall the old one.
 
@@ -135,25 +135,39 @@
 return nodemod.hex(os.urandom(ID_SIZE))
 
 
-def _serialize_docket(uid):
-"""return serialized bytes for a docket using the passed uid"""
-data = []
-data.append(S_VERSION.pack(ONDISK_VERSION))
-data.append(S_HEADER.pack(len(uid)))
-data.append(uid)
-return b''.join(data)
+class NodeMapDocket(object):
+"""metadata associated with persistent nodemap data
+
+The persistent data may come from disk or be on their way to disk.
+"""
+
+def __init__(self, uid=None):
+if uid is None:
+uid = _make_uid()
+self.uid = uid
+
+def copy(self):
+return NodeMapDocket(uid=self.uid)
+
+def serialize(self):
+"""return serialized bytes for a docket using the passed uid"""
+data = []
+data.append(S_VERSION.pack(ONDISK_VERSION))
+data.append(S_HEADER.pack(len(self.uid)))
+data.append(self.uid)
+return b''.join(data)
 
 
-def _rawdata_filepath(revlog, uid):
+def _rawdata_filepath(revlog, docket):
 """The (vfs relative) nodemap's rawdata file for a given uid"""
 prefix = revlog.nodemap_file[:-2]
-return b"%s-%s.nd" % (prefix, uid)
+return b"%s-%s.nd" % (prefix, docket.uid)
 
 
-def _other_rawdata_filepath(revlog, uid):
+def _other_rawdata_filepath(revlog, docket):
 prefix = revlog.nodemap_file[:-2]
 pattern = re.compile(b"(^|/)%s-[0-9a-f]+.nd$" % prefix)
-new_file_path = _rawdata_filepath(revlog, uid)
+new_file_path = _rawdata_filepath(revlog, docket)
 new_file_name = revlog.opener.basename(new_file_path)
 dirpath = revlog.opener.dirname(new_file_path)
 others = []



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7883: nodemap: keep track of the ondisk id of nodemap blocks

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  If we are to incrementally update the files, we need to keep some details 
about
  the data we read.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7883

AFFECTED FILES
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -220,6 +220,11 @@
 
 contains up to 16 entry indexed from 0 to 15"""
 
+def __init__(self):
+super(Block, self).__init__()
+# If this block exist on disk, here is its ID
+self.ondisk_id = None
+
 def __iter__(self):
 return iter(self.get(i) for i in range(16))
 
@@ -330,8 +335,8 @@
 """Parse the serialization of an individual block
 """
 block = Block()
-ondisk_id = len(block_map)
-block_map[ondisk_id] = block
+block.ondisk_id = len(block_map)
+block_map[block.ondisk_id] = block
 values = S_BLOCK.unpack(block_data)
 for idx, v in enumerate(values):
 if v == NO_ENTRY:



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7846: nodemap: all check that revision and nodes match in the nodemap

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute updated this revision to Diff 19300.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7846?vs=19183=19300

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7846/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7846

AFFECTED FILES
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -360,6 +360,19 @@
 ret = 1
 else:
 all_revs.remove(r)
+nm_rev = _find_node(root, nodemod.hex(index[r][7]))
+if nm_rev is None:
+msg = b"  revision node does not match any entries: %d\n" % r
+ui.write_err(msg)
+ret = 1
+elif nm_rev != r:
+msg = (
+b"  revision node does not match the expected revision: "
+b"%d != %d\n" % (r, nm_rev)
+)
+ui.write_err(msg)
+ret = 1
+
 if all_revs:
 for r in sorted(all_revs):
 msg = b"  extra revision in  nodemap: %d\n" % r
@@ -375,3 +388,11 @@
 if v is None or isinstance(v, Block):
 continue
 yield v
+
+
+def _find_node(block, node):
+"""find the revision associated with a given node"""
+entry = block.get(_to_int(node[0]))
+if isinstance(entry, dict):
+return _find_node(entry, node[1:])
+return entry



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7844: nodemap: code to parse a serialized nodemap

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute updated this revision to Diff 19298.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7844?vs=19181=19298

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7844/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7844

AFFECTED FILES
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -311,3 +311,33 @@
 return block_map[id(item)]
 else:
 return _transform_rev(item)
+
+
+def parse_data(data):
+"""parse serialized nodemap data into a nodemap Trie"""
+if (len(data) % S_BLOCK.size) != 0:
+msg = "nodemap data size is not a multiple of block size (%d): %d"
+raise error.Abort(msg % (S_BLOCK.size, len(data)))
+if not data:
+return Block()
+block_map = {}
+for i in range(0, len(data), S_BLOCK.size):
+block = _parse_block(block_map, data[i : i + S_BLOCK.size])
+return block
+
+
+def _parse_block(block_map, block_data):
+"""Parse the serialization of an individual block
+"""
+block = Block()
+ondisk_id = len(block_map)
+block_map[ondisk_id] = block
+values = S_BLOCK.unpack(block_data)
+for idx, v in enumerate(values):
+if v == NO_ENTRY:
+continue
+elif v >= 0:
+block[idx] = block_map[v]
+else:
+block[idx] = _transform_rev(v)
+return block



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7843: nodemap: move the iteratio inside the Block object

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute updated this revision to Diff 19297.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7843?vs=19180=19297

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7843/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7843

AFFECTED FILES
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -220,7 +220,8 @@
 
 contains up to 16 entry indexed from 0 to 15"""
 
-pass
+def __iter__(self):
+return iter(self.get(i) for i in range(16))
 
 
 def _build_trie(index):
@@ -298,7 +299,7 @@
 Children block are assumed to be already serialized and present in
 block_map.
 """
-data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
+data = tuple(_to_value(v, block_map) for v in block_node)
 return S_BLOCK.pack(*data)
 
 



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7842: nodemap: use an explicit "Block" object in the reference implementation

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute updated this revision to Diff 19296.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7842?vs=19179=19296

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7842/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7842

AFFECTED FILES
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -215,6 +215,14 @@
 return int(hex_digit, 16)
 
 
+class Block(dict):
+"""represent a block of the Trie
+
+contains up to 16 entry indexed from 0 to 15"""
+
+pass
+
+
 def _build_trie(index):
 """build a nodemap trie
 
@@ -223,7 +231,7 @@
 Each block is a dictionnary with key in `[0, 15]`. Value are either
 another block or a revision number.
 """
-root = {}
+root = Block()
 for rev in range(len(index)):
 hex = nodemod.hex(index[rev][7])
 _insert_into_block(index, 0, root, rev, hex)
@@ -252,7 +260,7 @@
 other_hex = nodemod.hex(index[entry][7])
 other_rev = entry
 while current_hex[level] == other_hex[level]:
-new = {}
+new = Block()
 block[_to_int(current_hex[level])] = new
 block = new
 level += 1



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7841: nodemap: add a optional `nodemap_add_full` method on indexes

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute updated this revision to Diff 19295.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7841?vs=19178=19295

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7841/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7841

AFFECTED FILES
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py

CHANGE DETAILS

diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -15,6 +15,7 @@
 from .. import (
 error,
 node as nodemod,
+util,
 )
 
 
@@ -68,7 +69,10 @@
 if revlog.nodemap_file is None:
 msg = "calling persist nodemap on a revlog without the feature enableb"
 raise error.ProgrammingError(msg)
-data = persistent_data(revlog.index)
+if util.safehasattr(revlog.index, "nodemap_data_all"):
+data = revlog.index.nodemap_data_all()
+else:
+data = persistent_data(revlog.index)
 uid = _make_uid()
 datafile = _rawdata_filepath(revlog, uid)
 olds = _other_rawdata_filepath(revlog, uid)
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -149,6 +149,13 @@
 through the dedicated `devel.persistent-nodemap` config.
 """
 
+def nodemap_data_all(self):
+"""Return bytes containing a full serialization of a nodemap
+
+The nodemap should be valid for the full set of revisions in the
+index."""
+return nodemaputil.persistent_data(self)
+
 
 class InlinedIndexObject(BaseIndexObject):
 def __init__(self, data, inline=0):



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7839: nodemap: deleted older raw data file when creating a new ones

2020-01-15 Thread marmoute (Pierre-Yves David)
marmoute updated this revision to Diff 19294.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7839?vs=19176=19294

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7839/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7839

AFFECTED FILES
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -12,6 +12,8 @@
   $ hg debugbuilddag .+5000
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
+  $ f --sha256 .hg/store/00changelog-*.nd
+  .hg/store/00changelog-.nd: 
sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
   size=245760, 
sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c
   $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size
@@ -32,3 +34,15 @@
   00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ||
   00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ||
   00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ||
+
+add a new commit
+
+  $ hg up
+  0 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ echo foo > foo
+  $ hg add foo
+  $ hg ci -m 'foo'
+  $ f --size .hg/store/00changelog.n
+  .hg/store/00changelog.n: size=18
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-.nd: size=245760, 
sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob)
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -9,6 +9,7 @@
 from __future__ import absolute_import
 
 import os
+import re
 import struct
 
 from .. import (
@@ -70,6 +71,16 @@
 data = persistent_data(revlog.index)
 uid = _make_uid()
 datafile = _rawdata_filepath(revlog, uid)
+olds = _other_rawdata_filepath(revlog, uid)
+if olds:
+realvfs = getattr(revlog, '_realopener', revlog.opener)
+
+def cleanup(tr):
+for oldfile in olds:
+realvfs.tryunlink(oldfile)
+
+callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
+tr.addpostclose(callback_id, cleanup)
 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
 # store vfs
 with revlog.opener(datafile, 'w') as fd:
@@ -135,6 +146,19 @@
 return b"%s-%s.nd" % (prefix, uid)
 
 
+def _other_rawdata_filepath(revlog, uid):
+prefix = revlog.nodemap_file[:-2]
+pattern = re.compile(b"(^|/)%s-[0-9a-f]+.nd$" % prefix)
+new_file_path = _rawdata_filepath(revlog, uid)
+new_file_name = revlog.opener.basename(new_file_path)
+dirpath = revlog.opener.dirname(new_file_path)
+others = []
+for f in revlog.opener.listdir(dirpath):
+if pattern.match(f) and f != new_file_name:
+others.append(f)
+return others
+
+
 ### Nodemap Trie
 #
 # This is a simple reference implementation to compute and serialise a nodemap



To: marmoute, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7787: rust-nodemap: building blocks for nodetree structures

2020-01-15 Thread gracinet (Georges Racinet)
gracinet added a comment.


  @kevincox thanks for the review!

INLINE COMMENTS

> kevincox wrote in nodemap.rs:92
> I would call these `get` and `set`.

Yes, I suppose `read` and `write` feel like I/O. Will do.

> kevincox wrote in nodemap.rs:111
> You can use this helper: 
> https://doc.rust-lang.org/std/fmt/struct.Formatter.html#method.debug_map

Nice, thanks for the tip

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7787/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7787

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7862: dirstate: move rust fast-path calling code to its own method

2020-01-15 Thread Raphaël Gomès
Alphare added a comment.


  In D7862#115784 , @pulkit wrote:
  
  > Absorbing the following diff to make `test-check-format.t` happy:
  
  Sorry about that.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7862/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7862

To: Alphare, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7787: rust-nodemap: building blocks for nodetree structures

2020-01-15 Thread kevincox (Kevin Cox)
kevincox added inline comments.
kevincox accepted this revision.

INLINE COMMENTS

> nodemap.rs:92
> +
> +fn read(, nybble: u8) -> Element {
> +Element::from(RawElement::from_be(self.0[nybble as usize]))

I would call these `get` and `set`.

> nodemap.rs:111
> +}
> +write!(f, "[{}]", inner.join(", "))
> +}

You can use this helper: 
https://doc.rust-lang.org/std/fmt/struct.Formatter.html#method.debug_map

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7787/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7787

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7879: sha1dc: use buffer protocol when parsing arguments

2020-01-15 Thread indygreg (Gregory Szorc)
Closed by commit rHGdc9b53482689: sha1dc: use buffer protocol when parsing 
arguments (authored by indygreg).
This revision was automatically updated to reflect the committed changes.
This revision was not accepted when it landed; it landed in state "Needs 
Review".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7879?vs=19278=19293

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7879/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7879

AFFECTED FILES
  mercurial/thirdparty/sha1dc/cext.c
  tests/test-hashutil.py

CHANGE DETAILS

diff --git a/tests/test-hashutil.py b/tests/test-hashutil.py
--- a/tests/test-hashutil.py
+++ b/tests/test-hashutil.py
@@ -45,6 +45,26 @@
 h.digest(),
 )
 
+def test_bytes_like_types(self):
+h = self.hasher()
+h.update(bytearray(b'foo'))
+h.update(memoryview(b'baz'))
+self.assertEqual(
+'21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
+)
+
+h = self.hasher(bytearray(b'foo'))
+h.update(b'baz')
+self.assertEqual(
+'21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
+)
+
+h = self.hasher(memoryview(b'foo'))
+h.update(b'baz')
+self.assertEqual(
+'21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
+)
+
 
 class hashlibtests(unittest.TestCase, hashertestsbase):
 hasher = hashlib.sha1
diff --git a/mercurial/thirdparty/sha1dc/cext.c 
b/mercurial/thirdparty/sha1dc/cext.c
--- a/mercurial/thirdparty/sha1dc/cext.c
+++ b/mercurial/thirdparty/sha1dc/cext.c
@@ -25,8 +25,8 @@
 
 static int pysha1ctx_init(pysha1ctx *self, PyObject *args)
 {
-   const char *data = NULL;
-   Py_ssize_t len;
+   Py_buffer data;
+   data.obj = NULL;
 
SHA1DCInit(&(self->ctx));
/* We don't want "safe" sha1s, wherein sha1dc can give you a
@@ -34,11 +34,19 @@
   collision. We just want to detect collisions.
 */
SHA1DCSetSafeHash(&(self->ctx), 0);
-   if (!PyArg_ParseTuple(args, PY23("|s#", "|y#"), , )) {
+   if (!PyArg_ParseTuple(args, PY23("|s*", "|y*"), )) {
return -1;
}
-   if (data) {
-   SHA1DCUpdate(&(self->ctx), data, len);
+   if (data.obj) {
+   if (!PyBuffer_IsContiguous(, 'C') || data.ndim > 1) {
+   PyErr_SetString(PyExc_BufferError,
+   "buffer must be contiguous and single 
dimension");
+   PyBuffer_Release();
+   return -1;
+   }
+
+   SHA1DCUpdate(&(self->ctx), data.buf, data.len);
+   PyBuffer_Release();
}
return 0;
 }
@@ -50,12 +58,18 @@
 
 static PyObject *pysha1ctx_update(pysha1ctx *self, PyObject *args)
 {
-   const char *data;
-   Py_ssize_t len;
-   if (!PyArg_ParseTuple(args, PY23("s#", "y#"), , )) {
+   Py_buffer data;
+   if (!PyArg_ParseTuple(args, PY23("s*", "y*"), )) {
return NULL;
}
-   SHA1DCUpdate(&(self->ctx), data, len);
+   if (!PyBuffer_IsContiguous(, 'C') || data.ndim > 1) {
+   PyErr_SetString(PyExc_BufferError,
+   "buffer must be contiguous and single 
dimension");
+   PyBuffer_Release();
+   return NULL;
+   }
+   SHA1DCUpdate(&(self->ctx), data.buf, data.len);
+   PyBuffer_Release();
Py_RETURN_NONE;
 }
 



To: indygreg, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7881: lfs: check content length after downloading content

2020-01-15 Thread mharbison72 (Matt Harbison)
Closed by commit rHG0ee0a3f6a990: lfs: check content length after downloading 
content (authored by mharbison72).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7881?vs=19280=19291

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7881/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7881

AFFECTED FILES
  hgext/lfs/blobstore.py
  hgext/lfs/wireprotolfsserver.py
  tests/test-lfs-serve-access.t

CHANGE DETAILS

diff --git a/tests/test-lfs-serve-access.t b/tests/test-lfs-serve-access.t
--- a/tests/test-lfs-serve-access.t
+++ b/tests/test-lfs-serve-access.t
@@ -210,7 +210,7 @@
   > 
   > store = repo.svfs.lfslocalblobstore
   > class badstore(store.__class__):
-  > def download(self, oid, src):
+  > def download(self, oid, src, contentlength):
   > '''Called in the server to handle reading from the client in a
   > PUT request.'''
   > origread = src.read
@@ -218,7 +218,7 @@
   > # Simulate bad data/checksum failure from the client
   > return b'0' * len(origread(nbytes))
   > src.read = _badread
-  > super(badstore, self).download(oid, src)
+  > super(badstore, self).download(oid, src, contentlength)
   > 
   > def _read(self, vfs, oid, verify):
   > '''Called in the server to read data for a GET request, and 
then
@@ -351,8 +351,8 @@
   $LOCALIP - - [$ERRDATE$] HG error:   (glob)
   $LOCALIP - - [$ERRDATE$] HG error:  Exception happened while processing 
request 
'/.hg/lfs/objects/b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c':
 (glob)
   $LOCALIP - - [$ERRDATE$] HG error:  Traceback (most recent call last): (glob)
-  $LOCALIP - - [$ERRDATE$] HG error:  localstore.download(oid, req.bodyfh) 
(glob)
-  $LOCALIP - - [$ERRDATE$] HG error:  super(badstore, self).download(oid, 
src) (glob)
+  $LOCALIP - - [$ERRDATE$] HG error:  localstore.download(oid, req.bodyfh, 
req.headers[b'Content-Length'])
+  $LOCALIP - - [$ERRDATE$] HG error:  super(badstore, self).download(oid, 
src, contentlength)
   $LOCALIP - - [$ERRDATE$] HG error:  _(b'corrupt remote lfs object: %s') 
% oid (glob)
   $LOCALIP - - [$ERRDATE$] HG error:  LfsCorruptionError: corrupt remote lfs 
object: b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c 
(no-py3 !)
   $LOCALIP - - [$ERRDATE$] HG error:  hgext.lfs.blobstore.LfsCorruptionError: 
corrupt remote lfs object: 
b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c (py3 !)
diff --git a/hgext/lfs/wireprotolfsserver.py b/hgext/lfs/wireprotolfsserver.py
--- a/hgext/lfs/wireprotolfsserver.py
+++ b/hgext/lfs/wireprotolfsserver.py
@@ -327,7 +327,7 @@
 
 statusmessage = hgwebcommon.statusmessage
 try:
-localstore.download(oid, req.bodyfh)
+localstore.download(oid, req.bodyfh, 
req.headers[b'Content-Length'])
 res.status = statusmessage(HTTP_OK if existed else HTTP_CREATED)
 except blobstore.LfsCorruptionError:
 _logexception(req)
diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py
--- a/hgext/lfs/blobstore.py
+++ b/hgext/lfs/blobstore.py
@@ -155,15 +155,29 @@
 
 return self.vfs(oid, b'rb')
 
-def download(self, oid, src):
+def download(self, oid, src, content_length):
 """Read the blob from the remote source in chunks, verify the content,
 and write to this local blobstore."""
 sha256 = hashlib.sha256()
+size = 0
 
 with self.vfs(oid, b'wb', atomictemp=True) as fp:
 for chunk in util.filechunkiter(src, size=1048576):
 fp.write(chunk)
 sha256.update(chunk)
+size += len(chunk)
+
+# If the server advertised a length longer than what we actually
+# received, then we should expect that the server crashed while
+# producing the response (but the server has no way of telling us
+# that), and we really don't need to try to write the response to
+# the localstore, because it's not going to match the expected.
+if content_length is not None and int(content_length) != size:
+msg = (
+b"Response length (%s) does not match Content-Length "
+b"header (%d): likely server-side crash"
+)
+raise LfsRemoteError(_(msg) % (size, int(content_length)))
 
 realoid = node.hex(sha256.digest())
 if realoid != oid:
@@ -492,6 +506,7 @@
 response = b''
 try:
 with contextlib.closing(self.urlopener.open(request)) as res:
+contentlength = res.info().get(b"content-length")
 ui = self.ui  # Shorten debug lines
 if self.ui.debugflag:
  

D7882: lfs: avoid quadratic performance in processing server responses

2020-01-15 Thread mharbison72 (Matt Harbison)
Closed by commit rHGffac09da7a19: lfs: avoid quadratic performance in 
processing server responses (authored by mharbison72).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7882?vs=19281=19292

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7882/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7882

AFFECTED FILES
  hgext/lfs/blobstore.py

CHANGE DETAILS

diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py
--- a/hgext/lfs/blobstore.py
+++ b/hgext/lfs/blobstore.py
@@ -503,7 +503,6 @@
 for k, v in headers:
 request.add_header(pycompat.strurl(k), pycompat.strurl(v))
 
-response = b''
 try:
 with contextlib.closing(self.urlopener.open(request)) as res:
 contentlength = res.info().get(b"content-length")
@@ -520,11 +519,14 @@
 # blobstore
 localstore.download(oid, res, contentlength)
 else:
+blocks = []
 while True:
 data = res.read(1048576)
 if not data:
 break
-response += data
+blocks.append(data)
+
+response = b"".join(blocks)
 if response:
 ui.debug(b'lfs %s response: %s' % (action, response))
 except util.urlerr.httperror as ex:



To: mharbison72, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7880: lfs: rename a variable to clarify its use

2020-01-15 Thread mharbison72 (Matt Harbison)
Closed by commit rHG84f2becbd106: lfs: rename a variable to clarify its use 
(authored by mharbison72).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7880?vs=19279=19290

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7880/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7880

AFFECTED FILES
  hgext/lfs/blobstore.py

CHANGE DETAILS

diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py
--- a/hgext/lfs/blobstore.py
+++ b/hgext/lfs/blobstore.py
@@ -491,22 +491,22 @@
 
 response = b''
 try:
-with contextlib.closing(self.urlopener.open(request)) as req:
+with contextlib.closing(self.urlopener.open(request)) as res:
 ui = self.ui  # Shorten debug lines
 if self.ui.debugflag:
-ui.debug(b'Status: %d\n' % req.status)
+ui.debug(b'Status: %d\n' % res.status)
 # lfs-test-server and hg serve return headers in different
 # order
-headers = pycompat.bytestr(req.info()).strip()
+headers = pycompat.bytestr(res.info()).strip()
 ui.debug(b'%s\n' % 
b'\n'.join(sorted(headers.splitlines(
 
 if action == b'download':
 # If downloading blobs, store downloaded data to local
 # blobstore
-localstore.download(oid, req)
+localstore.download(oid, res)
 else:
 while True:
-data = req.read(1048576)
+data = res.read(1048576)
 if not data:
 break
 response += data



To: mharbison72, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7878: sha1dc: use proper string functions on Python 2/3

2020-01-15 Thread indygreg (Gregory Szorc)
Closed by commit rHG29a110e2776e: sha1dc: use proper string functions on Python 
2/3 (authored by indygreg).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7878?vs=19277=19289

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7878/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7878

AFFECTED FILES
  mercurial/thirdparty/sha1dc/cext.c

CHANGE DETAILS

diff --git a/mercurial/thirdparty/sha1dc/cext.c 
b/mercurial/thirdparty/sha1dc/cext.c
--- a/mercurial/thirdparty/sha1dc/cext.c
+++ b/mercurial/thirdparty/sha1dc/cext.c
@@ -95,7 +95,7 @@
hexhash[i * 2] = hexdigit[hash[i] >> 4];
hexhash[i * 2 + 1] = hexdigit[hash[i] & 15];
}
-   return PyString_FromStringAndSize(hexhash, 40);
+   return PY23(PyString_FromStringAndSize, 
PyUnicode_FromStringAndSize)(hexhash, 40);
 }
 
 static PyTypeObject sha1ctxType;



To: indygreg, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7875: packaging: leverage os.path.relpath() in setup.py

2020-01-15 Thread martinvonz (Martin von Zweigbergk)
Closed by commit rHG4e05272dd681: packaging: leverage os.path.relpath() in 
setup.py (authored by martinvonz).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7875?vs=19268=19286

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7875/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7875

AFFECTED FILES
  setup.py

CHANGE DETAILS

diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -1064,11 +1064,7 @@
 # absolute path instead
 libdir = self.install_lib
 else:
-common = os.path.commonprefix((self.install_dir, self.install_lib))
-rest = self.install_dir[len(common) :]
-uplevel = len([n for n in os.path.split(rest) if n])
-
-libdir = uplevel * ('..' + os.sep) + self.install_lib[len(common) 
:]
+libdir = os.path.relpath(self.install_lib, self.install_dir)
 
 for outfile in self.outfiles:
 with open(outfile, 'rb') as fp:



To: martinvonz, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7876: sha1dc: manually define integer types on msvc 2008

2020-01-15 Thread indygreg (Gregory Szorc)
Closed by commit rHGef36156eac9f: sha1dc: manually define integer types on msvc 
2008 (authored by indygreg).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7876?vs=19275=19287

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7876/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7876

AFFECTED FILES
  mercurial/thirdparty/sha1dc/lib/sha1.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/sha1dc/lib/sha1.h 
b/mercurial/thirdparty/sha1dc/lib/sha1.h
--- a/mercurial/thirdparty/sha1dc/lib/sha1.h
+++ b/mercurial/thirdparty/sha1dc/lib/sha1.h
@@ -13,7 +13,14 @@
 #endif
 
 #ifndef SHA1DC_NO_STANDARD_INCLUDES
+/* PY27 this can be changed to a straight #include once Python 2.7 is
+   dropped, since this is for MSVC 2008 support. */
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
 #include 
+#else
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#endif
 #endif
 
 /* sha-1 compression function that takes an already expanded message, and 
additionally store intermediate states */



To: indygreg, #hg-reviewers, pulkit
Cc: durin42, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7877: sha1dc: declare all variables at begininng of block

2020-01-15 Thread indygreg (Gregory Szorc)
Closed by commit rHGc3f741bb2f33: sha1dc: declare all variables at begininng of 
block (authored by indygreg).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7877?vs=19276=19288

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7877/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7877

AFFECTED FILES
  mercurial/thirdparty/sha1dc/cext.c

CHANGE DETAILS

diff --git a/mercurial/thirdparty/sha1dc/cext.c 
b/mercurial/thirdparty/sha1dc/cext.c
--- a/mercurial/thirdparty/sha1dc/cext.c
+++ b/mercurial/thirdparty/sha1dc/cext.c
@@ -84,13 +84,14 @@
 
 static PyObject *pysha1ctx_hexdigest(pysha1ctx *self)
 {
+   static const char hexdigit[] = "0123456789abcdef";
unsigned char hash[20];
+   char hexhash[40];
+   int i;
if (!finalize(self->ctx, hash)) {
return NULL;
}
-   char hexhash[40];
-   static const char hexdigit[] = "0123456789abcdef";
-   for (int i = 0; i < 20; ++i) {
+   for (i = 0; i < 20; ++i) {
hexhash[i * 2] = hexdigit[hash[i] >> 4];
hexhash[i * 2 + 1] = hexdigit[hash[i] & 15];
}



To: indygreg, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7862: dirstate: move rust fast-path calling code to its own method

2020-01-15 Thread pulkit (Pulkit Goyal)
pulkit added a comment.


  Absorbing the following diff to make `test-check-format.t` happy:
  
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -1089,9 +1089,7 @@ class dirstate(object):
 # how to read the config file.
 numcpus = self._ui.configint(b"worker", b"numcpus")
 if numcpus is not None:
-encoding.environ.setdefault(
-b'RAYON_NUM_THREADS', b'%d' % numcpus
-)
+encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % 
numcpus)
 
 workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
 if not workers_enabled:

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7862/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7862

To: Alphare, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7730: rebase: make sure pruning does not confuse rebase (issue6180)

2020-01-15 Thread khanchi97 (Sushil khanchi)
khanchi97 added inline comments.

INLINE COMMENTS

> martinvonz wrote in rebase.py:597
> I think it's incorrect that rebase sets two parents while the merge is being 
> resolved, but that's out of scope for this patch.

I will look into it.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7730/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7730

To: khanchi97, martinvonz, #hg-reviewers
Cc: pulkit, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7730: rebase: make sure pruning does not confuse rebase (issue6180)

2020-01-15 Thread khanchi97 (Sushil khanchi)
khanchi97 added a comment.


  In D7730#114953 , @pulkit wrote:
  
  > Unrelated to the fix, we need better way to skip commits during rebasing. 
Pruning manually is not a good option, IIRC git rebase have a `--skip` flag.
  
  Yeah, that's a good idea. We should also have --skip flag to skip the commit 
on which rebase got interrupted.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7730/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7730

To: khanchi97, martinvonz, #hg-reviewers
Cc: pulkit, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7730: rebase: make sure pruning does not confuse rebase (issue6180)

2020-01-15 Thread khanchi97 (Sushil khanchi)
khanchi97 updated this revision to Diff 19283.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7730?vs=18939=19283

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7730/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7730

AFFECTED FILES
  hgext/rebase.py
  tests/test-rebase-conflicts.t

CHANGE DETAILS

diff --git a/tests/test-rebase-conflicts.t b/tests/test-rebase-conflicts.t
--- a/tests/test-rebase-conflicts.t
+++ b/tests/test-rebase-conflicts.t
@@ -476,14 +476,13 @@
   $ hg resolve -m
   (no more unresolved files)
   continue: hg rebase --continue
-XXX: it should have rebased revision 3 since it made changes unrelated to
-destination, so no reason to say "its destination already has all its changes"
   $ hg rebase -c
   note: not rebasing 2:06a50ac6b5ab "conflict in a", it has no successor
   rebasing 3:aea370672fd7 "add b" (tip)
-  note: not rebasing 3:aea370672fd7 "add b" (tip), its destination already has 
all its changes
   $ hg tglog
-  @  1:draft 'edit a'
+  @  4:draft 'add b'
+  |
+  o  1:draft 'edit a'
   |
   o  0:draft 'add a'
   
diff --git a/hgext/rebase.py b/hgext/rebase.py
--- a/hgext/rebase.py
+++ b/hgext/rebase.py
@@ -594,6 +594,10 @@
 adjustdest(repo, rev, self.destmap, self.state, self.skipped)
 )
 self.state[rev] = dest
+# since we are done, make sure wdir has one parent (issue6180)
+if len(self.wctx.parents()) == 2:
+p1 = self.wctx.p1().node()
+repo.setparents(p1)
 elif self.state[rev] == revtodo:
 ui.status(_(b'rebasing %s\n') % desc)
 progressfn(ctx)



To: khanchi97, martinvonz, #hg-reviewers
Cc: pulkit, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7728: rebase: add test to demonstrate issue6180

2020-01-15 Thread khanchi97 (Sushil khanchi)
khanchi97 updated this revision to Diff 19282.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7728?vs=18937=19282

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7728/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7728

AFFECTED FILES
  tests/test-rebase-conflicts.t

CHANGE DETAILS

diff --git a/tests/test-rebase-conflicts.t b/tests/test-rebase-conflicts.t
--- a/tests/test-rebase-conflicts.t
+++ b/tests/test-rebase-conflicts.t
@@ -429,3 +429,61 @@
   |/
   o  0:draft 'A'
   
+Test to make sure that pruning while rebasing doesn't confuse rebase 
(issue6180)
+  $ cat >> $HGRCPATH < [experimental]
+  > evolution.createmarkers=True
+  > evolution.allowunstable=True
+  > EOF
+
+The test case below starts a rebase of 2 commits and runs into conflicts on
+the first one. We then decide we don't want the changes from that commit, so
+we revert the changes and also prune the original commit.
+Now what we expect from `hg rebase --continue` is that it skip rebasing first
+one (since it's pruned) but don't skip second one.
+
+  $ hg init issue6180
+  $ cd issue6180
+  $ echo 0 > a
+  $ hg ci -Aqm 'add a'
+  $ echo 1 > a
+  $ hg ci -m 'edit a'
+  $ hg co -q 0
+  $ echo 2 > a
+  $ hg ci -qm 'conflict in a'
+  $ echo 3 > b
+  $ hg ci -Aqm 'add b'
+  $ hg tglog
+  @  3:draft 'add b'
+  |
+  o  2:draft 'conflict in a'
+  |
+  | o  1:draft 'edit a'
+  |/
+  o  0:draft 'add a'
+  
+  $ hg rebase -d 1
+  rebasing 2:06a50ac6b5ab "conflict in a"
+  merging a
+  warning: conflicts while merging a! (edit, then use 'hg resolve --mark')
+  unresolved conflicts (see hg resolve, then hg rebase --continue)
+  [1]
+  $ echo 1 > a
+  $ hg debugobsolete $(hg id -ir 2 --debug)
+  1 new obsolescence markers
+  obsoleted 1 changesets
+  1 new orphan changesets
+  $ hg resolve -m
+  (no more unresolved files)
+  continue: hg rebase --continue
+XXX: it should have rebased revision 3 since it made changes unrelated to
+destination, so no reason to say "its destination already has all its changes"
+  $ hg rebase -c
+  note: not rebasing 2:06a50ac6b5ab "conflict in a", it has no successor
+  rebasing 3:aea370672fd7 "add b" (tip)
+  note: not rebasing 3:aea370672fd7 "add b" (tip), its destination already has 
all its changes
+  $ hg tglog
+  @  1:draft 'edit a'
+  |
+  o  0:draft 'add a'
+  



To: khanchi97, #hg-reviewers
Cc: pulkit, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel