D4075: changegroup: capture revision delta in a data structure

2018-08-06 Thread indygreg (Gregory Szorc)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG23d582caae30: changegroup: capture revision delta in a data 
structure (authored by indygreg, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4075?vs=9837=9951

REVISION DETAIL
  https://phab.mercurial-scm.org/D4075

AFFECTED FILES
  mercurial/changegroup.py

CHANGE DETAILS

diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -19,6 +19,10 @@
 short,
 )
 
+from .thirdparty import (
+attr,
+)
+
 from . import (
 dagutil,
 error,
@@ -494,6 +498,26 @@
 return d
 return readexactly(self._fh, n)
 
+@attr.s(slots=True, frozen=True)
+class revisiondelta(object):
+"""Describes a delta entry in a changegroup.
+
+Captured data is sufficient to serialize the delta into multiple
+formats.
+"""
+# 20 byte node of this revision.
+node = attr.ib()
+# 20 byte nodes of parent revisions.
+p1node = attr.ib()
+p2node = attr.ib()
+# 20 byte node of node this delta is against.
+basenode = attr.ib()
+# 20 byte node of changeset revision this delta is associated with.
+linknode = attr.ib()
+# 2 bytes of flags to apply to revision data.
+flags = attr.ib()
+# Iterable of chunks holding raw delta data.
+deltachunks = attr.ib()
 
 class cg1packer(object):
 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
@@ -899,13 +923,25 @@
 
 def revchunk(self, store, rev, prev, linknode):
 if util.safehasattr(self, 'full_nodes'):
-fn = self._revchunknarrow
+fn = self._revisiondeltanarrow
 else:
-fn = self._revchunknormal
+fn = self._revisiondeltanormal
+
+delta = fn(store, rev, prev, linknode)
+if not delta:
+return
 
-return fn(store, rev, prev, linknode)
+meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node,
+ delta.basenode, delta.linknode,
+ delta.flags)
+l = len(meta) + sum(len(x) for x in delta.deltachunks)
 
-def _revchunknormal(self, store, rev, prev, linknode):
+yield chunkheader(l)
+yield meta
+for x in delta.deltachunks:
+yield x
+
+def _revisiondeltanormal(self, store, rev, prev, linknode):
 node = store.node(rev)
 p1, p2 = store.parentrevs(rev)
 base = self.deltaparent(store, rev, p1, p2, prev)
@@ -927,16 +963,18 @@
 else:
 delta = store.revdiff(base, rev)
 p1n, p2n = store.parents(node)
-basenode = store.node(base)
-flags = store.flags(rev)
-meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
-meta += prefix
-l = len(meta) + len(delta)
-yield chunkheader(l)
-yield meta
-yield delta
 
-def _revchunknarrow(self, store, rev, prev, linknode):
+return revisiondelta(
+node=node,
+p1node=p1n,
+p2node=p2n,
+basenode=store.node(base),
+linknode=linknode,
+flags=store.flags(rev),
+deltachunks=(prefix, delta),
+)
+
+def _revisiondeltanarrow(self, store, rev, prev, linknode):
 # build up some mapping information that's useful later. See
 # the local() nested function below.
 if not self.changelog_done:
@@ -950,9 +988,7 @@
 # This is a node to send in full, because the changeset it
 # corresponds to was a full changeset.
 if linknode in self.full_nodes:
-for x in self._revchunknormal(store, rev, prev, linknode):
-yield x
-return
+return self._revisiondeltanormal(store, rev, prev, linknode)
 
 # At this point, a node can either be one we should skip or an
 # ellipsis. If it's not an ellipsis, bail immediately.
@@ -1043,16 +1079,20 @@
 p1n, p2n = store.node(p1), store.node(p2)
 flags = store.flags(rev)
 flags |= revlog.REVIDX_ELLIPSIS
-meta = self.builddeltaheader(
-n, p1n, p2n, nullid, linknode, flags)
+
 # TODO: try and actually send deltas for ellipsis data blocks
 data = store.revision(n)
 diffheader = mdiff.trivialdiffheader(len(data))
-l = len(meta) + len(diffheader) + len(data)
-yield ''.join((chunkheader(l),
-   meta,
-   diffheader,
-   data))
+
+return revisiondelta(
+node=n,
+p1node=p1n,
+p2node=p2n,
+basenode=nullid,
+linknode=linknode,
+flags=flags,
+deltachunks=(diffheader, data),
+)
 
 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
 # 

D4075: changegroup: capture revision delta in a data structure

2018-08-03 Thread indygreg (Gregory Szorc)
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  The current changegroup generation code is tightly coupled to
  the revlog API. This tight coupling makes it difficult to implement
  alternate storage backends without requiring a large surface area
  of the revlog API to be exposed. This is not desirable.
  
  In order to support changegroup generation with non-revlog storage,
  we'll need to abstract the concept of delta generation.
  
  This commit is the first step down that road. We introduce a
  data structure for representing a delta in a changegroup.
  
  The API still leaves a lot to be desired. But at least we now
  have separation between data and actions performed on it.
  
  As part of this, we tweak behavior slightly: we no longer
  concatenate the delta prefix with the metadata header. Instead,
  we track and emit the prefix as a separate chunk. This shouldn't
  have any meaningful impact since all the chunks just get sent to
  the wire, the compressor, etc.
  
  Because we're introducing a new object, this does add some
  overhead to changegroup execution. `hg perfchangegroupchangelog`
  on my clone of the Mercurial repo (~40,000 visible revisions in
  the changelog) slows down a bit:
  
  ! wall 1.268600 comb 1.27 user 1.27 sys 0.00 (best of 8)
  ! wall 1.419479 comb 1.41 user 1.41 sys 0.00 (best of 8)
  
  With for `hg bundle -t none-v2 -a /dev/null`:
  
  before: real 6.610 secs (user 6.460+0.000 sys 0.140+0.000)
  after:  real 7.210 secs (user 7.060+0.000 sys 0.140+0.000)
  
  I plan to claw back this regression in future commits. And I may
  even do away with this data structure once the refactor is complete.
  For now, it makes things easier to comprehend.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4075

AFFECTED FILES
  mercurial/changegroup.py

CHANGE DETAILS

diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -19,6 +19,10 @@
 short,
 )
 
+from .thirdparty import (
+attr,
+)
+
 from . import (
 dagutil,
 error,
@@ -497,6 +501,26 @@
 return d
 return readexactly(self._fh, n)
 
+@attr.s(slots=True, frozen=True)
+class revisiondelta(object):
+"""Describes a delta entry in a changegroup.
+
+Captured data is sufficient to serialize the delta into multiple
+formats.
+"""
+# 20 byte node of this revision.
+node = attr.ib()
+# 20 byte nodes of parent revisions.
+p1node = attr.ib()
+p2node = attr.ib()
+# 20 byte node of node this delta is against.
+basenode = attr.ib()
+# 20 byte node of changeset revision this delta is associated with.
+linknode = attr.ib()
+# 2 bytes of flags to apply to revision data.
+flags = attr.ib()
+# Iterable of chunks holding raw delta data.
+deltachunks = attr.ib()
 
 class cg1packer(object):
 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
@@ -902,13 +926,25 @@
 
 def revchunk(self, store, rev, prev, linknode):
 if util.safehasattr(self, 'full_nodes'):
-fn = self._revchunknarrow
+fn = self._revisiondeltanarrow
 else:
-fn = self._revchunknormal
+fn = self._revisiondeltanormal
+
+delta = fn(store, rev, prev, linknode)
+if not delta:
+return
 
-return fn(store, rev, prev, linknode)
+meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node,
+ delta.basenode, delta.linknode,
+ delta.flags)
+l = len(meta) + sum(len(x) for x in delta.deltachunks)
 
-def _revchunknormal(self, store, rev, prev, linknode):
+yield chunkheader(l)
+yield meta
+for x in delta.deltachunks:
+yield x
+
+def _revisiondeltanormal(self, store, rev, prev, linknode):
 node = store.node(rev)
 p1, p2 = store.parentrevs(rev)
 base = self.deltaparent(store, rev, p1, p2, prev)
@@ -930,16 +966,18 @@
 else:
 delta = store.revdiff(base, rev)
 p1n, p2n = store.parents(node)
-basenode = store.node(base)
-flags = store.flags(rev)
-meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
-meta += prefix
-l = len(meta) + len(delta)
-yield chunkheader(l)
-yield meta
-yield delta
 
-def _revchunknarrow(self, store, rev, prev, linknode):
+return revisiondelta(
+node=node,
+p1node=p1n,
+p2node=p2n,
+basenode=store.node(base),
+linknode=linknode,
+flags=store.flags(rev),
+deltachunks=(prefix, delta),
+)
+
+def _revisiondeltanarrow(self, store, rev, prev, linknode):
 # build up some mapping information that's