D4075: changegroup: capture revision delta in a data structure
This revision was automatically updated to reflect the committed changes. Closed by commit rHG23d582caae30: changegroup: capture revision delta in a data structure (authored by indygreg, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D4075?vs=9837=9951 REVISION DETAIL https://phab.mercurial-scm.org/D4075 AFFECTED FILES mercurial/changegroup.py CHANGE DETAILS diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -19,6 +19,10 @@ short, ) +from .thirdparty import ( +attr, +) + from . import ( dagutil, error, @@ -494,6 +498,26 @@ return d return readexactly(self._fh, n) +@attr.s(slots=True, frozen=True) +class revisiondelta(object): +"""Describes a delta entry in a changegroup. + +Captured data is sufficient to serialize the delta into multiple +formats. +""" +# 20 byte node of this revision. +node = attr.ib() +# 20 byte nodes of parent revisions. +p1node = attr.ib() +p2node = attr.ib() +# 20 byte node of node this delta is against. +basenode = attr.ib() +# 20 byte node of changeset revision this delta is associated with. +linknode = attr.ib() +# 2 bytes of flags to apply to revision data. +flags = attr.ib() +# Iterable of chunks holding raw delta data. +deltachunks = attr.ib() class cg1packer(object): deltaheader = _CHANGEGROUPV1_DELTA_HEADER @@ -899,13 +923,25 @@ def revchunk(self, store, rev, prev, linknode): if util.safehasattr(self, 'full_nodes'): -fn = self._revchunknarrow +fn = self._revisiondeltanarrow else: -fn = self._revchunknormal +fn = self._revisiondeltanormal + +delta = fn(store, rev, prev, linknode) +if not delta: +return -return fn(store, rev, prev, linknode) +meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node, + delta.basenode, delta.linknode, + delta.flags) +l = len(meta) + sum(len(x) for x in delta.deltachunks) -def _revchunknormal(self, store, rev, prev, linknode): +yield chunkheader(l) +yield meta +for x in delta.deltachunks: +yield x + +def _revisiondeltanormal(self, store, rev, prev, linknode): node = store.node(rev) p1, p2 = store.parentrevs(rev) base = self.deltaparent(store, rev, p1, p2, prev) @@ -927,16 +963,18 @@ else: delta = store.revdiff(base, rev) p1n, p2n = store.parents(node) -basenode = store.node(base) -flags = store.flags(rev) -meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags) -meta += prefix -l = len(meta) + len(delta) -yield chunkheader(l) -yield meta -yield delta -def _revchunknarrow(self, store, rev, prev, linknode): +return revisiondelta( +node=node, +p1node=p1n, +p2node=p2n, +basenode=store.node(base), +linknode=linknode, +flags=store.flags(rev), +deltachunks=(prefix, delta), +) + +def _revisiondeltanarrow(self, store, rev, prev, linknode): # build up some mapping information that's useful later. See # the local() nested function below. if not self.changelog_done: @@ -950,9 +988,7 @@ # This is a node to send in full, because the changeset it # corresponds to was a full changeset. if linknode in self.full_nodes: -for x in self._revchunknormal(store, rev, prev, linknode): -yield x -return +return self._revisiondeltanormal(store, rev, prev, linknode) # At this point, a node can either be one we should skip or an # ellipsis. If it's not an ellipsis, bail immediately. @@ -1043,16 +1079,20 @@ p1n, p2n = store.node(p1), store.node(p2) flags = store.flags(rev) flags |= revlog.REVIDX_ELLIPSIS -meta = self.builddeltaheader( -n, p1n, p2n, nullid, linknode, flags) + # TODO: try and actually send deltas for ellipsis data blocks data = store.revision(n) diffheader = mdiff.trivialdiffheader(len(data)) -l = len(meta) + len(diffheader) + len(data) -yield ''.join((chunkheader(l), - meta, - diffheader, - data)) + +return revisiondelta( +node=n, +p1node=p1n, +p2node=p2n, +basenode=nullid, +linknode=linknode, +flags=flags, +deltachunks=(diffheader, data), +) def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): #
D4075: changegroup: capture revision delta in a data structure
indygreg created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY The current changegroup generation code is tightly coupled to the revlog API. This tight coupling makes it difficult to implement alternate storage backends without requiring a large surface area of the revlog API to be exposed. This is not desirable. In order to support changegroup generation with non-revlog storage, we'll need to abstract the concept of delta generation. This commit is the first step down that road. We introduce a data structure for representing a delta in a changegroup. The API still leaves a lot to be desired. But at least we now have separation between data and actions performed on it. As part of this, we tweak behavior slightly: we no longer concatenate the delta prefix with the metadata header. Instead, we track and emit the prefix as a separate chunk. This shouldn't have any meaningful impact since all the chunks just get sent to the wire, the compressor, etc. Because we're introducing a new object, this does add some overhead to changegroup execution. `hg perfchangegroupchangelog` on my clone of the Mercurial repo (~40,000 visible revisions in the changelog) slows down a bit: ! wall 1.268600 comb 1.27 user 1.27 sys 0.00 (best of 8) ! wall 1.419479 comb 1.41 user 1.41 sys 0.00 (best of 8) With for `hg bundle -t none-v2 -a /dev/null`: before: real 6.610 secs (user 6.460+0.000 sys 0.140+0.000) after: real 7.210 secs (user 7.060+0.000 sys 0.140+0.000) I plan to claw back this regression in future commits. And I may even do away with this data structure once the refactor is complete. For now, it makes things easier to comprehend. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D4075 AFFECTED FILES mercurial/changegroup.py CHANGE DETAILS diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -19,6 +19,10 @@ short, ) +from .thirdparty import ( +attr, +) + from . import ( dagutil, error, @@ -497,6 +501,26 @@ return d return readexactly(self._fh, n) +@attr.s(slots=True, frozen=True) +class revisiondelta(object): +"""Describes a delta entry in a changegroup. + +Captured data is sufficient to serialize the delta into multiple +formats. +""" +# 20 byte node of this revision. +node = attr.ib() +# 20 byte nodes of parent revisions. +p1node = attr.ib() +p2node = attr.ib() +# 20 byte node of node this delta is against. +basenode = attr.ib() +# 20 byte node of changeset revision this delta is associated with. +linknode = attr.ib() +# 2 bytes of flags to apply to revision data. +flags = attr.ib() +# Iterable of chunks holding raw delta data. +deltachunks = attr.ib() class cg1packer(object): deltaheader = _CHANGEGROUPV1_DELTA_HEADER @@ -902,13 +926,25 @@ def revchunk(self, store, rev, prev, linknode): if util.safehasattr(self, 'full_nodes'): -fn = self._revchunknarrow +fn = self._revisiondeltanarrow else: -fn = self._revchunknormal +fn = self._revisiondeltanormal + +delta = fn(store, rev, prev, linknode) +if not delta: +return -return fn(store, rev, prev, linknode) +meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node, + delta.basenode, delta.linknode, + delta.flags) +l = len(meta) + sum(len(x) for x in delta.deltachunks) -def _revchunknormal(self, store, rev, prev, linknode): +yield chunkheader(l) +yield meta +for x in delta.deltachunks: +yield x + +def _revisiondeltanormal(self, store, rev, prev, linknode): node = store.node(rev) p1, p2 = store.parentrevs(rev) base = self.deltaparent(store, rev, p1, p2, prev) @@ -930,16 +966,18 @@ else: delta = store.revdiff(base, rev) p1n, p2n = store.parents(node) -basenode = store.node(base) -flags = store.flags(rev) -meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags) -meta += prefix -l = len(meta) + len(delta) -yield chunkheader(l) -yield meta -yield delta -def _revchunknarrow(self, store, rev, prev, linknode): +return revisiondelta( +node=node, +p1node=p1n, +p2node=p2n, +basenode=store.node(base), +linknode=linknode, +flags=store.flags(rev), +deltachunks=(prefix, delta), +) + +def _revisiondeltanarrow(self, store, rev, prev, linknode): # build up some mapping information that's