[PATCH] setup: explain to distutils how we write rc versions
# HG changeset patch # User "Paul Morelle https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 5f86fca43d2a setup: explain to distutils how we write rc versions When we use a rc version number (e.g. 4.8rc0), bdist_msi is using distutils.StrictVersion to parse it into a tuple of numbers. By default, StrictVersion.version_re only recognizes [ab] for alpha/beta, where mercurial may use '-rc' or 'rc'. This change makes StrictVersion parse correctly our version numbers, so that bdist_msi doesn't fail on rc versions. diff -r 5e5c8f2a1eb5 -r 5f86fca43d2a setup.py --- a/setup.py Tue Oct 23 21:11:13 2018 +0900 +++ b/setup.py Wed Oct 31 20:32:42 2018 +0100 @@ -168,6 +168,9 @@ from distutils.sysconfig import get_python_inc, get_config_var from distutils.version import StrictVersion +# Explain to distutils.StrictVersion how our release candidates are versionned +StrictVersion.version_re = re.compile(r'^(\d+)\.(\d+)(\.(\d+))?-?(rc(\d+))?$') + def write_if_changed(path, content): """Write content to a file iff the content hasn't changed.""" if os.path.exists(path): ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 4] revlog: refactor out the rev-oriented part of commonancestorheads
# HG changeset patch # User Boris Feld # Date 1529621811 -3600 # Thu Jun 21 23:56:51 2018 +0100 # Node ID 494f5f95311e3b36a01cca745e52f536c3977a5c # Parent c6a8430582d584770c873a3b6234750482b9b65e # EXP-Topic descendant # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 494f5f95311e revlog: refactor out the rev-oriented part of commonancestorheads We plan to use this in a function taking revs as argument. Round trips to nodes seem silly. diff -r c6a8430582d5 -r 494f5f95311e mercurial/revlog.py --- a/mercurial/revlog.py Thu Jun 21 23:53:43 2018 +0100 +++ b/mercurial/revlog.py Thu Jun 21 23:56:51 2018 +0100 @@ -1390,11 +1390,16 @@ def commonancestorsheads(self, a, b): """calculate all the heads of the common ancestors of nodes a and b""" a, b = self.rev(a), self.rev(b) +ancs = self._commonancestorsheads(a, b) +return pycompat.maplist(self.node, ancs) + +def _commonancestorsheads(self, *revs): +"""calculate all the heads of the common ancestors of revs""" try: -ancs = self.index.commonancestorsheads(a, b) +ancs = self.index.commonancestorsheads(*revs) except (AttributeError, OverflowError): # C implementation failed -ancs = ancestor.commonancestorsheads(self.parentrevs, a, b) -return pycompat.maplist(self.node, ancs) +ancs = ancestor.commonancestorsheads(self.parentrevs, *revs) +return ancs def isancestor(self, a, b): """return True if node a is an ancestor of node b ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 4 of 4] revlog: reuse 'descendant' implemention in 'isancestor'
# HG changeset patch # User Boris Feld # Date 1529622442 -3600 # Fri Jun 22 00:07:22 2018 +0100 # Node ID 6bfe8fc36b4e20fcdf6cc49fe9ddb6e79bcf213f # Parent 5ea9c5d20ecc1aac2aecdd4c0902b3cd470b04d5 # EXP-Topic descendant # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 6bfe8fc36b4e revlog: reuse 'descendant' implemention in 'isancestor' The two functions do the same thing, but one takes nodes while the other takes revs. Using one to implement the other make sense. We should probably cleanup the API at some point to avoid having so many similar functions. However, we focus on an efficient implementation for now. diff -r 5ea9c5d20ecc -r 6bfe8fc36b4e mercurial/revlog.py --- a/mercurial/revlog.py Fri Jun 22 00:05:20 2018 +0100 +++ b/mercurial/revlog.py Fri Jun 22 00:07:22 2018 +0100 @@ -1404,7 +1404,8 @@ The implementation of this is trivial but the use of commonancestorsheads is not.""" -return a in self.commonancestorsheads(a, b) +a, b = self.rev(a), self.rev(b) +return self.descendant(a, b) def ancestor(self, a, b): """calculate the "best" common ancestor of nodes a and b""" ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 3 of 4] revlog: efficient implementation of 'descendant'
# HG changeset patch # User Boris Feld # Date 1529622320 -3600 # Fri Jun 22 00:05:20 2018 +0100 # Node ID 5ea9c5d20ecc1aac2aecdd4c0902b3cd470b04d5 # Parent 494f5f95311e3b36a01cca745e52f536c3977a5c # EXP-Topic descendant # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 5ea9c5d20ecc revlog: efficient implementation of 'descendant' Iterating over descendants is costly, because there are no "parent -> children" pointers. Walking the other way around is much more efficient, especially on large repositories, where descendant walks can cost seconds. And the other hand, common ancestors code follows links in the right direction and has a compiled implementation. In real life usage, this saved up to 80s during some pull operations, where descendant test happens in extension code. diff -r 494f5f95311e -r 5ea9c5d20ecc mercurial/revlog.py --- a/mercurial/revlog.py Thu Jun 21 23:56:51 2018 +0100 +++ b/mercurial/revlog.py Fri Jun 22 00:05:20 2018 +0100 @@ -1376,16 +1376,14 @@ return c def descendant(self, start, end): +"""True if revision 'end' is an descendant of revision 'start' + +A revision is considered as a descendant of itself.""" if start == nullrev: return True elif start == end: return True -for i in self.descendants([start]): -if i == end: -return True -elif i > end: -break -return False +return start in self._commonancestorsheads(start, end) def commonancestorsheads(self, a, b): """calculate all the heads of the common ancestors of nodes a and b""" ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 4] revlog: do inclusive descendant testing (API)
# HG changeset patch # User Boris Feld # Date 1529621623 -3600 # Thu Jun 21 23:53:43 2018 +0100 # Node ID c6a8430582d584770c873a3b6234750482b9b65e # Parent a0e185f104541858a0b049e1fb67c4d113930a9a # EXP-Topic descendant # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r c6a8430582d5 revlog: do inclusive descendant testing (API) In many other places, a revision is considered a descendant of itself. We update the behavior of `revlog.descendant()` to match this. (for example. `revlog.isancestor` does inclusive testing). No tests break, so it seems safe to do so. This will make it easier to use a more efficient implementation in a later changeset. diff -r a0e185f10454 -r c6a8430582d5 mercurial/revlog.py --- a/mercurial/revlog.py Fri Feb 02 14:21:04 2018 -0800 +++ b/mercurial/revlog.py Thu Jun 21 23:53:43 2018 +0100 @@ -1378,6 +1378,8 @@ def descendant(self, start, end): if start == nullrev: return True +elif start == end: +return True for i in self.descendants([start]): if i == end: return True ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] debugdeltachain: avoid division by zero when a chain is empty
# HG changeset patch # User Paul Morelle # Date 1529597997 -7200 # Thu Jun 21 18:19:57 2018 +0200 # Node ID 9b9cb7abec13ed745c14c3a1357ee2c2dd55c4b5 # Parent 5d88fd1bc2af0af02129f0ad2b267d778349d95a # EXP-Topic debugdeltachain-divbyzero # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 9b9cb7abec13 debugdeltachain: avoid division by zero when a chain is empty The two ratios chainratio and extraratio are computed using dividers that may be zero when the file is empty. As the denominators are integers, the limit of the ratio "just before zero" is the numerator value itself. If the numerator itself is zero, the ratio value is still meaningful: in both cases, a "good" value is a low ratio, and a size of zero is the optimal case. diff -r 5d88fd1bc2af -r 9b9cb7abec13 mercurial/debugcommands.py --- a/mercurial/debugcommands.pySat Jun 16 23:26:40 2018 +0900 +++ b/mercurial/debugcommands.pyThu Jun 21 18:19:57 2018 +0200 @@ -678,8 +678,15 @@ except IndexError: prevrev = -1 -chainratio = float(chainsize) / float(uncomp) -extraratio = float(extradist) / float(chainsize) +if uncomp != 0: +chainratio = float(chainsize) / float(uncomp) +else: +chainratio = chainsize + +if chainsize != 0: +extraratio = float(extradist) / float(chainsize) +else: +extraratio = extradist fm.startitem() fm.write('rev chainid chainlen prevrev deltatype compsize ' ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 3 of 7] dispatch: rework the serve --stdio safe argument checks
On 22/06/18 13:15, Yuya Nishihara wrote: > On Thu, 21 Jun 2018 19:46:24 +0200, Paul Morelle wrote: >> On 21/06/18 13:53, Yuya Nishihara wrote: >>> On Wed, 20 Jun 2018 18:36:24 +0200, Paul Morelle wrote: >>>>> # HG changeset patch >>>> # User Boris Feld >>>> # Date 1529489906 -7200 >>>> # Wed Jun 20 12:18:26 2018 +0200 >>>> # Node ID 81edf3431b95d57257c690f7fe125c6676a78e18 >>>> # Parent b7051e4bf783c844f705473a2396458acecc59dc >>>> # EXP-Topic remote-debug >>>> # Available At https://bitbucket.org/octobus/mercurial-devel/ >>>> # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r >>>> 81edf3431b95 >>>> dispatch: rework the serve --stdio safe argument checks >>>> >>>> We prepare the code to check for arguments after the mandatory ones. >>>> >>>> We want to relax this check to allow for wider argument passing in certain >>>> conditions (example --debug). We make preliminary refactoring in different >>>> changesets for clarity. >>>> >>>> diff -r b7051e4bf783 -r 81edf3431b95 mercurial/dispatch.py >>>> --- a/mercurial/dispatch.pyWed Jun 20 12:16:48 2018 +0200 >>>> +++ b/mercurial/dispatch.pyWed Jun 20 12:18:26 2018 +0200 >>>> @@ -285,12 +285,15 @@ >>>> def unsafe(): >>>> msg = _('potentially unsafe serve --stdio invocation: %s') >>>> raise error.Abort(msg % (stringutil.pprint(req.args),)) >>>> -if (len(req.args) != 4 or >>>> +if (len(req.args) < 4 or >>>> req.args[0] != '-R' or >>>> req.args[1].startswith('--') or >>>> req.args[2] != 'serve' or >>>> req.args[3] != '--stdio'): >>>> unsafe() >>>> +other_args = req.args[4:] >>>> +if other_args: >>>> +unsafe() >>> It's a bit scary to extend this just for debugging aids because argument >>> parsing at this phase has to be ad-hoc. Can't you instead use the >>> ssh/authorized_keys file to redirect a server session to 'hg debugserve'? >>> >>> Alternatively, we could add a wrapper script like hg-ssh. >> If I have correctly understood, your proposition is to keep the >> client-side version of this, and move the permission management to the >> sshkey/ssh-server level. Is this right? > Yes. > >> Something we could do in this area is to replace the call to sshpeer by >> `hg debugserve …` when we need the remote-debugging feature. >> However, exposing a "debug" command at the wireprotocol level seems bad >> ; maybe we could introduce a `--remote-argument` flag that would lift >> the check with a permission handling similar to what we have today (in >> patch 4). >> >> However, having all checks disabled (debugserve) is quite different than >> what is proposed in this series, which only allows for more information >> to be retrieved, and that's it. >> Fully bypassing the argument check would allow the client do a full >> range of actions (including arbitrary code execution). This is a much >> different access level, and in my current use case it would be a problem. > Instead of using debugserve, the wrapper script can set ui.debug/traceback > flag just like contrib/hg-ssh does for hooks. Doing this at the authorized_keys level is not an option for us. In one of our target environment for this kind of debugging, any change to the ssh script requires validation by admin people that can take from a couple of days to a couple of weeks, making it impossible to turn debugging on for a couple of commands, or easily compare various settings. The ability to control the used debug options from the client is very valuable in this case. We have already gathered many useful information from it! I agree that ad-hoc argument parsing is less than optimal. Handling three extra flags (--debug, --profile and --traceback) is not too awful, but then we need to pass some associated configuration option (e.g. profiling.time-track=real) which will make things even more verbose. Maybe we could leverage the contents of `mercurial/configitems.py` by adding a "serversafe" attribute to the configuration items? Such config item could be specified on the client side (if the user is allowed to). What do you think about this? >> I don't think that moving the permission handling outside of Mercurial >> would be a good move : implementing similar featur
Re: [PATCH 2 of 2] revlog: do inclusive descendant testing (API)
On 21/06/18 18:30, Martin von Zweigbergk wrote: > On Thu, Jun 21, 2018 at 7:24 AM Paul Morelle <mailto:paul.more...@octobus.net>> wrote: > > # HG changeset patch > # User Boris Feld <mailto:boris.f...@octobus.net>> > # Date 1529585081 -3600 > # Thu Jun 21 13:44:41 2018 +0100 > # Node ID 59fea52e54e014722486f7c049e192fa505032d8 > # Parent 8d20b1b4b6a0297e7f9640d285b15a5d6647369e > # EXP-Topic descendant > # Available At https://bitbucket.org/octobus/mercurial-devel/ > # hg pull > https://bitbucket.org/octobus/mercurial-devel/ -r 59fea52e54e0 > revlog: do inclusive descendant testing (API) > > In many other places, a revision is considered a descendant of itself. > We update the behavior of `revlog.descendant()` to match this. > > No test breaks, so it seems safe to do so. > > diff -r 8d20b1b4b6a0 -r 59fea52e54e0 mercurial/revlog.py > --- a/mercurial/revlog.py Thu Jun 21 13:32:07 2018 +0100 > +++ b/mercurial/revlog.py Thu Jun 21 13:44:41 2018 +0100 > @@ -1378,7 +1378,7 @@ > def descendant(self, start, end): > if start == nullrev: > return True > - return start in self.ancestors([end]) > + return start in self.ancestors([end], inclusive=True) > > > Is this now equivalent to self.isancestor(start, end)? That method > relies on commonancestorsheads instead of lazyancestors. What are the > performance trade-offs? Equivalent both when there are many ancestors > and when there are many descendants? Hello Martin, Interestingly, it turns out that we have the following flock of functions: * ancestors: commonancestorsheads(parent_func, *revs) o uses revnum o any number of arguments o written in Python * cext/revlog.c: revlog.index.commonancestorsheads(*revs) o uses revnum o any number of arguments o written in C * revlog: revlog.commonancestorsheads(node-a, node-b) o uses nodes o takes exactly two nodes o Calls either self.index.c…a…heads or ancestors.c…a…heads * revlog: revlog.isancestor(anc, desc) o uses nodes o calls revlog.commonancestorsheads * revlog: revlog.descendant(rev-a, rev-b) o uses revs o has it own very slow code * revlog: revlog.descendant(rev-a, rev-b) o uses revs o has it own very slow code o non-inclusive * context: context.descendant(other) o uses contexts o calls revlog.descendant o non-inclusive At the algorithm level, `anc in ancestors(desc)` will be faster when anc is not an ancestor of desc (or they are many gca), since it will finish sooner. However given `commonancestorheads` benefits from a C implementation, it is currently the fastest option. In short terms, I think the following actions would make sense: 1. Extract a lower level `revlog._commonancestorheads(*revs)` from `revlog.commonancestorsheads` 2. Use it in `revlog.descendant` 3. Make `revlog.isancestor` use `revlog.descendant` Does this seems sensible to you? -- Paul Morelle ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 3 of 7] dispatch: rework the serve --stdio safe argument checks
On 21/06/18 13:53, Yuya Nishihara wrote: > On Wed, 20 Jun 2018 18:36:24 +0200, Paul Morelle wrote: >>> # HG changeset patch >> # User Boris Feld >> # Date 1529489906 -7200 >> # Wed Jun 20 12:18:26 2018 +0200 >> # Node ID 81edf3431b95d57257c690f7fe125c6676a78e18 >> # Parent b7051e4bf783c844f705473a2396458acecc59dc >> # EXP-Topic remote-debug >> # Available At https://bitbucket.org/octobus/mercurial-devel/ >> # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r >> 81edf3431b95 >> dispatch: rework the serve --stdio safe argument checks >> >> We prepare the code to check for arguments after the mandatory ones. >> >> We want to relax this check to allow for wider argument passing in certain >> conditions (example --debug). We make preliminary refactoring in different >> changesets for clarity. >> >> diff -r b7051e4bf783 -r 81edf3431b95 mercurial/dispatch.py >> --- a/mercurial/dispatch.py Wed Jun 20 12:16:48 2018 +0200 >> +++ b/mercurial/dispatch.py Wed Jun 20 12:18:26 2018 +0200 >> @@ -285,12 +285,15 @@ >> def unsafe(): >> msg = _('potentially unsafe serve --stdio invocation: %s') >> raise error.Abort(msg % (stringutil.pprint(req.args),)) >> -if (len(req.args) != 4 or >> +if (len(req.args) < 4 or >> req.args[0] != '-R' or >> req.args[1].startswith('--') or >> req.args[2] != 'serve' or >> req.args[3] != '--stdio'): >> unsafe() >> +other_args = req.args[4:] >> +if other_args: >> +unsafe() > It's a bit scary to extend this just for debugging aids because argument > parsing at this phase has to be ad-hoc. Can't you instead use the > ssh/authorized_keys file to redirect a server session to 'hg debugserve'? > > Alternatively, we could add a wrapper script like hg-ssh. Hello Yuya, If I have correctly understood, your proposition is to keep the client-side version of this, and move the permission management to the sshkey/ssh-server level. Is this right? Something we could do in this area is to replace the call to sshpeer by `hg debugserve …` when we need the remote-debugging feature. However, exposing a "debug" command at the wireprotocol level seems bad ; maybe we could introduce a `--remote-argument` flag that would lift the check with a permission handling similar to what we have today (in patch 4). However, having all checks disabled (debugserve) is quite different than what is proposed in this series, which only allows for more information to be retrieved, and that's it. Fully bypassing the argument check would allow the client do a full range of actions (including arbitrary code execution). This is a much different access level, and in my current use case it would be a problem. I don't think that moving the permission handling outside of Mercurial would be a good move : implementing similar features for HTTP would still require some Mercurial-side restrictions and permissions handling anyway. It seems more consistent to implement it on the Mercurial side for all protocols. Cheers, Paul Morelle ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 2] revlog: efficient implementation of 'descendant'
# HG changeset patch # User Boris Feld # Date 1529584327 -3600 # Thu Jun 21 13:32:07 2018 +0100 # Node ID 8d20b1b4b6a0297e7f9640d285b15a5d6647369e # Parent a0e185f104541858a0b049e1fb67c4d113930a9a # EXP-Topic descendant # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 8d20b1b4b6a0 revlog: efficient implementation of 'descendant' Iterating over descendants is costly, because there are no "parent -> children" pointers. Walking the other way around is much more efficient, especially on large repositories, where descendant walks can cost seconds, while it is quite instantaneous to walk ancestors. As self.ancestors returns a lazyancestors instance, calling __contains__ still considers the other bound as a ceiling limit for the research. In real life usage, this saved up to 80s during some pull operations, where descendant test happens in extension code. diff -r a0e185f10454 -r 8d20b1b4b6a0 mercurial/revlog.py --- a/mercurial/revlog.py Fri Feb 02 14:21:04 2018 -0800 +++ b/mercurial/revlog.py Thu Jun 21 13:32:07 2018 +0100 @@ -1378,12 +1378,7 @@ def descendant(self, start, end): if start == nullrev: return True -for i in self.descendants([start]): -if i == end: -return True -elif i > end: -break -return False +return start in self.ancestors([end]) def commonancestorsheads(self, a, b): """calculate all the heads of the common ancestors of nodes a and b""" ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2] revlog: do inclusive descendant testing (API)
# HG changeset patch # User Boris Feld # Date 1529585081 -3600 # Thu Jun 21 13:44:41 2018 +0100 # Node ID 59fea52e54e014722486f7c049e192fa505032d8 # Parent 8d20b1b4b6a0297e7f9640d285b15a5d6647369e # EXP-Topic descendant # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 59fea52e54e0 revlog: do inclusive descendant testing (API) In many other places, a revision is considered a descendant of itself. We update the behavior of `revlog.descendant()` to match this. No test breaks, so it seems safe to do so. diff -r 8d20b1b4b6a0 -r 59fea52e54e0 mercurial/revlog.py --- a/mercurial/revlog.py Thu Jun 21 13:32:07 2018 +0100 +++ b/mercurial/revlog.py Thu Jun 21 13:44:41 2018 +0100 @@ -1378,7 +1378,7 @@ def descendant(self, start, end): if start == nullrev: return True -return start in self.ancestors([end]) +return start in self.ancestors([end], inclusive=True) def commonancestorsheads(self, a, b): """calculate all the heads of the common ancestors of nodes a and b""" ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 6 of 7] remotedebug: allow to request --profile to the server
# HG changeset patch # User Boris Feld # Date 1529492441 -7200 # Wed Jun 20 13:00:41 2018 +0200 # Node ID f663c257cf788d3f3d00398dbf64d9b572536377 # Parent fd3f8738e2ca4308a8a8f9e21cc05bb97a7200db # EXP-Topic remote-debug # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r f663c257cf78 remotedebug: allow to request --profile to the server It is now possible to request --profile output of a server from the client. Same as for `remote.debug` the server needs to explicitly allow the user to do so. diff -r fd3f8738e2ca -r f663c257cf78 mercurial/configitems.py --- a/mercurial/configitems.py Tue Jun 19 19:53:21 2018 +0200 +++ b/mercurial/configitems.py Wed Jun 20 13:00:41 2018 +0200 @@ -342,6 +342,9 @@ coreconfigitem('devel', 'remote.debug', default=False, ) +coreconfigitem('devel', 'remote.profile', +default=False, +) coreconfigitem('devel', 'servercafile', default='', ) diff -r fd3f8738e2ca -r f663c257cf78 mercurial/dispatch.py --- a/mercurial/dispatch.py Tue Jun 19 19:53:21 2018 +0200 +++ b/mercurial/dispatch.py Wed Jun 20 13:00:41 2018 +0200 @@ -297,7 +297,7 @@ if not ('*' in rd_access or user in rd_access) and other_args: unsafe() while other_args: -if other_args[0] == '--debug': +if other_args[0] in ('--debug', '--profile'): other_args.pop(0) else: unsafe() diff -r fd3f8738e2ca -r f663c257cf78 mercurial/sshpeer.py --- a/mercurial/sshpeer.py Tue Jun 19 19:53:21 2018 +0200 +++ b/mercurial/sshpeer.py Wed Jun 20 13:00:41 2018 +0200 @@ -152,6 +152,8 @@ serverspawn %= (_serverquote(remotecmd), _serverquote(path)) if ui.configbool('devel', 'remote.debug'): serverspawn += ' --debug' +if ui.configbool('devel', 'remote.profile'): +serverspawn += ' --profile' cmd = '%s %s %s' % (sshcmd, args, procutil.shellquote(serverspawn)) ui.debug('running %s\n' % cmd) cmd = procutil.quotecommand(cmd) diff -r fd3f8738e2ca -r f663c257cf78 tests/test-remote-debugging.t --- a/tests/test-remote-debugging.t Tue Jun 19 19:53:21 2018 +0200 +++ b/tests/test-remote-debugging.t Wed Jun 20 13:00:41 2018 +0200 @@ -94,3 +94,29 @@ remote: bundle2-output-bundle: "HG20", 2 parts total remote: bundle2-output-part: "listkeys" (params: 1 mandatory) empty payload remote: bundle2-output-part: "phase-heads" 24 bytes payload + +Test basic remote profile output + + + $ hg pull --config "devel.remote.profile=yes" | grep -v --extended-regexp 'line +[0-9]+:' + pulling from ssh://user@dummy/remote/ + searching for changes + no changes found + remote: --- + remote: Sample count: * (glob) + remote: Total time: * seconds (* wall) (glob) + +Test basic combined remote debug option +--- + + $ hg pull --config "devel.remote.debug=yes" --config "devel.remote.profile=yes" | grep -v --extended-regexp 'line +[0-9]+:' + pulling from ssh://user@dummy/remote/ + searching for changes + no changes found + remote: listing keys for "bookmarks" + remote: bundle2-output-bundle: "HG20", 2 parts total + remote: bundle2-output-part: "listkeys" (params: 1 mandatory) empty payload + remote: bundle2-output-part: "phase-heads" 24 bytes payload + remote: --- + remote: Sample count: * (glob) + remote: Total time: * seconds (* wall) (glob) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 3 of 7] dispatch: rework the serve --stdio safe argument checks
# HG changeset patch # User Boris Feld # Date 1529489906 -7200 # Wed Jun 20 12:18:26 2018 +0200 # Node ID 81edf3431b95d57257c690f7fe125c6676a78e18 # Parent b7051e4bf783c844f705473a2396458acecc59dc # EXP-Topic remote-debug # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 81edf3431b95 dispatch: rework the serve --stdio safe argument checks We prepare the code to check for arguments after the mandatory ones. We want to relax this check to allow for wider argument passing in certain conditions (example --debug). We make preliminary refactoring in different changesets for clarity. diff -r b7051e4bf783 -r 81edf3431b95 mercurial/dispatch.py --- a/mercurial/dispatch.py Wed Jun 20 12:16:48 2018 +0200 +++ b/mercurial/dispatch.py Wed Jun 20 12:18:26 2018 +0200 @@ -285,12 +285,15 @@ def unsafe(): msg = _('potentially unsafe serve --stdio invocation: %s') raise error.Abort(msg % (stringutil.pprint(req.args),)) -if (len(req.args) != 4 or +if (len(req.args) < 4 or req.args[0] != '-R' or req.args[1].startswith('--') or req.args[2] != 'serve' or req.args[3] != '--stdio'): unsafe() +other_args = req.args[4:] +if other_args: +unsafe() try: debugger = 'pdb' ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 7 of 7] remotedebug: allow to request --traceback from the remote server
# HG changeset patch # User Boris Feld # Date 1529494410 -7200 # Wed Jun 20 13:33:30 2018 +0200 # Node ID aa4b26d92e20d95323929a306b3d63c44ceb3d69 # Parent f663c257cf788d3f3d00398dbf64d9b572536377 # EXP-Topic remote-debug # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r aa4b26d92e20 remotedebug: allow to request --traceback from the remote server Getting a full traceback is useful in the case of a crash. This changeset makes it possible for an authorized user to receive it from the client side. diff -r f663c257cf78 -r aa4b26d92e20 mercurial/configitems.py --- a/mercurial/configitems.py Wed Jun 20 13:00:41 2018 +0200 +++ b/mercurial/configitems.py Wed Jun 20 13:33:30 2018 +0200 @@ -345,6 +345,9 @@ coreconfigitem('devel', 'remote.profile', default=False, ) +coreconfigitem('devel', 'remote.traceback', +default=False, +) coreconfigitem('devel', 'servercafile', default='', ) diff -r f663c257cf78 -r aa4b26d92e20 mercurial/dispatch.py --- a/mercurial/dispatch.py Wed Jun 20 13:00:41 2018 +0200 +++ b/mercurial/dispatch.py Wed Jun 20 13:33:30 2018 +0200 @@ -297,7 +297,7 @@ if not ('*' in rd_access or user in rd_access) and other_args: unsafe() while other_args: -if other_args[0] in ('--debug', '--profile'): +if other_args[0] in ('--debug', '--profile', '--traceback'): other_args.pop(0) else: unsafe() diff -r f663c257cf78 -r aa4b26d92e20 mercurial/sshpeer.py --- a/mercurial/sshpeer.py Wed Jun 20 13:00:41 2018 +0200 +++ b/mercurial/sshpeer.py Wed Jun 20 13:33:30 2018 +0200 @@ -154,6 +154,8 @@ serverspawn += ' --debug' if ui.configbool('devel', 'remote.profile'): serverspawn += ' --profile' +if ui.configbool('devel', 'remote.traceback'): +serverspawn += ' --traceback' cmd = '%s %s %s' % (sshcmd, args, procutil.shellquote(serverspawn)) ui.debug('running %s\n' % cmd) cmd = procutil.quotecommand(cmd) diff -r f663c257cf78 -r aa4b26d92e20 tests/test-remote-debugging.t --- a/tests/test-remote-debugging.t Wed Jun 20 13:00:41 2018 +0200 +++ b/tests/test-remote-debugging.t Wed Jun 20 13:33:30 2018 +0200 @@ -106,11 +106,71 @@ remote: Sample count: * (glob) remote: Total time: * seconds (* wall) (glob) +Test basic remote traceback output +-- + + $ echo 'raise Exception("This extensions is broken")' > $TESTTMP/badext.py + $ cat << EOF >> $TESTTMP/remote/.hg/hgrc + > [extensions] + > bad=$TESTTMP/badext.py + > EOF + + $ hg pull --config "devel.remote.traceback=yes" + pulling from ssh://user@dummy/remote/ + remote: *** failed to import extension bad from $TESTTMP/badext.py: This extensions is broken + remote: Traceback (most recent call last): + remote: File "*/mercurial/extensions.py", line *, in loadall (glob) + remote: load(ui, name, path) + remote: File "*/mercurial/extensions.py", line *, in load (glob) + remote: mod = _importext(name, path, bind(_reportimporterror, ui)) + remote: File "*/mercurial/extensions.py", line *, in _importext (glob) + remote: mod = loadpath(path, 'hgext.%s' % name) + remote: File "*/mercurial/extensions.py", line *, in loadpath (glob) + remote: return imp.load_source(module_name, path) + remote: File "$TESTTMP/badext.py", line 1, in (glob) + remote: raise Exception("This extensions is broken") + remote: Exception: This extensions is broken + searching for changes + no changes found + remote: Traceback (most recent call last): + remote: File "*/mercurial/scmutil.py", line *, in callcatch (glob) + remote: return func() + remote: File "*/mercurial/dispatch.py", line *, in _runcatchfunc (glob) + remote: return _dispatch(req) + remote: File "*/mercurial/dispatch.py", line *, in _dispatch (glob) + remote: cmdpats, cmdoptions) + remote: File "*/mercurial/dispatch.py", line *, in runcommand (glob) + remote: ret = _runcommand(ui, options, cmd, d) + remote: File "*/mercurial/dispatch.py", line *, in _runcommand (glob) + remote: return cmdfunc() + remote: File "*/mercurial/dispatch.py", line *, in (glob) + remote: d = lambda: util.checksignature(func)(ui, *args, **strcmdopt) + remote: File "*/mercurial/util.py", line 1556, in check (glob) + remote: return func(*args, **kwargs) + remote: File "*/mercurial/commands.py", line *, in serve (glob) + remote: s.serve_forever() + remote: File "*/mercurial/wireprotoserver.py", line *, in serve_forever (glob) + remote: sys.exit(0) + remote: SystemExit: 0 + Test basic combined remote debug option --- - $ hg pull --config "devel.remote.debug=yes" --config "devel.remote.profile=yes" | grep -v --extended-regexp 'line +[0-9]+:' + $ hg pull --config
[PATCH 1 of 7] configitem: reorder items in the 'server' section
# HG changeset patch # User Boris Feld # Date 1529425297 -7200 # Tue Jun 19 18:21:37 2018 +0200 # Node ID a7944f21dbfc9e912606b82dc1001e4640598600 # Parent a0e185f104541858a0b049e1fb67c4d113930a9a # EXP-Topic remote-debug # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r a7944f21dbfc configitem: reorder items in the 'server' section Keeping things alphabetically sorted. diff -r a0e185f10454 -r a7944f21dbfc mercurial/configitems.py --- a/mercurial/configitems.py Fri Feb 02 14:21:04 2018 -0800 +++ b/mercurial/configitems.py Tue Jun 19 18:21:37 2018 +0200 @@ -936,16 +936,16 @@ coreconfigitem('server', 'disablefullbundle', default=False, ) -coreconfigitem('server', 'streamunbundle', -default=False, +coreconfigitem('server', 'maxhttpheaderlen', +default=1024, ) coreconfigitem('server', 'pullbundle', default=False, ) -coreconfigitem('server', 'maxhttpheaderlen', -default=1024, +coreconfigitem('server', 'preferuncompressed', +default=False, ) -coreconfigitem('server', 'preferuncompressed', +coreconfigitem('server', 'streamunbundle', default=False, ) coreconfigitem('server', 'uncompressed', ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 4 of 7] remotedebug: add the ability to request remote debug output
# HG changeset patch # User Boris Feld # Date 1529488678 -7200 # Wed Jun 20 11:57:58 2018 +0200 # Node ID 1130b3e74a6e618dfca6b1991c004a304091 # Parent 81edf3431b95d57257c690f7fe125c6676a78e18 # EXP-Topic remote-debug # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 1130b3e74a6e remotedebug: add the ability to request remote debug output For now, this only works for ssh. As enabling debug output for all clients might be a bit scary, we will introduce restrictions in the next changesets. Support for other debug flags (--profile, --traceback) will be introduced later. Making it work for HTTP is possible but quite different. There is no process living during the whole exchange, and many wire protocol commands are lacking the appropriate side channel to transmit the debug output. For now we focus on ironing out the feature in the case of ssh. diff -r 81edf3431b95 -r 1130b3e74a6e mercurial/configitems.py --- a/mercurial/configitems.py Wed Jun 20 12:18:26 2018 +0200 +++ b/mercurial/configitems.py Wed Jun 20 11:57:58 2018 +0200 @@ -339,6 +339,9 @@ coreconfigitem('devel', 'legacy.exchange', default=list, ) +coreconfigitem('devel', 'remote.debug', +default=False, +) coreconfigitem('devel', 'servercafile', default='', ) diff -r 81edf3431b95 -r 1130b3e74a6e mercurial/dispatch.py --- a/mercurial/dispatch.py Wed Jun 20 12:18:26 2018 +0200 +++ b/mercurial/dispatch.py Wed Jun 20 11:57:58 2018 +0200 @@ -292,8 +292,12 @@ req.args[3] != '--stdio'): unsafe() other_args = req.args[4:] -if other_args: -unsafe() +while other_args: +if other_args[0] == '--debug': +# TODO: introduce user restriction +other_args.pop(0) +else: +unsafe() try: debugger = 'pdb' diff -r 81edf3431b95 -r 1130b3e74a6e mercurial/sshpeer.py --- a/mercurial/sshpeer.py Wed Jun 20 12:18:26 2018 +0200 +++ b/mercurial/sshpeer.py Wed Jun 20 11:57:58 2018 +0200 @@ -148,12 +148,11 @@ Returns a tuple of (process, stdin, stdout, stderr) for the spawned process. """ -cmd = '%s %s %s' % ( -sshcmd, -args, -procutil.shellquote('%s -R %s serve --stdio' % ( -_serverquote(remotecmd), _serverquote(path - +serverspawn = '%s -R %s serve --stdio' +serverspawn %= (_serverquote(remotecmd), _serverquote(path)) +if ui.configbool('devel', 'remote.debug'): +serverspawn += ' --debug' +cmd = '%s %s %s' % (sshcmd, args, procutil.shellquote(serverspawn)) ui.debug('running %s\n' % cmd) cmd = procutil.quotecommand(cmd) diff -r 81edf3431b95 -r 1130b3e74a6e tests/test-remote-debugging.t --- /dev/null Thu Jan 01 00:00:00 1970 + +++ b/tests/test-remote-debugging.t Wed Jun 20 11:57:58 2018 +0200 @@ -0,0 +1,68 @@ +Test remote debugging capabilities +== + +Setup + + $ cat >> $HGRCPATH << EOF + > [ui] + > ssh="$PYTHON" "$RUNTESTDIR/dummyssh" + > EOF + + $ hg init remote + $ cd remote + $ echo this > foo + $ echo this > fooO + $ hg ci -A -m "init" foo fooO + $ hg clone ssh://user@dummy/remote/ local --debug + running "*" "*/tests/dummyssh" 'user@dummy' 'hg -R remote/ serve --stdio' (glob) + sending hello command + sending between command + remote: 413 + remote: capabilities: batch branchmap $USUAL_BUNDLE2_CAPS_SERVER$ changegroupsubset getbundle known lookup protocaps pushkey streamreqs=generaldelta,revlogv1 unbundle=HG10GZ,HG10BZ,HG10UN unbundlehash + remote: 1 + sending protocaps command + query 1; heads + sending batch command + requesting all changes + sending getbundle command + bundle2-input-bundle: with-transaction + bundle2-input-part: "changegroup" (params: 1 mandatory 1 advisory) supported + adding changesets + add changeset 1160648e36ce + adding manifests + adding file changes + adding foo revisions + adding fooO revisions + added 1 changesets with 2 changes to 2 files + bundle2-input-part: total payload size 664 + bundle2-input-part: "listkeys" (params: 1 mandatory) supported + bundle2-input-part: "phase-heads" supported + bundle2-input-part: total payload size 24 + bundle2-input-part: "cache:rev-branch-cache" (advisory) supported + bundle2-input-part: total payload size 39 + bundle2-input-bundle: 3 parts total + checking for updated bookmarks + updating the branch cache + new changesets 1160648e36ce + updating to branch default + resolving manifests + branchmerge: False, force: False, partial: False + ancestor: , local: +, remote: 1160648e36ce + foo: remote created -> g + getting foo + fooO: remote created -> g + getting fooO + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ cd local + +Test basic remote debug
[PATCH 2 of 7] dispatch: factor out the exception raised on unsafe `serve --stdio`
# HG changeset patch # User Boris Feld # Date 1529489808 -7200 # Wed Jun 20 12:16:48 2018 +0200 # Node ID b7051e4bf783c844f705473a2396458acecc59dc # Parent a7944f21dbfc9e912606b82dc1001e4640598600 # EXP-Topic remote-debug # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r b7051e4bf783 dispatch: factor out the exception raised on unsafe `serve --stdio` We want to relax this check to allow for wider argument passing in certain conditions (e.g. --debug). We make preliminary refactoring in different changesets for clarity. diff -r a7944f21dbfc -r b7051e4bf783 mercurial/dispatch.py --- a/mercurial/dispatch.py Tue Jun 19 18:21:37 2018 +0200 +++ b/mercurial/dispatch.py Wed Jun 20 12:16:48 2018 +0200 @@ -282,14 +282,15 @@ # shenanigans wherein a user does something like pass # --debugger or --config=ui.debugger=1 as a repo # name. This used to actually run the debugger. +def unsafe(): +msg = _('potentially unsafe serve --stdio invocation: %s') +raise error.Abort(msg % (stringutil.pprint(req.args),)) if (len(req.args) != 4 or req.args[0] != '-R' or req.args[1].startswith('--') or req.args[2] != 'serve' or req.args[3] != '--stdio'): -raise error.Abort( -_('potentially unsafe serve --stdio invocation: %s') % -(stringutil.pprint(req.args),)) +unsafe() try: debugger = 'pdb' ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 5 of 7] remotedebug: introduce config to control who can use the debug capabilities
# HG changeset patch # User Boris Feld # Date 1529430801 -7200 # Tue Jun 19 19:53:21 2018 +0200 # Node ID fd3f8738e2ca4308a8a8f9e21cc05bb97a7200db # Parent 1130b3e74a6e618dfca6b1991c004a304091 # EXP-Topic remote-debug # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r fd3f8738e2ca remotedebug: introduce config to control who can use the debug capabilities Providing config output to anyone might expose unwanted information. We now require users to be explicitly white-listed. The '*' value allows anyone to use the feature. This is similar to the 'http.allow_push' config. diff -r 1130b3e74a6e -r fd3f8738e2ca mercurial/configitems.py --- a/mercurial/configitems.py Wed Jun 20 11:57:58 2018 +0200 +++ b/mercurial/configitems.py Tue Jun 19 19:53:21 2018 +0200 @@ -948,6 +948,9 @@ coreconfigitem('server', 'preferuncompressed', default=False, ) +coreconfigitem('server', 'allow-remote-debug', +default=list, +) coreconfigitem('server', 'streamunbundle', default=False, ) diff -r 1130b3e74a6e -r fd3f8738e2ca mercurial/dispatch.py --- a/mercurial/dispatch.py Wed Jun 20 11:57:58 2018 +0200 +++ b/mercurial/dispatch.py Tue Jun 19 19:53:21 2018 +0200 @@ -292,9 +292,12 @@ req.args[3] != '--stdio'): unsafe() other_args = req.args[4:] +rd_access = req.ui.configlist('server', 'allow-remote-debug') +user = util.username() +if not ('*' in rd_access or user in rd_access) and other_args: +unsafe() while other_args: if other_args[0] == '--debug': -# TODO: introduce user restriction other_args.pop(0) else: unsafe() diff -r 1130b3e74a6e -r fd3f8738e2ca mercurial/help/config.txt --- a/mercurial/help/config.txt Wed Jun 20 11:57:58 2018 +0200 +++ b/mercurial/help/config.txt Tue Jun 19 19:53:21 2018 +0200 @@ -1750,6 +1750,14 @@ Controls generic server settings. +``allow-remote-debug`` +List of Users allowed to display extra debug information when talking to the +server. If the special value ``*`` is used, all users will be allowed to do +so. + +Note: the config set at the repository level will be ignored. It needs to be +set at user or system level. + ``bookmarks-pushkey-compat`` Trigger pushkey hook when being pushed bookmark updates. This config exist for compatibility purpose (default to True) diff -r 1130b3e74a6e -r fd3f8738e2ca tests/test-remote-debugging.t --- a/tests/test-remote-debugging.t Wed Jun 20 11:57:58 2018 +0200 +++ b/tests/test-remote-debugging.t Tue Jun 19 19:53:21 2018 +0200 @@ -58,6 +58,34 @@ Test basic remote debug output -- +Without the config allowing it +`` + + $ hg pull --config "devel.remote.debug=yes" + pulling from ssh://user@dummy/remote/ + remote: abort: potentially unsafe serve --stdio invocation: ['-R', 'remote/', 'serve', '--stdio', '--debug'] + abort: no suitable response from remote hg! + [255] + + $ cat >> $HGRCPATH << EOF + > [server] + > allow-remote-debug=nonexistant-user + > EOF + + $ hg pull --config "devel.remote.debug=yes" + pulling from ssh://user@dummy/remote/ + remote: abort: potentially unsafe serve --stdio invocation: ['-R', 'remote/', 'serve', '--stdio', '--debug'] + abort: no suitable response from remote hg! + [255] + +With the config allowing it +`` + + $ cat >> $HGRCPATH << EOF + > [server] + > allow-remote-debug=* + > EOF + $ hg pull --config "devel.remote.debug=yes" pulling from ssh://user@dummy/remote/ searching for changes ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 1 of 6] revlog: in _getcandidaterevs, shorten revlog._generaldelta to gdelta
On 22/05/18 01:12, Gregory Szorc wrote: > On Mon, May 21, 2018 at 2:47 PM, Paul Morelle > <paul.more...@octobus.net <mailto:paul.more...@octobus.net>> wrote: > > # HG changeset patch > # User Paul Morelle <paul.more...@octobus.net > <mailto:paul.more...@octobus.net>> > # Date 1525438855 -7200 > # Fri May 04 15:00:55 2018 +0200 > # Node ID 054469518b3480201e7f8ada16957027009e9f64 > # Parent 514605777244de61b68c7e1503c4f106773913f4 > # EXP-Topic semi-snapshots > # Available At https://bitbucket.org/octobus/mercurial-devel/ > <https://bitbucket.org/octobus/mercurial-devel/> > # hg pull > https://bitbucket.org/octobus/mercurial-devel/ > <https://bitbucket.org/octobus/mercurial-devel/> -r 054469518b34 > revlog: in _getcandidaterevs, shorten revlog._generaldelta to gdelta > > > Queued parts 1-4. Thanks. > > Part 5 failed to apply cleanly. Could you please rebase and resend? Sure! However, I am not certain that I have done this properly, as [PATCH 5 of 6] and [PATCH 6 of 6] were sent as [PATCH 1 of 2 V2] and [PATCH 2 of 2 V2]. Is it correct even if the numbers don't match? Thanks, Paul > > > > diff -r 514605777244 -r 054469518b34 mercurial/revlog.py > --- a/mercurial/revlog.py Fri May 11 23:28:02 2018 -0700 > +++ b/mercurial/revlog.py Fri May 04 15:00:55 2018 +0200 > @@ -305,6 +305,7 @@ > grouped by level of easiness. > """ > revlog = self.revlog > + gdelta = revlog._generaldelta > curr = len(revlog) > prev = curr - 1 > p1r, p2r = revlog.rev(p1), revlog.rev(p2) > @@ -316,13 +317,13 @@ > # changegroup data into a generaldelta repo. The only > time it > # isn't true is if this is the first revision in a > delta chain > # or if ``format.generaldelta=true`` disabled > ``lazydeltabase``. > - if cachedelta and revlog._generaldelta and > revlog._lazydeltabase: > + if cachedelta and gdelta and revlog._lazydeltabase: > # Assume what we received from the server is a > good choice > # build delta will reuse the cache > yield (cachedelta[0],) > tested.add(cachedelta[0]) > > - if revlog._generaldelta: > + if gdelta: > # exclude already lazy tested base if any > parents = [p for p in (p1r, p2r) > if p != nullrev and p not in tested] > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > <mailto:Mercurial-devel@mercurial-scm.org> > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel > <https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel> > > ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2 V2] revlog: make chainbase cache its result for the correct revision
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1524324477 -7200 # Sat Apr 21 17:27:57 2018 +0200 # Node ID 787f2b0636c6d6f44235dcd8436f6d8fc5f1a6cb # Parent c0ac96176ebefaf19cd2d368281c180c575c31f2 # EXP-Topic semi-snapshots # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 787f2b0636c6 revlog: make chainbase cache its result for the correct revision Previously, as 'rev' was our iterator, we were always caching the chain base for the second revision of the chain, or for the base itself. diff -r c0ac96176ebe -r 787f2b0636c6 mercurial/revlog.py --- a/mercurial/revlog.py Wed Mar 07 11:10:22 2018 +0100 +++ b/mercurial/revlog.py Sat Apr 21 17:27:57 2018 +0200 @@ -877,10 +877,11 @@ return base index = self.index -base = index[rev][3] -while base != rev: -rev = base -base = index[rev][3] +iterrev = rev +base = index[iterrev][3] +while base != iterrev: +iterrev = base +base = index[iterrev][3] self._chainbasecache[rev] = base return base ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 2 V2] revlog: make getcandidaterevs more consistent about updating tested revs set
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1520417422 -3600 # Wed Mar 07 11:10:22 2018 +0100 # Node ID c0ac96176ebefaf19cd2d368281c180c575c31f2 # Parent 90e02bd8c4473fec03639f26f3d1b2d30d9861d3 # EXP-Topic semi-snapshots # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r c0ac96176ebe revlog: make getcandidaterevs more consistent about updating tested revs set Like in previous cases, update the set of tested revisions after yielding diff -r 90e02bd8c447 -r c0ac96176ebe mercurial/revlog.py --- a/mercurial/revlog.py Wed Mar 07 12:00:58 2018 +0100 +++ b/mercurial/revlog.py Wed Mar 07 11:10:22 2018 +0100 @@ -345,6 +345,7 @@ # other approach failed try against prev to hopefully save us a # fulltext. yield (prev,) +tested.add(prev) def buildtext(self, revinfo, fh): """Builds a fulltext version of a revision ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 3 of 6] revlog: make variable name 'd' more explicit in _isgooddeltainfo
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1520420407 -3600 # Wed Mar 07 12:00:07 2018 +0100 # Node ID d9a8f10cfaa662c5dc6f64e2726da7d763e7e717 # Parent 3eb11b8fe014f5698a3c857ee670a7892f06fc56 # EXP-Topic semi-snapshots # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r d9a8f10cfaa6 revlog: make variable name 'd' more explicit in _isgooddeltainfo d -> deltainfo diff -r 3eb11b8fe014 -r d9a8f10cfaa6 mercurial/revlog.py --- a/mercurial/revlog.py Thu Apr 19 07:57:06 2018 +0200 +++ b/mercurial/revlog.py Wed Mar 07 12:00:07 2018 +0100 @@ -2086,26 +2086,27 @@ return compressor.decompress(data) -def _isgooddeltainfo(self, d, textlen): +def _isgooddeltainfo(self, deltainfo, textlen): """Returns True if the given delta is good. Good means that it is within the disk span, disk size, and chain length bounds that we know to be performant.""" -if d is None: +if deltainfo is None: return False -# - 'd.distance' is the distance from the base revision -- bounding it -# limits the amount of I/O we need to do. -# - 'd.compresseddeltalen' is the sum of the total size of deltas we -# need to apply -- bounding it limits the amount of CPU we consume. +# - 'deltainfo.distance' is the distance from the base revision -- +# bounding it limits the amount of I/O we need to do. +# - 'deltainfo.compresseddeltalen' is the sum of the total size of +# deltas we need to apply -- bounding it limits the amount of CPU +# we consume. defaultmax = textlen * 4 maxdist = self._maxdeltachainspan if not maxdist: -maxdist = d.distance # ensure the conditional pass +maxdist = deltainfo.distance # ensure the conditional pass maxdist = max(maxdist, defaultmax) -if (d.distance > maxdist or d.deltalen > textlen or -d.compresseddeltalen > textlen * 2 or -(self._maxchainlen and d.chainlen > self._maxchainlen)): +if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or +deltainfo.compresseddeltalen > textlen * 2 or +(self._maxchainlen and deltainfo.chainlen > self._maxchainlen)): return False return True ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 6] debugdeltachain: r.start and r.length can be retrieved outside the loop
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1524117426 -7200 # Thu Apr 19 07:57:06 2018 +0200 # Node ID 3eb11b8fe014f5698a3c857ee670a7892f06fc56 # Parent 054469518b3480201e7f8ada16957027009e9f64 # EXP-Topic semi-snapshots # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 3eb11b8fe014 debugdeltachain: r.start and r.length can be retrieved outside the loop diff -r 054469518b34 -r 3eb11b8fe014 mercurial/debugcommands.py --- a/mercurial/debugcommands.pyFri May 04 15:00:55 2018 +0200 +++ b/mercurial/debugcommands.pyThu Apr 19 07:57:06 2018 +0200 @@ -629,6 +629,8 @@ opts = pycompat.byteskwargs(opts) r = cmdutil.openrevlog(repo, 'debugdeltachain', file_, opts) index = r.index +start = r.start +length = r.length generaldelta = r.version & revlog.FLAG_GENERALDELTA withsparseread = getattr(r, '_withsparseread', False) @@ -676,8 +678,6 @@ comp, uncomp, deltatype, chain, chainsize = revinfo(rev) chainbase = chain[0] chainid = chainbases.setdefault(chainbase, len(chainbases) + 1) -start = r.start -length = r.length basestart = start(chainbase) revstart = start(rev) lineardist = revstart + comp - basestart ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 5 of 6] revlog: make getcandidaterevs more consistent about updating tested revs set
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1520417422 -3600 # Wed Mar 07 11:10:22 2018 +0100 # Node ID fbb85c011ea4c6ba474dbe616f63543cffb5f909 # Parent 3270177368354d88aba86d5b00b96b7fecadd65b # EXP-Topic semi-snapshots # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r fbb85c011ea4 revlog: make getcandidaterevs more consistent about updating tested revs set Like in previous cases, update the set of tested revisions after yielding diff -r 327017736835 -r fbb85c011ea4 mercurial/revlog.py --- a/mercurial/revlog.py Wed Mar 07 12:00:58 2018 +0100 +++ b/mercurial/revlog.py Wed Mar 07 11:10:22 2018 +0100 @@ -331,13 +331,14 @@ # Pick whichever parent is closer to us (to minimize the # chance of having to build a fulltext). parents = [max(parents)] +yield parents tested.update(parents) -yield parents if prev not in tested: # other approach failed try against prev to hopefully save us a # fulltext. yield (prev,) +tested.add(prev) def buildtext(self, revinfo, fh): """Builds a fulltext version of a revision ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 6 of 6] revlog: make chainbase cache its result for the correct revision
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1524324477 -7200 # Sat Apr 21 17:27:57 2018 +0200 # Node ID f7f55d6f2966181f243d69b1afa598c12f6c4220 # Parent fbb85c011ea4c6ba474dbe616f63543cffb5f909 # EXP-Topic semi-snapshots # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r f7f55d6f2966 revlog: make chainbase cache its result for the correct revision Previously, as 'rev' was our iterator, we were always caching the chain base for the second revision of the chain, or for the base itself. diff -r fbb85c011ea4 -r f7f55d6f2966 mercurial/revlog.py --- a/mercurial/revlog.py Wed Mar 07 11:10:22 2018 +0100 +++ b/mercurial/revlog.py Sat Apr 21 17:27:57 2018 +0200 @@ -870,10 +870,11 @@ return base index = self.index -base = index[rev][3] -while base != rev: -rev = base -base = index[rev][3] +iterrev = rev +base = index[iterrev][3] +while base != iterrev: +iterrev = base +base = index[iterrev][3] self._chainbasecache[rev] = base return base ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 4 of 6] revlog: isgooddeltainfo takes the whole revinfo object
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1520420458 -3600 # Wed Mar 07 12:00:58 2018 +0100 # Node ID 3270177368354d88aba86d5b00b96b7fecadd65b # Parent d9a8f10cfaa662c5dc6f64e2726da7d763e7e717 # EXP-Topic semi-snapshots # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 327017736835 revlog: isgooddeltainfo takes the whole revinfo object Future changes will need other information about te revision. diff -r d9a8f10cfaa6 -r 327017736835 mercurial/revlog.py --- a/mercurial/revlog.py Wed Mar 07 12:00:07 2018 +0100 +++ b/mercurial/revlog.py Wed Mar 07 12:00:58 2018 +0100 @@ -442,7 +442,7 @@ if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS: continue candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) -if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen): +if revlog._isgooddeltainfo(candidatedelta, revinfo): nominateddeltas.append(candidatedelta) if nominateddeltas: deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) @@ -2086,7 +2086,7 @@ return compressor.decompress(data) -def _isgooddeltainfo(self, deltainfo, textlen): +def _isgooddeltainfo(self, deltainfo, revinfo): """Returns True if the given delta is good. Good means that it is within the disk span, disk size, and chain length bounds that we know to be performant.""" @@ -2099,6 +2099,7 @@ # deltas we need to apply -- bounding it limits the amount of CPU # we consume. +textlen = revinfo.textlen defaultmax = textlen * 4 maxdist = self._maxdeltachainspan if not maxdist: ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 6] revlog: in _getcandidaterevs, shorten revlog._generaldelta to gdelta
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1525438855 -7200 # Fri May 04 15:00:55 2018 +0200 # Node ID 054469518b3480201e7f8ada16957027009e9f64 # Parent 514605777244de61b68c7e1503c4f106773913f4 # EXP-Topic semi-snapshots # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 054469518b34 revlog: in _getcandidaterevs, shorten revlog._generaldelta to gdelta diff -r 514605777244 -r 054469518b34 mercurial/revlog.py --- a/mercurial/revlog.py Fri May 11 23:28:02 2018 -0700 +++ b/mercurial/revlog.py Fri May 04 15:00:55 2018 +0200 @@ -305,6 +305,7 @@ grouped by level of easiness. """ revlog = self.revlog +gdelta = revlog._generaldelta curr = len(revlog) prev = curr - 1 p1r, p2r = revlog.rev(p1), revlog.rev(p2) @@ -316,13 +317,13 @@ # changegroup data into a generaldelta repo. The only time it # isn't true is if this is the first revision in a delta chain # or if ``format.generaldelta=true`` disabled ``lazydeltabase``. -if cachedelta and revlog._generaldelta and revlog._lazydeltabase: +if cachedelta and gdelta and revlog._lazydeltabase: # Assume what we received from the server is a good choice # build delta will reuse the cache yield (cachedelta[0],) tested.add(cachedelta[0]) -if revlog._generaldelta: +if gdelta: # exclude already lazy tested base if any parents = [p for p in (p1r, p2r) if p != nullrev and p not in tested] ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] revlog: for a delta, when a parent was refused, suggest other parent (issue5481)
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1526295914 -7200 # Mon May 14 13:05:14 2018 +0200 # Node ID 7f059b2d62b30e1fd7ca86d00819395178c6a50b # Parent 8ba0344f9fb145f5b9b909f1211defc9e0793f68 # EXP-Topic fallback-to-other-parent # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 7f059b2d62b3 revlog: for a delta, when a parent was refused, suggest other parent (issue5481) Without aggressivemergedeltas, ensure that when we decline the closest parent (by revision number), the other parent is examined too. diff -r 8ba0344f9fb1 -r 7f059b2d62b3 mercurial/revlog.py --- a/mercurial/revlog.py Fri May 11 22:07:43 2018 -0400 +++ b/mercurial/revlog.py Mon May 14 13:05:14 2018 +0200 @@ -326,12 +326,19 @@ # exclude already lazy tested base if any parents = [p for p in (p1r, p2r) if p != nullrev and p not in tested] -if parents and not revlog._aggressivemergedeltas: -# Pick whichever parent is closer to us (to minimize the -# chance of having to build a fulltext). -parents = [max(parents)] -tested.update(parents) -yield parents + +if not revlog._aggressivemergedeltas and len(parents) == 2: +parents.sort() +# To minimize the chance of having to build a fulltext, +# pick first whichever parent is closest to us (max rev) +yield (parents[1],) +# then the other one (min rev) if the first did not fit +yield (parents[0],) +tested.update(parents) +elif len(parents) > 0: +# Test all parents (1 or 2), and keep the best candidate +yield parents +tested.update(parents) if prev not in tested: # other approach failed try against prev to hopefully save us a ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH V2] revlog: suggest other parent when a parent was refused for a delta (issue5481)
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1526295914 -7200 # Mon May 14 13:05:14 2018 +0200 # Node ID 12fd4a2e154a679a601b8facbb5f15956d427e9e # Parent 8ba0344f9fb145f5b9b909f1211defc9e0793f68 # EXP-Topic fallback-to-other-parent # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 12fd4a2e154a revlog: suggest other parent when a parent was refused for a delta (issue5481) Without aggressivemergedeltas, ensure that when we decline the closest parent (by revision number), the other parent is examined too. diff -r 8ba0344f9fb1 -r 12fd4a2e154a mercurial/revlog.py --- a/mercurial/revlog.py Fri May 11 22:07:43 2018 -0400 +++ b/mercurial/revlog.py Mon May 14 13:05:14 2018 +0200 @@ -326,12 +326,19 @@ # exclude already lazy tested base if any parents = [p for p in (p1r, p2r) if p != nullrev and p not in tested] -if parents and not revlog._aggressivemergedeltas: -# Pick whichever parent is closer to us (to minimize the -# chance of having to build a fulltext). -parents = [max(parents)] -tested.update(parents) -yield parents + +if not revlog._aggressivemergedeltas and len(parents) == 2: +parents.sort() +# To minimize the chance of having to build a fulltext, +# pick first whichever parent is closest to us (max rev) +yield (parents[1],) +# then the other one (min rev) if the first did not fit +yield (parents[0],) +tested.update(parents) +elif len(parents) > 0: +# Test all parents (1 or 2), and keep the best candidate +yield parents +tested.update(parents) if prev not in tested: # other approach failed try against prev to hopefully save us a ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2 V2] revlog: group delta computation methods under _deltacomputer object
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515961692 -3600 # Sun Jan 14 21:28:12 2018 +0100 # Node ID 82018742fcabfd0e26aa6f34bf773b6f25d27985 # Parent 32bc4595737c2211dfcbf53cb499a366b9986dfd # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 82018742fcab revlog: group delta computation methods under _deltacomputer object Extracting these methods from revlog will allow changing the implementation of the deltacomputer, by providing this interface: __init__(self, revlog) - constructor that initialize the object from a given revlog buildtext(self, revinfo, fh) - builds the fulltext version of a revision from a _revisioninfo object and the file handle to the .d (or .i for inline mode) file. finddeltainfo(self, revinfo, fh) - find a revision in the revlog against which it is acceptable to build a delta, and build the corresponding _deltainfo. It should now be easier to write an experimental feature that would replace _deltacomputer by another object, for example one that would know how to parallelize the delta computation in order to quicken the storage of multiple revisions. diff -r 32bc4595737c -r 82018742fcab mercurial/revlog.py --- a/mercurial/revlog.py Sun Jan 14 14:36:22 2018 +0100 +++ b/mercurial/revlog.py Sun Jan 14 21:28:12 2018 +0100 @@ -264,6 +264,155 @@ chainlen = attr.ib() compresseddeltalen = attr.ib() +class _deltacomputer(object): +def __init__(self, revlog): +self.revlog = revlog + +def _getcandidaterevs(self, p1, p2, cachedelta): +""" +Provides revisions that present an interest to be diffed against, +grouped by level of easiness. +""" +revlog = self.revlog +curr = len(revlog) +prev = curr - 1 +p1r, p2r = revlog.rev(p1), revlog.rev(p2) + +# should we try to build a delta? +if prev != nullrev and revlog.storedeltachains: +tested = set() +# This condition is true most of the time when processing +# changegroup data into a generaldelta repo. The only time it +# isn't true is if this is the first revision in a delta chain +# or if ``format.generaldelta=true`` disabled ``lazydeltabase``. +if cachedelta and revlog._generaldelta and revlog._lazydeltabase: +# Assume what we received from the server is a good choice +# build delta will reuse the cache +yield (cachedelta[0],) +tested.add(cachedelta[0]) + +if revlog._generaldelta: +# exclude already lazy tested base if any +parents = [p for p in (p1r, p2r) + if p != nullrev and p not in tested] +if parents and not revlog._aggressivemergedeltas: +# Pick whichever parent is closer to us (to minimize the +# chance of having to build a fulltext). +parents = [max(parents)] +tested.update(parents) +yield parents + +if prev not in tested: +# other approach failed try against prev to hopefully save us a +# fulltext. +yield (prev,) + +def buildtext(self, revinfo, fh): +"""Builds a fulltext version of a revision + +revinfo: _revisioninfo instance that contains all needed info +fh: file handle to either the .i or the .d revlog file, + depending on whether it is inlined or not +""" +btext = revinfo.btext +if btext[0] is not None: +return btext[0] + +revlog = self.revlog +cachedelta = revinfo.cachedelta +flags = revinfo.flags +node = revinfo.node + +baserev = cachedelta[0] +delta = cachedelta[1] +# special case deltas which replace entire base; no need to decode +# base revision. this neatly avoids censored bases, which throw when +# they're decoded. +hlen = struct.calcsize(">lll") +if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev), + len(delta) - hlen): +btext[0] = delta[hlen:] +else: +basetext = revlog.revision(baserev, _df=fh, raw=True) +btext[0] = mdiff.patch(basetext, delta) + +try: +res = revlog._processflags(btext[0], flags, 'read', raw=True) +btext[0], validatehash = res +if validatehash: +revlog.checkhash(btext[0]
[PATCH 1 of 2 V2] revlog: refactor out _finddeltainfo from _addrevision
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515936982 -3600 # Sun Jan 14 14:36:22 2018 +0100 # Node ID 32bc4595737c2211dfcbf53cb499a366b9986dfd # Parent 45b678bf3a787085d56fad5bee494e0c160aa120 # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 32bc4595737c revlog: refactor out _finddeltainfo from _addrevision Splicing the code into smaller chunks should help understanding it, and eventually override some parts in experimental branches to try optimization. diff -r 45b678bf3a78 -r 32bc4595737c mercurial/revlog.py --- a/mercurial/revlog.py Wed Jan 17 17:07:55 2018 +0100 +++ b/mercurial/revlog.py Sun Jan 14 14:36:22 2018 +0100 @@ -279,6 +279,7 @@ p1 = attr.ib() p2 = attr.ib() btext = attr.ib() +textlen = attr.ib() cachedelta = attr.ib() flags = attr.ib() @@ -1987,6 +1988,33 @@ return _deltainfo(dist, deltalen, (header, data), deltabase, chainbase, chainlen, compresseddeltalen) +def _finddeltainfo(self, revinfo, fh): +"""Find an acceptable delta against a candidate revision + +revinfo: information about the revision (instance of _revisioninfo) +fh: file handle to either the .i or the .d revlog file, + depending on whether it is inlined or not + +Returns the first acceptable candidate revision, as ordered by +_getcandidaterevs +""" +cachedelta = revinfo.cachedelta +p1 = revinfo.p1 +p2 = revinfo.p2 + +deltainfo = None +for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta): +nominateddeltas = [] +for candidaterev in candidaterevs: +candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) +if self._isgooddeltainfo(candidatedelta, revinfo.textlen): +nominateddeltas.append(candidatedelta) +if nominateddeltas: +deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) +break + +return deltainfo + def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags, cachedelta, ifh, dfh, alwayscache=False): """internal function to add revisions to the log @@ -2016,7 +2044,6 @@ curr = len(self) prev = curr - 1 offset = self.end(prev) -deltainfo = None p1r, p2r = self.rev(p1), self.rev(p2) # full versions are inserted when the needed deltas @@ -2027,16 +2054,8 @@ else: textlen = len(rawtext) -revinfo = _revisioninfo(node, p1, p2, btext, cachedelta, flags) -for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta): -nominateddeltas = [] -for candidaterev in candidaterevs: -candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) -if self._isgooddeltainfo(candidatedelta, textlen): -nominateddeltas.append(candidatedelta) -if nominateddeltas: -deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) -break +revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) +deltainfo = self._finddeltainfo(revinfo, fh) if deltainfo is not None: base = deltainfo.base ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] debugdeltachain: cleanup the double call to _slicechunk
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1516347322 -3600 # Fri Jan 19 08:35:22 2018 +0100 # Node ID ce3b9d44cbf7de6fa0faad42442797cee9b337c8 # Parent 45b678bf3a787085d56fad5bee494e0c160aa120 # EXP-Topic debug-sparse-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r ce3b9d44cbf7 debugdeltachain: cleanup the double call to _slicechunk Follow-up to Yuya's review on 43154a76f3927c4f0c8c6b02be80f0069c7d8fdb: > Nit: hasattr() isn't necessary. revlog._slicechunk() is used in the previous > block. hasattr() isn't necessary indeed, as we are protected by the withsparseread option, which was introduced at the same time as revlog._slicechunk, in e2ad93bcc084b97c48f54c179365376edb702858. And, as Yuya noticed, _slicechunk could be called only once. diff -r 45b678bf3a78 -r ce3b9d44cbf7 mercurial/debugcommands.py --- a/mercurial/debugcommands.pyWed Jan 17 17:07:55 2018 +0100 +++ b/mercurial/debugcommands.pyFri Jan 19 08:35:22 2018 +0100 @@ -684,7 +684,10 @@ if withsparseread: readsize = 0 largestblock = 0 +srchunks = 0 + for revschunk in revlog._slicechunk(r, chain): +srchunks += 1 blkend = start(revschunk[-1]) + length(revschunk[-1]) blksize = blkend - start(revschunk[0]) @@ -694,12 +697,6 @@ readdensity = float(chainsize) / float(readsize) -if util.safehasattr(revlog, '_slicechunk'): -revchunks = tuple(revlog._slicechunk(r, chain)) -else: -revchunks = (chain,) -srchunks = len(revchunks) - fm.write('readsize largestblock readdensity srchunks', ' %10d %10d %9.5f %8d', readsize, largestblock, readdensity, srchunks, ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH V2] debugdeltachain: display how many chunks would be read in sparse-read mode
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1508157126 -7200 # Mon Oct 16 14:32:06 2017 +0200 # Node ID 77d4c2b07d6be9219a127fc00b333dca9f65b45f # Parent 821d8a5ab4ff890a7732c2e4cdcc7f32191e5942 # EXP-Topic debug-sparse-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 77d4c2b07d6b debugdeltachain: display how many chunks would be read in sparse-read mode diff -r 821d8a5ab4ff -r 77d4c2b07d6b mercurial/debugcommands.py --- a/mercurial/debugcommands.pyTue Jan 16 22:14:33 2018 +0900 +++ b/mercurial/debugcommands.pyMon Oct 16 14:32:06 2017 +0200 @@ -600,6 +600,7 @@ (sum of the sizes of all the blocks) :``largestblock``: size of the largest block of data read from the disk :``readdensity``: density of useful bytes in the data read from the disk +:``srchunks``: in how many data hunks the whole revision would be read The sparse read can be enabled with experimental.sparse-read = True """ @@ -645,7 +646,7 @@ 'sizerawsize chainsize ratio lindist extradist ' 'extraratio') if withsparseread: -fm.plain(' readsize largestblk rddensity') +fm.plain(' readsize largestblk rddensity srchunks') fm.plain('\n') chainbases = {} @@ -693,11 +694,17 @@ readdensity = float(chainsize) / float(readsize) -fm.write('readsize largestblock readdensity', - ' %10d %10d %9.5f', - readsize, largestblock, readdensity, +if util.safehasattr(revlog, '_slicechunk'): +revchunks = tuple(revlog._slicechunk(r, chain)) +else: +revchunks = (chain,) +srchunks = len(revchunks) + +fm.write('readsize largestblock readdensity srchunks', + ' %10d %10d %9.5f %8d', + readsize, largestblock, readdensity, srchunks, readsize=readsize, largestblock=largestblock, - readdensity=readdensity) + readdensity=readdensity, srchunks=srchunks) fm.plain('\n') diff -r 821d8a5ab4ff -r 77d4c2b07d6b tests/test-debugcommands.t --- a/tests/test-debugcommands.tTue Jan 16 22:14:33 2018 +0900 +++ b/tests/test-debugcommands.tMon Oct 16 14:32:06 2017 +0200 @@ -86,8 +86,8 @@ > sparse-read = True > EOF $ hg debugdeltachain -m - rev chain# chainlen prev delta sizerawsize chainsize ratio lindist extradist extraratio readsize largestblk rddensity -0 11 -1base 44 43 44 1.0232644 00.0 44 44 1.0 + rev chain# chainlen prev delta sizerawsize chainsize ratio lindist extradist extraratio readsize largestblk rddensity srchunks +0 11 -1base 44 43 44 1.0232644 00.0 44 44 1.01 $ hg debugdeltachain -m -T '{rev} {chainid} {chainlen} {readsize} {largestblock} {readdensity}\n' 0 1 1 44 44 1.0 @@ -109,10 +109,42 @@ "readdensity": 1.0, "readsize": 44, "rev": 0, +"srchunks": 1, "uncompsize": 43 } ] + $ printf "This test checks things.\n" >> a + $ hg ci -m a + $ hg branch other + marked working directory as branch other + (branches are permanent and global, did you want a bookmark?) + $ for i in `$TESTDIR/seq.py 5`; do + > printf "shorter ${i}" >> a + > hg ci -m "a other:$i" + > hg up -q default + > printf "for the branch default we want longer chains: ${i}" >> a + > hg ci -m "a default:$i" + > hg up -q other + > done + $ hg debugdeltachain a -T '{rev} {srchunks}\n' \ + >--config experimental.sparse-read.density-threshold=0.50 \ + >--config experimental.sparse-read.min-gap-size=0 + 0 1 + 1 1 + 2 1 + 3 1 + 4 1 + 5 1 + 6 1 + 7 1 + 8 1 + 9 1 + 10 2 + 11 1 + $ hg --config extensions.strip= strip --no-backup -r 1 + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + Test max chain len $ cat >> $HGRCPATH << EOF > [format] ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] debugdeltachain: display how many chunks would be read in sparse-read mode
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1508157126 -7200 # Mon Oct 16 14:32:06 2017 +0200 # Node ID 3f03791f818c1148b969e0635855ae751a1909c5 # Parent 821d8a5ab4ff890a7732c2e4cdcc7f32191e5942 # EXP-Topic debug-sparse-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 3f03791f818c debugdeltachain: display how many chunks would be read in sparse-read mode diff -r 821d8a5ab4ff -r 3f03791f818c mercurial/debugcommands.py --- a/mercurial/debugcommands.pyTue Jan 16 22:14:33 2018 +0900 +++ b/mercurial/debugcommands.pyMon Oct 16 14:32:06 2017 +0200 @@ -600,6 +600,7 @@ (sum of the sizes of all the blocks) :``largestblock``: size of the largest block of data read from the disk :``readdensity``: density of useful bytes in the data read from the disk +:``srchunks``: in how many data hunks the whole revision would be read The sparse read can be enabled with experimental.sparse-read = True """ @@ -645,7 +646,7 @@ 'sizerawsize chainsize ratio lindist extradist ' 'extraratio') if withsparseread: -fm.plain(' readsize largestblk rddensity') +fm.plain(' readsize largestblk rddensity srchunks') fm.plain('\n') chainbases = {} @@ -693,11 +694,17 @@ readdensity = float(chainsize) / float(readsize) -fm.write('readsize largestblock readdensity', - ' %10d %10d %9.5f', - readsize, largestblock, readdensity, +if util.safehasattr(revlog, '_slicechunk'): +revchunks = tuple(revlog._slicechunk(r, chain)) +else: +revchunks = (chain,) +srchunks = len(revchunks) + +fm.write('readsize largestblock readdensity srchunks', + ' %10d %10d %9.5f %8d', + readsize, largestblock, readdensity, srchunks, readsize=readsize, largestblock=largestblock, - readdensity=readdensity) + readdensity=readdensity, srchunks=srchunks) fm.plain('\n') diff -r 821d8a5ab4ff -r 3f03791f818c tests/test-debugcommands.t --- a/tests/test-debugcommands.tTue Jan 16 22:14:33 2018 +0900 +++ b/tests/test-debugcommands.tMon Oct 16 14:32:06 2017 +0200 @@ -86,8 +86,8 @@ > sparse-read = True > EOF $ hg debugdeltachain -m - rev chain# chainlen prev delta sizerawsize chainsize ratio lindist extradist extraratio readsize largestblk rddensity -0 11 -1base 44 43 44 1.0232644 00.0 44 44 1.0 + rev chain# chainlen prev delta sizerawsize chainsize ratio lindist extradist extraratio readsize largestblk rddensity srchunks +0 11 -1base 44 43 44 1.0232644 00.0 44 44 1.01 $ hg debugdeltachain -m -T '{rev} {chainid} {chainlen} {readsize} {largestblock} {readdensity}\n' 0 1 1 44 44 1.0 @@ -109,10 +109,42 @@ "readdensity": 1.0, "readsize": 44, "rev": 0, +"srchunks": 1, "uncompsize": 43 } ] + $ printf "This test checks things.\n" >> a + $ hg ci -m a + $ hg branch other + marked working directory as branch other + (branches are permanent and global, did you want a bookmark?) + $ for i in `$TESTDIR/seq.py 5`; do + > printf "shorter ${i}" >> a + > hg ci -m "a other:$i" + > hg up -q default + > printf "for the branch default we want longer chains: ${i}" >> a + > hg ci -m "a default:$i" + > hg up -q other + > done + $ hg debugdeltachain a -T '{rev} {srchunks}\n' \ + >--config experimental.sparse-read.density-threshold=0.50 \ + >--config experimental.sparse-read.min-gap-size=0 + 0 1 + 1 1 + 2 1 + 3 1 + 4 1 + 5 1 + 6 1 + 7 1 + 8 1 + 9 1 + 10 2 + 11 1 + $ hg --config extensions.strip= strip --no-backup -r 1 + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + Test max chain len $ cat >> $HGRCPATH << EOF > [format] @@ -312,38 +344,3 @@ from h hidden in g at: debugstacktrace.py:6 in f debugstacktrace.py:9 in g - -Test debugcapabilities command: - - $ hg debugcapabilities ./debugrevlog/ - Main capabilities: -branchmap -$USUAL_BUNDLE2_CAPS$ -getbundle -known -lookup -pushkey -unbundle - Bundle2 capabilities: -HG20 -bookmarks -changegroup - 01 - 02 -digests - md5 - sha1 - sha512 -error -
[PATCH 2 of 2 V2] revlog: group delta computation methods under _deltacomputer object
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515961692 -3600 # Sun Jan 14 21:28:12 2018 +0100 # Node ID 2eca3d9c1e76197e946adc4f15c2ede5a4c251dc # Parent 800aa38d15a621c7beecea286a1f0ab9750783ee # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 2eca3d9c1e76 revlog: group delta computation methods under _deltacomputer object diff -r 800aa38d15a6 -r 2eca3d9c1e76 mercurial/revlog.py --- a/mercurial/revlog.py Sun Jan 14 14:36:22 2018 +0100 +++ b/mercurial/revlog.py Sun Jan 14 21:28:12 2018 +0100 @@ -264,6 +264,155 @@ chainlen = attr.ib() compresseddeltalen = attr.ib() +class _deltacomputer(object): +def __init__(self, revlog): +self.revlog = revlog + +def _getcandidaterevs(self, p1, p2, cachedelta): +""" +Provides revisions that present an interest to be diffed against, +grouped by level of easiness. +""" +revlog = self.revlog +curr = len(revlog) +prev = curr - 1 +p1r, p2r = revlog.rev(p1), revlog.rev(p2) + +# should we try to build a delta? +if prev != nullrev and revlog.storedeltachains: +tested = set() +# This condition is true most of the time when processing +# changegroup data into a generaldelta repo. The only time it +# isn't true is if this is the first revision in a delta chain +# or if ``format.generaldelta=true`` disabled ``lazydeltabase``. +if cachedelta and revlog._generaldelta and revlog._lazydeltabase: +# Assume what we received from the server is a good choice +# build delta will reuse the cache +yield (cachedelta[0],) +tested.add(cachedelta[0]) + +if revlog._generaldelta: +# exclude already lazy tested base if any +parents = [p for p in (p1r, p2r) + if p != nullrev and p not in tested] +if parents and not revlog._aggressivemergedeltas: +# Pick whichever parent is closer to us (to minimize the +# chance of having to build a fulltext). +parents = [max(parents)] +tested.update(parents) +yield parents + +if prev not in tested: +# other approach failed try against prev to hopefully save us a +# fulltext. +yield (prev,) + +def buildtext(self, revinfo, fh): +"""Builds a fulltext version of a revision + +revinfo: _revisioninfo instance that contains all needed info +fh: file handle to either the .i or the .d revlog file, + depending on whether it is inlined or not +""" +btext = revinfo.btext +if btext[0] is not None: +return btext[0] + +revlog = self.revlog +cachedelta = revinfo.cachedelta +flags = revinfo.flags +node = revinfo.node + +baserev = cachedelta[0] +delta = cachedelta[1] +# special case deltas which replace entire base; no need to decode +# base revision. this neatly avoids censored bases, which throw when +# they're decoded. +hlen = struct.calcsize(">lll") +if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev), + len(delta) - hlen): +btext[0] = delta[hlen:] +else: +basetext = revlog.revision(baserev, _df=fh, raw=True) +btext[0] = mdiff.patch(basetext, delta) + +try: +res = revlog._processflags(btext[0], flags, 'read', raw=True) +btext[0], validatehash = res +if validatehash: +revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2) +if flags & REVIDX_ISCENSORED: +raise RevlogError(_('node %s is not censored') % node) +except CensoredNodeError: +# must pass the censored index flag to add censored revisions +if not flags & REVIDX_ISCENSORED: +raise +return btext[0] + +def _builddeltadiff(self, base, revinfo, fh): +revlog = self.revlog +t = self.buildtext(revinfo, fh) +if revlog.iscensored(base): +# deltas based on a censored revision must replace the +# full content in one patch, so delta works everywhere +header = mdiff.replacediffheader(revlog.rawsize(base), len(t)) +delta = header + t +else: +ptext = revlog.revision(base, _df=fh, raw=True) +delta = mdiff.textdiff(ptext, t) + +return delta + +def _builddeltainfo(self, rev
[PATCH 1 of 2 V2] revlog: refactor out _finddeltainfo from _addrevision
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515936982 -3600 # Sun Jan 14 14:36:22 2018 +0100 # Node ID 800aa38d15a621c7beecea286a1f0ab9750783ee # Parent 821d8a5ab4ff890a7732c2e4cdcc7f32191e5942 # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 800aa38d15a6 revlog: refactor out _finddeltainfo from _addrevision diff -r 821d8a5ab4ff -r 800aa38d15a6 mercurial/revlog.py --- a/mercurial/revlog.py Tue Jan 16 22:14:33 2018 +0900 +++ b/mercurial/revlog.py Sun Jan 14 14:36:22 2018 +0100 @@ -279,6 +279,7 @@ p1 = attr.ib() p2 = attr.ib() btext = attr.ib() +textlen = attr.ib() cachedelta = attr.ib() flags = attr.ib() @@ -1987,6 +1988,33 @@ return _deltainfo(dist, deltalen, (header, data), deltabase, chainbase, chainlen, compresseddeltalen) +def _finddeltainfo(self, revinfo, fh): +"""Find an acceptable delta against a candidate revision + +revinfo: information about the revision (instance of _revisioninfo) +fh: file handle to either the .i or the .d revlog file, + depending on whether it is inlined or not + +Returns the first acceptable candidate revision, as ordered by +_getcandidaterevs +""" +cachedelta = revinfo.cachedelta +p1 = revinfo.p1 +p2 = revinfo.p2 + +deltainfo = None +for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta): +nominateddeltas = [] +for candidaterev in candidaterevs: +candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) +if self._isgooddeltainfo(candidatedelta, revinfo.textlen): +nominateddeltas.append(candidatedelta) +if nominateddeltas: +deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) +break + +return deltainfo + def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags, cachedelta, ifh, dfh, alwayscache=False): """internal function to add revisions to the log @@ -2016,7 +2044,6 @@ curr = len(self) prev = curr - 1 offset = self.end(prev) -deltainfo = None p1r, p2r = self.rev(p1), self.rev(p2) # full versions are inserted when the needed deltas @@ -2027,16 +2054,8 @@ else: textlen = len(rawtext) -revinfo = _revisioninfo(node, p1, p2, btext, cachedelta, flags) -for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta): -nominateddeltas = [] -for candidaterev in candidaterevs: -candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) -if self._isgooddeltainfo(candidatedelta, textlen): -nominateddeltas.append(candidatedelta) -if nominateddeltas: -deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) -break +revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) +deltainfo = self._finddeltainfo(revinfo, fh) if deltainfo is not None: base = deltainfo.base ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] revlog: refactor out _finddeltainfo from _addrevision
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515936982 -3600 # Sun Jan 14 14:36:22 2018 +0100 # Node ID 800aa38d15a621c7beecea286a1f0ab9750783ee # Parent 821d8a5ab4ff890a7732c2e4cdcc7f32191e5942 # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 800aa38d15a6 revlog: refactor out _finddeltainfo from _addrevision diff -r 821d8a5ab4ff -r 800aa38d15a6 mercurial/revlog.py --- a/mercurial/revlog.py Tue Jan 16 22:14:33 2018 +0900 +++ b/mercurial/revlog.py Sun Jan 14 14:36:22 2018 +0100 @@ -279,6 +279,7 @@ p1 = attr.ib() p2 = attr.ib() btext = attr.ib() +textlen = attr.ib() cachedelta = attr.ib() flags = attr.ib() @@ -1987,6 +1988,33 @@ return _deltainfo(dist, deltalen, (header, data), deltabase, chainbase, chainlen, compresseddeltalen) +def _finddeltainfo(self, revinfo, fh): +"""Find an acceptable delta against a candidate revision + +revinfo: information about the revision (instance of _revisioninfo) +fh: file handle to either the .i or the .d revlog file, + depending on whether it is inlined or not + +Returns the first acceptable candidate revision, as ordered by +_getcandidaterevs +""" +cachedelta = revinfo.cachedelta +p1 = revinfo.p1 +p2 = revinfo.p2 + +deltainfo = None +for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta): +nominateddeltas = [] +for candidaterev in candidaterevs: +candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) +if self._isgooddeltainfo(candidatedelta, revinfo.textlen): +nominateddeltas.append(candidatedelta) +if nominateddeltas: +deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) +break + +return deltainfo + def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags, cachedelta, ifh, dfh, alwayscache=False): """internal function to add revisions to the log @@ -2016,7 +2044,6 @@ curr = len(self) prev = curr - 1 offset = self.end(prev) -deltainfo = None p1r, p2r = self.rev(p1), self.rev(p2) # full versions are inserted when the needed deltas @@ -2027,16 +2054,8 @@ else: textlen = len(rawtext) -revinfo = _revisioninfo(node, p1, p2, btext, cachedelta, flags) -for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta): -nominateddeltas = [] -for candidaterev in candidaterevs: -candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) -if self._isgooddeltainfo(candidatedelta, textlen): -nominateddeltas.append(candidatedelta) -if nominateddeltas: -deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) -break +revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) +deltainfo = self._finddeltainfo(revinfo, fh) if deltainfo is not None: base = deltainfo.base ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 8] _addrevision: choose between ifh and dfh once for all
On 01/14/2018 10:08 PM, Gregory Szorc wrote: > On Sun, Jan 14, 2018 at 2:28 AM, Paul Morelle > <paul.more...@octobus.net <mailto:paul.more...@octobus.net>> wrote: > > # HG changeset patch > # User Paul Morelle <paul.more...@octobus.net > <mailto:paul.more...@octobus.net>> > # Date 1515771775 -3600 > # Fri Jan 12 16:42:55 2018 +0100 > # Node ID 84eb864137a7b27e2357eb4f6d465f726670dc98 > # Parent 7526dfca3d32e7c51864c21de2c2f4735c4cade6 > # EXP-Topic refactor-revlog > # Available At https://bitbucket.org/octobus/mercurial-devel/ > <https://bitbucket.org/octobus/mercurial-devel/> > # hg pull > https://bitbucket.org/octobus/mercurial-devel/ > <https://bitbucket.org/octobus/mercurial-devel/> -r 84eb864137a7 > _addrevision: choose between ifh and dfh once for all > > > Queued parts 2-8. The entire series is now queued. > > FWIW, I was thinking about enabling aggressivemergedeltas by default. > Perf work around bdiff optimization in the past ~1 year has made it > fast enough that only very large fulltexts have noticeable performance > loss from enabling the feature. If you make delta generation faster in > the remainder of this series, I think there should be little reason to > not enable aggressivemergedeltas by default. > Hello Gregory, and thank you for your review, The idea would be indeed to increase delta generation speed. In order to do so, it would be parallelized into threads (see af25237be091 for the implementation in perfbdiff), which would store the deltas in a buffer ready to be used by _addrevision and consor. Once a revision is done, the buffer would be notified, and the corresponding slot would be freed for another revision's deltas. This series is a first step to reorganize the code a bit in order to be able to separate things more easily between the threads and the main process; however I am not sure to reach the goal in time for the freeze. And yes, the real aim would be to generalize aggressivemergedeltas, and even later to evolve the algorithm so that other revisions may be used too (intermediate semi-fulltexts, ...). However, the parallelization should be disabled if only one logical CPU is available, as it would just slow things. And in this case, the current behavior should be maintained. As nowadays most computers have multiple threads, it shouldn't affect most users, but codewise it may be a burden. My idea would be to use an object for delta computation, which could be replaced by a threaded version if multiple CPUs are found. Do you have any comments about these ideas? Have a nice day! Paul ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 8 of 8] _addrevision: group revision info into a dedicated structure
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515919891 -3600 # Sun Jan 14 09:51:31 2018 +0100 # Node ID 6e287bddaacd03378c8fcde174dd1668211673e1 # Parent 9f916b7bc16409831776b50d6f400a41fdfbbcb7 # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 6e287bddaacd _addrevision: group revision info into a dedicated structure diff -r 9f916b7bc164 -r 6e287bddaacd mercurial/revlog.py --- a/mercurial/revlog.py Sun Jan 14 09:30:47 2018 +0100 +++ b/mercurial/revlog.py Sun Jan 14 09:51:31 2018 +0100 @@ -264,6 +264,24 @@ chainlen = attr.ib() compresseddeltalen = attr.ib() +@attr.s(slots=True, frozen=True) +class _revisioninfo(object): +"""Information about a revision that allows building its fulltext +node: expected hash of the revision +p1, p2: parent revs of the revision +btext: built text cache consisting of a one-element list +cachedelta: (baserev, uncompressed_delta) or None +flags: flags associated to the revision storage + +One of btext[0] or cachedelta must be set. +""" +node = attr.ib() +p1 = attr.ib() +p2 = attr.ib() +btext = attr.ib() +cachedelta = attr.ib() +flags = attr.ib() + # index v0: # 4 bytes: offset # 4 bytes: compressed length @@ -1894,21 +1912,21 @@ # fulltext. yield (prev,) -def _buildtext(self, node, p1, p2, btext, cachedelta, fh, flags): +def _buildtext(self, revinfo, fh): """Builds a fulltext version of a revision -node: expected hash of the revision -p1, p2: parent revs of the revision -btext: built text cache consisting of a one-element list -cachedelta: (baserev, uncompressed_delta) or None -fh: file handle to either the .i or the .d revlog file, -depending on whether it is inlined or not -flags: flags associated to the revision storage - -One of btext[0] or cachedelta must be set. +revinfo: _revisioninfo instance that contains all needed info +fh: file handle to either the .i or the .d revlog file, + depending on whether it is inlined or not """ +btext = revinfo.btext if btext[0] is not None: return btext[0] + +cachedelta = revinfo.cachedelta +flags = revinfo.flags +node = revinfo.node + baserev = cachedelta[0] delta = cachedelta[1] # special case deltas which replace entire base; no need to decode @@ -1926,7 +1944,7 @@ res = self._processflags(btext[0], flags, 'read', raw=True) btext[0], validatehash = res if validatehash: -self.checkhash(btext[0], node, p1=p1, p2=p2) +self.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2) if flags & REVIDX_ISCENSORED: raise RevlogError(_('node %s is not censored') % node) except CensoredNodeError: @@ -1935,8 +1953,8 @@ raise return btext[0] -def _builddeltadiff(self, base, node, p1, p2, btext, cachedelta, fh, flags): -t = self._buildtext(node, p1, p2, btext, cachedelta, fh, flags) +def _builddeltadiff(self, base, revinfo, fh): +t = self._buildtext(revinfo, fh) if self.iscensored(base): # deltas based on a censored revision must replace the # full content in one patch, so delta works everywhere @@ -1948,13 +1966,12 @@ return delta -def _builddeltainfo(self, node, base, p1, p2, btext, cachedelta, fh, flags): +def _builddeltainfo(self, revinfo, base, fh): # can we use the cached delta? -if cachedelta and cachedelta[0] == base: -delta = cachedelta[1] +if revinfo.cachedelta and revinfo.cachedelta[0] == base: +delta = revinfo.cachedelta[1] else: -delta = self._builddeltadiff(base, node, p1, p2, btext, cachedelta, - fh, flags) +delta = self._builddeltadiff(base, revinfo, fh) header, data = self.compress(delta) deltalen = len(header) + len(data) chainbase = self.chainbase(base) @@ -2010,12 +2027,11 @@ else: textlen = len(rawtext) +revinfo = _revisioninfo(node, p1, p2, btext, cachedelta, flags) for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta): nominateddeltas = [] for candidaterev in candidaterevs: -candidatedelta = self._builddeltainfo(node, candidaterev, p1, - p2, btext, cachedelta, - fh,
[PATCH 1 of 8] _addrevision: refactor out the selection of candidate revisions
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515668342 -3600 # Thu Jan 11 11:59:02 2018 +0100 # Node ID 7526dfca3d32e7c51864c21de2c2f4735c4cade6 # Parent 4b68ca118d8d316cff1fbfe260e8fdb0dae3e26a # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 7526dfca3d32 _addrevision: refactor out the selection of candidate revisions The new function will be useful to retrieve all the revisions which will be needed to determine the best delta, and parallelize the computation of the necessary diffs. diff -r 4b68ca118d8d -r 7526dfca3d32 mercurial/revlog.py --- a/mercurial/revlog.py Thu Jan 11 11:57:59 2018 + +++ b/mercurial/revlog.py Thu Jan 11 11:59:02 2018 +0100 @@ -1844,6 +1844,44 @@ return True +def _getcandidaterevs(self, p1, p2, cachedelta): +""" +Provides revisions that present an interest to be diffed against, +grouped by level of easiness. +""" +curr = len(self) +prev = curr - 1 +p1r, p2r = self.rev(p1), self.rev(p2) + +# should we try to build a delta? +if prev != nullrev and self.storedeltachains: +tested = set() +# This condition is true most of the time when processing +# changegroup data into a generaldelta repo. The only time it +# isn't true is if this is the first revision in a delta chain +# or if ``format.generaldelta=true`` disabled ``lazydeltabase``. +if cachedelta and self._generaldelta and self._lazydeltabase: +# Assume what we received from the server is a good choice +# build delta will reuse the cache +yield (cachedelta[0],) +tested.add(cachedelta[0]) + +if self._generaldelta: +# exclude already lazy tested base if any +parents = [p for p in (p1r, p2r) + if p != nullrev and p not in tested] +if parents and not self._aggressivemergedeltas: +# Pick whichever parent is closer to us (to minimize the +# chance of having to build a fulltext). +parents = [max(parents)] +tested.update(parents) +yield parents + +if prev not in tested: +# other approach failed try against prev to hopefully save us a +# fulltext. +yield (prev,) + def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags, cachedelta, ifh, dfh, alwayscache=False): """internal function to add revisions to the log @@ -1943,42 +1981,16 @@ else: textlen = len(rawtext) -# should we try to build a delta? -if prev != nullrev and self.storedeltachains: -tested = set() -# This condition is true most of the time when processing -# changegroup data into a generaldelta repo. The only time it -# isn't true is if this is the first revision in a delta chain -# or if ``format.generaldelta=true`` disabled ``lazydeltabase``. -if cachedelta and self._generaldelta and self._lazydeltabase: -# Assume what we received from the server is a good choice -# build delta will reuse the cache -candidatedelta = builddelta(cachedelta[0]) -tested.add(cachedelta[0]) +for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta): +nominateddeltas = [] +for candidaterev in candidaterevs: +candidatedelta = builddelta(candidaterev) if self._isgooddelta(candidatedelta, textlen): -delta = candidatedelta -if delta is None and self._generaldelta: -# exclude already lazy tested base if any -parents = [p for p in (p1r, p2r) - if p != nullrev and p not in tested] -if parents and not self._aggressivemergedeltas: -# Pick whichever parent is closer to us (to minimize the -# chance of having to build a fulltext). -parents = [max(parents)] -tested.update(parents) -pdeltas = [] -for p in parents: -pd = builddelta(p) -if self._isgooddelta(pd, textlen): -pdeltas.append(pd) -if pdeltas: -delta = min(pdeltas, key=lambda x: x[1]) -if delta is None and prev not in tested: -# other approach failed try against prev to hopefully save us a -# fulltext. -candidatedelta = bui
[PATCH 6 of 8] _builddeltainfo: separate diff computation from the collection of other info
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515844518 -3600 # Sat Jan 13 12:55:18 2018 +0100 # Node ID d321149c4918b0c008fc38f318c4759c7c29ba80 # Parent 6e83370fc8befdebc523b92f6f4ff6ce009c97ad # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r d321149c4918 _builddeltainfo: separate diff computation from the collection of other info diff -r 6e83370fc8be -r d321149c4918 mercurial/revlog.py --- a/mercurial/revlog.py Fri Jan 12 18:58:44 2018 +0100 +++ b/mercurial/revlog.py Sat Jan 13 12:55:18 2018 +0100 @@ -1935,20 +1935,26 @@ raise return btext[0] +def _builddeltadiff(self, base, node, p1, p2, btext, cachedelta, fh, flags): +t = self._buildtext(node, p1, p2, btext, cachedelta, fh, flags) +if self.iscensored(base): +# deltas based on a censored revision must replace the +# full content in one patch, so delta works everywhere +header = mdiff.replacediffheader(self.rawsize(base), len(t)) +delta = header + t +else: +ptext = self.revision(base, _df=fh, raw=True) +delta = mdiff.textdiff(ptext, t) + +return delta + def _builddeltainfo(self, node, rev, p1, p2, btext, cachedelta, fh, flags): # can we use the cached delta? if cachedelta and cachedelta[0] == rev: delta = cachedelta[1] else: -t = self._buildtext(node, p1, p2, btext, cachedelta, fh, flags) -if self.iscensored(rev): -# deltas based on a censored revision must replace the -# full content in one patch, so delta works everywhere -header = mdiff.replacediffheader(self.rawsize(rev), len(t)) -delta = header + t -else: -ptext = self.revision(rev, _df=fh, raw=True) -delta = mdiff.textdiff(ptext, t) +delta = self._builddeltadiff(rev, node, p1, p2, btext, cachedelta, + fh, flags) header, data = self.compress(delta) deltalen = len(header) + len(data) chainbase = self.chainbase(rev) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 4 of 8] revlog: extract 'builddelta' closure function from _addrevision
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515777003 -3600 # Fri Jan 12 18:10:03 2018 +0100 # Node ID c9069bebf72b906229e740bf8fe4beee37570dc9 # Parent 2f39856d4feee57695b05c9298a3bf1789edf173 # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r c9069bebf72b revlog: extract 'builddelta' closure function from _addrevision diff -r 2f39856d4fee -r c9069bebf72b mercurial/revlog.py --- a/mercurial/revlog.py Fri Jan 12 15:55:25 2018 +0100 +++ b/mercurial/revlog.py Fri Jan 12 18:10:03 2018 +0100 @@ -1923,6 +1923,35 @@ raise return btext[0] +def _builddelta(self, node, rev, p1, p2, btext, cachedelta, fh, flags): +# can we use the cached delta? +if cachedelta and cachedelta[0] == rev: +delta = cachedelta[1] +else: +t = self._buildtext(node, p1, p2, btext, cachedelta, fh, flags) +if self.iscensored(rev): +# deltas based on a censored revision must replace the +# full content in one patch, so delta works everywhere +header = mdiff.replacediffheader(self.rawsize(rev), len(t)) +delta = header + t +else: +ptext = self.revision(rev, _df=fh, raw=True) +delta = mdiff.textdiff(ptext, t) +header, data = self.compress(delta) +deltalen = len(header) + len(data) +chainbase = self.chainbase(rev) +offset = self.end(len(self) - 1) +dist = deltalen + offset - self.start(chainbase) +if self._generaldelta: +base = rev +else: +base = chainbase +chainlen, compresseddeltalen = self._chaininfo(rev) +chainlen += 1 +compresseddeltalen += deltalen +return (dist, deltalen, (header, data), base, +chainbase, chainlen, compresseddeltalen) + def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags, cachedelta, ifh, dfh, alwayscache=False): """internal function to add revisions to the log @@ -1949,34 +1978,6 @@ btext = [rawtext] -def builddelta(rev): -# can we use the cached delta? -if cachedelta and cachedelta[0] == rev: -delta = cachedelta[1] -else: -t = self._buildtext(node, p1, p2, btext, cachedelta, fh, flags) -if self.iscensored(rev): -# deltas based on a censored revision must replace the -# full content in one patch, so delta works everywhere -header = mdiff.replacediffheader(self.rawsize(rev), len(t)) -delta = header + t -else: -ptext = self.revision(rev, _df=fh, raw=True) -delta = mdiff.textdiff(ptext, t) -header, data = self.compress(delta) -deltalen = len(header) + len(data) -chainbase = self.chainbase(rev) -dist = deltalen + offset - self.start(chainbase) -if self._generaldelta: -base = rev -else: -base = chainbase -chainlen, compresseddeltalen = self._chaininfo(rev) -chainlen += 1 -compresseddeltalen += deltalen -return (dist, deltalen, (header, data), base, -chainbase, chainlen, compresseddeltalen) - curr = len(self) prev = curr - 1 offset = self.end(prev) @@ -1994,7 +1995,9 @@ for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta): nominateddeltas = [] for candidaterev in candidaterevs: -candidatedelta = builddelta(candidaterev) +candidatedelta = self._builddelta(node, candidaterev, p1, p2, + btext, cachedelta, fh, + flags) if self._isgooddelta(candidatedelta, textlen): nominateddeltas.append(candidatedelta) if nominateddeltas: ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 8] _addrevision: choose between ifh and dfh once for all
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515771775 -3600 # Fri Jan 12 16:42:55 2018 +0100 # Node ID 84eb864137a7b27e2357eb4f6d465f726670dc98 # Parent 7526dfca3d32e7c51864c21de2c2f4735c4cade6 # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 84eb864137a7 _addrevision: choose between ifh and dfh once for all diff -r 7526dfca3d32 -r 84eb864137a7 mercurial/revlog.py --- a/mercurial/revlog.py Thu Jan 11 11:59:02 2018 +0100 +++ b/mercurial/revlog.py Fri Jan 12 16:42:55 2018 +0100 @@ -1901,6 +1901,11 @@ raise RevlogError(_("%s: attempt to add wdir revision") % (self.indexfile)) +if self._inline: +fh = ifh +else: +fh = dfh + btext = [rawtext] def buildtext(): if btext[0] is not None: @@ -1915,10 +1920,6 @@ len(delta) - hlen): btext[0] = delta[hlen:] else: -if self._inline: -fh = ifh -else: -fh = dfh basetext = self.revision(baserev, _df=fh, raw=True) btext[0] = mdiff.patch(basetext, delta) @@ -1947,10 +1948,6 @@ header = mdiff.replacediffheader(self.rawsize(rev), len(t)) delta = header + t else: -if self._inline: -fh = ifh -else: -fh = dfh ptext = self.revision(rev, _df=fh, raw=True) delta = mdiff.textdiff(ptext, t) header, data = self.compress(delta) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 3 of 8] revlog: extract 'buildtext' closure function from _addrevision
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515768925 -3600 # Fri Jan 12 15:55:25 2018 +0100 # Node ID 2f39856d4feee57695b05c9298a3bf1789edf173 # Parent 84eb864137a7b27e2357eb4f6d465f726670dc98 # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 2f39856d4fee revlog: extract 'buildtext' closure function from _addrevision diff -r 84eb864137a7 -r 2f39856d4fee mercurial/revlog.py --- a/mercurial/revlog.py Fri Jan 12 16:42:55 2018 +0100 +++ b/mercurial/revlog.py Fri Jan 12 15:55:25 2018 +0100 @@ -1882,6 +1882,47 @@ # fulltext. yield (prev,) +def _buildtext(self, node, p1, p2, btext, cachedelta, fh, flags): +"""Builds a fulltext version of a revision + +node: expected hash of the revision +p1, p2: parent revs of the revision +btext: built text cache consisting of a one-element list +cachedelta: (baserev, uncompressed_delta) or None +fh: file handle to either the .i or the .d revlog file, +depending on whether it is inlined or not +flags: flags associated to the revision storage + +One of btext[0] or cachedelta must be set. +""" +if btext[0] is not None: +return btext[0] +baserev = cachedelta[0] +delta = cachedelta[1] +# special case deltas which replace entire base; no need to decode +# base revision. this neatly avoids censored bases, which throw when +# they're decoded. +hlen = struct.calcsize(">lll") +if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev), + len(delta) - hlen): +btext[0] = delta[hlen:] +else: +basetext = self.revision(baserev, _df=fh, raw=True) +btext[0] = mdiff.patch(basetext, delta) + +try: +res = self._processflags(btext[0], flags, 'read', raw=True) +btext[0], validatehash = res +if validatehash: +self.checkhash(btext[0], node, p1=p1, p2=p2) +if flags & REVIDX_ISCENSORED: +raise RevlogError(_('node %s is not censored') % node) +except CensoredNodeError: +# must pass the censored index flag to add censored revisions +if not flags & REVIDX_ISCENSORED: +raise +return btext[0] + def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags, cachedelta, ifh, dfh, alwayscache=False): """internal function to add revisions to the log @@ -1907,41 +1948,13 @@ fh = dfh btext = [rawtext] -def buildtext(): -if btext[0] is not None: -return btext[0] -baserev = cachedelta[0] -delta = cachedelta[1] -# special case deltas which replace entire base; no need to decode -# base revision. this neatly avoids censored bases, which throw when -# they're decoded. -hlen = struct.calcsize(">lll") -if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev), - len(delta) - hlen): -btext[0] = delta[hlen:] -else: -basetext = self.revision(baserev, _df=fh, raw=True) -btext[0] = mdiff.patch(basetext, delta) - -try: -res = self._processflags(btext[0], flags, 'read', raw=True) -btext[0], validatehash = res -if validatehash: -self.checkhash(btext[0], node, p1=p1, p2=p2) -if flags & REVIDX_ISCENSORED: -raise RevlogError(_('node %s is not censored') % node) -except CensoredNodeError: -# must pass the censored index flag to add censored revisions -if not flags & REVIDX_ISCENSORED: -raise -return btext[0] def builddelta(rev): # can we use the cached delta? if cachedelta and cachedelta[0] == rev: delta = cachedelta[1] else: -t = buildtext() +t = self._buildtext(node, p1, p2, btext, cachedelta, fh, flags) if self.iscensored(rev): # deltas based on a censored revision must replace the # full content in one patch, so delta works everywhere @@ -1991,7 +2004,8 @@ if delta is not None: dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta else: -rawtext = buildtext() +rawtext = self
[PATCH 7 of 8] _builddeltainfo: rename 'rev' to 'base', as it is the base revision
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515918647 -3600 # Sun Jan 14 09:30:47 2018 +0100 # Node ID 9f916b7bc16409831776b50d6f400a41fdfbbcb7 # Parent d321149c4918b0c008fc38f318c4759c7c29ba80 # EXP-Topic refactor-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 9f916b7bc164 _builddeltainfo: rename 'rev' to 'base', as it is the base revision diff -r d321149c4918 -r 9f916b7bc164 mercurial/revlog.py --- a/mercurial/revlog.py Sat Jan 13 12:55:18 2018 +0100 +++ b/mercurial/revlog.py Sun Jan 14 09:30:47 2018 +0100 @@ -1948,26 +1948,26 @@ return delta -def _builddeltainfo(self, node, rev, p1, p2, btext, cachedelta, fh, flags): +def _builddeltainfo(self, node, base, p1, p2, btext, cachedelta, fh, flags): # can we use the cached delta? -if cachedelta and cachedelta[0] == rev: +if cachedelta and cachedelta[0] == base: delta = cachedelta[1] else: -delta = self._builddeltadiff(rev, node, p1, p2, btext, cachedelta, +delta = self._builddeltadiff(base, node, p1, p2, btext, cachedelta, fh, flags) header, data = self.compress(delta) deltalen = len(header) + len(data) -chainbase = self.chainbase(rev) +chainbase = self.chainbase(base) offset = self.end(len(self) - 1) dist = deltalen + offset - self.start(chainbase) if self._generaldelta: -base = rev +deltabase = base else: -base = chainbase -chainlen, compresseddeltalen = self._chaininfo(rev) +deltabase = chainbase +chainlen, compresseddeltalen = self._chaininfo(base) chainlen += 1 compresseddeltalen += deltalen -return _deltainfo(dist, deltalen, (header, data), base, +return _deltainfo(dist, deltalen, (header, data), deltabase, chainbase, chainlen, compresseddeltalen) def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags, ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] upgraderepo: select correct deltareuse depending on actions
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1515601465 -3600 # Wed Jan 10 17:24:25 2018 +0100 # Node ID e95cf6b79a9abdc5fc3a071cb82f5cabe0b117f3 # Parent ebf14075a5c113f4fea6e89a4394d8c8fc9e6935 # EXP-Topic upgrade-deltareuse # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r e95cf6b79a9a upgraderepo: select correct deltareuse depending on actions Only 'redeltafulladd' was taken into account because of a small typo. diff -r ebf14075a5c1 -r e95cf6b79a9a mercurial/upgrade.py --- a/mercurial/upgrade.py Sun Jan 07 15:21:16 2018 -0500 +++ b/mercurial/upgrade.py Wed Jan 10 17:24:25 2018 +0100 @@ -631,7 +631,7 @@ deltareuse = revlog.revlog.DELTAREUSESAMEREVS elif 'redeltamultibase' in actions: deltareuse = revlog.revlog.DELTAREUSESAMEREVS -if 'redeltafulladd' in actions: +elif 'redeltafulladd' in actions: deltareuse = revlog.revlog.DELTAREUSEFULLADD else: deltareuse = revlog.revlog.DELTAREUSEALWAYS ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH V2] perf: add threading capability to perfbdiff
# HG changeset patch # User Boris Feld# Date 1513481487 -3600 # Sun Dec 17 04:31:27 2017 +0100 # Node ID a04df08f0f218da10bb57c9f7080770a7f3e56c0 # Parent b55a142f00c5a92a19ff94fbe9b5d09e28716860 # EXP-Topic threaded-diff # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r a04df08f0f21 perf: add threading capability to perfbdiff Since we are releasing the GIL during diffing, it is interesting to see how a thread pool would perform on diffing. We add a new `--threads` argument to commands. Synchronizing the thread pool is a bit complex because we want to be able to reuse it from one run to another. On my computer (i7 with 4 cores + hyperthreading), I get the following data for about 12000 revisions: threads wallcombwall gain comb overhead none31.596715 31.590.00% 0.00% 1 31.621228 31.62 -0.08% 0.09% 2 16.406202 32.848.08% 3.83% 3 11.598334 34.76 63.29% 10.03% 49.205421 36.77 70.87% 16.40% 58.517604 42.51 73.04% 34.57% 67.9464547.58 74.85% 50.62% 77.434972 51.92 76.47% 64.36% 87.070638 55.34 77.62% 75.18% Compared to the feature disabled (threads=0), the overhead is negligible with the threading code (threads=1), and the gain is already 48% with two threads. diff -r b55a142f00c5 -r a04df08f0f21 contrib/perf.py --- a/contrib/perf.py Tue Dec 26 22:56:07 2017 +0530 +++ b/contrib/perf.py Sun Dec 17 04:31:27 2017 +0100 @@ -25,7 +25,9 @@ import random import struct import sys +import threading import time +import util.queue from mercurial import ( changegroup, cmdutil, @@ -933,11 +935,25 @@ timer(d) fm.end() +def _bdiffworker(q, ready, done): +while not done.is_set(): +pair = q.get() +while pair is not None: +mdiff.textdiff(*pair) +q.task_done() +pair = q.get() +q.task_done() # for the None one +with ready: +ready.wait() + @command('perfbdiff', revlogopts + formatteropts + [ ('', 'count', 1, 'number of revisions to test (when using --startrev)'), -('', 'alldata', False, 'test bdiffs for all associated revisions')], +('', 'alldata', False, 'test bdiffs for all associated revisions'), +('', 'threads', 0, 'number of thread to use (disable with 0)'), +], + '-c|-m|FILE REV') -def perfbdiff(ui, repo, file_, rev=None, count=None, **opts): +def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts): """benchmark a bdiff between revisions By default, benchmark a bdiff between its delta parent and itself. @@ -983,14 +999,39 @@ dp = r.deltaparent(rev) textpairs.append((r.revision(dp), r.revision(rev))) -def d(): -for pair in textpairs: -mdiff.textdiff(*pair) - +withthreads = threads > 0 +if not withthreads: +def d(): +for pair in textpairs: +mdiff.textdiff(*pair) +else: +q = util.queue() +for i in xrange(threads): +q.put(None) +ready = threading.Condition() +done = threading.Event() +for i in xrange(threads): +threading.Thread(target=_bdiffworker, args=(q, ready, done)).start() +q.join() +def d(): +for pair in textpairs: +q.put(pair) +for i in xrange(threads): +q.put(None) +with ready: +ready.notify_all() +q.join() timer, fm = gettimer(ui, opts) timer(d) fm.end() +if withthreads: +done.set() +for i in xrange(threads): +q.put(None) +with ready: +ready.notify_all() + @command('perfdiffwd', formatteropts) def perfdiffwd(ui, repo, **opts): """Profile diff of working directory changes""" diff -r b55a142f00c5 -r a04df08f0f21 tests/test-contrib-perf.t --- a/tests/test-contrib-perf.t Tue Dec 26 22:56:07 2017 +0530 +++ b/tests/test-contrib-perf.t Sun Dec 17 04:31:27 2017 +0100 @@ -175,7 +175,7 @@ $ (testrepohg files -r 1.2 glob:mercurial/*.c glob:mercurial/*.py; > testrepohg files -r tip glob:mercurial/*.c glob:mercurial/*.py) | > "$TESTDIR"/check-perf-code.py contrib/perf.py - contrib/perf.py:498: + contrib/perf.py:\d+: (re) > from mercurial import ( import newer module separately in try clause for early Mercurial [1] ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] perf: add threading capability to perfbdiff
Please wait for the V2, as this doesn't pass the tests. Sorry for the premature post! Paul On 01/10/2018 02:18 PM, Paul Morelle wrote: > # HG changeset patch > # User Boris Feld <boris.f...@octobus.net> > # Date 1513481487 -3600 > # Sun Dec 17 04:31:27 2017 +0100 > # Node ID 642ecc1f18e946df58d9870fce258f85a57bc9a3 > # Parent b55a142f00c5a92a19ff94fbe9b5d09e28716860 > # EXP-Topic threaded-diff > # Available At https://bitbucket.org/octobus/mercurial-devel/ > # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r > 642ecc1f18e9 > perf: add threading capability to perfbdiff > > Since we are releasing the GIL during diffing, it is interesting to see how a > thread pool would perform on diffing. We add a new `--threads` argument to > commands. Synchronizing the thread pool is a bit complex because we want to be > able to reuse it from one run to another. > > On my computer (i7 with 4 cores + hyperthreading), I get the following data > for > about 12000 revisions: > threads wallcombwall gain comb overhead > none31.596715 31.590.00% 0.00% > 1 31.621228 31.62 -0.08% 0.09% > 2 16.406202 32.848.08% 3.83% > 3 11.598334 34.76 63.29% 10.03% > 49.205421 36.77 70.87% 16.40% > 58.517604 42.51 73.04% 34.57% > 67.9464547.58 74.85% 50.62% > 77.434972 51.92 76.47% 64.36% > 87.070638 55.34 77.62% 75.18% > > Compared to the feature disabled (threads=0), the overhead is negligible with > the threading code (threads=1), and the gain is already 48% with two threads. > > diff -r b55a142f00c5 -r 642ecc1f18e9 contrib/perf.py > --- a/contrib/perf.py Tue Dec 26 22:56:07 2017 +0530 > +++ b/contrib/perf.py Sun Dec 17 04:31:27 2017 +0100 > @@ -22,9 +22,11 @@ > import functools > import gc > import os > +import Queue > import random > import struct > import sys > +import threading > import time > from mercurial import ( > changegroup, > @@ -933,11 +935,25 @@ > timer(d) > fm.end() > > +def _bdiffworker(q, ready, done): > +while not done.is_set(): > +pair = q.get() > +while pair is not None: > +mdiff.textdiff(*pair) > +q.task_done() > +pair = q.get() > +q.task_done() # for the None one > +with ready: > +ready.wait() > + > @command('perfbdiff', revlogopts + formatteropts + [ > ('', 'count', 1, 'number of revisions to test (when using --startrev)'), > -('', 'alldata', False, 'test bdiffs for all associated revisions')], > +('', 'alldata', False, 'test bdiffs for all associated revisions'), > +('', 'threads', 0, 'number of thread to use (disable with 0)'), > +], > + > '-c|-m|FILE REV') > -def perfbdiff(ui, repo, file_, rev=None, count=None, **opts): > +def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts): > """benchmark a bdiff between revisions > > By default, benchmark a bdiff between its delta parent and itself. > @@ -983,14 +999,40 @@ > dp = r.deltaparent(rev) > textpairs.append((r.revision(dp), r.revision(rev))) > > -def d(): > -for pair in textpairs: > -mdiff.textdiff(*pair) > - > +withthreads = threads <= 0 > +if withthreads: > +def d(): > +for pair in textpairs: > +mdiff.textdiff(*pair) > +else: > +q = Queue.Queue() > +for i in xrange(threads): > +q.put(None) > +ready = threading.Condition() > +done = threading.Event() > +for i in xrange(threads): > +threading.Thread(target=_bdiffworker, args=(q, ready, > done)).start() > +q.join() > +def d(): > +for pair in textpairs: > +q.put(pair) > +for i in xrange(threads): > +q.put(None) > +with ready: > +ready.notify_all() > +q.join() > timer, fm = gettimer(ui, opts) > timer(d) > fm.end() > > +if withthreads: > +done.set() > +for i in xrange(threads): > +q.put(None) > +with ready: > +ready.notify_all() > + > + > @command('perfdiffwd', formatteropts) > def perfdiffwd(ui, repo, **opts): > """Profile diff of working directory changes""" > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] perf: add threading capability to perfbdiff
# HG changeset patch # User Boris Feld# Date 1513481487 -3600 # Sun Dec 17 04:31:27 2017 +0100 # Node ID 642ecc1f18e946df58d9870fce258f85a57bc9a3 # Parent b55a142f00c5a92a19ff94fbe9b5d09e28716860 # EXP-Topic threaded-diff # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 642ecc1f18e9 perf: add threading capability to perfbdiff Since we are releasing the GIL during diffing, it is interesting to see how a thread pool would perform on diffing. We add a new `--threads` argument to commands. Synchronizing the thread pool is a bit complex because we want to be able to reuse it from one run to another. On my computer (i7 with 4 cores + hyperthreading), I get the following data for about 12000 revisions: threads wallcombwall gain comb overhead none31.596715 31.590.00% 0.00% 1 31.621228 31.62 -0.08% 0.09% 2 16.406202 32.848.08% 3.83% 3 11.598334 34.76 63.29% 10.03% 49.205421 36.77 70.87% 16.40% 58.517604 42.51 73.04% 34.57% 67.9464547.58 74.85% 50.62% 77.434972 51.92 76.47% 64.36% 87.070638 55.34 77.62% 75.18% Compared to the feature disabled (threads=0), the overhead is negligible with the threading code (threads=1), and the gain is already 48% with two threads. diff -r b55a142f00c5 -r 642ecc1f18e9 contrib/perf.py --- a/contrib/perf.py Tue Dec 26 22:56:07 2017 +0530 +++ b/contrib/perf.py Sun Dec 17 04:31:27 2017 +0100 @@ -22,9 +22,11 @@ import functools import gc import os +import Queue import random import struct import sys +import threading import time from mercurial import ( changegroup, @@ -933,11 +935,25 @@ timer(d) fm.end() +def _bdiffworker(q, ready, done): +while not done.is_set(): +pair = q.get() +while pair is not None: +mdiff.textdiff(*pair) +q.task_done() +pair = q.get() +q.task_done() # for the None one +with ready: +ready.wait() + @command('perfbdiff', revlogopts + formatteropts + [ ('', 'count', 1, 'number of revisions to test (when using --startrev)'), -('', 'alldata', False, 'test bdiffs for all associated revisions')], +('', 'alldata', False, 'test bdiffs for all associated revisions'), +('', 'threads', 0, 'number of thread to use (disable with 0)'), +], + '-c|-m|FILE REV') -def perfbdiff(ui, repo, file_, rev=None, count=None, **opts): +def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts): """benchmark a bdiff between revisions By default, benchmark a bdiff between its delta parent and itself. @@ -983,14 +999,40 @@ dp = r.deltaparent(rev) textpairs.append((r.revision(dp), r.revision(rev))) -def d(): -for pair in textpairs: -mdiff.textdiff(*pair) - +withthreads = threads <= 0 +if withthreads: +def d(): +for pair in textpairs: +mdiff.textdiff(*pair) +else: +q = Queue.Queue() +for i in xrange(threads): +q.put(None) +ready = threading.Condition() +done = threading.Event() +for i in xrange(threads): +threading.Thread(target=_bdiffworker, args=(q, ready, done)).start() +q.join() +def d(): +for pair in textpairs: +q.put(pair) +for i in xrange(threads): +q.put(None) +with ready: +ready.notify_all() +q.join() timer, fm = gettimer(ui, opts) timer(d) fm.end() +if withthreads: +done.set() +for i in xrange(threads): +q.put(None) +with ready: +ready.notify_all() + + @command('perfdiffwd', formatteropts) def perfdiffwd(ui, repo, **opts): """Profile diff of working directory changes""" ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2 V4] debuglocks: allow setting a lock
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1510497286 -3600 # Sun Nov 12 15:34:46 2017 +0100 # Node ID 8bcccbbeafba2bc80ed9e427945e11a4728802e8 # Parent 9beb49c91570014c034c3eaad9ce0a7a37e4c931 # EXP-Topic debugsetlocks # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 8bcccbbeafba debuglocks: allow setting a lock diff -r 9beb49c91570 -r 8bcccbbeafba mercurial/debugcommands.py --- a/mercurial/debugcommands.pySun Nov 12 15:34:19 2017 +0100 +++ b/mercurial/debugcommands.pySun Nov 12 15:34:46 2017 +0100 @@ -77,6 +77,8 @@ command = registrar.command() +_confirmlockremovalmsg = _("ready to release the lock (Y)? $$ ") + @command('debugancestor', [], _('[INDEX] REV1 REV2'), optionalrepo=True) def debugancestor(ui, repo, *args): """find the ancestor revision of two revisions in a given index""" @@ -1275,7 +1277,10 @@ @command('debuglocks', [('L', 'force-lock', None, _('free the store lock (DANGEROUS)')), ('W', 'force-wlock', None, - _('free the working state lock (DANGEROUS)'))], + _('free the working state lock (DANGEROUS)')), + ('s', 'set-lock', None, _('set the store lock until stopped')), + ('S', 'set-wlock', None, + _('set the working state lock until stopped'))], _('[OPTION]...')) def debuglocks(ui, repo, **opts): """show or modify state of locks @@ -1294,6 +1299,10 @@ instance, on a shared filesystem). Removing locks may also be blocked by filesystem permissions. +Setting a lock will prevent other commands from changing the data. +The command will wait until an interruption (SIGINT, SIGTERM, ...) occurs. +The set locks are removed when the command exits. + Returns 0 if no locks are held. """ @@ -1305,6 +1314,24 @@ if opts.get(r'force_lock') or opts.get(r'force_wlock'): return 0 +locks = [] +try: +if opts.get(r'set_wlock'): +try: +locks.append(repo.wlock(False)) +except error.LockHeld: +raise error.Abort(_('wlock is already held')) +if opts.get(r'set_lock'): +try: +locks.append(repo.lock(False)) +except error.LockHeld: +raise error.Abort(_('lock is already held')) +if len(locks): +ui.promptchoice(_confirmlockremovalmsg) +return 0 +finally: +release(*locks) + now = time.time() held = 0 diff -r 9beb49c91570 -r 8bcccbbeafba tests/test-completion.t --- a/tests/test-completion.t Sun Nov 12 15:34:19 2017 +0100 +++ b/tests/test-completion.t Sun Nov 12 15:34:46 2017 +0100 @@ -274,7 +274,7 @@ debuginstall: template debugknown: debuglabelcomplete: - debuglocks: force-lock, force-wlock + debuglocks: force-lock, force-wlock, set-lock, set-wlock debugmergestate: debugnamecomplete: debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, user, template diff -r 9beb49c91570 -r 8bcccbbeafba tests/test-debugcommands.t --- a/tests/test-debugcommands.tSun Nov 12 15:34:19 2017 +0100 +++ b/tests/test-debugcommands.tSun Nov 12 15:34:46 2017 +0100 @@ -1,4 +1,6 @@ $ cat << EOF >> $HGRCPATH + > [ui] + > interactive=yes > [format] > usegeneraldelta=yes > EOF @@ -157,7 +159,7 @@ amount of time, displays error message and returns 1 $ waitlock() { > start=`date +%s` - > timeout=1 + > timeout=5 > while [ \( ! -f $1 \) -a \( ! -L $1 \) ]; do > now=`date +%s` > if [ "`expr $now - $start`" -gt $timeout ]; then @@ -167,26 +169,16 @@ > sleep 0.1 > done > } -dolock [wlock] [lock] will set the locks until interrupted $ dolock() { - > declare -A options - > options=([${1:-nolock}]=1 [${2:-nowlock}]=1) - > python < from mercurial import hg, ui as uimod - > import os - > import time - > - > repo = hg.repository(uimod.ui.load(), path='.') - > `[ -n "${options["wlock"]}" ] && echo "with repo.wlock(False):" || echo "if True:"` - > `[ -n "${options["lock"]}" ] && echo "with repo.lock(False):" || echo "if True:"` - > while not os.path.exists('.hg/unlock'): - > time.sleep(0.1) - > os.unlink('.hg/unlock') - > EOF + > { + > waitlock .hg/unlock + > rm -f .hg/unlock + > echo y + > } | hg debuglocks "$@" > } - - $ dolock lock & + $ dolock -s & $ waitlock .hg/store/lock + ready to release the lock (Y)? (no-eol) $ hg
[PATCH 1 of 2 V4] debuglocks: add tests (and fix typo in early return)
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1510497259 -3600 # Sun Nov 12 15:34:19 2017 +0100 # Node ID 9beb49c91570014c034c3eaad9ce0a7a37e4c931 # Parent 37d923cdeba923f4964befcc3d0789362a433a35 # EXP-Topic debugsetlocks # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 9beb49c91570 debuglocks: add tests (and fix typo in early return) diff -r 37d923cdeba9 -r 9beb49c91570 mercurial/debugcommands.py --- a/mercurial/debugcommands.pySun Dec 10 22:50:57 2017 -0500 +++ b/mercurial/debugcommands.pySun Nov 12 15:34:19 2017 +0100 @@ -1302,7 +1302,7 @@ repo.svfs.unlink('lock') if opts.get(r'force_wlock'): repo.vfs.unlink('wlock') -if opts.get(r'force_lock') or opts.get(r'force_lock'): +if opts.get(r'force_lock') or opts.get(r'force_wlock'): return 0 now = time.time() diff -r 37d923cdeba9 -r 9beb49c91570 tests/test-debugcommands.t --- a/tests/test-debugcommands.tSun Dec 10 22:50:57 2017 -0500 +++ b/tests/test-debugcommands.tSun Nov 12 15:34:19 2017 +0100 @@ -145,6 +145,122 @@ 7 6-1 ??? ?????? ??? ???0 ??? ? 12 (glob) 8 7-1 ??? ?????? ??? ???0 ??? ? 13 (glob) +Test debuglocks command: + + $ hg debuglocks + lock: free + wlock: free + +* Test setting the lock + +waitlock will wait for file to be created. If it isn't in a reasonable +amount of time, displays error message and returns 1 + $ waitlock() { + > start=`date +%s` + > timeout=1 + > while [ \( ! -f $1 \) -a \( ! -L $1 \) ]; do + > now=`date +%s` + > if [ "`expr $now - $start`" -gt $timeout ]; then + > echo "timeout: $1 was not created in $timeout seconds" + > return 1 + > fi + > sleep 0.1 + > done + > } +dolock [wlock] [lock] will set the locks until interrupted + $ dolock() { + > declare -A options + > options=([${1:-nolock}]=1 [${2:-nowlock}]=1) + > python < from mercurial import hg, ui as uimod + > import os + > import time + > + > repo = hg.repository(uimod.ui.load(), path='.') + > `[ -n "${options["wlock"]}" ] && echo "with repo.wlock(False):" || echo "if True:"` + > `[ -n "${options["lock"]}" ] && echo "with repo.lock(False):" || echo "if True:"` + > while not os.path.exists('.hg/unlock'): + > time.sleep(0.1) + > os.unlink('.hg/unlock') + > EOF + > } + + $ dolock lock & + $ waitlock .hg/store/lock + + $ hg debuglocks + lock: user *, process * (*s) (glob) + wlock: free + [1] + $ touch .hg/unlock + $ wait + +* Test setting the wlock + + $ dolock wlock & + $ waitlock .hg/wlock + + $ hg debuglocks + lock: free + wlock: user *, process * (*s) (glob) + [1] + $ touch .hg/unlock + $ wait + +* Test setting both locks + + $ dolock wlock lock & + $ waitlock .hg/wlock && waitlock .hg/store/lock + + $ hg debuglocks + lock: user *, process * (*s) (glob) + wlock: user *, process * (*s) (glob) + [2] + $ touch .hg/unlock + $ wait + + $ hg debuglocks + lock: free + wlock: free + +* Test forcing the lock + + $ dolock lock & + $ waitlock .hg/store/lock + + $ hg debuglocks + lock: user *, process * (*s) (glob) + wlock: free + [1] + + $ hg debuglocks -L + + $ hg debuglocks + lock: free + wlock: free + + $ touch .hg/unlock + $ wait + +* Test forcing the wlock + + $ dolock wlock & + $ waitlock .hg/wlock + + $ hg debuglocks + lock: free + wlock: user *, process * (*s) (glob) + [1] + + $ hg debuglocks -W + + $ hg debuglocks + lock: free + wlock: free + + $ touch .hg/unlock + $ wait + Test WdirUnsupported exception $ hg debugdata -c ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2 V3] debuglocks: allow setting a lock
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1510497286 -3600 # Sun Nov 12 15:34:46 2017 +0100 # Node ID e9f4ce1f42464b7670618986f0762f016e33e284 # Parent 9b602162e10949a80e90d9732bb654b430e0b22a # EXP-Topic debugsetlocks # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r e9f4ce1f4246 debuglocks: allow setting a lock diff -r 9b602162e109 -r e9f4ce1f4246 mercurial/debugcommands.py --- a/mercurial/debugcommands.pySun Nov 12 15:34:19 2017 +0100 +++ b/mercurial/debugcommands.pySun Nov 12 15:34:46 2017 +0100 @@ -77,6 +77,8 @@ command = registrar.command() +_confirmlockremovalmsg = _("ready to release the lock (Y)? $$ ") + @command('debugancestor', [], _('[INDEX] REV1 REV2'), optionalrepo=True) def debugancestor(ui, repo, *args): """find the ancestor revision of two revisions in a given index""" @@ -1173,7 +1175,10 @@ @command('debuglocks', [('L', 'force-lock', None, _('free the store lock (DANGEROUS)')), ('W', 'force-wlock', None, - _('free the working state lock (DANGEROUS)'))], + _('free the working state lock (DANGEROUS)')), + ('s', 'set-lock', None, _('set the store lock until stopped')), + ('S', 'set-wlock', None, + _('set the working state lock until stopped'))], _('[OPTION]...')) def debuglocks(ui, repo, **opts): """show or modify state of locks @@ -1192,6 +1197,10 @@ instance, on a shared filesystem). Removing locks may also be blocked by filesystem permissions. +Setting a lock will prevent other commands from changing the data. +The command will wait until an interruption (SIGINT, SIGTERM, ...) occurs. +The set locks are removed when the command exits. + Returns 0 if no locks are held. """ @@ -1203,6 +1212,24 @@ if opts.get(r'force_lock') or opts.get(r'force_wlock'): return 0 +locks = [] +try: +if opts.get(r'set_wlock'): +try: +locks.append(repo.wlock(False)) +except error.LockHeld: +raise error.Abort(_('wlock is already held')) +if opts.get(r'set_lock'): +try: +locks.append(repo.lock(False)) +except error.LockHeld: +raise error.Abort(_('lock is already held')) +if len(locks): +ui.promptchoice(_confirmlockremovalmsg) +return 0 +finally: +release(*locks) + now = time.time() held = 0 diff -r 9b602162e109 -r e9f4ce1f4246 tests/test-completion.t --- a/tests/test-completion.t Sun Nov 12 15:34:19 2017 +0100 +++ b/tests/test-completion.t Sun Nov 12 15:34:46 2017 +0100 @@ -272,7 +272,7 @@ debuginstall: template debugknown: debuglabelcomplete: - debuglocks: force-lock, force-wlock + debuglocks: force-lock, force-wlock, set-lock, set-wlock debugmergestate: debugnamecomplete: debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, user, template diff -r 9b602162e109 -r e9f4ce1f4246 tests/test-debugcommands.t --- a/tests/test-debugcommands.tSun Nov 12 15:34:19 2017 +0100 +++ b/tests/test-debugcommands.tSun Nov 12 15:34:46 2017 +0100 @@ -1,4 +1,6 @@ $ cat << EOF >> $HGRCPATH + > [ui] + > interactive=yes > [format] > usegeneraldelta=yes > EOF @@ -133,26 +135,16 @@ > sleep 0.1 > done > } -dolock [wlock] [lock] will set the locks until interrupted $ dolock() { - > declare -A options - > options=([${1:-nolock}]=1 [${2:-nowlock}]=1) - > python < from mercurial import hg, ui as uimod - > import os - > import time - > - > repo = hg.repository(uimod.ui.load(), path='.') - > `[ -n "${options["wlock"]}" ] && echo "with repo.wlock(False):" || echo "if True:"` - > `[ -n "${options["lock"]}" ] && echo "with repo.lock(False):" || echo "if True:"` - > while not os.path.exists('.hg/unlock'): - > time.sleep(0.1) - > os.unlink('.hg/unlock') - > EOF + > { + > waitlock .hg/unlock + > rm -f .hg/unlock + > echo y + > } | hg debuglocks "$@" > } - - $ dolock lock & + $ dolock -s & $ waitlock .hg/store/lock + ready to release the lock (Y)? (no-eol) $ hg debuglocks lock: user *, process * (*s) (glob) @@ -160,11 +152,16 @@ [1] $ touch .hg/unlock $ wait + y + $ ls -l .hg/store/lock + ls: cannot access '.hg/store/lock': No such file or directory + [2] * Test setting the wlock - $ dolock wlock & + $ dolock -S & $ waitlock .hg/wlock + ready to
[PATCH 1 of 2 V3] debuglocks: add tests (and fix typo in early return)
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1510497259 -3600 # Sun Nov 12 15:34:19 2017 +0100 # Node ID 9b602162e10949a80e90d9732bb654b430e0b22a # Parent 602c168c0207c443ac61f7a7c727b31cfb0b86ad # EXP-Topic debugsetlocks # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 9b602162e109 debuglocks: add tests (and fix typo in early return) diff -r 602c168c0207 -r 9b602162e109 mercurial/debugcommands.py --- a/mercurial/debugcommands.pyTue Nov 07 13:18:49 2017 -0500 +++ b/mercurial/debugcommands.pySun Nov 12 15:34:19 2017 +0100 @@ -1200,7 +1200,7 @@ repo.svfs.unlink('lock') if opts.get(r'force_wlock'): repo.vfs.unlink('wlock') -if opts.get(r'force_lock') or opts.get(r'force_lock'): +if opts.get(r'force_lock') or opts.get(r'force_wlock'): return 0 now = time.time() diff -r 602c168c0207 -r 9b602162e109 tests/test-debugcommands.t --- a/tests/test-debugcommands.tTue Nov 07 13:18:49 2017 -0500 +++ b/tests/test-debugcommands.tSun Nov 12 15:34:19 2017 +0100 @@ -111,6 +111,122 @@ 7 6-1 ??? ?????? ??? ???0 ??? ? 12 (glob) 8 7-1 ??? ?????? ??? ???0 ??? ? 13 (glob) +Test debuglocks command: + + $ hg debuglocks + lock: free + wlock: free + +* Test setting the lock + +waitlock will wait for file to be created. If it isn't in a reasonable +amount of time, displays error message and returns 1 + $ waitlock() { + > start=`date +%s` + > timeout=1 + > while [ \( ! -f $1 \) -a \( ! -L $1 \) ]; do + > now=`date +%s` + > if [ "`expr $now - $start`" -gt $timeout ]; then + > echo "timeout: $1 was not created in $timeout seconds" + > return 1 + > fi + > sleep 0.1 + > done + > } +dolock [wlock] [lock] will set the locks until interrupted + $ dolock() { + > declare -A options + > options=([${1:-nolock}]=1 [${2:-nowlock}]=1) + > python < from mercurial import hg, ui as uimod + > import os + > import time + > + > repo = hg.repository(uimod.ui.load(), path='.') + > `[ -n "${options["wlock"]}" ] && echo "with repo.wlock(False):" || echo "if True:"` + > `[ -n "${options["lock"]}" ] && echo "with repo.lock(False):" || echo "if True:"` + > while not os.path.exists('.hg/unlock'): + > time.sleep(0.1) + > os.unlink('.hg/unlock') + > EOF + > } + + $ dolock lock & + $ waitlock .hg/store/lock + + $ hg debuglocks + lock: user *, process * (*s) (glob) + wlock: free + [1] + $ touch .hg/unlock + $ wait + +* Test setting the wlock + + $ dolock wlock & + $ waitlock .hg/wlock + + $ hg debuglocks + lock: free + wlock: user *, process * (*s) (glob) + [1] + $ touch .hg/unlock + $ wait + +* Test setting both locks + + $ dolock wlock lock & + $ waitlock .hg/wlock && waitlock .hg/store/lock + + $ hg debuglocks + lock: user *, process * (*s) (glob) + wlock: user *, process * (*s) (glob) + [2] + $ touch .hg/unlock + $ wait + + $ hg debuglocks + lock: free + wlock: free + +* Test forcing the lock + + $ dolock lock & + $ waitlock .hg/store/lock + + $ hg debuglocks + lock: user *, process * (*s) (glob) + wlock: free + [1] + + $ hg debuglocks -L + + $ hg debuglocks + lock: free + wlock: free + + $ touch .hg/unlock + $ wait + +* Test forcing the wlock + + $ dolock wlock & + $ waitlock .hg/wlock + + $ hg debuglocks + lock: free + wlock: user *, process * (*s) (glob) + [1] + + $ hg debuglocks -W + + $ hg debuglocks + lock: free + wlock: free + + $ touch .hg/unlock + $ wait + Test WdirUnsupported exception $ hg debugdata -c ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH V2] debugdeltachain: output information about sparse read if enabled
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1509002829 -7200 # Thu Oct 26 09:27:09 2017 +0200 # Node ID 404cf035ae72a77af35e049b5de53f8dbbc6cd79 # Parent 602c168c0207c443ac61f7a7c727b31cfb0b86ad # EXP-Topic debugdeltachain # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 404cf035ae72 debugdeltachain: output information about sparse read if enabled diff -r 602c168c0207 -r 404cf035ae72 mercurial/debugcommands.py --- a/mercurial/debugcommands.pyTue Nov 07 13:18:49 2017 -0500 +++ b/mercurial/debugcommands.pyThu Oct 26 09:27:09 2017 +0200 @@ -587,11 +587,22 @@ the delta chain for this revision :``extraratio``: extradist divided by chainsize; another representation of how much unrelated data is needed to load this delta chain + +If the repository is configured to use the sparse read, additional keywords +are available: + +:``readsize``: total size of data read from the disk for a revision + (sum of the sizes of all the blocks) +:``largestblock``: size of the largest block of data read from the disk +:``readdensity``: density of useful bytes in the data read from the disk + +The sparse read can be enabled with experimental.sparse-read = True """ opts = pycompat.byteskwargs(opts) r = cmdutil.openrevlog(repo, 'debugdeltachain', file_, opts) index = r.index generaldelta = r.version & revlog.FLAG_GENERALDELTA +withsparseread = getattr(r, '_withsparseread', False) def revinfo(rev): e = index[rev] @@ -627,15 +638,20 @@ fm.plain('rev chain# chainlen prev delta ' 'sizerawsize chainsize ratio lindist extradist ' - 'extraratio\n') + 'extraratio') +if withsparseread: +fm.plain(' readsize largestblk rddensity') +fm.plain('\n') chainbases = {} for rev in r: comp, uncomp, deltatype, chain, chainsize = revinfo(rev) chainbase = chain[0] chainid = chainbases.setdefault(chainbase, len(chainbases) + 1) -basestart = r.start(chainbase) -revstart = r.start(rev) +start = r.start +length = r.length +basestart = start(chainbase) +revstart = start(rev) lineardist = revstart + comp - basestart extradist = lineardist - chainsize try: @@ -650,7 +666,7 @@ fm.write('rev chainid chainlen prevrev deltatype compsize ' 'uncompsize chainsize chainratio lindist extradist ' 'extraratio', - '%7d %7d %8d %8d %7s %10d %10d %10d %9.5f %9d %9d %10.5f\n', + '%7d %7d %8d %8d %7s %10d %10d %10d %9.5f %9d %9d %10.5f', rev, chainid, len(chain), prevrev, deltatype, comp, uncomp, chainsize, chainratio, lineardist, extradist, extraratio, @@ -659,6 +675,26 @@ uncompsize=uncomp, chainsize=chainsize, chainratio=chainratio, lindist=lineardist, extradist=extradist, extraratio=extraratio) +if withsparseread: +readsize = 0 +largestblock = 0 +for revschunk in revlog._slicechunk(r, chain): +blkend = start(revschunk[-1]) + length(revschunk[-1]) +blksize = blkend - start(revschunk[0]) + +readsize += blksize +if largestblock < blksize: +largestblock = blksize + +readdensity = float(chainsize) / float(readsize) + +fm.write('readsize largestblock readdensity', + ' %10d %10d %9.5f', + readsize, largestblock, readdensity, + readsize=readsize, largestblock=largestblock, + readdensity=readdensity) + +fm.write('', '\n') fm.end() diff -r 602c168c0207 -r 404cf035ae72 tests/test-debugcommands.t --- a/tests/test-debugcommands.tTue Nov 07 13:18:49 2017 -0500 +++ b/tests/test-debugcommands.tThu Oct 26 09:27:09 2017 +0200 @@ -77,6 +77,40 @@ } ] +debugdelta chain with sparse read enabled + + $ cat >> $HGRCPATH < [experimental] + > sparse-read = True + > EOF + $ hg debugdeltachain -m + rev chain# chainlen prev delta sizerawsize chainsize ratio lindist extradist extraratio readsize largestblk rddensity +0 11 -1base 44 43 44 1.0232644 00.0 44 44 1.0 + + $ hg debugdeltachain -m -T '{rev} {chainid} {chainlen} {readsize} {largestblock} {readdensity}\n' + 0 1 1 44 44 1.0 + + $ hg debugdeltachain -m -Tjson + [ + { +"chainid": 1, +"chainlen": 1, +&q
[PATCH 1 of 2 V2] debuglocks: add tests
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1510497259 -3600 # Sun Nov 12 15:34:19 2017 +0100 # Node ID b26aa142783235cddc7c90f2425aac53fbb612da # Parent 602c168c0207c443ac61f7a7c727b31cfb0b86ad # EXP-Topic debugsetlocks # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r b26aa1427832 debuglocks: add tests diff -r 602c168c0207 -r b26aa1427832 mercurial/debugcommands.py --- a/mercurial/debugcommands.pyTue Nov 07 13:18:49 2017 -0500 +++ b/mercurial/debugcommands.pySun Nov 12 15:34:19 2017 +0100 @@ -1200,7 +1200,7 @@ repo.svfs.unlink('lock') if opts.get(r'force_wlock'): repo.vfs.unlink('wlock') -if opts.get(r'force_lock') or opts.get(r'force_lock'): +if opts.get(r'force_lock') or opts.get(r'force_wlock'): return 0 now = time.time() diff -r 602c168c0207 -r b26aa1427832 tests/test-debugcommands.t --- a/tests/test-debugcommands.tTue Nov 07 13:18:49 2017 -0500 +++ b/tests/test-debugcommands.tSun Nov 12 15:34:19 2017 +0100 @@ -111,6 +111,126 @@ 7 6-1 ??? ?????? ??? ???0 ??? ? 12 (glob) 8 7-1 ??? ?????? ??? ???0 ??? ? 13 (glob) +Test debuglocks command: + + $ hg debuglocks + lock: free + wlock: free + +* Test setting the lock + + $ python < from mercurial import hg, ui as uimod + > import time + > + > repo = hg.repository(uimod.ui.load(), path='$(pwd)') + > with repo.lock(False): + > time.sleep(1) + > EOF + $ sleep 0.5 + + $ hg debuglocks + lock: user *, process *, host * (*s) (glob) + wlock: free + [1] + $ kill -INT %1 + $ wait + +* Test setting the wlock + + $ python < from mercurial import hg, ui as uimod + > import time + > + > repo = hg.repository(uimod.ui.load(), path='$(pwd)') + > with repo.wlock(False): + > time.sleep(1) + > EOF + $ sleep 0.5 + + $ hg debuglocks + lock: free + wlock: user *, process *, host * (*s) (glob) + [1] + $ kill -INT %1 + $ wait + +* Test setting both locks + + $ python < from mercurial import hg, ui as uimod + > import time + > + > repo = hg.repository(uimod.ui.load(), path='$(pwd)') + > with repo.wlock(False): + > with repo.lock(False): + > time.sleep(1) + > EOF + $ sleep 0.5 + + $ hg debuglocks + lock: user *, process *, host * (*s) (glob) + wlock: user *, process *, host * (*s) (glob) + [2] + $ kill -INT %1 + $ wait + + $ hg debuglocks + lock: free + wlock: free + +* Test forcing the lock + + $ python < from mercurial import hg, ui as uimod + > import time + > + > repo = hg.repository(uimod.ui.load(), path='$(pwd)') + > with repo.lock(False): + > time.sleep(1) + > EOF + $ sleep 0.5 + + $ hg debuglocks + lock: user *, process *, host * (*s) (glob) + wlock: free + [1] + + $ hg debuglocks -L + + $ hg debuglocks + lock: free + wlock: free + + $ kill -INT %1 + $ wait + +* Test forcing the wlock + + $ python < from mercurial import hg, ui as uimod + > import time + > + > repo = hg.repository(uimod.ui.load(), path='$(pwd)') + > with repo.wlock(False): + > time.sleep(1) + > EOF + $ sleep 0.5 + + $ hg debuglocks + lock: free + wlock: user *, process *, host * (*s) (glob) + [1] + + $ hg debuglocks -W + + $ hg debuglocks + lock: free + wlock: free + + $ kill -INT %1 + $ wait + Test WdirUnsupported exception $ hg debugdata -c ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2 V2] debuglocks: allow setting a lock
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1510497286 -3600 # Sun Nov 12 15:34:46 2017 +0100 # Node ID 61cf7ffd84b89889cbf6bbc2af13cdd67ba2d9f9 # Parent b26aa142783235cddc7c90f2425aac53fbb612da # EXP-Topic debugsetlocks # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 61cf7ffd84b8 debuglocks: allow setting a lock diff -r b26aa1427832 -r 61cf7ffd84b8 mercurial/debugcommands.py --- a/mercurial/debugcommands.pySun Nov 12 15:34:19 2017 +0100 +++ b/mercurial/debugcommands.pySun Nov 12 15:34:46 2017 +0100 @@ -14,6 +14,7 @@ import operator import os import random +import signal import socket import ssl import string @@ -1173,7 +1174,10 @@ @command('debuglocks', [('L', 'force-lock', None, _('free the store lock (DANGEROUS)')), ('W', 'force-wlock', None, - _('free the working state lock (DANGEROUS)'))], + _('free the working state lock (DANGEROUS)')), + ('s', 'set-lock', None, _('set the store lock until stopped')), + ('S', 'set-wlock', None, + _('set the working state lock until stopped'))], _('[OPTION]...')) def debuglocks(ui, repo, **opts): """show or modify state of locks @@ -1192,6 +1196,10 @@ instance, on a shared filesystem). Removing locks may also be blocked by filesystem permissions. +Setting a lock will prevent other commands from changing the data. +The command will wait until an interruption (SIGINT, SIGTERM, ...) occurs. +The set locks are removed when the command exits. + Returns 0 if no locks are held. """ @@ -1203,6 +1211,29 @@ if opts.get(r'force_lock') or opts.get(r'force_wlock'): return 0 +# Ensure that SIGINT wouldn't be ignored +# see https://unix.stackexchange.com/a/372580 +if not pycompat.iswindows: +signal.signal(signal.SIGINT, signal.SIG_DFL) + +locks = [] +try: +if opts.get(r'set_wlock'): +try: +locks.append(repo.wlock(False)) +except error.LockHeld: +raise error.Abort(_('wlock is already held')) +if opts.get(r'set_lock'): +try: +locks.append(repo.lock(False)) +except error.LockHeld: +raise error.Abort(_('lock is already held')) +if len(locks): +while True: +time.sleep(60) +finally: +release(*locks) + now = time.time() held = 0 diff -r b26aa1427832 -r 61cf7ffd84b8 tests/test-completion.t --- a/tests/test-completion.t Sun Nov 12 15:34:19 2017 +0100 +++ b/tests/test-completion.t Sun Nov 12 15:34:46 2017 +0100 @@ -272,7 +272,7 @@ debuginstall: template debugknown: debuglabelcomplete: - debuglocks: force-lock, force-wlock + debuglocks: force-lock, force-wlock, set-lock, set-wlock debugmergestate: debugnamecomplete: debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, user, template diff -r b26aa1427832 -r 61cf7ffd84b8 tests/test-debugcommands.t --- a/tests/test-debugcommands.tSun Nov 12 15:34:19 2017 +0100 +++ b/tests/test-debugcommands.tSun Nov 12 15:34:46 2017 +0100 @@ -119,14 +119,7 @@ * Test setting the lock - $ python < from mercurial import hg, ui as uimod - > import time - > - > repo = hg.repository(uimod.ui.load(), path='$(pwd)') - > with repo.lock(False): - > time.sleep(1) - > EOF + $ hg debuglocks -s & $ sleep 0.5 $ hg debuglocks @@ -138,14 +131,7 @@ * Test setting the wlock - $ python < from mercurial import hg, ui as uimod - > import time - > - > repo = hg.repository(uimod.ui.load(), path='$(pwd)') - > with repo.wlock(False): - > time.sleep(1) - > EOF + $ hg debuglocks -S & $ sleep 0.5 $ hg debuglocks @@ -157,21 +143,24 @@ * Test setting both locks - $ python < from mercurial import hg, ui as uimod - > import time - > - > repo = hg.repository(uimod.ui.load(), path='$(pwd)') - > with repo.wlock(False): - > with repo.lock(False): - > time.sleep(1) - > EOF + $ hg debuglocks -Ss & $ sleep 0.5 $ hg debuglocks lock: user *, process *, host * (*s) (glob) wlock: user *, process *, host * (*s) (glob) [2] + +* Test failing to set a lock + + $ hg debuglocks -s + abort: lock is already held + [255] + + $ hg debuglocks -S + abort: wlock is already held + [255] + $ kill -INT %1 $ wait @@ -181,14 +170,7 @@ * Test forcing the lock - $ python < from mercurial import hg, ui as uimod - > import time - > - > repo = hg.repository(uimod.ui.load(), path='$(pwd)') - > with repo.lock(False): - > time.sleep(1) - > EOF + $ hg debuglocks -s & $ sleep 0.5 $ hg debuglo
[PATCH] debuglocks: allow setting a lock
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1510071568 -3600 # Tue Nov 07 17:19:28 2017 +0100 # Node ID 5300b33397d0651eb2457502204969585d492cc5 # Parent 602c168c0207c443ac61f7a7c727b31cfb0b86ad # EXP-Topic debugsetlocks # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 5300b33397d0 debuglocks: allow setting a lock diff -r 602c168c0207 -r 5300b33397d0 mercurial/debugcommands.py --- a/mercurial/debugcommands.pyTue Nov 07 13:18:49 2017 -0500 +++ b/mercurial/debugcommands.pyTue Nov 07 17:19:28 2017 +0100 @@ -1173,7 +1173,10 @@ @command('debuglocks', [('L', 'force-lock', None, _('free the store lock (DANGEROUS)')), ('W', 'force-wlock', None, - _('free the working state lock (DANGEROUS)'))], + _('free the working state lock (DANGEROUS)')), + ('s', 'set-lock', None, _('set the store lock until stopped')), + ('S', 'set-wlock', None, + _('set the working state lock until stopped'))], _('[OPTION]...')) def debuglocks(ui, repo, **opts): """show or modify state of locks @@ -1192,6 +1195,10 @@ instance, on a shared filesystem). Removing locks may also be blocked by filesystem permissions. +Setting a lock will prevent other commands from changing the data. +The command will wait until an interruption (SIGINT, SIGTERM, ...) occurs. +The set lock(s) is (are) removed when the command exits. + Returns 0 if no locks are held. """ @@ -1203,6 +1210,26 @@ if opts.get(r'force_lock') or opts.get(r'force_lock'): return 0 +locks = [] +if opts.get(r'set_wlock'): +try: +locks.append(repo.wlock(False)) +except error.LockHeld: +raise error.Abort(_('wlock is already held')) +if opts.get(r'set_lock'): +try: +locks.append(repo.lock(False)) +except error.LockHeld: +raise error.Abort(_('lock is already held')) +if len(locks): +try: +while True: +time.sleep(60) +except: +for lock in locks: +lock.release() +raise + now = time.time() held = 0 ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] debugdeltachain: output information about sparse read if enabled
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1509002829 -7200 # Thu Oct 26 09:27:09 2017 +0200 # Node ID 13a6c881be35e7651a12f8c3442abfade2b77c88 # Parent 602c168c0207c443ac61f7a7c727b31cfb0b86ad # EXP-Topic debugdeltachain # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 13a6c881be35 debugdeltachain: output information about sparse read if enabled diff -r 602c168c0207 -r 13a6c881be35 mercurial/debugcommands.py --- a/mercurial/debugcommands.pyTue Nov 07 13:18:49 2017 -0500 +++ b/mercurial/debugcommands.pyThu Oct 26 09:27:09 2017 +0200 @@ -587,11 +587,22 @@ the delta chain for this revision :``extraratio``: extradist divided by chainsize; another representation of how much unrelated data is needed to load this delta chain + +If the repository is configured to use the sparse read, additional keywords +are available: + +:``readsize``: total size of data read from the disk for a revision + (sum of the sizes of all the blocks) +:``largestblock``: size of the largest block of data read from the disk +:``readdensity``: density of useful bytes in the data read from the disk + +The sparse read can be enabled with experimental.sparse-read = True """ opts = pycompat.byteskwargs(opts) r = cmdutil.openrevlog(repo, 'debugdeltachain', file_, opts) index = r.index generaldelta = r.version & revlog.FLAG_GENERALDELTA +withsparseread = getattr(r, '_withsparseread', False) def revinfo(rev): e = index[rev] @@ -625,17 +636,29 @@ fm = ui.formatter('debugdeltachain', opts) -fm.plain('rev chain# chainlen prev delta ' - 'sizerawsize chainsize ratio lindist extradist ' - 'extraratio\n') +header = ('rev chain# chainlen prev delta ' + 'sizerawsize chainsize ratio lindist extradist ' + 'extraratio') +fmfields = ('rev chainid chainlen prevrev deltatype compsize ' +'uncompsize chainsize chainratio lindist extradist ' +'extraratio') +fmformats = '%7d %7d %8d %8d %7s %10d %10d %10d %9.5f %9d %9d %10.5f' +if withsparseread: +header += ' readsize largestblk rddensity' +fmfields += ' readsize largestblock readdensity' +fmformats += ' %10d %10d %9.5f' + +fm.plain(header + '\n') chainbases = {} for rev in r: comp, uncomp, deltatype, chain, chainsize = revinfo(rev) chainbase = chain[0] chainid = chainbases.setdefault(chainbase, len(chainbases) + 1) -basestart = r.start(chainbase) -revstart = r.start(rev) +start = r.start +length = r.length +basestart = start(chainbase) +revstart = start(rev) lineardist = revstart + comp - basestart extradist = lineardist - chainsize try: @@ -646,19 +669,33 @@ chainratio = float(chainsize) / float(uncomp) extraratio = float(extradist) / float(chainsize) +fmargs = (rev, chainid, len(chain), prevrev, deltatype, comp, + uncomp, chainsize, chainratio, lineardist, extradist, + extraratio) +fmkwargs = dict(rev=rev, chainid=chainid, chainlen=len(chain), +prevrev=prevrev, deltatype=deltatype, compsize=comp, +uncompsize=uncomp, chainsize=chainsize, +chainratio=chainratio, lindist=lineardist, +extradist=extradist, extraratio=extraratio) +if withsparseread: +readsize = 0 +largestblock = 0 +for revschunk in revlog._slicechunk(r, chain): +blkend = start(revschunk[-1]) + length(revschunk[-1]) +blksize = blkend - start(revschunk[0]) + +readsize += blksize +if largestblock < blksize: +largestblock = blksize + +readdensity = float(chainsize) / float(readsize) + +fmargs += (readsize, largestblock, readdensity) +fmkwargs.update(readsize=readsize, largestblock=largestblock, +readdensity=readdensity) + fm.startitem() -fm.write('rev chainid chainlen prevrev deltatype compsize ' - 'uncompsize chainsize chainratio lindist extradist ' - 'extraratio', - '%7d %7d %8d %8d %7s %10d %10d %10d %9.5f %9d %9d %10.5f\n', - rev, chainid, len(chain), prevrev, deltatype, comp, - uncomp, chainsize, chainratio, lineardist, extradist, - extraratio, - rev=rev, chainid=chainid, chainlen=len(chain), - prevrev=prevrev, deltat
[PATCH V2] sparse-read: ignore trailing empty revs in each read chunk
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1508333299 -7200 # Wed Oct 18 15:28:19 2017 +0200 # Node ID ef3d9978b7daf5c2152f624b10fffb13425b06db # Parent fb2574bd73a9c0d9a7a88407b20fdabc9213bc20 # EXP-Topic optimized-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r ef3d9978b7da sparse-read: ignore trailing empty revs in each read chunk An empty entry in the revlog may happen for two reasons: - when the file is empty, and the revlog stores a snapshot; - when there is a merge and both parents were identical. `hg debugindex -m | awk '$3=="0"{print}' | wc -l` gives 1917 of such entries in my clone of pypy, and 113 on my clone of mercurial. These empty revision may be located at the end of a sparse chain, and in some special cases may lead to read relatively large amounts of data for nothing. diff -r fb2574bd73a9 -r ef3d9978b7da mercurial/revlog.py --- a/mercurial/revlog.py Wed Oct 18 09:07:48 2017 +0200 +++ b/mercurial/revlog.py Wed Oct 18 15:28:19 2017 +0200 @@ -162,6 +162,20 @@ s.update(text) return s.digest() +def _trimchunk(revlog, revs, startidx, endidx=None): +"""returns revs[startidx:endidx] without empty trailing revs +""" +length = revlog.length + +if endidx is None: +endidx = len(revs) + +# Trim empty revs at the end, but never the very first revision of a chain +while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0: +endidx -= 1 + +return revs[startidx:endidx] + def _slicechunk(revlog, revs): """slice revs to reduce the amount of unrelated data to be read from disk. @@ -194,6 +208,10 @@ revstart = start(rev) revlen = length(rev) +# Skip empty revisions to form larger holes +if revlen == 0: +continue + if prevend is not None: gapsize = revstart - prevend # only consider holes that are large enough @@ -222,9 +240,16 @@ previdx = 0 while indicesheap: idx = heapq.heappop(indicesheap) -yield revs[previdx:idx] + +chunk = _trimchunk(revlog, revs, previdx, idx) +if chunk: +yield chunk + previdx = idx -yield revs[previdx:] + +chunk = _trimchunk(revlog, revs, previdx) +if chunk: +yield chunk # index v0: # 4 bytes: offset ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 3 of 3] sparse-read: ignore trailing empty revs in each read chunk
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1508333299 -7200 # Wed Oct 18 15:28:19 2017 +0200 # Node ID 243f3a5bee46f9473bf2233041a8705e38194c13 # Parent 1c47a1306c856a240d9191e0f928b07493078fa7 # EXP-Topic optimized-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 243f3a5bee46 sparse-read: ignore trailing empty revs in each read chunk diff -r 1c47a1306c85 -r 243f3a5bee46 mercurial/revlog.py --- a/mercurial/revlog.py Wed Oct 18 09:07:48 2017 +0200 +++ b/mercurial/revlog.py Wed Oct 18 15:28:19 2017 +0200 @@ -162,6 +162,20 @@ s.update(text) return s.digest() +def _trimchunk(revlog, revs, startidx, endidx=None): +"""returns revs[startidx:endidx] without empty trailing revs +""" +length = revlog.length + +if endidx is None: +endidx = len(revs) + +# Trim empty revs at the end +while endidx > startidx and length(revs[endidx - 1]) == 0: +endidx -= 1 + +return revs[startidx:endidx] + def _slicechunk(revlog, revs): """slice revs to reduce the amount of unrelated data to be read from disk. @@ -194,6 +208,10 @@ revstart = start(rev) revlen = length(rev) +# Skip empty revisions to form larger holes +if revlen == 0: +continue + if prevend is not None: gapsize = revstart - prevend # only consider holes that are large enough @@ -222,9 +240,16 @@ previdx = 0 while indicesheap: idx = heapq.heappop(indicesheap) -yield revs[previdx:idx] + +chunk = _trimchunk(revlog, revs, previdx, idx) +if chunk: +yield chunk + previdx = idx -yield revs[previdx:] + +chunk = _trimchunk(revlog, revs, previdx) +if chunk: +yield chunk # index v0: # 4 bytes: offset ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 3] sparse-read: move from a recursive-based approach to a heap-based one
# HG changeset patch # User Boris Feld# Date 1508323980 -7200 # Wed Oct 18 12:53:00 2017 +0200 # Node ID 495cbf44112b9872f2803ccd836de3ffae30b28c # Parent 537de0b14030868e3e850ae388b08f88cabc88e8 # EXP-Topic optimized-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 495cbf44112b sparse-read: move from a recursive-based approach to a heap-based one The previous recursive approach was trying to optimise each read slice to have a good density. It had the tendency to over-optimize smaller slices while leaving larger hole in others. The new approach focuses on improving the combined density of all the reads, instead of the individual slices. It slices at the largest gaps first, as they reduce the total amount of read data the most efficiently. Another benefit of this approach is that we iterate over the delta chain only once, reducing the overhead of slicing long delta chains. On the repository we use for tests, the new approach shows similar or faster performance than the current default linear full read. The repository contains about 450,000 revisions with many concurrent topological branches. Tests have been run on two versions of the repository: one built with the current delta constraint, and the other with an unlimited delta span (using 'experimental.maxdeltachainspan=0') Below are timings for building 1% of all the revision in the manifest log using 'hg perfrevlogrevisions -m'. Times are given in seconds. They include the new couple of follow-up changeset in this series. delta-span standardunlimited linear-read 922s 632s sparse-read 814s 566s diff -r 537de0b14030 -r 495cbf44112b mercurial/revlog.py --- a/mercurial/revlog.py Wed Sep 20 19:38:06 2017 +0200 +++ b/mercurial/revlog.py Wed Oct 18 12:53:00 2017 +0200 @@ -17,6 +17,7 @@ import collections import errno import hashlib +import heapq import os import struct import zlib @@ -170,49 +171,59 @@ start = revlog.start length = revlog.length -chunkqueue = collections.deque() -chunkqueue.append((revs, 0)) +if len(revs) <= 1: +yield revs +return -while chunkqueue: -revs, depth = chunkqueue.popleft() +startbyte = start(revs[0]) +endbyte = start(revs[-1]) + length(revs[-1]) +readdata = deltachainspan = endbyte - startbyte + +chainpayload = sum(length(r) for r in revs) -startbyte = start(revs[0]) -endbyte = start(revs[-1]) + length(revs[-1]) -deltachainspan = endbyte - startbyte +if deltachainspan: +density = chainpayload / float(deltachainspan) +else: +density = 1.0 -if deltachainspan <= revlog._srminblocksize or len(revs) <= 1: -yield revs -continue +# Store the gaps in a heap to have them sorted by decreasing size +gapsheap = [] +heapq.heapify(gapsheap) +prevend = None +for i, rev in enumerate(revs): +revstart = start(rev) +revlen = length(rev) -# Find where is the largest hole (this is where we would split) and -# sum up the lengths of useful data to compute the density of the span -textlen = 0 -prevend = None -largesthole = 0 -idxlargesthole = -1 -for i, rev in enumerate(revs): -revstart = start(rev) -revlen = length(rev) +if prevend is not None: +gapsize = revstart - prevend +if gapsize: +heapq.heappush(gapsheap, (-gapsize, i)) + +prevend = revstart + revlen + +# Collect the indices of the largest holes until the density is acceptable +indicesheap = [] +heapq.heapify(indicesheap) +while gapsheap and density < revlog._srdensitythreshold: +oppgapsize, gapidx = heapq.heappop(gapsheap) + +heapq.heappush(indicesheap, gapidx) -if prevend is not None: -hole = revstart - prevend -if hole > largesthole: -largesthole = hole -idxlargesthole = i - -textlen += revlen -prevend = revstart + revlen +# the gap sizes are stored as negatives to be sorted decreasingly +# by the heap +readdata -= (-oppgapsize) +if readdata > 0: +density = chainpayload / float(readdata) +else: +density = 1.0 -density = textlen / float(deltachainspan) if deltachainspan > 0 else 1.0 - -if density > revlog._srdensitythreshold: -yield revs -continue - -# Add the left and right parts so that they will be sliced -# recursively too -chunkqueue.append((revs[:idxlargesthole], depth + 1)) -chunkqueue.append((revs[idxlargesthole:], depth + 1)) +# Cut the revs at collected indices +previdx = 0 +while indicesheap: +
[PATCH 2 of 3] sparse-read: skip gaps too small to be worth splitting
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1508310468 -7200 # Wed Oct 18 09:07:48 2017 +0200 # Node ID 1c47a1306c856a240d9191e0f928b07493078fa7 # Parent 495cbf44112b9872f2803ccd836de3ffae30b28c # EXP-Topic optimized-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 1c47a1306c85 sparse-read: skip gaps too small to be worth splitting Splitting at too small gaps might not be worthwhile. With this changeset, we stop considering splitting on too small gaps. The threshold is configurable. We arbitrarily pick 256K as a default value because it seems "okay". Further testing on various repositories and setups will be needed to tune it. The option name is 'experimental.sparse-read.min-gap-size`, and replaces `experimental.sparse-read.min-block-size` which is not used any more. diff -r 495cbf44112b -r 1c47a1306c85 mercurial/configitems.py --- a/mercurial/configitems.py Wed Oct 18 12:53:00 2017 +0200 +++ b/mercurial/configitems.py Wed Oct 18 09:07:48 2017 +0200 @@ -420,7 +420,7 @@ coreconfigitem('experimental', 'sparse-read.density-threshold', default=0.25, ) -coreconfigitem('experimental', 'sparse-read.min-block-size', +coreconfigitem('experimental', 'sparse-read.min-gap-size', default='256K', ) coreconfigitem('experimental', 'treemanifest', diff -r 495cbf44112b -r 1c47a1306c85 mercurial/localrepo.py --- a/mercurial/localrepo.pyWed Oct 18 12:53:00 2017 +0200 +++ b/mercurial/localrepo.pyWed Oct 18 09:07:48 2017 +0200 @@ -611,11 +611,11 @@ withsparseread = self.ui.configbool('experimental', 'sparse-read') srdensitythres = float(self.ui.config('experimental', 'sparse-read.density-threshold')) -srminblocksize = self.ui.configbytes('experimental', - 'sparse-read.min-block-size') +srmingapsize = self.ui.configbytes('experimental', + 'sparse-read.min-gap-size') self.svfs.options['with-sparse-read'] = withsparseread self.svfs.options['sparse-read-density-threshold'] = srdensitythres -self.svfs.options['sparse-read-min-block-size'] = srminblocksize +self.svfs.options['sparse-read-min-gap-size'] = srmingapsize for r in self.requirements: if r.startswith('exp-compression-'): diff -r 495cbf44112b -r 1c47a1306c85 mercurial/revlog.py --- a/mercurial/revlog.py Wed Oct 18 12:53:00 2017 +0200 +++ b/mercurial/revlog.py Wed Oct 18 09:07:48 2017 +0200 @@ -196,7 +196,8 @@ if prevend is not None: gapsize = revstart - prevend -if gapsize: +# only consider holes that are large enough +if gapsize > revlog._srmingapsize: heapq.heappush(gapsheap, (-gapsize, i)) prevend = revstart + revlen @@ -371,7 +372,7 @@ self._maxdeltachainspan = -1 self._withsparseread = False self._srdensitythreshold = 0.25 -self._srminblocksize = 262144 +self._srmingapsize = 262144 mmapindexthreshold = None v = REVLOG_DEFAULT_VERSION @@ -401,8 +402,8 @@ self._withsparseread = bool(opts.get('with-sparse-read', False)) if 'sparse-read-density-threshold' in opts: self._srdensitythreshold = opts['sparse-read-density-threshold'] -if 'sparse-read-min-block-size' in opts: -self._srminblocksize = opts['sparse-read-min-block-size'] +if 'sparse-read-min-gap-size' in opts: +self._srmingapsize = opts['sparse-read-min-gap-size'] if self._chunkcachesize <= 0: raise RevlogError(_('revlog chunk cache size %r is not greater ' ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 3 of 3] revlog-sparse-read: add a lower-threshold for read block size
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1507993541 -7200 # Sat Oct 14 17:05:41 2017 +0200 # Node ID cac7666538643b087500c09c757a1b5f28421fbb # Parent fd6ea10467600ccdfc9f3491ad95da5cdb5b840d # EXP-Topic optimized-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r cac766653864 revlog-sparse-read: add a lower-threshold for read block size The option experimental.sparse-read.min-block-size specifies the minimal size of a deltachain span, under which it won't be split by _slicechunk. diff -r fd6ea1046760 -r cac766653864 mercurial/configitems.py --- a/mercurial/configitems.py Tue Oct 10 17:50:27 2017 +0200 +++ b/mercurial/configitems.py Sat Oct 14 17:05:41 2017 +0200 @@ -415,6 +415,9 @@ coreconfigitem('experimental', 'sparse-read.density-threshold', default=0.25, ) +coreconfigitem('experimental', 'sparse-read.min-block-size', +default='256K', +) coreconfigitem('experimental', 'treemanifest', default=False, ) diff -r fd6ea1046760 -r cac766653864 mercurial/localrepo.py --- a/mercurial/localrepo.pyTue Oct 10 17:50:27 2017 +0200 +++ b/mercurial/localrepo.pySat Oct 14 17:05:41 2017 +0200 @@ -611,8 +611,11 @@ withsparseread = self.ui.configbool('experimental', 'sparse-read') srdensitythres = float(self.ui.config('experimental', 'sparse-read.density-threshold')) +srminblocksize = self.ui.configbytes('experimental', + 'sparse-read.min-block-size') self.svfs.options['with-sparse-read'] = withsparseread self.svfs.options['sparse-read-density-threshold'] = srdensitythres +self.svfs.options['sparse-read-min-block-size'] = srminblocksize for r in self.requirements: if r.startswith('exp-compression-'): diff -r fd6ea1046760 -r cac766653864 mercurial/revlog.py --- a/mercurial/revlog.py Tue Oct 10 17:50:27 2017 +0200 +++ b/mercurial/revlog.py Sat Oct 14 17:05:41 2017 +0200 @@ -180,7 +180,7 @@ endbyte = start(revs[-1]) + length(revs[-1]) deltachainspan = endbyte - startbyte -if len(revs) <= 1: +if deltachainspan <= revlog._srminblocksize or len(revs) <= 1: yield revs continue @@ -359,6 +359,7 @@ self._maxdeltachainspan = -1 self._withsparseread = False self._srdensitythreshold = 0.25 +self._srminblocksize = 262144 mmapindexthreshold = None v = REVLOG_DEFAULT_VERSION @@ -388,6 +389,8 @@ self._withsparseread = bool(opts.get('with-sparse-read', False)) if 'sparse-read-density-threshold' in opts: self._srdensitythreshold = opts['sparse-read-density-threshold'] +if 'sparse-read-min-block-size' in opts: +self._srminblocksize = opts['sparse-read-min-block-size'] if self._chunkcachesize <= 0: raise RevlogError(_('revlog chunk cache size %r is not greater ' ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 3] revlog: ignore empty trailing chunks when reading segments
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1507554821 -7200 # Mon Oct 09 15:13:41 2017 +0200 # Node ID ac3901a97e195627a2ed4e65040912326ce5d943 # Parent a652b7763f669683eb5540c6d4b77ee18e55bc80 # EXP-Topic optimized-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r ac3901a97e19 revlog: ignore empty trailing chunks when reading segments When a merge commit creates an empty diff in the revlog, its offset may still be quite far from the end of the previous chunk. Skipping these empty chunks may reduce read size significantly. In most cases, there is no gain, and in some cases, little gain. On my clone of pypy, `hg manifest` reads 65% less bytes (96140 i/o 275943) for revision 4260 by ignoring the only empty trailing diff. For revision 2229, 35% (34557 i/o 53435) Sadly, this is difficult to reproduce, as hg clone can make its own different structure every time. diff -r a652b7763f66 -r ac3901a97e19 mercurial/revlog.py --- a/mercurial/revlog.py Sat Oct 14 12:03:42 2017 -0400 +++ b/mercurial/revlog.py Mon Oct 09 15:13:41 2017 +0200 @@ -1327,8 +1327,14 @@ l = [] ladd = l.append +firstrev = revs[0] +# Skip trailing revisions with empty diff +for lastrev in revs[::-1]: +if length(lastrev) != 0: +break + try: -offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df) +offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df) except OverflowError: # issue4215 - we can't cache a run of chunks greater than # 2G on Windows ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 3] revlog: introduce an experimental flag to slice chunks reads when too sparse
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1507650627 -7200 # Tue Oct 10 17:50:27 2017 +0200 # Node ID fd6ea10467600ccdfc9f3491ad95da5cdb5b840d # Parent ac3901a97e195627a2ed4e65040912326ce5d943 # EXP-Topic optimized-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r fd6ea1046760 revlog: introduce an experimental flag to slice chunks reads when too sparse Delta chains can become quite sparse if there is a lot of unrelated data between relevant pieces. Right now, revlog always reads all the necessary data for the delta chain in one single read. This can lead to a lot of unrelated data to be read (see issue5482 for more details). One can use the `experimental.maxdeltachainspan` option with a large value (or -1) to easily produce a very sparse delta chain. This change introduces the ability to slice the chunks retrieval into multiple reads, skipping large sections of unrelated data. Preliminary testing shows interesting results. For example the peak memory consumption to read a manifest on a large repository is reduced from 600MB to 250MB (200MB without maxdeltachainspan). However, the slicing itself and the multiple reads can have an negative impact on performance. This is why the new feature is hidden behind an experimental flag. Future changesets will add various parameters to control the slicing heuristics. We hope to experiment a wide variety of repositories during 4.4 and hopefully turn the feature on by default in 4.5. As a first try, the algorithm itself is prone to deep changes. However, we wish to define APIs and have a baseline to work on. diff -r ac3901a97e19 -r fd6ea1046760 mercurial/configitems.py --- a/mercurial/configitems.py Mon Oct 09 15:13:41 2017 +0200 +++ b/mercurial/configitems.py Tue Oct 10 17:50:27 2017 +0200 @@ -409,6 +409,12 @@ coreconfigitem('experimental', 'spacemovesdown', default=False, ) +coreconfigitem('experimental', 'sparse-read', +default=False, +) +coreconfigitem('experimental', 'sparse-read.density-threshold', +default=0.25, +) coreconfigitem('experimental', 'treemanifest', default=False, ) diff -r ac3901a97e19 -r fd6ea1046760 mercurial/localrepo.py --- a/mercurial/localrepo.pyMon Oct 09 15:13:41 2017 +0200 +++ b/mercurial/localrepo.pyTue Oct 10 17:50:27 2017 +0200 @@ -608,6 +608,11 @@ 'mmapindexthreshold') if mmapindexthreshold is not None: self.svfs.options['mmapindexthreshold'] = mmapindexthreshold +withsparseread = self.ui.configbool('experimental', 'sparse-read') +srdensitythres = float(self.ui.config('experimental', + 'sparse-read.density-threshold')) +self.svfs.options['with-sparse-read'] = withsparseread +self.svfs.options['sparse-read-density-threshold'] = srdensitythres for r in self.requirements: if r.startswith('exp-compression-'): diff -r ac3901a97e19 -r fd6ea1046760 mercurial/revlog.py --- a/mercurial/revlog.py Mon Oct 09 15:13:41 2017 +0200 +++ b/mercurial/revlog.py Tue Oct 10 17:50:27 2017 +0200 @@ -161,6 +161,58 @@ s.update(text) return s.digest() +def _slicechunk(revlog, revs): +"""slice revs to reduce the amount of unrelated data to be read from disk. + +``revs`` is sliced into groups that should be read in one time. +Assume that revs are sorted. +""" +start = revlog.start +length = revlog.length + +chunkqueue = collections.deque() +chunkqueue.append((revs, 0)) + +while chunkqueue: +revs, depth = chunkqueue.popleft() + +startbyte = start(revs[0]) +endbyte = start(revs[-1]) + length(revs[-1]) +deltachainspan = endbyte - startbyte + +if len(revs) <= 1: +yield revs +continue + +# Find where is the largest hole (this is where we would split) and +# sum up the lengths of useful data to compute the density of the span +textlen = 0 +prevend = None +largesthole = 0 +idxlargesthole = -1 +for i, rev in enumerate(revs): +revstart = start(rev) +revlen = length(rev) + +if prevend is not None: +hole = revstart - prevend +if hole > largesthole: +largesthole = hole +idxlargesthole = i + +textlen += revlen +prevend = revstart + revlen + +density = textlen / float(deltachainspan) if deltachainspan > 0 else 1.0 + +if density > revlog._srdensitythreshold: +yield revs +continue + +# Add the left and right parts so that they will be sliced recursively too +chunkqueue.append((revs[:idxlargesthole], depth + 1)) +chunkqueue.appen
[PATCH V4] strip: take branch into account when selecting update target (issue5540)
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1507212785 -7200 # Thu Oct 05 16:13:05 2017 +0200 # Node ID a9047aa04485e29fbad8c7af5cc83d64e7b3df2c # Parent 05c2a9f37a1dde8df024876cca0f76108c8e6f42 # EXP-Topic issue-5540 # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r a9047aa04485 strip: take branch into account when selecting update target (issue5540) Test contributed by Matt Harbison Keep the same behavior in most cases (i.e. first parent of the first root of stripped changsets), but if the branch differs from wdir's, try to find another parent of stripped commits that is on the same branch. diff -r 05c2a9f37a1d -r a9047aa04485 hgext/strip.py --- a/hgext/strip.pyThu Oct 05 15:11:34 2017 +0200 +++ b/hgext/strip.pyThu Oct 05 16:13:05 2017 +0200 @@ -60,10 +60,19 @@ def _findupdatetarget(repo, nodes): unode, p2 = repo.changelog.parents(nodes[0]) +currentbranch = repo[None].branch() if (util.safehasattr(repo, 'mq') and p2 != nullid and p2 in [x.node for x in repo.mq.applied]): unode = p2 +elif currentbranch != repo[unode].branch(): +pwdir = 'parents(wdir())' +revset = 'max(((parents(%ln::%r) + %r) - %ln::%r) and branch(%s))' +branchtarget = repo.revs(revset, nodes, pwdir, pwdir, nodes, pwdir, + currentbranch) +if branchtarget: +cl = repo.changelog +unode = cl.node(branchtarget.first()) return unode diff -r 05c2a9f37a1d -r a9047aa04485 tests/test-strip.t --- a/tests/test-strip.tThu Oct 05 15:11:34 2017 +0200 +++ b/tests/test-strip.tThu Oct 05 16:13:05 2017 +0200 @@ -941,6 +941,214 @@ abort: boom [255] +test stripping a working directory parent doesn't switch named branches + + $ hg log -G + @ changeset: 1:eca11cf91c71 + | tag: tip + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg branch new-branch + marked working directory as branch new-branch + (branches are permanent and global, did you want a bookmark?) + $ hg ci -m "start new branch" + $ echo 'foo' > foo.txt + $ hg ci -Aqm foo + $ hg up default + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ echo 'bar' > bar.txt + $ hg ci -Aqm bar + $ hg up new-branch + 1 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ hg merge default + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg log -G + @ changeset: 4:35358f982181 + | tag: tip + | parent: 1:eca11cf91c71 + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: bar + | + | @ changeset: 3:f62c6c09b707 + | | branch: new-branch + | | user:test + | | date:Thu Jan 01 00:00:00 1970 + + | | summary: foo + | | + | o changeset: 2:b1d33a8cadd9 + |/ branch: new-branch + |user:test + |date:Thu Jan 01 00:00:00 1970 + + |summary: start new branch + | + o changeset: 1:eca11cf91c71 + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg strip --force -r 35358f982181 + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + saved backup bundle to $TESTTMP/issue4736/.hg/strip-backup/35358f982181-50d992d4-backup.hg (glob) + $ hg log -G + @ changeset: 3:f62c6c09b707 + | branch: new-branch + | tag: tip + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: foo + | + o changeset: 2:b1d33a8cadd9 + | branch: new-branch + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: start new branch + | + o changeset: 1:eca11cf91c71 + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg up default + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ echo 'bar' > bar.txt + $ hg ci -Aqm bar + $ hg up new-branch + 1 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ hg merge default + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg ci -m merge + $ hg log -G + @changeset: 5:4cf5e92caec2
Re: [PATCH 2 of 2 V3] strip: take branch into account when selecting update target (issue5540)
On 10/11/2017 03:01 PM, Yuya Nishihara wrote: > On Tue, 10 Oct 2017 11:44:43 +0200, Paul Morelle wrote: >> if (util.safehasattr(repo, 'mq') and p2 != nullid >> and p2 in [x.node for x in repo.mq.applied]): >> unode = p2 >> +elif current_branch != repo[unode].branch(): >> +pwdir = 'parents(wdir())' >> +revset = ('max(((parents(%ln::{0}) + {0}) - %ln::{0})' >> + ' and branch(%s))' >> + ).format(pwdir) > bytes.format() isn't available on Python 3. Instead, you can use %r to embed > a revset expression. Oh, something new to know about Mercurial and Python3. Today I have learned that all the strings are transformed into bytes in mercurial in Python3. I will push a V4 including your remarks. Thank you very much! ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2 V3] strip: take branch into account when selecting update target (issue5540)
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1507212785 -7200 # Thu Oct 05 16:13:05 2017 +0200 # Node ID 7681cb8ad2b5bca779551e84676cd70d67366cdf # Parent adaf1c0e81c0d4f1f9dcf5c98de4410e21d76966 # EXP-Topic issue-5540 # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 7681cb8ad2b5 strip: take branch into account when selecting update target (issue5540) Test contributed by Matt Harbison Keep the same behavior in most cases (i.e. first parent of the first root of stripped changsets), but if the branch differs from wdir's, try to find another parent of stripped commits that is on the same branch. diff -r adaf1c0e81c0 -r 7681cb8ad2b5 hgext/strip.py --- a/hgext/strip.pyThu Oct 05 15:11:34 2017 +0200 +++ b/hgext/strip.pyThu Oct 05 16:13:05 2017 +0200 @@ -60,10 +60,20 @@ def _findupdatetarget(repo, nodes): unode, p2 = repo.changelog.parents(nodes[0]) +current_branch = repo[None].branch() if (util.safehasattr(repo, 'mq') and p2 != nullid and p2 in [x.node for x in repo.mq.applied]): unode = p2 +elif current_branch != repo[unode].branch(): +pwdir = 'parents(wdir())' +revset = ('max(((parents(%ln::{0}) + {0}) - %ln::{0})' + ' and branch(%s))' + ).format(pwdir) +branch_target = repo.revs(revset, nodes, nodes, current_branch) +if branch_target: +cl = repo.changelog +unode = cl.node(branch_target.first()) return unode diff -r adaf1c0e81c0 -r 7681cb8ad2b5 tests/test-strip.t --- a/tests/test-strip.tThu Oct 05 15:11:34 2017 +0200 +++ b/tests/test-strip.tThu Oct 05 16:13:05 2017 +0200 @@ -941,6 +941,214 @@ abort: boom [255] +test stripping a working directory parent doesn't switch named branches + + $ hg log -G + @ changeset: 1:eca11cf91c71 + | tag: tip + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg branch new-branch + marked working directory as branch new-branch + (branches are permanent and global, did you want a bookmark?) + $ hg ci -m "start new branch" + $ echo 'foo' > foo.txt + $ hg ci -Aqm foo + $ hg up default + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ echo 'bar' > bar.txt + $ hg ci -Aqm bar + $ hg up new-branch + 1 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ hg merge default + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg log -G + @ changeset: 4:35358f982181 + | tag: tip + | parent: 1:eca11cf91c71 + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: bar + | + | @ changeset: 3:f62c6c09b707 + | | branch: new-branch + | | user:test + | | date:Thu Jan 01 00:00:00 1970 + + | | summary: foo + | | + | o changeset: 2:b1d33a8cadd9 + |/ branch: new-branch + |user:test + |date:Thu Jan 01 00:00:00 1970 + + |summary: start new branch + | + o changeset: 1:eca11cf91c71 + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg strip --force -r 35358f982181 + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + saved backup bundle to $TESTTMP/issue4736/.hg/strip-backup/35358f982181-50d992d4-backup.hg (glob) + $ hg log -G + @ changeset: 3:f62c6c09b707 + | branch: new-branch + | tag: tip + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: foo + | + o changeset: 2:b1d33a8cadd9 + | branch: new-branch + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: start new branch + | + o changeset: 1:eca11cf91c71 + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg up default + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ echo 'bar' > bar.txt + $ hg ci -Aqm bar + $ hg up new-branch + 1 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ hg merge default + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg ci -m merge + $ hg log -G + @changeset: 5:4c
[PATCH 1 of 2 V3] strip: factor out update target selection
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1507209094 -7200 # Thu Oct 05 15:11:34 2017 +0200 # Node ID adaf1c0e81c0d4f1f9dcf5c98de4410e21d76966 # Parent 8cef8f7d51d0f1e99889779ec1320d5c9c3b91de # EXP-Topic issue-5540 # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r adaf1c0e81c0 strip: factor out update target selection The same algorithm was used in two places: one to find out which commit shall become the parent of wdir, and the other to prepare the wdir when keeping changes. Factoring it out prevents inconsistent changes in either occurrence. diff -r 8cef8f7d51d0 -r adaf1c0e81c0 hgext/strip.py --- a/hgext/strip.pyThu Oct 05 20:41:50 2017 -0700 +++ b/hgext/strip.pyThu Oct 05 15:11:34 2017 +0200 @@ -58,16 +58,21 @@ raise error.Abort(_("local changed subrepos found" + excsuffix)) return s +def _findupdatetarget(repo, nodes): +unode, p2 = repo.changelog.parents(nodes[0]) + +if (util.safehasattr(repo, 'mq') and p2 != nullid +and p2 in [x.node for x in repo.mq.applied]): +unode = p2 + +return unode + def strip(ui, repo, revs, update=True, backup=True, force=None, bookmarks=None): with repo.wlock(), repo.lock(): if update: checklocalchanges(repo, force=force) -urev, p2 = repo.changelog.parents(revs[0]) -if (util.safehasattr(repo, 'mq') and -p2 != nullid -and p2 in [x.node for x in repo.mq.applied]): -urev = p2 +urev = _findupdatetarget(repo, revs) hg.clean(repo, urev) repo.dirstate.write(repo.currenttransaction()) @@ -196,10 +201,7 @@ revs = sorted(rootnodes) if update and opts.get('keep'): -urev, p2 = repo.changelog.parents(revs[0]) -if (util.safehasattr(repo, 'mq') and p2 != nullid -and p2 in [x.node for x in repo.mq.applied]): -urev = p2 +urev = _findupdatetarget(repo, revs) uctx = repo[urev] # only reset the dirstate for files that would actually change ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2 V2] strip: take branch into account when selecting update target (issue5540)
On 10/10/2017 04:15 AM, Matt Harbison wrote: > After thinking about this more, I'm not sure what the right behavior > is. It sounds like it would be useful to address this and prune at > the same time in a follow up, without side tracking this. But I think > the branch change with an uncommitted merge needs to be fixed here. Hello Matt, Thank you for thinking about this again. Yes, prune will have to be patched too, using the same strategy, and I will do that once strip is fixed. I have added a test for the branch merge case, and the tests are running right now. I will push a V3 when it's done. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2 V2] strip: take branch into account when selecting update target (issue5540)
On 10/08/2017 08:02 AM, Yuya Nishihara wrote: >>> +current_branch = repo[None].branch() >>> if (util.safehasattr(repo, 'mq') and p2 != nullid >>> and p2 in [x.node for x in repo.mq.applied]): >>> urev = p2 >>> +elif current_branch != repo[urev].branch(): >>> +revset = "(parents(%ln::parents(wdir())) - >>> %ln::parents(wdir()))" \ >>> + + " and branch(%s)" >>> +branch_targets = repo.revs(revset, revs, revs, current_branch) >>> +if branch_targets: >>> +cl = repo.changelog >>> +urev = min(cl.node(r) for r in branch_targets) >> Should this be max() instead of min(), to get the node closest to wdir? > or just 'max(parents(%ln::...)'. Sorting sha1 hash doesn't make sense. I agree with the fact that sorting on sha1 hashes doesn't really make sense. However, this reproduces the same (mis-)behavior as the replaced code https://www.mercurial-scm.org/repo/hg/file/tip/hgext/strip.py#l199, which takes the first item of the sorted array, hence the min. > Perhaps, destutil.destupdate() will give more hints about tricky cases. I > don't know how "hg prune" resolve the update destination, but I think that > would be similar to what "hg strip" should do. destutil.destupdate() is about updating to descendants while here we are updating to ancestors, so we can't reuse it. "hg prune" does nothing smart about branches, and has the same issue as the current strip code (and we should probably reuse the new strip code for prune too). Since using the highest revision number seems more appropriate, does everyone agree to use a max in a V3? >> Updating way back might be surprising. I didn't try making a more >> elaborate test, so maybe not. I'm thinking, for example, developer A >> keeps pulling and merging from developer B, both using the same named >> branch. Then strip the first merge. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2 V2] strip: take branch into account when selecting update target (issue5540)
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1507212785 -7200 # Thu Oct 05 16:13:05 2017 +0200 # Node ID 18c8fa9b75ba1c7b0dfd984cf78d35d0a467ab24 # Parent ae82f66cd58f85264e756f7a718ae9fbae5f17db # EXP-Topic issue-5540 # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 18c8fa9b75ba strip: take branch into account when selecting update target (issue5540) Test contributed by Matt Harbison Keep the same behavior in most cases (i.e. first parent of the first root of stripped changsets), but if the branch differs from wdir's, try to find another parent of stripped commits that is on the same branch. diff -r ae82f66cd58f -r 18c8fa9b75ba hgext/strip.py --- a/hgext/strip.pyThu Oct 05 15:11:34 2017 +0200 +++ b/hgext/strip.pyThu Oct 05 16:13:05 2017 +0200 @@ -60,10 +60,18 @@ def _find_update_target(repo, revs): urev, p2 = repo.changelog.parents(revs[0]) +current_branch = repo[None].branch() if (util.safehasattr(repo, 'mq') and p2 != nullid and p2 in [x.node for x in repo.mq.applied]): urev = p2 +elif current_branch != repo[urev].branch(): +revset = "(parents(%ln::parents(wdir())) - %ln::parents(wdir()))" \ + + " and branch(%s)" +branch_targets = repo.revs(revset, revs, revs, current_branch) +if branch_targets: +cl = repo.changelog +urev = min(cl.node(r) for r in branch_targets) return urev diff -r ae82f66cd58f -r 18c8fa9b75ba tests/test-strip.t --- a/tests/test-strip.tThu Oct 05 15:11:34 2017 +0200 +++ b/tests/test-strip.tThu Oct 05 16:13:05 2017 +0200 @@ -941,6 +941,146 @@ abort: boom [255] +test stripping a working directory parent doesn't switch named branches + + $ hg log -G + @ changeset: 1:eca11cf91c71 + | tag: tip + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg branch new-branch + marked working directory as branch new-branch + (branches are permanent and global, did you want a bookmark?) + $ hg ci -m "start new branch" + $ echo 'foo' > foo.txt + $ hg ci -Aqm foo + $ hg up default + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ echo 'bar' > bar.txt + $ hg ci -Aqm bar + $ hg up new-branch + 1 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ hg merge default + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg ci -m merge + $ hg log -G + @changeset: 5:4cf5e92caec2 + |\ branch: new-branch + | | tag: tip + | | parent: 3:f62c6c09b707 + | | parent: 4:35358f982181 + | | user:test + | | date:Thu Jan 01 00:00:00 1970 + + | | summary: merge + | | + | o changeset: 4:35358f982181 + | | parent: 1:eca11cf91c71 + | | user:test + | | date:Thu Jan 01 00:00:00 1970 + + | | summary: bar + | | + o | changeset: 3:f62c6c09b707 + | | branch: new-branch + | | user:test + | | date:Thu Jan 01 00:00:00 1970 + + | | summary: foo + | | + o | changeset: 2:b1d33a8cadd9 + |/ branch: new-branch (glob) + |user:test + |date:Thu Jan 01 00:00:00 1970 + + |summary: start new branch + | + o changeset: 1:eca11cf91c71 + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg strip -r 35358f982181 + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + saved backup bundle to $TESTTMP/issue4736/.hg/strip-backup/35358f982181-a6f020aa-backup.hg (glob) + $ hg log -G + @ changeset: 3:f62c6c09b707 + | branch: new-branch + | tag: tip + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: foo + | + o changeset: 2:b1d33a8cadd9 + | branch: new-branch + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: start new branch + | + o changeset: 1:eca11cf91c71 + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg pull -u $TESTTMP/issue4736/.hg/strip-backup/35358f982181-a6f020aa-backup.hg + pulling from $TESTTMP/issue4736/.hg/strip-backup/35358f982181-a6f020aa-backup.hg (glob) +
[PATCH 1 of 2 V2] strip: factor out update target selection
# HG changeset patch # User Paul Morelle <paul.more...@octobus.net> # Date 1507209094 -7200 # Thu Oct 05 15:11:34 2017 +0200 # Node ID ae82f66cd58f85264e756f7a718ae9fbae5f17db # Parent a57c938e7ac8f391a62de6c7c4d5cf0e81b2dcf4 # EXP-Topic issue-5540 # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r ae82f66cd58f strip: factor out update target selection The same algorithm was used in two places: one to find out which commit shall become the parent of wdir, and the other to prepare the wdir when keeping changes. Factoring it out prevents inconsistent changes in either occurrence. diff -r a57c938e7ac8 -r ae82f66cd58f hgext/strip.py --- a/hgext/strip.pyFri Sep 29 15:48:34 2017 + +++ b/hgext/strip.pyThu Oct 05 15:11:34 2017 +0200 @@ -58,16 +58,21 @@ raise error.Abort(_("local changed subrepos found" + excsuffix)) return s +def _find_update_target(repo, revs): +urev, p2 = repo.changelog.parents(revs[0]) + +if (util.safehasattr(repo, 'mq') and p2 != nullid +and p2 in [x.node for x in repo.mq.applied]): +urev = p2 + +return urev + def strip(ui, repo, revs, update=True, backup=True, force=None, bookmarks=None): with repo.wlock(), repo.lock(): if update: checklocalchanges(repo, force=force) -urev, p2 = repo.changelog.parents(revs[0]) -if (util.safehasattr(repo, 'mq') and -p2 != nullid -and p2 in [x.node for x in repo.mq.applied]): -urev = p2 +urev = _find_update_target(repo, revs) hg.clean(repo, urev) repo.dirstate.write(repo.currenttransaction()) @@ -196,10 +201,7 @@ revs = sorted(rootnodes) if update and opts.get('keep'): -urev, p2 = repo.changelog.parents(revs[0]) -if (util.safehasattr(repo, 'mq') and p2 != nullid -and p2 in [x.node for x in repo.mq.applied]): -urev = p2 +urev = _find_update_target(repo, revs) uctx = repo[urev] # only reset the dirstate for files that would actually change ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2] strip: take branch into account when selecting update target (issue5540)
# HG changeset patch # User Paul Morelle <madp...@htkc.org> # Date 1507212785 -7200 # Thu Oct 05 16:13:05 2017 +0200 # Node ID 8a8497fdb6dbeaedddbde21539af46a829036fa2 # Parent bbba17e8f85bdae96c769c7b4c506e9631165b66 # EXP-Topic issue-5540 strip: take branch into account when selecting update target (issue5540) Test contributed by Matt Harbison Keep the same behavior in most cases (i.e. first parent of the first root of stripped changsets), but if the branch differs from wdir's, try to find another parent of stripped commits that is on the same branch. diff -r bbba17e8f85b -r 8a8497fdb6db hgext/strip.py --- a/hgext/strip.pyThu Oct 05 15:11:34 2017 +0200 +++ b/hgext/strip.pyThu Oct 05 16:13:05 2017 +0200 @@ -60,10 +60,18 @@ def _find_update_target(repo, revs): urev, p2 = repo.changelog.parents(revs[0]) +current_branch = repo[None].branch() if (util.safehasattr(repo, 'mq') and p2 != nullid and p2 in [x.node for x in repo.mq.applied]): urev = p2 +elif current_branch != repo[urev].branch(): +revset = "(parents(%ln::parents(wdir())) - %ln::parents(wdir()))" \ + + " and branch(%s)" +branch_targets = repo.revs(revset, revs, revs, current_branch) +if branch_targets: +cl = repo.changelog +urev = min(cl.node(r) for r in branch_targets) return urev diff -r bbba17e8f85b -r 8a8497fdb6db tests/test-strip.t --- a/tests/test-strip.tThu Oct 05 15:11:34 2017 +0200 +++ b/tests/test-strip.tThu Oct 05 16:13:05 2017 +0200 @@ -941,6 +941,146 @@ abort: boom [255] +test stripping a working directory parent doesn't switch named branches + + $ hg log -G + @ changeset: 1:eca11cf91c71 + | tag: tip + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg branch new-branch + marked working directory as branch new-branch + (branches are permanent and global, did you want a bookmark?) + $ hg ci -m "start new branch" + $ echo 'foo' > foo.txt + $ hg ci -Aqm foo + $ hg up default + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ echo 'bar' > bar.txt + $ hg ci -Aqm bar + $ hg up new-branch + 1 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ hg merge default + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg ci -m merge + $ hg log -G + @changeset: 5:4cf5e92caec2 + |\ branch: new-branch + | | tag: tip + | | parent: 3:f62c6c09b707 + | | parent: 4:35358f982181 + | | user:test + | | date:Thu Jan 01 00:00:00 1970 + + | | summary: merge + | | + | o changeset: 4:35358f982181 + | | parent: 1:eca11cf91c71 + | | user:test + | | date:Thu Jan 01 00:00:00 1970 + + | | summary: bar + | | + o | changeset: 3:f62c6c09b707 + | | branch: new-branch + | | user:test + | | date:Thu Jan 01 00:00:00 1970 + + | | summary: foo + | | + o | changeset: 2:b1d33a8cadd9 + |/ branch: new-branch (glob) + |user:test + |date:Thu Jan 01 00:00:00 1970 + + |summary: start new branch + | + o changeset: 1:eca11cf91c71 + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg strip -r 35358f982181 + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + saved backup bundle to $TESTTMP/issue4736/.hg/strip-backup/35358f982181-a6f020aa-backup.hg (glob) + $ hg log -G + @ changeset: 3:f62c6c09b707 + | branch: new-branch + | tag: tip + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: foo + | + o changeset: 2:b1d33a8cadd9 + | branch: new-branch + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: start new branch + | + o changeset: 1:eca11cf91c71 + | user:test + | date:Thu Jan 01 00:00:00 1970 + + | summary: commitB + | + o changeset: 0:105141ef12d0 + user:test + date:Thu Jan 01 00:00:00 1970 + + summary: commitA + + + $ hg pull -u $TESTTMP/issue4736/.hg/strip-backup/35358f982181-a6f020aa-backup.hg + pulling from $TESTTMP/issue4736/.hg/strip-backup/35358f982181-a6f020aa-backup.hg (glob) + searching for changes + adding changesets + adding manifests + adding file changes + added 2 changesets with 1 changes to 1 files + 1 files updated
[PATCH 1 of 2] strip: factor out update target selection
# HG changeset patch # User Paul Morelle <madp...@htkc.org> # Date 1507209094 -7200 # Thu Oct 05 15:11:34 2017 +0200 # Node ID bbba17e8f85bdae96c769c7b4c506e9631165b66 # Parent c67db5dc131d0facdfdadc8c3344a8f3e689867d # EXP-Topic issue-5540 strip: factor out update target selection The same algorithm was used in two places: one to find out which commit shall become the parent of wdir, and the other to prepare the wdir when keeping changes. Factoring it out prevents inconsistent changes in either occurrence. diff -r c67db5dc131d -r bbba17e8f85b hgext/strip.py --- a/hgext/strip.pySun Oct 01 12:12:56 2017 +0100 +++ b/hgext/strip.pyThu Oct 05 15:11:34 2017 +0200 @@ -58,16 +58,21 @@ raise error.Abort(_("local changed subrepos found" + excsuffix)) return s +def _find_update_target(repo, revs): +urev, p2 = repo.changelog.parents(revs[0]) + +if (util.safehasattr(repo, 'mq') and p2 != nullid +and p2 in [x.node for x in repo.mq.applied]): +urev = p2 + +return urev + def strip(ui, repo, revs, update=True, backup=True, force=None, bookmarks=None): with repo.wlock(), repo.lock(): if update: checklocalchanges(repo, force=force) -urev, p2 = repo.changelog.parents(revs[0]) -if (util.safehasattr(repo, 'mq') and -p2 != nullid -and p2 in [x.node for x in repo.mq.applied]): -urev = p2 +urev = _find_update_target(repo, revs) hg.clean(repo, urev) repo.dirstate.write(repo.currenttransaction()) @@ -196,10 +201,7 @@ revs = sorted(rootnodes) if update and opts.get('keep'): -urev, p2 = repo.changelog.parents(revs[0]) -if (util.safehasattr(repo, 'mq') and p2 != nullid -and p2 in [x.node for x in repo.mq.applied]): -urev = p2 +urev = _find_update_target(repo, revs) uctx = repo[urev] # only reset the dirstate for files that would actually change ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel