[PATCH 1 of 2] perf: add command for measuring revlog chunk operations
# HG changeset patch # User Gregory Szorc# Date 1479442671 28800 # Thu Nov 17 20:17:51 2016 -0800 # Node ID 3a1a4b0f3fd8445b166608e86829e048770ffa92 # Parent 41a8106789cae9716c39d8381fa5da1d3ea0d74b perf: add command for measuring revlog chunk operations Upcoming commits will teach revlogs to leverage the new compression engine API so that new compression formats can more easily be leveraged in revlogs. We want to be sure this refactoring doesn't regress performance. So this commit introduces "perfrevchunks" to explicitly test performance of reading, decompressing, and recompressing revlog chunks. Here is output when run on the mozilla-unified repo: $ hg perfrevlogchunks -c ! read ! wall 0.346603 comb 0.35 user 0.34 sys 0.01 (best of 28) ! read w/ reused fd ! wall 0.337707 comb 0.34 user 0.32 sys 0.02 (best of 30) ! read batch ! wall 0.013206 comb 0.02 user 0.00 sys 0.02 (best of 221) ! read batch w/ reused fd ! wall 0.013259 comb 0.03 user 0.01 sys 0.02 (best of 222) ! chunk ! wall 1.909939 comb 1.91 user 1.90 sys 0.01 (best of 6) ! chunk batch ! wall 1.750677 comb 1.76 user 1.74 sys 0.02 (best of 6) ! compress ! wall 5.668004 comb 5.67 user 5.67 sys 0.00 (best of 3) $ hg perfrevlogchunks -m ! read ! wall 0.365834 comb 0.37 user 0.35 sys 0.02 (best of 26) ! read w/ reused fd ! wall 0.350160 comb 0.35 user 0.32 sys 0.03 (best of 28) ! read batch ! wall 0.024777 comb 0.02 user 0.00 sys 0.02 (best of 119) ! read batch w/ reused fd ! wall 0.024895 comb 0.03 user 0.00 sys 0.03 (best of 118) ! chunk ! wall 2.514061 comb 2.52 user 2.48 sys 0.04 (best of 4) ! chunk batch ! wall 2.380788 comb 2.38 user 2.36 sys 0.02 (best of 5) ! compress ! wall 9.815297 comb 9.82 user 9.82 sys 0.00 (best of 3) We already see some interesting data, such as how much slower non-batched chunk reading is and that zlib compression appears to be >2x slower than decompression. I didn't have the data when I wrote this commit message, but I ran this on Mozilla's NFS-based Mercurial server and the time for reading with a reused file descriptor was faster. So I think it is worth testing both with and without file descriptor reuse so we can make informed decisions about recycling file descriptors. diff --git a/contrib/perf.py b/contrib/perf.py --- a/contrib/perf.py +++ b/contrib/perf.py @@ -859,6 +859,84 @@ def perfrevlog(ui, repo, file_=None, sta timer(d) fm.end() +@command('perfrevlogchunks', revlogopts + formatteropts + + [('s', 'startrev', 0, 'revision to start at')], + '-c|-m|FILE') +def perfrevlogchunks(ui, repo, file_=None, startrev=0, **opts): +"""Benchmark operations on revlog chunks. + +Logically, each revlog is a collection of fulltext revisions. However, +stored within each revlog are "chunks" of possibly compressed data. This +data needs to be read and decompressed or compressed and written. + +This command measures the time it takes to read+decompress and recompress +chunks in a revlog. It effectively isolates I/O and compression performance. +For measurements of higher-level operations like resolving revisions, +see ``perfrevlog`` and ``perfrevlogrevision``. +""" +rl = cmdutil.openrevlog(repo, 'perfrevlogchunks', file_, opts) +revs = list(rl.revs(startrev, len(rl) - 1)) + +def rlfh(rl): +if rl._inline: +return getsvfs(repo)(rl.indexfile) +else: +return getsvfs(repo)(rl.datafile) + +def doread(): +rl.clearcaches() +for rev in revs: +rl._chunkraw(rev, rev) + +def doreadcachedfh(): +rl.clearcaches() +fh = rlfh(rl) +for rev in revs: +rl._chunkraw(rev, rev, df=fh) + +def doreadbatch(): +rl.clearcaches() +rl._chunkraw(revs[0], revs[-1]) + +def doreadbatchcachedfh(): +rl.clearcaches() +fh = rlfh(rl) +rl._chunkraw(revs[0], revs[-1], df=fh) + +def dochunk(): +rl.clearcaches() +fh = rlfh(rl) +for rev in revs: +rl._chunk(rev, df=fh) + +chunks = [None] + +def dochunkbatch(): +rl.clearcaches() +fh = rlfh(rl) +# Save chunks as a side-effect. +chunks[0] = rl._chunks(revs, df=fh) + +def docompress(): +rl.clearcaches() +for chunk in chunks[0]: +rl.compress(chunk) + +benches = [ +(lambda: doread(), 'read'), +(lambda: doreadcachedfh(), 'read w/ reused fd'), +(lambda: doreadbatch(), 'read batch'), +(lambda: doreadbatchcachedfh(), 'read batch w/ reused fd'), +(lambda: dochunk(), 'chunk'), +(lambda: dochunkbatch(), 'chunk batch'), +(lambda: docompress(), 'compress'), +] + +for fn, title in benches: +timer, fm = gettimer(ui,
[PATCH 2 of 2] commands: print chunk type in debugrevlog
# HG changeset patch # User Gregory Szorc# Date 1479443400 28800 # Thu Nov 17 20:30:00 2016 -0800 # Node ID 4f92246570138fcf43913a287619706940e33e92 # Parent 3a1a4b0f3fd8445b166608e86829e048770ffa92 commands: print chunk type in debugrevlog Each data entry ("chunk") in a revlog has a type based on the first byte of the data. This type indicates how to interpret the data. This seems like a useful thing to be able to query through a debug command. So let's add that to `hg debugrevlog`. This does make `hg debugrevlog` slightly slower, as it has to read more than just the index. However, even on the mozilla-unified manifest (which is ~200MB spread over ~350K revisions), this takes <400ms. diff --git a/mercurial/commands.py b/mercurial/commands.py --- a/mercurial/commands.py +++ b/mercurial/commands.py @@ -15,6 +15,7 @@ import random import re import shlex import socket +import string import sys import tempfile import time @@ -3194,6 +3195,8 @@ def debugrevlog(ui, repo, file_=None, ** datasize = [None, 0, 0] fullsize = [None, 0, 0] deltasize = [None, 0, 0] +chunktypecounts = {} +chunktypesizes = {} def addsize(size, l): if l[0] is None or size < l[0]: @@ -3231,6 +3234,20 @@ def debugrevlog(ui, repo, file_=None, ** elif delta != nullrev: numother += 1 +# Obtain data on the raw chunks in the revlog. +chunk = r._chunkraw(rev, rev)[1] +if chunk: +chunktype = chunk[0] +else: +chunktype = 'empty' + +if chunktype not in chunktypecounts: +chunktypecounts[chunktype] = 0 +chunktypesizes[chunktype] = 0 + +chunktypecounts[chunktype] += 1 +chunktypesizes[chunktype] += size + # Adjust size min value for empty cases for size in (datasize, fullsize, deltasize): if size[0] is None: @@ -3282,6 +3299,24 @@ def debugrevlog(ui, repo, file_=None, ** ui.write(('full : ') + fmt % pcfmt(fulltotal, totalsize)) ui.write(('deltas: ') + fmt % pcfmt(deltatotal, totalsize)) +def fmtchunktype(chunktype): +if chunktype == 'empty': +return '%s : ' % chunktype +elif chunktype in string.ascii_letters: +return '0x%s (%s) : ' % (hex(chunktype), chunktype) +else: +return '0x%s : ' % hex(chunktype) + +ui.write('\n') +ui.write(('chunks: ') + fmt2 % numrevs) +for chunktype in sorted(chunktypecounts): +ui.write(fmtchunktype(chunktype)) +ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs)) +ui.write(('chunks size : ') + fmt2 % totalsize) +for chunktype in sorted(chunktypecounts): +ui.write(fmtchunktype(chunktype)) +ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize)) + ui.write('\n') fmt = dfmtstr(max(avgchainlen, compratio)) ui.write(('avg chain length : ') + fmt % avgchainlen) diff --git a/tests/test-debugcommands.t b/tests/test-debugcommands.t --- a/tests/test-debugcommands.t +++ b/tests/test-debugcommands.t @@ -22,6 +22,11 @@ full : 44 (100.00%) deltas: 0 ( 0.00%) + chunks: 1 + 0x75 (u) : 1 (100.00%) + chunks size : 44 + 0x75 (u) : 44 (100.00%) + avg chain length : 0 max chain length : 0 compression ratio : 0 ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] setup: add flag to build_ext to control building zstd
# HG changeset patch # User Gregory Szorc# Date 1479442150 28800 # Thu Nov 17 20:09:10 2016 -0800 # Node ID 0c36116df0e58677f78ff5ca19ed0e496fc12c11 # Parent 41a8106789cae9716c39d8381fa5da1d3ea0d74b setup: add flag to build_ext to control building zstd Downstream packagers will inevitably want to disable building the vendored python-zstandard Python package. Rather than force them to patch setup.py, let's give them a knob to use. distutils Command classes support defining custom options. It requires setting certain class attributes (yes, class attributes: instance attributes don't work because the class type is consulted before it is instantiated). We already have a custom child class of build_ext, so we set these class attributes, implement some scaffolding, and override build_extensions to filter the Extension instance for the zstd extension if the `--no-zstd` argument is specified. Example usage: $ python setup.py build_ext --no-zstd diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -276,7 +276,30 @@ class hgdist(Distribution): # too late for some cases return not self.pure and Distribution.has_ext_modules(self) +# This is ugly as a one-liner. So use a variable. +buildextnegops = dict(getattr(build_ext, 'negative_options', {})) +buildextnegops['no-zstd'] = 'zstd' + class hgbuildext(build_ext): +user_options = build_ext.user_options + [ +('zstd', None, 'compile zstd bindings [default]'), +('no-zstd', None, 'do not compile zstd bindings'), +] + +boolean_options = build_ext.boolean_options + ['zstd'] +negative_opt = buildextnegops + +def initialize_options(self): +self.zstd = True +return build_ext.initialize_options(self) + +def build_extensions(self): +# Filter out zstd if disabled via argument. +if not self.zstd: +self.extensions = [e for e in self.extensions + if e.name != 'mercurial.zstd'] + +return build_ext.build_extensions(self) def build_extension(self, ext): try: ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH patchbot] patchbot: do not crash for malicious mails without message-id
> On Nov 16, 2016, at 10:16 PM, Jun Wuwrote: > > # HG changeset patch > # User Jun Wu > # Date 1479352538 0 > # Thu Nov 17 03:15:38 2016 + > # Node ID 7ecfbdf2460df04c1c263875a12403d3cfe3134a > # Parent 9036c3d03d301378e67d4516937ea9c6672c3c67 > patchbot: do not crash for malicious mails without message-id Queued for patchbot, thanks > > Sometimes due to power failure, the on-disk files could get truncated and > lose message-id. Currently patchbot would raise "KeyError: message-id" but > it does not print clues about which mail is corrupted. > > This patch makes it print which mail does not contain message-id and just > ignore them. > > diff --git a/index.py b/index.py > --- a/index.py > +++ b/index.py > @@ -51,4 +51,7 @@ def index_mailbox(session, mb): > if key not in all_keys: > mail = mb[key] > +if 'message-id' not in mail: > +sys.stderr.write('%s: ignored - no message-id\n' % key) > +continue > if session.query(models.Message).filter(models.Message.id.is_( > mail['message-id'])).count() > 0: > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel signature.asc Description: Message signed with OpenPGP using GPGMail ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 8 of 8] util: implement zstd compression engine
On Fri, Nov 11, 2016 at 10:19 AM, Bryan O'Sullivanwrote: > > On Fri, Nov 11, 2016 at 1:23 AM, Gregory Szorc > wrote: >> >> zstd is wins all around. I can't wait to implement support for it >> on the wire protocol and in revlogs. > > > Very exciting! Yep. Now queued. Thanks, Greg. > > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel > ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2 V2] rebase: calculate ancestors for --base separately (issue5420)
# HG changeset patch # User Jun Wu# Date 1479426495 0 # Thu Nov 17 23:48:15 2016 + # Node ID e5451a607d1ca53b2446ab049375e5dd5a055bb8 # Parent 87e0edc93d87b88e2925877469d8c142e01737fc # Available At https://bitbucket.org/quark-zju/hg-draft # hg pull https://bitbucket.org/quark-zju/hg-draft -r e5451a607d1c rebase: calculate ancestors for --base separately (issue5420) Previously, the --base option only works with a single "branch" - if there are multiple branching points, "rebase" will error out with: abort: source is ancestor of destination This happens if the user has multiple draft branches with different branching points, and uses "hg rebase -b 'draft()' -d master". The error message looks cryptic to users who don't know the implementation detail. This patch changes the logic to calculate ancestors for each "base" branch so it would work in the multiple branching points case. Note: if there are multiple bases where some of them are rebasable, while some of them aren't because the branching point is the destination, the current behavior is to abort with "nothing to rebase", which seems wrong. However, that issue is not introduced by this patch - it exists for "-s" as well. I have reported it as issue5422 and should be solved separately. diff --git a/hgext/rebase.py b/hgext/rebase.py --- a/hgext/rebase.py +++ b/hgext/rebase.py @@ -722,10 +722,17 @@ def _definesets(ui, repo, destf=None, sr destf = str(dest) -commonanc = repo.revs('ancestor(%ld, %d)', base, dest).first() -if commonanc is not None: -rebaseset = repo.revs('(%d::(%ld) - %d)::', - commonanc, base, commonanc) -else: -rebaseset = [] +# calculate ancestors for individual bases +realbases = [] +for b in repo.revs('roots(%ld)', base): +# branching point +bp = repo.revs('ancestor(%d, %d)', b, dest).first() +if bp is None: +continue +# move b to be the direct child of the branching point +b = repo.revs('%d::%d - %d', bp, b, bp).first() +if b is not None: +realbases.append(b) + +rebaseset = repo.revs('%ld::', realbases) if not rebaseset: diff --git a/tests/test-rebase-base.t b/tests/test-rebase-base.t new file mode 100644 --- /dev/null +++ b/tests/test-rebase-base.t @@ -0,0 +1,94 @@ + $ cat >> $HGRCPATH < [extensions] + > rebase= + > drawdag=$TESTDIR/drawdag.py + > + > [phases] + > publish=False + > + > [alias] + > tglog = log -G --template "{rev}: {desc}" + > EOF + + $ hg init a + $ cd a + + $ hg debugdrawdag < g f + > |/ + > e c d + > | |/ + > | b + > |/ + > a + > EOS + + $ cd .. + +Pick a single base: + + $ cp -a a a1 && cd a1 + $ hg rebase -b c -d g -q + $ hg tglog + o 6: d + | + | o 5: c + |/ + o 4: b + | + o 3: g + | + | o 2: f + |/ + o 1: e + | + o 0: a + + $ cd .. + +Pick a base that is already an descendant of dest: + + $ cp -a a a2 && cd a2 + $ hg rebase -b g -d e + nothing to rebase + [1] + $ hg rebase -b d -d a + nothing to rebase + [1] + $ hg rebase -b d+c+f+e -d a + nothing to rebase + [1] + $ cd .. + +Pick multiple bases (issue5420): + + $ cp -a a a3 && cd a3 + $ hg rebase -b d+f -d g -q + $ hg tglog + o 6: f + | + | o 5: d + | | + | | o 4: c + | |/ + | o 3: b + |/ + o 2: g + | + o 1: e + | + o 0: a + + $ cd .. + +Mixed rebasable and non-rebasable bases (unresolved, issue5422): + + $ cp -a a a4 && cd a4 + $ hg debugdrawdag < h + > | + > g + > EOS + $ hg rebase -b d+f+h -d g + nothing to rebase + [1] ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] manifest: move manifestctx creation into manifestlog.get()
# HG changeset patch # User Durham Goode# Date 1479425479 28800 # Thu Nov 17 15:31:19 2016 -0800 # Node ID 916fbbd3d9a628dcd4a63c352d4d70b678e92a68 # Parent c27614f2dec1405db606d1ef871dfabf72cc0737 manifest: move manifestctx creation into manifestlog.get() Most manifestctx creation already happened in manifestlog.get(), but there was one spot in the manifestctx class itself that created an instance manually. This patch makes that one instance go through the manifestlog. This means extensions can just wrap manifestlog.get() and it will cover all manifestctx creations. It also means this code path now hits the manifestlog cache. diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -1418,7 +1418,7 @@ class manifestctx(object): if revlog._usemanifestv2: # Need to perform a slow delta r0 = revlog.deltaparent(revlog.rev(self._node)) -m0 = manifestctx(self._repo, revlog.node(r0)).read() +m0 = self._repo.manifestlog[revlog.node(r0)].read() m1 = self.read() md = manifestdict() for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems(): ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] patchbomb: make --git imply --plain
Am Thu, 17 Nov 2016 16:39:05 -0500 schrieb Augie Fackler: > On Thu, Nov 17, 2016 at 4:34 PM, Sean Farley wrote: > > Henning Schild writes: > > > >> Am Wed, 16 Nov 2016 17:41:03 -0500 > >> schrieb Augie Fackler : > >> > >>> > On Nov 16, 2016, at 15:41, Henning Schild > >>> > wrote: > >>> > > >>> > # HG changeset patch > >>> > # User Henning Schild > >>> > # Date 1479327376 -3600 > >>> > # Wed Nov 16 21:16:16 2016 +0100 > >>> > # Node ID 181aaf02c1542387651e66c22063bb276271ced2 > >>> > # Parent db8637a8a7464aca668e5aaa2fb9610edaf6f286 > >>> > patchbomb: make --git imply --plain > >>> > >>> No thanks. --git here is about using the git-diff format, which is > >>> still extremely valuable for hg users. > >> > >> Ok. I guess that consequently also means no to the diffstat patch > >> (2 of 2 out of this thread)? > >> > >> Let me see how to fit that change into hg-git. I did not have a > >> look at how that hooks into hg, but i can imagine a similar patch > >> would be much harder there - like parsing the mbox. I need that > >> change working with hg-git for contributing code back to upstream > >> mailing lists. I can imagine that similar patches would be > >> required for the reverse operation (hg mimport), but i did not try > >> that way yet. > > > > Since a git mailing list would be only for git clients, it'd be ok > > to have both import / export in the git style for hg-git. Maybe > > others feel different than me, though. > > It's probably Not Okay to have hg-git break hg email for users that > have hg-git turned on globally, rather than per-repo? > > > > >> How about a new option --git-format that implies --git and --plain > >> and takes care of the diffstat? > > > > We usually shy away from new arguments and instead suggest the > > improvement of templates which could be used here (maybe they > > already can, I haven't checked). > > I'm not sure that applies entirely to email tho - the email formats > aren't templated, though I suppose they could be... The whole template-story sounds like a good idea but kind of long-term. For the last series i have sent the question is whether the new argument would contradict such a change (if hg-git brought its own template and the new argument would not be required anymore). Today you can already send valid mails to git-based projects. Including the diffstat is just a style thing, if you skip it git-am wont care. Setting --plain and --git automatically is pure convenience. That being said, sending emails without diffstat - and maybe other additional information - might result in git upstream to "dislike" the mails and rejecting the contributions. While this is not an hg issue it could mean that hg-git is not a viable option if you are planning to contribute back to upstream git. > Now you've got me thinking. > ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 3 of 3 V2] memctx: allow the memlightctx thats reusing the manifest node
Jun Wuwrites: > Excerpts from Sean Farley's message of 2016-11-17 13:41:54 -0800: >> Jun Wu writes: >> >> > Per discussion offline, I think this (a new class) approach is the cleanest >> > and safest approach to reuse manifest nodes. >> > >> > "absorb" has an "overlaycontext" which takes an "original" context, but >> > overrides some file contents. In the future we may want to have such >> > overlaycontext, while the "memlightctx" is a special case where you don't >> > override any file contents. Note: that should not require a resend of this >> > patch to implement such overlaycontext feature right now. >> >> Why can't we refactor memctx? I was planning on removing committablectx >> in the future and just use memctx to build a commit (and even merge). > > Then you will either: > > a) worry about the "consistency" of "files" and "manifestnode" - people > (ex. new programmer) passing inconsistent "files" and "manifestnode" > will be able to add buggy content to the repo. > > or: > > b) have perf issues - if you calculate "files" from "manifestnode" and > its parent - it's slower than just getting "files" from the changelog. > > Therefore a new class taking a "changelog node" makes perfect sense. It > can generate "manifestnode" and "files" efficiently and have no perf or > consistency issues. I guess that's alright for now. I'll think about my future refactorings later. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] patchbomb: make --git imply --plain
Augie Facklerwrites: > On Thu, Nov 17, 2016 at 4:34 PM, Sean Farley wrote: >> Henning Schild writes: >> >>> Am Wed, 16 Nov 2016 17:41:03 -0500 >>> schrieb Augie Fackler : >>> > On Nov 16, 2016, at 15:41, Henning Schild > wrote: > > # HG changeset patch > # User Henning Schild > # Date 1479327376 -3600 > # Wed Nov 16 21:16:16 2016 +0100 > # Node ID 181aaf02c1542387651e66c22063bb276271ced2 > # Parent db8637a8a7464aca668e5aaa2fb9610edaf6f286 > patchbomb: make --git imply --plain No thanks. --git here is about using the git-diff format, which is still extremely valuable for hg users. >>> >>> Ok. I guess that consequently also means no to the diffstat patch (2 of >>> 2 out of this thread)? >>> >>> Let me see how to fit that change into hg-git. I did not have a look at >>> how that hooks into hg, but i can imagine a similar patch would be much >>> harder there - like parsing the mbox. I need that change working with >>> hg-git for contributing code back to upstream mailing lists. >>> I can imagine that similar patches would be required for the reverse >>> operation (hg mimport), but i did not try that way yet. >> >> Since a git mailing list would be only for git clients, it'd be ok to >> have both import / export in the git style for hg-git. Maybe others feel >> different than me, though. > > It's probably Not Okay to have hg-git break hg email for users that > have hg-git turned on globally, rather than per-repo? Whoops, yeah, I only meant for a git repo. >>> How about a new option --git-format that implies --git and --plain and >>> takes care of the diffstat? >> >> We usually shy away from new arguments and instead suggest the >> improvement of templates which could be used here (maybe they already >> can, I haven't checked). > > I'm not sure that applies entirely to email tho - the email formats > aren't templated, though I suppose they could be... > > Now you've got me thinking. :-) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 05 of 11] bundle2: use compression engines API to obtain decompressor
On Tue, Nov 1, 2016 at 5:08 PM, Gregory Szorcwrote: > # HG changeset patch > # User Gregory Szorc > # Date 1477158197 25200 > # Sat Oct 22 10:43:17 2016 -0700 > # Node ID 859682e2187c46bb7ab68aedd7dcbab5266d878e > # Parent b32a9f8f72bcaea41d23165cc26583b0dcfb0fc6 > bundle2: use compression engines API to obtain decompressor > > Like the recent change for the compressor side, this too is > relatively straightforward. We now store a compression engine > on the instance instead of a low-level decompressor. This will > allow us to transition to a new decompression API when one is > ready. > > diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py > --- a/mercurial/bundle2.py > +++ b/mercurial/bundle2.py > @@ -676,17 +676,17 @@ class unbundle20(unpackermixin): > This class is fed with a binary stream and yields parts through its > `iterparts` methods.""" > > _magicstring = 'HG20' > > def __init__(self, ui, fp): > """If header is specified, we do not read it out of the stream.""" > self.ui = ui > -self._decompressor = util.decompressors[None] > +self._compengine = util.compressionengines.forbundletype('UN') > self._compressed = None > super(unbundle20, self).__init__(fp) > > @util.propertycache > def params(self): > """dictionary of stream level parameters""" > indebug(self.ui, 'reading bundle2 stream parameters') > params = {} > @@ -750,19 +750,19 @@ class unbundle20(unpackermixin): > if paramssize < 0: > raise error.BundleValueError('negative bundle param size: %i' > % paramssize) > yield _pack(_fstreamparamsize, paramssize) > if paramssize: > params = self._readexact(paramssize) > self._processallparams(params) > yield params > -assert self._decompressor is util.decompressors[None] > +assert self._compengine.bundletype == 'UN' Should be bundletype()[0], right? Any idea why your version passes? I haven't even looked at what this code is doing, but it's unfortunate that we don't seem to have a test for it. > # From there, payload might need to be decompressed > -self._fp = self._decompressor(self._fp) > +self._fp = self._compengine.decompressorreader(self._fp) > emptycount = 0 > while emptycount < 2: > # so we can brainlessly loop > assert _fpartheadersize == _fpayloadsize > size = self._unpack(_fpartheadersize)[0] > yield _pack(_fpartheadersize, size) > if size: > emptycount = 0 > @@ -776,17 +776,17 @@ class unbundle20(unpackermixin): > yield self._readexact(size) > > > def iterparts(self): > """yield all parts contained in the stream""" > # make sure param have been loaded > self.params > # From there, payload need to be decompressed > -self._fp = self._decompressor(self._fp) > +self._fp = self._compengine.decompressorreader(self._fp) > indebug(self.ui, 'start extraction of bundle2 parts') > headerblock = self._readpartheader() > while headerblock is not None: > part = unbundlepart(self.ui, headerblock, self._fp) > yield part > part.seek(0, 2) > headerblock = self._readpartheader() > indebug(self.ui, 'end of bundle2 stream') > @@ -818,20 +818,20 @@ def b2streamparamhandler(name): > assert name not in formatmap > b2streamparamsmap[name] = func > return func > return decorator > > @b2streamparamhandler('compression') > def processcompression(unbundler, param, value): > """read compression parameter and install payload decompression""" > -if value not in util.decompressors: > +if value not in util.compressionengines.supportedbundletypes: > raise error.BundleUnknownFeatureError(params=(param,), >values=(value,)) > -unbundler._decompressor = util.decompressors[value] > +unbundler._compengine = util.compressionengines.forbundletype(value) > if value is not None: > unbundler._compressed = True > > class bundlepart(object): > """A bundle2 part contains application level payload > > The part `type` is used to route the part to the application level > handler. > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 3 of 3 V2] memctx: allow the memlightctx thats reusing the manifest node
Excerpts from Sean Farley's message of 2016-11-17 13:41:54 -0800: > Jun Wuwrites: > > > Per discussion offline, I think this (a new class) approach is the cleanest > > and safest approach to reuse manifest nodes. > > > > "absorb" has an "overlaycontext" which takes an "original" context, but > > overrides some file contents. In the future we may want to have such > > overlaycontext, while the "memlightctx" is a special case where you don't > > override any file contents. Note: that should not require a resend of this > > patch to implement such overlaycontext feature right now. > > Why can't we refactor memctx? I was planning on removing committablectx > in the future and just use memctx to build a commit (and even merge). Then you will either: a) worry about the "consistency" of "files" and "manifestnode" - people (ex. new programmer) passing inconsistent "files" and "manifestnode" will be able to add buggy content to the repo. or: b) have perf issues - if you calculate "files" from "manifestnode" and its parent - it's slower than just getting "files" from the changelog. Therefore a new class taking a "changelog node" makes perfect sense. It can generate "manifestnode" and "files" efficiently and have no perf or consistency issues. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 1 of 3] patchbomb: introduce new option --git-format-patch
Henning Schildwrites: > # HG changeset patch > # User Henning Schild > # Date 1478958881 -3600 > # Sat Nov 12 14:54:41 2016 +0100 > # Node ID 7885eea6837bbb30c12e0833b71a6691008b489e > # Parent bf876ac433c1859ab61d66ddf786049fd6e6bea8 > patchbomb: introduce new option --git-format-patch As I mentioned before, I'd rather not have a new option. If we could do this with templates, then you could just alias that: [templates] gitemail = "{foo} {bar}" [alias] gitemail = email -T "{gitemail}" ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 3 of 3 V2] memctx: allow the memlightctx thats reusing the manifest node
Jun Wuwrites: > Per discussion offline, I think this (a new class) approach is the cleanest > and safest approach to reuse manifest nodes. > > "absorb" has an "overlaycontext" which takes an "original" context, but > overrides some file contents. In the future we may want to have such > overlaycontext, while the "memlightctx" is a special case where you don't > override any file contents. Note: that should not require a resend of this > patch to implement such overlaycontext feature right now. Why can't we refactor memctx? I was planning on removing committablectx in the future and just use memctx to build a commit (and even merge). ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] patchbomb: make --git imply --plain
On Thu, Nov 17, 2016 at 4:34 PM, Sean Farleywrote: > Henning Schild writes: > >> Am Wed, 16 Nov 2016 17:41:03 -0500 >> schrieb Augie Fackler : >> >>> > On Nov 16, 2016, at 15:41, Henning Schild >>> > wrote: >>> > >>> > # HG changeset patch >>> > # User Henning Schild >>> > # Date 1479327376 -3600 >>> > # Wed Nov 16 21:16:16 2016 +0100 >>> > # Node ID 181aaf02c1542387651e66c22063bb276271ced2 >>> > # Parent db8637a8a7464aca668e5aaa2fb9610edaf6f286 >>> > patchbomb: make --git imply --plain >>> >>> No thanks. --git here is about using the git-diff format, which is >>> still extremely valuable for hg users. >> >> Ok. I guess that consequently also means no to the diffstat patch (2 of >> 2 out of this thread)? >> >> Let me see how to fit that change into hg-git. I did not have a look at >> how that hooks into hg, but i can imagine a similar patch would be much >> harder there - like parsing the mbox. I need that change working with >> hg-git for contributing code back to upstream mailing lists. >> I can imagine that similar patches would be required for the reverse >> operation (hg mimport), but i did not try that way yet. > > Since a git mailing list would be only for git clients, it'd be ok to > have both import / export in the git style for hg-git. Maybe others feel > different than me, though. It's probably Not Okay to have hg-git break hg email for users that have hg-git turned on globally, rather than per-repo? > >> How about a new option --git-format that implies --git and --plain and >> takes care of the diffstat? > > We usually shy away from new arguments and instead suggest the > improvement of templates which could be used here (maybe they already > can, I haven't checked). I'm not sure that applies entirely to email tho - the email formats aren't templated, though I suppose they could be... Now you've got me thinking. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] patchbomb: make --git imply --plain
Henning Schildwrites: > Am Wed, 16 Nov 2016 17:41:03 -0500 > schrieb Augie Fackler : > >> > On Nov 16, 2016, at 15:41, Henning Schild >> > wrote: >> > >> > # HG changeset patch >> > # User Henning Schild >> > # Date 1479327376 -3600 >> > # Wed Nov 16 21:16:16 2016 +0100 >> > # Node ID 181aaf02c1542387651e66c22063bb276271ced2 >> > # Parent db8637a8a7464aca668e5aaa2fb9610edaf6f286 >> > patchbomb: make --git imply --plain >> >> No thanks. --git here is about using the git-diff format, which is >> still extremely valuable for hg users. > > Ok. I guess that consequently also means no to the diffstat patch (2 of > 2 out of this thread)? > > Let me see how to fit that change into hg-git. I did not have a look at > how that hooks into hg, but i can imagine a similar patch would be much > harder there - like parsing the mbox. I need that change working with > hg-git for contributing code back to upstream mailing lists. > I can imagine that similar patches would be required for the reverse > operation (hg mimport), but i did not try that way yet. Since a git mailing list would be only for git clients, it'd be ok to have both import / export in the git style for hg-git. Maybe others feel different than me, though. > How about a new option --git-format that implies --git and --plain and > takes care of the diffstat? We usually shy away from new arguments and instead suggest the improvement of templates which could be used here (maybe they already can, I haven't checked). ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH stable,v2] shelve: add missing space in help text
On Thu, Nov 17, 2016 at 07:35:03PM +0100, Mads Kiilerich wrote: > # HG changeset patch > # User Mads Kiilerich> # Date 1479159805 -3600 > # Mon Nov 14 22:43:25 2016 +0100 > # Node ID 543c83999f3e1b0f8f62e5f2150b48bc47d55b0c > # Parent 73b671fbed41d82a5dd46e485c61ddb8afe42faf > shelve: add missing space in help text Queued for stable, thanks! > > The change is trivial and unlikely to have been translated so we update > translation files too. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 3 of 3] patchbomb: make --git-format-patch imply --plain
# HG changeset patch # User Henning Schild# Date 1479415557 -3600 # Thu Nov 17 21:45:57 2016 +0100 # Node ID de2b03a509491020f728f1955e39e2bfb9a77426 # Parent a9be53e26cb1ac19d1c0156062e8ae23f8366d8b patchbomb: make --git-format-patch imply --plain Not using --plain would generate mails with the hg header in the git commit message. Since --git-format-patch already caters for git, might as well make sure users do not forget --plain. diff --git a/hgext/patchbomb.py b/hgext/patchbomb.py --- a/hgext/patchbomb.py +++ b/hgext/patchbomb.py @@ -418,7 +418,7 @@ @command('email', [('g', 'git', None, _('use git extended diff format')), ('', 'git-format-patch', None, _('use git-format-patch email format ' - '(implies --git)')), + '(implies --git and --plain)')), ('', 'plain', None, _('omit hg patch header')), ('o', 'outgoing', None, _('send changes not found in the target repository')), @@ -527,6 +527,7 @@ if (opts.get('git_format_patch')): opts['git'] = True +opts['plain'] = True if not (opts.get('test') or mbox): # really sending diff --git a/tests/test-patchbomb.t b/tests/test-patchbomb.t --- a/tests/test-patchbomb.t +++ b/tests/test-patchbomb.t @@ -755,13 +755,6 @@ To: foo Cc: bar - # HG changeset patch - # User test - # Date 3 0 - # Thu Jan 01 00:00:03 1970 + - # Node ID ff2c9fa2018b15fa74b33363bda9527323e2a99f - # Parent 97d72e5f12c7e84f85064aa72e5a297142c36ed9 - c --- c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) @@ -953,13 +946,6 @@ To: foo Cc: bar - # HG changeset patch - # User test - # Date 1 0 - # Thu Jan 01 00:00:01 1970 + - # Node ID 8580ff50825a50c8f716709acdf8de0deddcd6ab - # Parent - a --- a | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) @@ -989,13 +975,6 @@ To: foo Cc: bar - # HG changeset patch - # User test - # Date 2 0 - # Thu Jan 01 00:00:02 1970 + - # Node ID 97d72e5f12c7e84f85064aa72e5a297142c36ed9 - # Parent 8580ff50825a50c8f716709acdf8de0deddcd6ab - b --- b | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 3] patchbomb: introduce new option --git-format-patch
# HG changeset patch # User Henning Schild# Date 1478958881 -3600 # Sat Nov 12 14:54:41 2016 +0100 # Node ID 7885eea6837bbb30c12e0833b71a6691008b489e # Parent bf876ac433c1859ab61d66ddf786049fd6e6bea8 patchbomb: introduce new option --git-format-patch When sending patch mails to a git-based project the diffstat should be placed right under the commit message. Setting that option makes hg email behave like git-format-patch. diff --git a/hgext/patchbomb.py b/hgext/patchbomb.py --- a/hgext/patchbomb.py +++ b/hgext/patchbomb.py @@ -140,8 +140,10 @@ desc = [] node = None body = '' +linenr = 0 for line in patchlines: +linenr += 1 if line.startswith('#'): if line.startswith('# Node ID'): node = line.split()[-1] @@ -150,6 +152,16 @@ break desc.append(line) +ds = patch.diffstat(patchlines) +if opts.get('diffstat') and opts.get('git_format_patch'): +linenr -= 1 +if (patchlines[linenr - 1] == ''): +linenr -= 1 +patchlines.pop(linenr) +patchlines.insert(linenr, '---') +linenr += 1 +patchlines.insert(linenr, ds) + if not patchname and not node: raise ValueError @@ -166,8 +178,7 @@ while patchlines and not patchlines[0].strip(): patchlines.pop(0) -ds = patch.diffstat(patchlines) -if opts.get('diffstat'): +if opts.get('diffstat') and not opts.get('git_format_patch'): body += ds + '\n\n' addattachment = opts.get('attach') or opts.get('inline') @@ -406,6 +417,7 @@ @command('email', [('g', 'git', None, _('use git extended diff format')), +('', 'git-format-patch', None, _('use git-format-patch email format')), ('', 'plain', None, _('omit hg patch header')), ('o', 'outgoing', None, _('send changes not found in the target repository')), @@ -504,7 +516,6 @@ ''' _charsets = mail._charsets(ui) - bundle = opts.get('bundle') date = opts.get('date') mbox = opts.get('mbox') diff --git a/tests/test-patchbomb.t b/tests/test-patchbomb.t --- a/tests/test-patchbomb.t +++ b/tests/test-patchbomb.t @@ -722,6 +722,58 @@ +c +test diffstat for single patch for git: + $ hg email --git-format-patch --git --date '1970-1-1 0:1' -n -f quux -t foo \ + > -c bar -s test -d -y -r 2 + this patch series consists of 1 patches. + + + Final summary: + + From: quux + To: foo + Cc: bar + Subject: [PATCH] test + c | 1 + + 1 files changed, 1 insertions(+), 0 deletions(-) + + are you sure you want to send (yn)? y + + displaying [PATCH] test ... + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Subject: [PATCH] test + X-Mercurial-Node: ff2c9fa2018b15fa74b33363bda9527323e2a99f + X-Mercurial-Series-Index: 1 + X-Mercurial-Series-Total: 1 + Message-Id: (glob) + X-Mercurial-Series-Id: (glob) + User-Agent: Mercurial-patchbomb/* (glob) + Date: Thu, 01 Jan 1970 00:01:00 + + From: quux + To: foo + Cc: bar + + # HG changeset patch + # User test + # Date 3 0 + # Thu Jan 01 00:00:03 1970 + + # Node ID ff2c9fa2018b15fa74b33363bda9527323e2a99f + # Parent 97d72e5f12c7e84f85064aa72e5a297142c36ed9 + c + --- + c | 1 + + 1 files changed, 1 insertions(+), 0 deletions(-) + + diff --git a/c b/c + new file mode 100644 + --- /dev/null + +++ b/c + @@ -0,0 +1,1 @@ + +c + + test diffstat for multiple patches: $ hg email --date '1970-1-1 0:1' -n -f quux -t foo -c bar -s test -d -y \ > -r 0:1 @@ -839,6 +891,123 @@ +b +test diffstat for multiple patches for git: + $ hg email --git-format-patch --git --date '1970-1-1 0:1' -n -f quux -t foo \ + > -c bar -s test -d -y -r 0:1 + this patch series consists of 2 patches. + + + Write the introductory message for the patch series. + + + Final summary: + + From: quux + To: foo + Cc: bar + Subject: [PATCH 0 of 2] test + a | 1 + + b | 1 + + 2 files changed, 2 insertions(+), 0 deletions(-) + Subject: [PATCH 1 of 2] a + a | 1 + + 1 files changed, 1 insertions(+), 0 deletions(-) + Subject: [PATCH 2 of 2] b + b | 1 + + 1 files changed, 1 insertions(+), 0 deletions(-) + + are you sure you want to send (yn)? y + + displaying [PATCH 0 of 2] test ... + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Subject: [PATCH 0 of 2] test + Message-Id: (glob) + User-Agent: Mercurial-patchbomb/* (glob) + Date: Thu, 01 Jan 1970 00:01:00 + + From: quux + To: foo + Cc: bar + + + a | 1 + + b | 1 + + 2 files changed, 2 insertions(+), 0 deletions(-) + + displaying [PATCH 1 of 2] a ... + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Subject: [PATCH 1 of 2]
[PATCH 2 of 3] patchbomb: make --git-format-patch imply --git
# HG changeset patch # User Henning Schild# Date 1479415556 -3600 # Thu Nov 17 21:45:56 2016 +0100 # Node ID a9be53e26cb1ac19d1c0156062e8ae23f8366d8b # Parent 7885eea6837bbb30c12e0833b71a6691008b489e patchbomb: make --git-format-patch imply --git --git-format-patch should not be used without --git, enforce that so the user does not have to specify both. diff --git a/hgext/patchbomb.py b/hgext/patchbomb.py --- a/hgext/patchbomb.py +++ b/hgext/patchbomb.py @@ -417,7 +417,8 @@ @command('email', [('g', 'git', None, _('use git extended diff format')), -('', 'git-format-patch', None, _('use git-format-patch email format')), +('', 'git-format-patch', None, _('use git-format-patch email format ' + '(implies --git)')), ('', 'plain', None, _('omit hg patch header')), ('o', 'outgoing', None, _('send changes not found in the target repository')), @@ -524,6 +525,9 @@ # internal option used by pbranches patches = opts.get('patches') +if (opts.get('git_format_patch')): +opts['git'] = True + if not (opts.get('test') or mbox): # really sending mail.validateconfig(ui) diff --git a/tests/test-patchbomb.t b/tests/test-patchbomb.t --- a/tests/test-patchbomb.t +++ b/tests/test-patchbomb.t @@ -723,8 +723,8 @@ test diffstat for single patch for git: - $ hg email --git-format-patch --git --date '1970-1-1 0:1' -n -f quux -t foo \ - > -c bar -s test -d -y -r 2 + $ hg email --git-format-patch --date '1970-1-1 0:1' -n -f quux -t foo -c bar \ + > -s test -d -y -r 2 this patch series consists of 1 patches. @@ -892,8 +892,8 @@ test diffstat for multiple patches for git: - $ hg email --git-format-patch --git --date '1970-1-1 0:1' -n -f quux -t foo \ - > -c bar -s test -d -y -r 0:1 + $ hg email --git-format-patch --date '1970-1-1 0:1' -n -f quux -t foo -c bar \ + > -s test -d -y -r 0:1 this patch series consists of 2 patches. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
On Thu, Nov 17, 2016 at 11:26 AM, Gregory Szorcwrote: > On Thu, Nov 17, 2016 at 9:16 AM, Mads Kiilerich > wrote: > >> # HG changeset patch >> # User Mads Kiilerich >> # Date 1479322051 -3600 >> # Wed Nov 16 19:47:31 2016 +0100 >> # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 >> # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 >> bdiff: early pruning of common suffix before doing expensive computations >> >> Similar to the previous change, but trimming trailing lines. >> >> On mozilla-unified: >> perfbdiff -m 3041e4d59df2 >> ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to >> ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to >> perfbdiff 0e9928989e9c --alldata --count 10 >> ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18) >> ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22) >> >> > I can collaborate the perf improvements. On mozilla-unified (using a > different clone and machine from what I wrote in my commit messages > reporting bdiff perf - but the CPU model is the same), these 2 patches > result in: > > $ perfbdiff -m 3041e4d59df2 > ! wall 0.040755 comb 0.04 user 0.04 sys 0.00 (best of 100) > ! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304) > > $ perfbdiff 0e9928989e9c --alldata --count 100 > ! wall 4.59 comb 4.58 user 4.58 sys 0.00 (best of 3) > ! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6) > > Compared to 4.0: > $ perfbdiff -m 3041e4d59df2 > ! wall 0.076924 comb 0.08 user 0.08 sys 0.00 (best of 100) > ! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304) > > $ perfbdiff 0e9928989e9c --alldata --count 100 > ! wall 8.892216 comb 8.88 user 8.88 sys 0.00 (best of 3) > ! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6) > > Looking at the assembly and profiling output, I believe I've confirmed my > suspicions from my comment on part 1: there is still room to optimize this > code. Again, that can be deferred to a follow-up. > It's worth looking at where we're spending CPU time after this series. Measuring `perfbdiff 0e9928989e9c --alldata --count 100`: This series 40% bdiff_module.c:bdiff() 38% bidff.c:bdiff_splitlines() 12% bdiff.c:bdiff_diff() 5% __memcmp_sse4_1 (not sure where exactly this is called from) 4% bdiff.c:recurse() Before prefix and suffix matching: 62% bdiff.c:bdiff_splitlines() 23% bdiff.c.bdiff_diff() 8% memcmp_sse4_1 5% bdiff.c:recurse() 4.0 82% bdiff.c:bdiff_splitlines() 10% bdiff.c:bdiff_diff() 4.5% __memcmp_sse4_1 2.5% bdiff.c:recurse() This series shifted time out of bdiff_splitlines() *and* bdiff_diff(). Furthermore, from 4.0 to this series, the ratio of time spent before bdiff_diff() and inside is roughly the same, despite things getting 5-10x faster in that time! It is alarming we're still spending so much time in what is effectively set-up to run an algorithm. But this is also a good thing: we know the setup code can still be optimized which means there are still significant perf wins on the table. > >> diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c >> --- a/mercurial/bdiff_module.c >> +++ b/mercurial/bdiff_module.c >> @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P >> struct bdiff_line *al, *bl; >> struct bdiff_hunk l, *h; >> int an, bn, count; >> - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; >> + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, >> lmax; >> PyThreadState *_save; >> >> l.next = NULL; >> @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P >> if (*ia == '\n') >> lcommon = li + 1; >> /* we can almost add: if (li == lmax) lcommon = li; */ >> + lmax -= lcommon; >> + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; >> +li <= lmax && *ia == *ib; >> +++li, --ia, --ib) >> + if (*ia == '\n') >> + lcommonend = li; >> + /* printf("common %i %i\n", lcommon, lcommonend); */ >> >> - an = bdiff_splitlines(sa + lcommon, la - lcommon, ); >> - bn = bdiff_splitlines(sb + lcommon, lb - lcommon, ); >> + an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend, >> ); >> + bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend, >> ); >> if (!al || !bl) >> goto nomem; >> >> diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out >> --- a/tests/test-bdiff.py.out >> +++ b/tests/test-bdiff.py.out >> @@ -28,9 +28,9 @@ showdiff( >>'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'): >> 'x\n\nx\n\n' >> 6 6 '' -> 'y\n\n' >> - 'x\n\n' >> - 9 9 '' -> 'y\n\n' >> - 'x\n\nz\n' >> + 'x\n' >> + 8 8 '' -> '\ny\n' >> + '\nx\n\nz\n' >> showdiff( >>'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n', >>
Re: [PATCH 3 of 3 V2] memctx: allow the memlightctx thats reusing the manifest node
Per discussion offline, I think this (a new class) approach is the cleanest and safest approach to reuse manifest nodes. "absorb" has an "overlaycontext" which takes an "original" context, but overrides some file contents. In the future we may want to have such overlaycontext, while the "memlightctx" is a special case where you don't override any file contents. Note: that should not require a resend of this patch to implement such overlaycontext feature right now. Excerpts from Mateusz Kwapich's message of 2016-11-17 11:31:12 -0800: > # HG changeset patch > # User Mateusz Kwapich> # Date 1479410909 28800 > # Thu Nov 17 11:28:29 2016 -0800 > # Node ID 0d41689f79cf22d8761dd6af9cb5da86008afe94 > # Parent 4a0824bead3ba5980bd8528937fba5f7bb31ba9f > memctx: allow the memlightctx thats reusing the manifest node > > When we have a lot of files writing a new manifest revision can be expensive. > This commit adds a possibility for memctx to reuse a manifest from a different > commit. This can be beneficial for commands that are creating metadata changes > without any actual files changed like "hg metaedit" in evolve extension. > > I will send the change for evolve that leverages this once this is accepted. > > diff --git a/mercurial/context.py b/mercurial/context.py > --- a/mercurial/context.py > +++ b/mercurial/context.py > @@ -1975,3 +1975,99 @@ class memfilectx(committablefilectx): > def write(self, data, flags): > """wraps repo.wwrite""" > self._data = data > + > +class memlightctx(committablectx): > +"""Like memctx but it's reusing the manifest of different commit. > +Intended to be used by lightweight operations that are creating > +metadata-only changes. > + > +Revision information is supplied at initialization time. 'repo' is the > +current localrepo, 'ctx' is original revision which manifest we're > reuisng > +'parents' is a sequence of two parent revisions identifiers (pass None > for > +every missing parent), 'text' is the commit. > + > +user receives the committer name and defaults to current repository > +username, date is the commit date in any format supported by > +util.parsedate() and defaults to current date, extra is a dictionary of > +metadata or is left empty. > +""" > +def __new__(cls, repo, path, *args, **kwargs): > +return super(memlightctx, cls).__new__(cls, repo) > + > +def __init__(self, repo, originalctx, parents, text, user=None, > date=None, > + extra=None, editor=False): > +super(memlightctx, self).__init__(repo, text, user, date, extra) > +self._rev = None > +self._node = None > +self._originalctx = originalctx > +self._manifestnode = originalctx.manifestnode() > +parents = [(p or nullid) for p in parents] > +p1, p2 = self._parents = [changectx(self._repo, p) for p in parents] > + > +# sanity check to ensure that the reused manifest parents are > +# manifests of our commit parents > +mp1, mp2 = self.manifestctx().parents > +if p1 != nullid and p1.manifestctx().node() != mp1: > +raise > +if p2 != nullid and p2.manifestctx().node() != mp2: > +raise Maybe raise RuntimeError(...) to give the user (a new programmer) some hints about what's wrong. > + > +self._files = originalctx.files() > +self.substate = {} > + > +if extra: > +self._extra = extra.copy() > +else: > +self._extra = {} > + > +if self._extra.get('branch', '') == '': > +self._extra['branch'] = 'default' > + > +if editor: > +self._text = editor(self._repo, self, []) > +self._repo.savecommitmessage(self._text) > + > +def manifestnode(self): > +return self._manifestnode > + > +@propertycache > +def _manifestctx(self): > +return self._repo.manifestlog[self._manifestnode] > + > +def filectx(self, path, filelog=None): > +return self._originalctx.filectx(path, filelog=filelog) > + > +def commit(self): > +"""commit context to the repo""" > +return self._repo.commitctx(self) > + > +@property > +def _manifest(self): > +return self._originalctx.manifest() > + > +@propertycache > +def _status(self): Could we just delegate to orignalctx._status ? > +"""Calculate exact status from ``files`` specified in the ``origctx`` > +and parents manifests. > +""" > +man1 = self.p1().manifest() > +p2 = self._parents[1] > +# "1 < len(self._parents)" can't be used for checking > +# existence of the 2nd parent, because "memlightctx._parents" is > +# explicitly initialized by the list, of which length is 2. > +if p2.node() != nullid: > +man2 = p2.manifest() > +managing = lambda f: f in man1 or f in man2 > +
Re: [PATCH 1 of 3 V2] manifest: expose the parents() method
At the "*ctx" level, I think "parents" should return the same type of classes (manifestctx). Be aware that propertycache such classes could make refcount GC harder. I guess we can just use the low-level revlog's parents() in this case. Excerpts from Mateusz Kwapich's message of 2016-11-17 11:31:10 -0800: > # HG changeset patch > # User Mateusz Kwapich> # Date 1479409155 28800 > # Thu Nov 17 10:59:15 2016 -0800 > # Node ID 7dfd4c184ee087f2c05e1bdae8a10ccefbff7a92 > # Parent 96f2f50d923f94c23999df198ff16409e7539af8 > manifest: expose the parents() method > > diff --git a/mercurial/manifest.py b/mercurial/manifest.py > --- a/mercurial/manifest.py > +++ b/mercurial/manifest.py > @@ -1381,6 +1381,10 @@ class manifestctx(object): > memmf._manifestdict = self.read().copy() > return memmf > > +@propertycache > +def parents(self): > +return self._revlog().parents(self._node) > + > def read(self): > if not self._data: > if self._node == revlog.nullid: > @@ -1515,6 +1519,10 @@ class treemanifestctx(object): > memmf._treemanifest = self.read().copy() > return memmf > > +@propertycache > +def parents(self): > +return self._revlog().parents(self._node) > + > def readdelta(self, shallow=False): > '''Returns a manifest containing just the entries that are present > in this manifest, but not in its p1 manifest. This is efficient to > read ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 3 of 3 V2] memctx: allow the memlightctx thats reusing the manifest node
# HG changeset patch # User Mateusz Kwapich# Date 1479410909 28800 # Thu Nov 17 11:28:29 2016 -0800 # Node ID 0d41689f79cf22d8761dd6af9cb5da86008afe94 # Parent 4a0824bead3ba5980bd8528937fba5f7bb31ba9f memctx: allow the memlightctx thats reusing the manifest node When we have a lot of files writing a new manifest revision can be expensive. This commit adds a possibility for memctx to reuse a manifest from a different commit. This can be beneficial for commands that are creating metadata changes without any actual files changed like "hg metaedit" in evolve extension. I will send the change for evolve that leverages this once this is accepted. diff --git a/mercurial/context.py b/mercurial/context.py --- a/mercurial/context.py +++ b/mercurial/context.py @@ -1975,3 +1975,99 @@ class memfilectx(committablefilectx): def write(self, data, flags): """wraps repo.wwrite""" self._data = data + +class memlightctx(committablectx): +"""Like memctx but it's reusing the manifest of different commit. +Intended to be used by lightweight operations that are creating +metadata-only changes. + +Revision information is supplied at initialization time. 'repo' is the +current localrepo, 'ctx' is original revision which manifest we're reuisng +'parents' is a sequence of two parent revisions identifiers (pass None for +every missing parent), 'text' is the commit. + +user receives the committer name and defaults to current repository +username, date is the commit date in any format supported by +util.parsedate() and defaults to current date, extra is a dictionary of +metadata or is left empty. +""" +def __new__(cls, repo, path, *args, **kwargs): +return super(memlightctx, cls).__new__(cls, repo) + +def __init__(self, repo, originalctx, parents, text, user=None, date=None, + extra=None, editor=False): +super(memlightctx, self).__init__(repo, text, user, date, extra) +self._rev = None +self._node = None +self._originalctx = originalctx +self._manifestnode = originalctx.manifestnode() +parents = [(p or nullid) for p in parents] +p1, p2 = self._parents = [changectx(self._repo, p) for p in parents] + +# sanity check to ensure that the reused manifest parents are +# manifests of our commit parents +mp1, mp2 = self.manifestctx().parents +if p1 != nullid and p1.manifestctx().node() != mp1: +raise +if p2 != nullid and p2.manifestctx().node() != mp2: +raise + +self._files = originalctx.files() +self.substate = {} + +if extra: +self._extra = extra.copy() +else: +self._extra = {} + +if self._extra.get('branch', '') == '': +self._extra['branch'] = 'default' + +if editor: +self._text = editor(self._repo, self, []) +self._repo.savecommitmessage(self._text) + +def manifestnode(self): +return self._manifestnode + +@propertycache +def _manifestctx(self): +return self._repo.manifestlog[self._manifestnode] + +def filectx(self, path, filelog=None): +return self._originalctx.filectx(path, filelog=filelog) + +def commit(self): +"""commit context to the repo""" +return self._repo.commitctx(self) + +@property +def _manifest(self): +return self._originalctx.manifest() + +@propertycache +def _status(self): +"""Calculate exact status from ``files`` specified in the ``origctx`` +and parents manifests. +""" +man1 = self.p1().manifest() +p2 = self._parents[1] +# "1 < len(self._parents)" can't be used for checking +# existence of the 2nd parent, because "memlightctx._parents" is +# explicitly initialized by the list, of which length is 2. +if p2.node() != nullid: +man2 = p2.manifest() +managing = lambda f: f in man1 or f in man2 +else: +managing = lambda f: f in man1 + +modified, added, removed = [], [], [] +for f in self._files: +if not managing(f): +added.append(f) +elif self[f]: +modified.append(f) +else: +removed.append(f) + +return scmutil.status(modified, added, removed, [], [], [], []) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 3 V2] manifest: expose the parents() method
# HG changeset patch # User Mateusz Kwapich# Date 1479409155 28800 # Thu Nov 17 10:59:15 2016 -0800 # Node ID 7dfd4c184ee087f2c05e1bdae8a10ccefbff7a92 # Parent 96f2f50d923f94c23999df198ff16409e7539af8 manifest: expose the parents() method diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -1381,6 +1381,10 @@ class manifestctx(object): memmf._manifestdict = self.read().copy() return memmf +@propertycache +def parents(self): +return self._revlog().parents(self._node) + def read(self): if not self._data: if self._node == revlog.nullid: @@ -1515,6 +1519,10 @@ class treemanifestctx(object): memmf._treemanifest = self.read().copy() return memmf +@propertycache +def parents(self): +return self._revlog().parents(self._node) + def readdelta(self, shallow=False): '''Returns a manifest containing just the entries that are present in this manifest, but not in its p1 manifest. This is efficient to read ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 1 of 2] bdiff: early pruning of common prefix before doing expensive computations
On 11/17/2016 07:53 PM, Gregory Szorc wrote: On Thu, Nov 17, 2016 at 9:16 AM, Mads Kiilerich> wrote: # HG changeset patch # User Mads Kiilerich > # Date 1479321935 -3600 # Wed Nov 16 19:45:35 2016 +0100 # Node ID 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 # Parent fe6a3576b863955a6c40ca46bd1d6c8e5384dedf bdiff: early pruning of common prefix before doing expensive computations It seems quite common that files don't change completely. New lines are often pretty much appended, and modifications will often only change a small section of the file which on average will be in the middle. There can thus be a big win by pruning a common prefix before starting the more expensive search for longest common substrings. Worst case, it will scan through a long sequence of similar bytes without encountering a newline. Splitlines will then have to do the same again ... twice for each side. If similar lines are found, splitlines will save the double iteration and hashing of the lines ... plus there will be less lines to find common substrings in. This change might in some cases make the algorith pick shorter or less optimal common substrings. We can't have the cake and eat it. This make hg --time bundle --base null -r 4.0 go from 14.5 to 15 s - a 3% increase. On mozilla-unified: perfbdiff -m 3041e4d59df2 ! wall 0.053088 comb 0.06 user 0.06 sys 0.00 (best of 100) to ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) perfbdiff 0e9928989e9c --alldata --count 10 ! wall 0.702075 comb 0.70 user 0.70 sys 0.00 (best of 15) to ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18) \o/ Thank you for implementing this! This is likely the single biggest "easy" perf win in bdiff to be found. diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c --- a/mercurial/bdiff_module.c +++ b/mercurial/bdiff_module.c @@ -61,12 +61,12 @@ nomem: static PyObject *bdiff(PyObject *self, PyObject *args) Implementing this in the Python module means that CFFI/PyPy won't be able to realize the perf wins :( How do you feel about implementing this in bdiff.c? I guess other logic also should move from bdiff_module to bdiff.c? This was just the easy way to hook in before the two sides got split into lines. { - char *sa, *sb, *rb; + char *sa, *sb, *rb, *ia, *ib; PyObject *result = NULL; struct bdiff_line *al, *bl; struct bdiff_hunk l, *h; int an, bn, count; - Py_ssize_t len = 0, la, lb; + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; PyThreadState *_save; l.next = NULL; @@ -80,8 +80,17 @@ static PyObject *bdiff(PyObject *self, P } _save = PyEval_SaveThread(); - an = bdiff_splitlines(sa, la, ); - bn = bdiff_splitlines(sb, lb, ); + + lmax = la > lb ? lb : la; + for (ia = sa, ib = sb; +li < lmax && *ia == *ib; +++li, ++ia, ++ib) + if (*ia == '\n') + lcommon = li + 1; + /* we can almost add: if (li == lmax) lcommon = li; */ I can't get this to apply cleanly locally against the committed repo, so I can't look at the assembly. But, my guess is there are still some performance optimizations to be found. Absolutely. This is the "simple" way of doing it. With this as baseline, we can try to optimize it. If we compare words instead of bytes, this will require fewer loop iterations and fewer executed instructions. My work with bdiff_splitlines() tells me that the \n check inside the tight loop is likely adding a bit of overhead. It might be worth backtracking after first mismatch to find the last newline. Alternatively, if you iterate on word boundaries, you can do a bitwise AND against 0x0a0a0a0a... and compare against 0x0 and store the last known word offset containing a newline. On 64 bit machines, the cost of that newline check is reduced by 8x. Yes, that sound like good ideas. I guess the biggest win is to first find common prefix on words while ignoring newlines, then adjust and backtrack. When backtracking and looking for newlines, we could perhaps also use something along the lines of this draft: x = w ^ 0x0a0a0a0a x = x | x >> 4 x = x | x >> 2 x = x | x >> 1 x = x & x >> 16 x = x & x >> 8 if x&1==0, then we have a 0a in one of the bytes. But I have no idea how efficient all this bit shuffling would be compared to a simpler approach ;-) But also, we could probably win a lot by having different handling for "source code" and "binary files". The backtracking cost is probably not
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
On Thu, Nov 17, 2016 at 9:16 AM, Mads Kiilerichwrote: > # HG changeset patch > # User Mads Kiilerich > # Date 1479322051 -3600 > # Wed Nov 16 19:47:31 2016 +0100 > # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 > # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 > bdiff: early pruning of common suffix before doing expensive computations > > Similar to the previous change, but trimming trailing lines. > > On mozilla-unified: > perfbdiff -m 3041e4d59df2 > ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to > ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to > perfbdiff 0e9928989e9c --alldata --count 10 > ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18) > ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22) > > I can collaborate the perf improvements. On mozilla-unified (using a different clone and machine from what I wrote in my commit messages reporting bdiff perf - but the CPU model is the same), these 2 patches result in: $ perfbdiff -m 3041e4d59df2 ! wall 0.040755 comb 0.04 user 0.04 sys 0.00 (best of 100) ! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304) $ perfbdiff 0e9928989e9c --alldata --count 100 ! wall 4.59 comb 4.58 user 4.58 sys 0.00 (best of 3) ! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6) Compared to 4.0: $ perfbdiff -m 3041e4d59df2 ! wall 0.076924 comb 0.08 user 0.08 sys 0.00 (best of 100) ! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304) $ perfbdiff 0e9928989e9c --alldata --count 100 ! wall 8.892216 comb 8.88 user 8.88 sys 0.00 (best of 3) ! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6) Looking at the assembly and profiling output, I believe I've confirmed my suspicions from my comment on part 1: there is still room to optimize this code. Again, that can be deferred to a follow-up. > diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c > --- a/mercurial/bdiff_module.c > +++ b/mercurial/bdiff_module.c > @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P > struct bdiff_line *al, *bl; > struct bdiff_hunk l, *h; > int an, bn, count; > - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; > + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, > lmax; > PyThreadState *_save; > > l.next = NULL; > @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P > if (*ia == '\n') > lcommon = li + 1; > /* we can almost add: if (li == lmax) lcommon = li; */ > + lmax -= lcommon; > + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; > +li <= lmax && *ia == *ib; > +++li, --ia, --ib) > + if (*ia == '\n') > + lcommonend = li; > + /* printf("common %i %i\n", lcommon, lcommonend); */ > > - an = bdiff_splitlines(sa + lcommon, la - lcommon, ); > - bn = bdiff_splitlines(sb + lcommon, lb - lcommon, ); > + an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend, > ); > + bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend, > ); > if (!al || !bl) > goto nomem; > > diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out > --- a/tests/test-bdiff.py.out > +++ b/tests/test-bdiff.py.out > @@ -28,9 +28,9 @@ showdiff( >'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'): > 'x\n\nx\n\n' > 6 6 '' -> 'y\n\n' > - 'x\n\n' > - 9 9 '' -> 'y\n\n' > - 'x\n\nz\n' > + 'x\n' > + 8 8 '' -> '\ny\n' > + '\nx\n\nz\n' > showdiff( >'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n', >'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'): > diff --git a/tests/test-check-code.t b/tests/test-check-code.t > --- a/tests/test-check-code.t > +++ b/tests/test-check-code.t > @@ -15,10 +15,6 @@ New errors are not allowed. Warnings are >Skipping hgext/fsmonitor/pywatchman/msc_stdint.h it has no-che?k-code > (glob) >Skipping hgext/fsmonitor/pywatchman/pybser.py it has no-che?k-code > (glob) >Skipping i18n/polib.py it has no-che?k-code (glob) > - mercurial/bdiff_module.c:89: > - > //printf("common prefix: %i next line: %i\n", li, llf); > - don't use //-style comments >Skipping mercurial/httpclient/__init__.py it has no-che?k-code (glob) >Skipping mercurial/httpclient/_readers.py it has no-che?k-code (glob) >Skipping mercurial/statprof.py it has no-che?k-code (glob) > - [1] > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel > ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 5] posix: move checkexec test file to .hg/cache
# HG changeset patch # User Mads Kiilerich# Date 1479383976 -3600 # Thu Nov 17 12:59:36 2016 +0100 # Node ID 1b5e959ebd859c27b3369124c926a512e222545c # Parent 854190becacb8abecbf5c18a777b02bbc045 posix: move checkexec test file to .hg/cache This avoids unnecessary churn in the working directory. It is not necessarily a fully valid assumption that .hg/cache is on the same filesystem as the working directory, but I think it is an acceptable approximation. It could also be the case that different parts of the working directory is on different mount points so checking in the root folder could also be wrong. diff --git a/mercurial/posix.py b/mercurial/posix.py --- a/mercurial/posix.py +++ b/mercurial/posix.py @@ -160,7 +160,10 @@ def checkexec(path): try: EXECFLAGS = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH -fh, fn = tempfile.mkstemp(dir=path, prefix='hg-checkexec-') +cachedir = os.path.join(path, '.hg', 'cache') +if not os.path.isdir(cachedir): +cachedir = path +fh, fn = tempfile.mkstemp(dir=cachedir, prefix='hg-checkexec-') try: os.close(fh) m = os.stat(fn).st_mode & 0o777 ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 4 of 5] posix: move checklink test file to .hg/cache
# HG changeset patch # User Mads Kiilerich# Date 1479383976 -3600 # Thu Nov 17 12:59:36 2016 +0100 # Node ID 5409e0c5e6c0764e802360a3912f7719885ba2b5 # Parent 23e2868f6dfd50d4d7ca3e0748cb62bcf4354a52 posix: move checklink test file to .hg/cache This avoids unnecessary churn in the working directory. It is not necessarily a fully valid assumption that .hg/cache is on the same filesystem as the working directory, but I think it is an acceptable approximation. It could also be the case that different parts of the working directory is on different mount points so checking in the root folder could also be wrong. diff --git a/mercurial/posix.py b/mercurial/posix.py --- a/mercurial/posix.py +++ b/mercurial/posix.py @@ -219,9 +219,16 @@ def checklink(path): # mktemp is not racy because symlink creation will fail if the # file already exists while True: -name = tempfile.mktemp(dir=path, prefix='hg-checklink-') +cachedir = os.path.join(path, '.hg', 'cache') +if os.path.isdir(cachedir): +checkdir = cachedir +else: +checkdir = path +cachedir = None +name = tempfile.mktemp(dir=checkdir, prefix='checklink-') try: -fd = tempfile.NamedTemporaryFile(dir=path, prefix='hg-checklink-') +fd = tempfile.NamedTemporaryFile(dir=checkdir, + prefix='hg-checklink-') try: os.symlink(os.path.basename(fd.name), name) os.unlink(name) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 5 of 5] posix: give checklink a fast path that cache the check file and is read only
# HG changeset patch # User Mads Kiilerich# Date 1421194526 -3600 # Wed Jan 14 01:15:26 2015 +0100 # Node ID 73b671fbed41d82a5dd46e485c61ddb8afe42faf # Parent 5409e0c5e6c0764e802360a3912f7719885ba2b5 posix: give checklink a fast path that cache the check file and is read only util.checklink would create a symlink and remove it again. That would sometimes happen multiple times. Write operations are relatively expensive and give disk tear and noise for applications monitoring file system activity. Instead of creating a symlink and deleting it again, just create it once and leave it in .hg/cache/check-link . If the file exists, just verify that os.islink reports true. We will assume that this check is as good as symlink creation not failing. Note: The symlink left in .hg/cache has to resolve to a file - otherwise 'make dist' will fail ... test-symlink-os-yes-fs-no.py does some monkey patching to simulate a platform without symlink support. The slightly different testing method requires additional monkeying. diff --git a/mercurial/posix.py b/mercurial/posix.py --- a/mercurial/posix.py +++ b/mercurial/posix.py @@ -220,6 +220,10 @@ def checklink(path): # file already exists while True: cachedir = os.path.join(path, '.hg', 'cache') +checklink = os.path.join(cachedir, 'checklink') +# try fast path, read only +if os.path.islink(checklink): +return True if os.path.isdir(cachedir): checkdir = cachedir else: @@ -231,7 +235,13 @@ def checklink(path): prefix='hg-checklink-') try: os.symlink(os.path.basename(fd.name), name) -os.unlink(name) +if cachedir is None: +os.unlink(name) +else: +try: +os.rename(name, checklink) +except OSError: +os.unlink(name) return True except OSError as inst: # link creation might race, try again diff --git a/tests/test-clone.t b/tests/test-clone.t --- a/tests/test-clone.t +++ b/tests/test-clone.t @@ -32,6 +32,7 @@ Trigger branchcache creation: $ ls .hg/cache branch2-served checkisexec + checklink checknoexec rbc-names-v1 rbc-revs-v1 @@ -48,6 +49,7 @@ Ensure branchcache got copied over: $ ls .hg/cache branch2-served checkisexec + checklink $ cat a a diff --git a/tests/test-symlink-os-yes-fs-no.py b/tests/test-symlink-os-yes-fs-no.py --- a/tests/test-symlink-os-yes-fs-no.py +++ b/tests/test-symlink-os-yes-fs-no.py @@ -35,6 +35,9 @@ commands.status(u, repo) def symlink_failure(src, dst): raise OSError(1, "Operation not permitted") os.symlink = symlink_failure +def islink_failure(path): +return False +os.path.islink = islink_failure # dereference links as if a Samba server has exported this to a # Windows client diff --git a/tests/test-tags.t b/tests/test-tags.t --- a/tests/test-tags.t +++ b/tests/test-tags.t @@ -673,6 +673,7 @@ Missing tags2* files means the cache was $ ls tagsclient/.hg/cache branch2-served checkisexec + checklink hgtagsfnodes1 rbc-names-v1 rbc-revs-v1 @@ -698,6 +699,7 @@ Running hg tags should produce tags2* fi $ ls tagsclient/.hg/cache branch2-served checkisexec + checklink hgtagsfnodes1 rbc-names-v1 rbc-revs-v1 ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 3 of 5] posix: give checkexec a fast path; keep the check files and test read only
# HG changeset patch # User Mads Kiilerich# Date 1421194526 -3600 # Wed Jan 14 01:15:26 2015 +0100 # Node ID 23e2868f6dfd50d4d7ca3e0748cb62bcf4354a52 # Parent f383046bc26d39d307523521de2df5885ba111ff posix: give checkexec a fast path; keep the check files and test read only Before, Mercurial would create a new temporary file every time, stat it, change its exec mode, stat it again, and delete it. Most of this dance was done to handle the rare and not-so-essential case of VFAT mounts on unix. The cost of that was paid by the much more common and important case of using normal file systems. Instead, try to create and preserve .hg/cache/checkisexec and .hg/cache/checknoexec with and without exec flag set. If the files exist and have correct exec flags set, we can conclude that that file system supports the exec flag. Best case, the whole exec check can thus be done with two stat calls. Worst case, we delete the wrong files and check as usual. That will be because temporary loss of exec bit or on file systems without support for the exec bit. In that case we check as we did before, with the additional overhead of one extra stat call. It is possible that this different test algorithm in some cases on odd file systems will give different behaviour. Again, I think it will be rare and special cases and I think it is worth the risk. test-clone.t happens to show the situation where checkisexec is left behind from the old style check, while checknoexec only will be created next time a exec check will be performed. diff --git a/mercurial/posix.py b/mercurial/posix.py --- a/mercurial/posix.py +++ b/mercurial/posix.py @@ -161,18 +161,55 @@ def checkexec(path): try: EXECFLAGS = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH cachedir = os.path.join(path, '.hg', 'cache') -if not os.path.isdir(cachedir): -cachedir = path -fh, fn = tempfile.mkstemp(dir=cachedir, prefix='hg-checkexec-') +if os.path.isdir(cachedir): +checkisexec = os.path.join(cachedir, 'checkisexec') +checknoexec = os.path.join(cachedir, 'checknoexec') + +try: +m = os.stat(checkisexec).st_mode +except OSError as e: +if e.errno != errno.ENOENT: +raise +# checkisexec does not exist - fall through ... +else: +# checkisexec exists, check if it actually is exec +if m & EXECFLAGS != 0: +# ensure checkisexec exists, check it isn't exec +try: +m = os.stat(checknoexec).st_mode +except OSError as e: +if e.errno != errno.ENOENT: +raise +file(checknoexec, 'w').close() # might fail +m = os.stat(checknoexec).st_mode +if m & EXECFLAGS == 0: +# check-exec is exec and check-no-exec is not exec +return True +# checknoexec exists but is exec - delete it +os.unlink(checknoexec) +# checkisexec exists but is not exec - delete it +os.unlink(checkisexec) + +# check using one file, leave it as checkisexec +checkdir = cachedir +else: +# check directly in path and don't leave checkisexec behind +checkdir = path +checkisexec = None +fh, fn = tempfile.mkstemp(dir=checkdir, prefix='hg-checkexec-') try: os.close(fh) m = os.stat(fn).st_mode -if m & EXECFLAGS: -return False -os.chmod(fn, m & 0o777 | EXECFLAGS) -return os.stat(fn).st_mode & EXECFLAGS +if m & EXECFLAGS == 0: +os.chmod(fn, m & 0o777 | EXECFLAGS) +if os.stat(fn).st_mode & EXECFLAGS != 0: +if checkisexec is not None: +os.rename(fn, checkisexec) +fn = None +return True finally: -os.unlink(fn) +if fn is not None: +os.unlink(fn) except (IOError, OSError): # we don't care, the user probably won't be able to commit anyway return False diff --git a/tests/test-clone.t b/tests/test-clone.t --- a/tests/test-clone.t +++ b/tests/test-clone.t @@ -31,6 +31,8 @@ Trigger branchcache creation: default 10:a7949464abda $ ls .hg/cache branch2-served + checkisexec + checknoexec rbc-names-v1 rbc-revs-v1 @@ -45,6 +47,7 @@ Ensure branchcache got copied over: $ ls .hg/cache branch2-served + checkisexec $ cat a a diff --git a/tests/test-hardlinks.t b/tests/test-hardlinks.t --- a/tests/test-hardlinks.t +++ b/tests/test-hardlinks.t @@ -211,6 +211,8 @@ r4 has hardlinks in the
[PATCH 2 of 5] posix: simplify checkexec check
# HG changeset patch # User Mads Kiilerich# Date 1421194526 -3600 # Wed Jan 14 01:15:26 2015 +0100 # Node ID f383046bc26d39d307523521de2df5885ba111ff # Parent 1b5e959ebd859c27b3369124c926a512e222545c posix: simplify checkexec check Use a slightly simpler logic that in some cases can avoid an unnecessary chmod and stat. Instead of flipping the X bits, make it more clear that we rely on no X bits being set on initial file creation, and that at least some of them stick after they all have been set. diff --git a/mercurial/posix.py b/mercurial/posix.py --- a/mercurial/posix.py +++ b/mercurial/posix.py @@ -166,16 +166,16 @@ def checkexec(path): fh, fn = tempfile.mkstemp(dir=cachedir, prefix='hg-checkexec-') try: os.close(fh) -m = os.stat(fn).st_mode & 0o777 -new_file_has_exec = m & EXECFLAGS -os.chmod(fn, m ^ EXECFLAGS) -exec_flags_cannot_flip = ((os.stat(fn).st_mode & 0o777) == m) +m = os.stat(fn).st_mode +if m & EXECFLAGS: +return False +os.chmod(fn, m & 0o777 | EXECFLAGS) +return os.stat(fn).st_mode & EXECFLAGS finally: os.unlink(fn) except (IOError, OSError): # we don't care, the user probably won't be able to commit anyway return False -return not (new_file_has_exec or exec_flags_cannot_flip) def checklink(path): """check whether the given path is on a symlink-capable filesystem""" ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH stable,v2] shelve: add missing space in help text
# HG changeset patch # User Mads Kiilerich# Date 1479159805 -3600 # Mon Nov 14 22:43:25 2016 +0100 # Node ID 543c83999f3e1b0f8f62e5f2150b48bc47d55b0c # Parent 73b671fbed41d82a5dd46e485c61ddb8afe42faf shelve: add missing space in help text The change is trivial and unlikely to have been translated so we update translation files too. diff --git a/hgext/shelve.py b/hgext/shelve.py --- a/hgext/shelve.py +++ b/hgext/shelve.py @@ -882,7 +882,7 @@ def shelvecmd(ui, repo, *pats, **opts): files. If specific files or directories are named, only changes to those files are shelved. -In bare shelve(when no files are specified, without interactive, +In bare shelve (when no files are specified, without interactive, include and exclude option), shelving remembers information if the working directory was on newly created branch, in other words working directory was on different branch than its first parent. In this diff --git a/i18n/ja.po b/i18n/ja.po --- a/i18n/ja.po +++ b/i18n/ja.po @@ -10464,7 +10464,7 @@ msgstr "" "指定対象の変更のみが退避されます。" msgid "" -"In bare shelve(when no files are specified, without interactive,\n" +"In bare shelve (when no files are specified, without interactive,\n" "include and exclude option), shelving remembers information if the\n" "working directory was on newly created branch, in other words working\n" "directory was on different branch than its first parent. In this\n" diff --git a/i18n/pt_BR.po b/i18n/pt_BR.po --- a/i18n/pt_BR.po +++ b/i18n/pt_BR.po @@ -10582,7 +10582,7 @@ msgstr "" "engavetadas." msgid "" -"In bare shelve(when no files are specified, without interactive,\n" +"In bare shelve (when no files are specified, without interactive,\n" "include and exclude option), shelving remembers information if the\n" "working directory was on newly created branch, in other words working\n" "directory was on different branch than its first parent. In this\n" diff --git a/tests/test-shelve.t b/tests/test-shelve.t --- a/tests/test-shelve.t +++ b/tests/test-shelve.t @@ -36,7 +36,7 @@ shelve has a help message specific files or directories are named, only changes to those files are shelved. - In bare shelve(when no files are specified, without interactive, include + In bare shelve (when no files are specified, without interactive, include and exclude option), shelving remembers information if the working directory was on newly created branch, in other words working directory was on different branch than its first parent. In this situation ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH stable] shelve: add missing space in help text
On 11/17/2016 07:05 PM, Jun Wu wrote: Curious: in situations like this, do i18n translators need to re-translate the text? Yeah, the sentence has been translated to Japanese and Portuguese, but the missing space has probably already been "lost in translation". There should thus not be any need for re-translation in this case. Also, I did apparently not run tests and missed tests/test-shelve.t . I will resend. So much ado for so little - how hard can it be ;-) /Mads ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[Bug 5428] New: rebase should "rebase" obsmarkers to avoid divergence
https://bz.mercurial-scm.org/show_bug.cgi?id=5428 Bug ID: 5428 Summary: rebase should "rebase" obsmarkers to avoid divergence Product: Mercurial Version: default branch Hardware: All OS: All Status: UNCONFIRMED Severity: bug Priority: wish Component: rebase Assignee: bugzi...@selenic.com Reporter: arcppzju+hg...@gmail.com CC: mercurial-de...@selenic.com Before rebase: o c5f1a8 quark draft/rebase | rebase: calculate ancestors for --base separately (issue5420) | | o bb3f84 quark rebase unstable | | warn | | | x f35e20 quark |/ rebase: calculate ancestors for --base separately (issue5420) | o 3f18df quark | drawdag: update test repos by drawing the changelog DAG in ASCII After rebase -s 3f18df -d remote/@ --config experimental.allowdivergence=1: o 5fbcf9 quark divergent | rebase: calculate ancestors for --base separately (issue5420) | | o ad6aa4 quark rebase | | warn | | | o 976883 quark divergent |/ rebase: calculate ancestors for --base separately (issue5420) | o ee473a quark | drawdag: update test repos by drawing the changelog DAG in ASCII I'd expect an extra obs marker of "976883 is obsoleted by 5fbcf9" is created so no divergent will be introduced. This may be related to bug4849 but it seems they are not the same thing. -- You are receiving this mail because: You are on the CC list for the bug. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH stable] shelve: add missing space in help text
Curious: in situations like this, do i18n translators need to re-translate the text? Excerpts from Mads Kiilerich's message of 2016-11-17 18:53:37 +0100: > # HG changeset patch > # User Mads Kiilerich> # Date 1479159805 -3600 > # Mon Nov 14 22:43:25 2016 +0100 > # Branch stable > # Node ID 649dbbabbd067aea7fe96bab700835f60a9b1ef3 > # Parent e0ff47999b1384e42bdc99f5026c7e2ed5405047 > shelve: add missing space in help text > > diff --git a/hgext/shelve.py b/hgext/shelve.py > --- a/hgext/shelve.py > +++ b/hgext/shelve.py > @@ -865,7 +865,7 @@ def shelvecmd(ui, repo, *pats, **opts): > files. If specific files or directories are named, only changes to > those files are shelved. > > -In bare shelve(when no files are specified, without interactive, > +In bare shelve (when no files are specified, without interactive, > include and exclude option), shelving remembers information if the > working directory was on newly created branch, in other words working > directory was on different branch than its first parent. In this ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH stable] shelve: add missing space in help text
# HG changeset patch # User Mads Kiilerich# Date 1479159805 -3600 # Mon Nov 14 22:43:25 2016 +0100 # Branch stable # Node ID 649dbbabbd067aea7fe96bab700835f60a9b1ef3 # Parent e0ff47999b1384e42bdc99f5026c7e2ed5405047 shelve: add missing space in help text diff --git a/hgext/shelve.py b/hgext/shelve.py --- a/hgext/shelve.py +++ b/hgext/shelve.py @@ -865,7 +865,7 @@ def shelvecmd(ui, repo, *pats, **opts): files. If specific files or directories are named, only changes to those files are shelved. -In bare shelve(when no files are specified, without interactive, +In bare shelve (when no files are specified, without interactive, include and exclude option), shelving remembers information if the working directory was on newly created branch, in other words working directory was on different branch than its first parent. In this ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 7 of 7 v2] bdiff: give slight preference to removing trailing lines
On Tue, Nov 15, 2016 at 09:57:08PM +0100, Mads Kiilerich wrote: > # HG changeset patch > # User Mads Kiilerich> # Date 1479243409 -3600 > # Tue Nov 15 21:56:49 2016 +0100 > # Node ID efa30442953a6e1f096f8833c4ee8047375600d6 > # Parent e022e995415bd48ffe6d4d03dc97b99dd547cde1 > bdiff: give slight preference to removing trailing lines My own cursory perfbdiff runs suggest this is a perf wash (using `perfbdiff -m 3041e4d59df2` in the mozilla repo). Queued. Thanks! > > [This change could be folded into the previous changeset to minimize the repo > churn ...] > > Similar to the previous change, introduce an exception to the general > preference for matches in the middle of bdiff ranges: If the best match on the > B side starts at the beginning of the bdiff range, don't aim for the > middle-most A side match but for the earliest. > > New (later) matches on the A side will only be considered better if the > corresponding match on the B side *not* is at the beginning of the range. > Thus, if the best (middle-most) match on the B side turns out to be at the > beginning of the range, the earliest match on the A side will be used. > > The bundle size for 4.0 (hg bundle --base null -r 4.0 x.hg) happens to go from > 22807275 to 22808120 bytes - a 0.004% increase. > > diff --git a/mercurial/bdiff.c b/mercurial/bdiff.c > --- a/mercurial/bdiff.c > +++ b/mercurial/bdiff.c > @@ -184,7 +184,7 @@ static int longest_match(struct bdiff_li > mj = j; > mk = k; > } else if (k == mk) { > - if (i > mi && i <= half) { > + if (i > mi && i <= half && j > b1) { > /* same match but closer to half */ > mi = i; > mj = j; > diff --git a/tests/test-bdiff.py b/tests/test-bdiff.py > --- a/tests/test-bdiff.py > +++ b/tests/test-bdiff.py > @@ -88,7 +88,7 @@ print("Diff 1 to 3 lines - preference fo > showdiff('a\n', 'a\n' * 3) > print("Diff 1 to 5 lines - preference for appending:") > showdiff('a\n', 'a\n' * 5) > -print("Diff 3 to 1 lines - preference for balanced recursion:") > +print("Diff 3 to 1 lines - preference for removing trailing lines:") > showdiff('a\n' * 3, 'a\n') > -print("Diff 5 to 1 lines - preference for balanced recursion:") > +print("Diff 5 to 1 lines - preference for removing trailing lines:") > showdiff('a\n' * 5, 'a\n') > diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out > --- a/tests/test-bdiff.py.out > +++ b/tests/test-bdiff.py.out > @@ -68,17 +68,15 @@ showdiff( >'a\na\na\na\na\n'): > 'a\n' > 2 2 '' -> 'a\na\na\na\n' > -Diff 3 to 1 lines - preference for balanced recursion: > +Diff 3 to 1 lines - preference for removing trailing lines: > showdiff( >'a\na\na\n', >'a\n'): > - 0 2 'a\n' -> '' > 'a\n' > - 4 6 'a\n' -> '' > -Diff 5 to 1 lines - preference for balanced recursion: > + 2 6 'a\na\n' -> '' > +Diff 5 to 1 lines - preference for removing trailing lines: > showdiff( >'a\na\na\na\na\n', >'a\n'): > - 0 4 'a\na\n' -> '' > 'a\n' > - 6 10 'a\na\n' -> '' > + 2 10 'a\na\na\na\n' -> '' > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] perf: unbust perfbdiff --alldata
Queued, thanks. On Thu, Nov 17, 2016 at 8:53 AM, Gregory Szorcwrote: > # HG changeset patch > # User Gregory Szorc > # Date 1479401572 28800 > # Thu Nov 17 08:52:52 2016 -0800 > # Node ID afec1b124bc35acde475fb2f5963b401c6d35203 > # Parent 21772a6a7861bc312b1ba862334d105e56b38d07 > perf: unbust perfbdiff --alldata > > This broke in f84fc6a92817 due to a refactored manifest API. > > The fix is a bit hacky - perfbdiff doesn't yet support tree manifests > for example. But it gets the job done. > > A test has been added for --alldata so this doesn't happen again. > > diff --git a/contrib/perf.py b/contrib/perf.py > --- a/contrib/perf.py > +++ b/contrib/perf.py > @@ -781,9 +781,9 @@ def perfbdiff(ui, repo, file_, rev=None, > if opts['alldata']: > # Load revisions associated with changeset. > ctx = repo[rev] > -mtext = repo.manifest.revision(ctx.manifestnode()) > +mtext = repo.manifestlog._revlog.revision(ctx.manifestnode()) > for pctx in ctx.parents(): > -pman = repo.manifest.revision(pctx.manifestnode()) > +pman = repo.manifestlog._revlog.revision(pctx.manifestnode()) > textpairs.append((pman, mtext)) > > # Load filelog revisions by iterating manifest delta. > diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t > --- a/tests/test-contrib-perf.t > +++ b/tests/test-contrib-perf.t > @@ -114,6 +114,7 @@ perfstatus >$ hg perfancestorset 2 >$ hg perfannotate a >$ hg perfbdiff -c 1 > + $ hg perfbdiff --alldata 1 >$ hg perfbranchmap >$ hg perfcca >$ hg perfchangegroupchangelog > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 4 of 4] worker: discard waited pid by anyone who noticed it first
Queued this. Thanks, and thanks to Jun for reviewing. On Thu, Nov 17, 2016 at 5:20 AM, Yuya Nishiharawrote: > # HG changeset patch > # User Yuya Nishihara > # Date 1479383829 -32400 > # Thu Nov 17 20:57:09 2016 +0900 > # Node ID 5e9721975f8cda7daea6ee0cc6aaad7af5096a08 > # Parent 4be342413ddecd0eb6d76c4e5d8bb38fee28061d > worker: discard waited pid by anyone who noticed it first > > This makes sure all waited pids are removed before calling killworkers() > even if waitpid()-pids.discard() sequence is interrupted by another SIGCHLD. > > diff --git a/mercurial/worker.py b/mercurial/worker.py > --- a/mercurial/worker.py > +++ b/mercurial/worker.py > @@ -111,11 +111,14 @@ def _posixworker(ui, func, staticargs, a > if e.errno == errno.EINTR: > continue > elif e.errno == errno.ECHILD: > -break # ignore ECHILD > +# child would already be reaped, but pids yet been > +# updated (maybe interrupted just after waitpid) > +pids.discard(pid) > +break > else: > raise > if p: > -pids.remove(p) > +pids.discard(p) > st = _exitstatus(st) > if st and not problem[0]: > problem[0] = st > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
# HG changeset patch # User Mads Kiilerich# Date 1479322051 -3600 # Wed Nov 16 19:47:31 2016 +0100 # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 bdiff: early pruning of common suffix before doing expensive computations Similar to the previous change, but trimming trailing lines. On mozilla-unified: perfbdiff -m 3041e4d59df2 ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to perfbdiff 0e9928989e9c --alldata --count 10 ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18) ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22) diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c --- a/mercurial/bdiff_module.c +++ b/mercurial/bdiff_module.c @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P struct bdiff_line *al, *bl; struct bdiff_hunk l, *h; int an, bn, count; - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax; PyThreadState *_save; l.next = NULL; @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P if (*ia == '\n') lcommon = li + 1; /* we can almost add: if (li == lmax) lcommon = li; */ + lmax -= lcommon; + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; +li <= lmax && *ia == *ib; +++li, --ia, --ib) + if (*ia == '\n') + lcommonend = li; + /* printf("common %i %i\n", lcommon, lcommonend); */ - an = bdiff_splitlines(sa + lcommon, la - lcommon, ); - bn = bdiff_splitlines(sb + lcommon, lb - lcommon, ); + an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend, ); + bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend, ); if (!al || !bl) goto nomem; diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out --- a/tests/test-bdiff.py.out +++ b/tests/test-bdiff.py.out @@ -28,9 +28,9 @@ showdiff( 'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'): 'x\n\nx\n\n' 6 6 '' -> 'y\n\n' - 'x\n\n' - 9 9 '' -> 'y\n\n' - 'x\n\nz\n' + 'x\n' + 8 8 '' -> '\ny\n' + '\nx\n\nz\n' showdiff( 'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n', 'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'): diff --git a/tests/test-check-code.t b/tests/test-check-code.t --- a/tests/test-check-code.t +++ b/tests/test-check-code.t @@ -15,10 +15,6 @@ New errors are not allowed. Warnings are Skipping hgext/fsmonitor/pywatchman/msc_stdint.h it has no-che?k-code (glob) Skipping hgext/fsmonitor/pywatchman/pybser.py it has no-che?k-code (glob) Skipping i18n/polib.py it has no-che?k-code (glob) - mercurial/bdiff_module.c:89: - > //printf("common prefix: %i next line: %i\n", li, llf); - don't use //-style comments Skipping mercurial/httpclient/__init__.py it has no-che?k-code (glob) Skipping mercurial/httpclient/_readers.py it has no-che?k-code (glob) Skipping mercurial/statprof.py it has no-che?k-code (glob) - [1] ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] perf: unbust perfbdiff --alldata
# HG changeset patch # User Gregory Szorc# Date 1479401572 28800 # Thu Nov 17 08:52:52 2016 -0800 # Node ID afec1b124bc35acde475fb2f5963b401c6d35203 # Parent 21772a6a7861bc312b1ba862334d105e56b38d07 perf: unbust perfbdiff --alldata This broke in f84fc6a92817 due to a refactored manifest API. The fix is a bit hacky - perfbdiff doesn't yet support tree manifests for example. But it gets the job done. A test has been added for --alldata so this doesn't happen again. diff --git a/contrib/perf.py b/contrib/perf.py --- a/contrib/perf.py +++ b/contrib/perf.py @@ -781,9 +781,9 @@ def perfbdiff(ui, repo, file_, rev=None, if opts['alldata']: # Load revisions associated with changeset. ctx = repo[rev] -mtext = repo.manifest.revision(ctx.manifestnode()) +mtext = repo.manifestlog._revlog.revision(ctx.manifestnode()) for pctx in ctx.parents(): -pman = repo.manifest.revision(pctx.manifestnode()) +pman = repo.manifestlog._revlog.revision(pctx.manifestnode()) textpairs.append((pman, mtext)) # Load filelog revisions by iterating manifest delta. diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t --- a/tests/test-contrib-perf.t +++ b/tests/test-contrib-perf.t @@ -114,6 +114,7 @@ perfstatus $ hg perfancestorset 2 $ hg perfannotate a $ hg perfbdiff -c 1 + $ hg perfbdiff --alldata 1 $ hg perfbranchmap $ hg perfcca $ hg perfchangegroupchangelog ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 3 of 4] worker: kill workers after all zombie processes are reaped
The 4 patches LGTM. Upon closer investigation, I think this is a brilliant solution that solves the redundant "os.kill" problem. A minor issue is that "killworkers" becomes easier to be called multiple times, there could be redundant "signal.signal" calls. But that does not affect correctness and can be improved later. Excerpts from Yuya Nishihara's message of 2016-11-17 22:20:01 +0900: > # HG changeset patch > # User Yuya Nishihara> # Date 1479384538 -32400 > # Thu Nov 17 21:08:58 2016 +0900 > # Node ID 4be342413ddecd0eb6d76c4e5d8bb38fee28061d > # Parent 60c9574e29b43c6d9579b99249ba11ff19af77a0 > worker: kill workers after all zombie processes are reaped > > Since we now wait child processes in non-blocking way (changed by 7bc25549e084 > and e8fb03cfbbde), we don't have to kill them in the middle of the waitpid() > loop. This change will help solving a possible race of > waitpid()-pids.discard() > sequence and another SIGCHLD. > > waitforworkers() is called by cleanup(), in which case we do killworkers() > beforehand so we can remove killworkers() from waitforworkers(). > > diff --git a/mercurial/worker.py b/mercurial/worker.py > --- a/mercurial/worker.py > +++ b/mercurial/worker.py > @@ -119,9 +119,10 @@ def _posixworker(ui, func, staticargs, a > st = _exitstatus(st) > if st and not problem[0]: > problem[0] = st > -killworkers() > def sigchldhandler(signum, frame): > waitforworkers(blocking=False) > +if problem[0]: With a traditional multi-thread mindset, I thought SIGCHLD happening here could cause trouble, while it's actually okay. > +killworkers() > oldchldhandler = signal.signal(signal.SIGCHLD, sigchldhandler) > for pargs in partition(args, workers): > pid = os.fork() ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] match: adding non-recursive directory matching
(sorry for late reply) At Wed, 26 Oct 2016 14:02:48 -0700, Rodrigo Damazio wrote: > > On Wed, Oct 26, 2016 at 12:17 AM, FUJIWARA Katsunori> wrote: > > > > > At Tue, 25 Oct 2016 19:51:59 -0700, > > Rodrigo Damazio wrote: > > > > > > On Tue, Oct 25, 2016 at 4:31 PM, FUJIWARA Katsunori < > > fo...@lares.dti.ne.jp> > > > wrote: > > > > > > > > > > > At Mon, 24 Oct 2016 10:34:52 -0700, > > > > Rodrigo Damazio wrote: [snip] > > > On the other hand, you assume that newly introduced *path syntaxes > > > > will be recursive, as below. Would you assume that default > > > > recursive-ness is different between *glob and *path syntaxes ? > > > > > > > > > > path would be recursive, as will glob that ends with ** or regex that > > ends > > > with .* > > > > > > > > > > > Also, for discussion: I assume the *path patterns will be recursive > > when > > > > > they reference a directory. Do we also want a non-recursive > > equivalent > > > > > (rootexact, rootfiles, rootnonrecursive or something like that)? > > > > How about adding syntax type "file"/"dir" ? > > > > = = = > > type for recursive for non-recursive > > = = = > > glob use "**" use "*" > > reomit "$" append "$" > > path always(*1) > > file always > > dir always(*2) > > = = = > > > > (*1) match against both file and directory > > (*2) match against only directory > > > > "dir" might be overkill, though :-) (is it useful in resolving name > > collision at merging or so ?) > > > > foozy, thanks so much for the review and discussion. > Sounds like we do agree about the glob behavior then, so let me know if > you'd like any changes to the latest version of this patch, other than > improving documentation. I'm happy to send an updated version as soon as > someone is ready to review. > > I understand the difference between dir and path (and between the original > version of this patch and file) would be that they'd validate the type of > entry being matched (so that passing a filename to dir or dir name to file > would be an error) - is that what you have in mind? Yes > "passing a filename to dir or dir name to file would be an error" > The current matchers > don't have a good mechanism to verify the type, so some significant > rewiring would need to be done to pass that information down. Current match implement uses two additional pattern suffix '(?:/|$)' and '$' to control recursive matching of "glob" and "path". The former allows to match recursively (for "glob" and "path"), and the latter doesn't (only for "glob"). I simply think using this technique to implement pattern types "file" and "dir". path:PATTERN => ESCAPED-PATTERN(?:/|$) file:PATTERN => ESCAPED-PATTERN$ dif:PATTERN => ESCAPED-PATTERN/ > Another thought is that by supporting file and dir, you're incentivizing > developers to rely on smarter name collision support (and also case > collisions) - one could argue that there's no reason for the complexity > caused by that. -- [FUJIWARA Katsunori] fo...@lares.dti.ne.jp ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] memctx: allow the memctx to reuse the manifest node
On Wed, Nov 16, 2016 at 08:15:28PM +, Mateusz Kwapich wrote: > # HG changeset patch > # User Mateusz Kwapich> # Date 1479327311 0 > # Wed Nov 16 20:15:11 2016 + > # Node ID 0fd8175aa4e8a3a0cd6f637b34bfa25a103c454e > # Parent c27614f2dec1405db606d1ef871dfabf72cc0737 > memctx: allow the memctx to reuse the manifest node > > When we have a lot of files writing a new manifest revision can be expensive. > This commit adds a possibility for memctx to reuse a manifest from a different > commit. This can be beneficial for commands that are creating metadata changes > without any actual files changed like "hg metaedit" in evolve extension. > > I will send the change for evolve that leverages this once this is accepted. > > diff --git a/mercurial/context.py b/mercurial/context.py > --- a/mercurial/context.py > +++ b/mercurial/context.py > @@ -1160,6 +1160,7 @@ > changes=None): > self._repo = repo > self._rev = None > +self._manifestnode = None > self._node = None > self._text = text > if date: > @@ -1268,7 +1269,8 @@ > return None > > def manifestnode(self): > -return None > +return self._manifestnode > + > def user(self): > return self._user or self._repo.ui.username() > def date(self): > @@ -1833,11 +1835,12 @@ > # this field to determine what to do in filectxfn. > _returnnoneformissingfiles = True > > -def __init__(self, repo, parents, text, files, filectxfn, user=None, > - date=None, extra=None, editor=False): > +def __init__(self, repo, parents, text, files, filectxfn=None, user=None, > + date=None, extra=None, editor=False, manifestnode=None): > super(memctx, self).__init__(repo, text, user, date, extra) > self._rev = None > self._node = None > +self._manifestnode = manifestnode > parents = [(p or nullid) for p in parents] > p1, p2 = parents > self._parents = [changectx(self._repo, p) for p in (p1, p2)] > diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py > --- a/mercurial/localrepo.py > +++ b/mercurial/localrepo.py > @@ -1695,7 +1695,11 @@ > tr = self.transaction("commit") > trp = weakref.proxy(tr) > > -if ctx.files(): > +if ctx.manifestnode(): As-is, this puts a very sharp edge on the API. Please add an assert that the files list is empty if a manifestnode is passed in the ctor. Thanks! > +# reuse an existing manifest revision > +mn = ctx.manifestnode() > +files = ctx.files() > +elif ctx.files(): > m1ctx = p1.manifestctx() > m2ctx = p2.manifestctx() > mctx = m1ctx.copy() > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 8 of 9 v3] auditvfs: forward options property from nested vfs
> # HG changeset patch > # User Augie Fackler > # Date 1470410362 14400 > # Fri Aug 05 11:19:22 2016 -0400 > # Node ID 7ec7023b4cb4acaeb42b4fa4ed28b82ebed41b71 > # Parent d8942cf57007c0d18dd0400c3c0ee881c3ca2a31 > auditvfs: forward options property from nested vfs > > This was breaking my ability to use treemanifests in bundlerepos, and > was deeply mysterious. We should probably just make the options > property a formal part of the vfs API, and make it a required > construction parameter. Sadly, I don't have time to dive into that > refactor right now. > > diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py > --- a/mercurial/scmutil.py > +++ b/mercurial/scmutil.py > @@ -638,6 +638,14 @@ class auditvfs(object): > def mustaudit(self, onoff): > self.vfs.mustaudit = onoff > > +@property > +def options(self): > +return self.vfs.options > + > +@options.setter > +def options(self, value): > +self.vfs.options = value > + > class filtervfs(abstractvfs, auditvfs): > '''Wrapper vfs for filtering filenames with a function.''' > Are you aware that this very changeset makes a dramatic performance difference when cloning from a HG20 bundle with generaldelta? Our product repository is around 100MB when bundled, and a clone from this bundle would take 30min on my machine. Tested with hg rev 43924f3a55fa, which is the immediate predecessor. However, with this patch applied (i.e., rev 69109052d9ac), the time needed for the operation drops to 1min, which is something I can live with ;). added 53460 changesets with 98526 changes to 14774 files (+6 heads) real0m56.661s user0m54.604s ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
mercurial@30418: new changeset
New changeset in mercurial: http://selenic.com/repo/hg//rev/1156ec81f709 changeset: 30418:1156ec81f709 bookmark:@ tag: tip user:Jun Wudate:Tue Nov 15 20:25:51 2016 + summary: util: improve iterfile so it chooses code path wisely -- Repository URL: http://selenic.com/repo/hg/ ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 8 of 9 v3] auditvfs: forward options property from nested vfs
> On Nov 17, 2016, at 7:32 AM, stefan...@gmail.com wrote: > >> # HG changeset patch >> # User Augie Fackler >> # Date 1470410362 14400 >> # Fri Aug 05 11:19:22 2016 -0400 >> # Node ID 7ec7023b4cb4acaeb42b4fa4ed28b82ebed41b71 >> # Parent d8942cf57007c0d18dd0400c3c0ee881c3ca2a31 >> auditvfs: forward options property from nested vfs >> >> This was breaking my ability to use treemanifests in bundlerepos, and >> was deeply mysterious. We should probably just make the options >> property a formal part of the vfs API, and make it a required >> construction parameter. Sadly, I don't have time to dive into that >> refactor right now. >> >> diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py >> --- a/mercurial/scmutil.py >> +++ b/mercurial/scmutil.py >> @@ -638,6 +638,14 @@ class auditvfs(object): >> def mustaudit(self, onoff): >> self.vfs.mustaudit = onoff >> >> +@property >> +def options(self): >> +return self.vfs.options >> + >> +@options.setter >> +def options(self, value): >> +self.vfs.options = value >> + >> class filtervfs(abstractvfs, auditvfs): >> '''Wrapper vfs for filtering filenames with a function.''' >> > > Are you aware that this very changeset makes a dramatic performance difference > when cloning from a HG20 bundle with generaldelta? Our product repository is > around 100MB when bundled, and a clone from this bundle would take 30min on my > machine. Tested with hg rev 43924f3a55fa, which is the immediate predecessor. > However, with this patch applied (i.e., rev 69109052d9ac), the time needed for > the operation drops to 1min, which is something I can live with ;). > > added 53460 changesets with 98526 changes to 14774 files (+6 heads) > real0m56.661s > user0m54.604s I had no idea, but I’ll certainly take an accidental 30x performance win! Thanks! signature.asc Description: Message signed with OpenPGP using GPGMail ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] memctx: allow the memctx to reuse the manifest node
Hi, I’ll try write some tests for it – but it’s hard since we don’t have any feature that could directly benefit from it in core. Best, Mateusz On 11/16/16, 10:02 PM, "Jun Wu"wrote: I like the direction! I'm a bit concerned about how other existing "memctx" methods (ex. "_status", "__contains__") would work. I'd like to make sure they still have reasonable behavior with this change. See inline comments. Excerpts from Mateusz Kwapich's message of 2016-11-16 20:15:28 +: > # HG changeset patch > # User Mateusz Kwapich > # Date 1479327311 0 > # Wed Nov 16 20:15:11 2016 + > # Node ID 0fd8175aa4e8a3a0cd6f637b34bfa25a103c454e > # Parent c27614f2dec1405db606d1ef871dfabf72cc0737 > memctx: allow the memctx to reuse the manifest node > > When we have a lot of files writing a new manifest revision can be expensive. > This commit adds a possibility for memctx to reuse a manifest from a different > commit. This can be beneficial for commands that are creating metadata changes > without any actual files changed like "hg metaedit" in evolve extension. > > I will send the change for evolve that leverages this once this is accepted. > > diff --git a/mercurial/context.py b/mercurial/context.py > --- a/mercurial/context.py > +++ b/mercurial/context.py > @@ -1160,6 +1160,7 @@ > changes=None): > self._repo = repo > self._rev = None > +self._manifestnode = None > self._node = None > self._text = text > if date: > @@ -1268,7 +1269,8 @@ > return None > > def manifestnode(self): > -return None > +return self._manifestnode > + > def user(self): > return self._user or self._repo.ui.username() > def date(self): > @@ -1833,11 +1835,12 @@ > # this field to determine what to do in filectxfn. > _returnnoneformissingfiles = True > > -def __init__(self, repo, parents, text, files, filectxfn, user=None, > - date=None, extra=None, editor=False): > +def __init__(self, repo, parents, text, files, filectxfn=None, user=None, > + date=None, extra=None, editor=False, manifestnode=None): IIUC, "manifestnode" conflicts with "files" / "filectxfn" here. If that's true, I think we want: - memctx constructor change: - "files" / "filectxfn" are optional, and conflict with "manifestnode" - probably worthwhile to update the class docstring - if "manifestnode" is provided: - change "memctx._manifest" to read "self._manifestnode" directly - change "memctx._files" to get the files changed lazily from manifests Then "memctx"'s other methods like "status", "filenode", "__contains__" would probably work as expected and feel consistent. If the new implementation diverges significantly, I think it may be a good idea to do it in a different class, like "memlightctx" which must reuse a manifest node. By providing the “files” we can commit the ctx without reading the manifest which gives us a serious perf boost if the manifest is large. > super(memctx, self).__init__(repo, text, user, date, extra) > self._rev = None > self._node = None > +self._manifestnode = manifestnode > parents = [(p or nullid) for p in parents] > p1, p2 = parents > self._parents = [changectx(self._repo, p) for p in (p1, p2)] > diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py > --- a/mercurial/localrepo.py > +++ b/mercurial/localrepo.py > @@ -1695,7 +1695,11 @@ > tr = self.transaction("commit") > trp = weakref.proxy(tr) > > -if ctx.files(): > +if ctx.manifestnode(): I think we want a sanity check here to prevent buggy linkrevs that can give us trouble in the future. If ctx.parents' manifestnodes are differnet from ctx.manifestnode's parents or ctx.manifestnode (for the empty commit case), abort. I’m fine with doing sanity checks. Thanks for suggestion. > +# reuse an existing manifest revision > +mn = ctx.manifestnode() > +files = ctx.files() > +elif ctx.files(): > m1ctx = p1.manifestctx() > m2ctx = p2.manifestctx() > mctx = m1ctx.copy() ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 3 of 4] worker: kill workers after all zombie processes are reaped
# HG changeset patch # User Yuya Nishihara# Date 1479384538 -32400 # Thu Nov 17 21:08:58 2016 +0900 # Node ID 4be342413ddecd0eb6d76c4e5d8bb38fee28061d # Parent 60c9574e29b43c6d9579b99249ba11ff19af77a0 worker: kill workers after all zombie processes are reaped Since we now wait child processes in non-blocking way (changed by 7bc25549e084 and e8fb03cfbbde), we don't have to kill them in the middle of the waitpid() loop. This change will help solving a possible race of waitpid()-pids.discard() sequence and another SIGCHLD. waitforworkers() is called by cleanup(), in which case we do killworkers() beforehand so we can remove killworkers() from waitforworkers(). diff --git a/mercurial/worker.py b/mercurial/worker.py --- a/mercurial/worker.py +++ b/mercurial/worker.py @@ -119,9 +119,10 @@ def _posixworker(ui, func, staticargs, a st = _exitstatus(st) if st and not problem[0]: problem[0] = st -killworkers() def sigchldhandler(signum, frame): waitforworkers(blocking=False) +if problem[0]: +killworkers() oldchldhandler = signal.signal(signal.SIGCHLD, sigchldhandler) for pargs in partition(args, workers): pid = os.fork() ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 4 of 4] worker: discard waited pid by anyone who noticed it first
# HG changeset patch # User Yuya Nishihara# Date 1479383829 -32400 # Thu Nov 17 20:57:09 2016 +0900 # Node ID 5e9721975f8cda7daea6ee0cc6aaad7af5096a08 # Parent 4be342413ddecd0eb6d76c4e5d8bb38fee28061d worker: discard waited pid by anyone who noticed it first This makes sure all waited pids are removed before calling killworkers() even if waitpid()-pids.discard() sequence is interrupted by another SIGCHLD. diff --git a/mercurial/worker.py b/mercurial/worker.py --- a/mercurial/worker.py +++ b/mercurial/worker.py @@ -111,11 +111,14 @@ def _posixworker(ui, func, staticargs, a if e.errno == errno.EINTR: continue elif e.errno == errno.ECHILD: -break # ignore ECHILD +# child would already be reaped, but pids yet been +# updated (maybe interrupted just after waitpid) +pids.discard(pid) +break else: raise if p: -pids.remove(p) +pids.discard(p) st = _exitstatus(st) if st and not problem[0]: problem[0] = st ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel