I must say (and you are going to say that those are idle complaints) that asserts and prints are far inferior than a proper debugger which is surprisingly hard to use in the case of mercurial tests
On Tue, Sep 20, 2016 at 4:39 PM, Augie Fackler <r...@durin42.com> wrote: > On Sat, Sep 17, 2016 at 08:23:36PM +0200, Maciej Fijalkowski wrote: >> This should fix the issues presented. >> >> There is one problem which is that the hash in test-rebase-detach >> changes. The way I see it is just an RNG phase-shift, but it might be >> a real bug. Some actual input will be appreciated. > > It's not possibly RNG related - there's no RNG that goes into the sha1 > of a node. I've added --debug to the failure in test-rebase-detach and > compared them between cpython and pypy: > > cpython: > @@ -366,11 +366,24 @@ > > > $ hg log --rev tip --debug > - changeset: 8:9472f4b1d736 > + changeset: 8:9472f4b1d7366902d0098aa1401e3f170000cc56 > tag: tip > + phase: draft > + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 > + parent: -1:0000000000000000000000000000000000000000 > + manifest: 8:8dbdcf066a97239fde2d0dba19fcb1e699fa66d2 > user: test > date: Thu Jan 01 00:00:00 1970 +0000 > - summary: Collapsed revision > + files: F > + files+: E > + extra: branch=default > + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 > + description: > + Collapsed revision > + * I > + * Merge > + * J > + > > > pypy: > @@ -366,11 +366,24 @@ > > > $ hg log --rev tip --debug > - changeset: 8:9472f4b1d736 > + changeset: 8:122ceff3b303b13e5ca323742a8ebe589b9f8066 > tag: tip > + phase: draft > + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 > + parent: -1:0000000000000000000000000000000000000000 > + manifest: 8:09f1dc369d2667dcaeb9cf828b0fcb76bda29f33 > user: test > date: Thu Jan 01 00:00:00 1970 +0000 > - summary: Collapsed revision > + files: F > + files+: E > + extra: branch=default > + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 > + description: > + Collapsed revision > + * I > + * Merge > + * J > + > > Which means the manifest is hashing out differently. That suggests > there's a subtle bug in how the manifest is hashing out in the pypy > version. Adding a `hg debugdata -m 8` (which prints raw manifest > data), and this is what I get on cpython: > > $ hg debugdata -m 8 > + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) > + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) > + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) > + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) > > on pypy: > $ hg debugdata -m 8 > + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) > + F\x0022bfcfd62a21a3287edbd4d656218d0f525ed76a (esc) > + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) > + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) > + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) > > > So you've got a subtle bug here where the compaction of the > lazymanifest isn't working right - probably around merging manifests > somehow. Does this give you enough to get started? > > (I'd probably start debugging this by littering _lazymanifest._compact > and _lazymanifest.text with asserts until I managed to trip over the > un-sorted lines, and then backtrack from there to figure out how they > managed to survive.) > >> >> >> >> On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski <fij...@gmail.com> wrote: >> > # HG changeset patch >> > # User Maciej Fijalkowski <fij...@gmail.com> >> > # Date 1473680234 -7200 >> > # Mon Sep 12 13:37:14 2016 +0200 >> > # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc >> > # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >> > lazymanifest: write a more efficient, pypy friendly version of lazymanifest >> > >> > diff --git a/mercurial/manifest.py b/mercurial/manifest.py >> > --- a/mercurial/manifest.py >> > +++ b/mercurial/manifest.py >> > @@ -104,69 +104,297 @@ >> > _checkforbidden(files) >> > return ''.join(lines) >> > >> > -class _lazymanifest(dict): >> > - """This is the pure implementation of lazymanifest. >> > - >> > - It has not been optimized *at all* and is not lazy. >> > - """ >> > - >> > - def __init__(self, data): >> > - dict.__init__(self) >> > - for f, n, fl in _parse(data): >> > - self[f] = n, fl >> > - >> > - def __setitem__(self, k, v): >> > - node, flag = v >> > - assert node is not None >> > - if len(node) > 21: >> > - node = node[:21] # match c implementation behavior >> > - dict.__setitem__(self, k, (node, flag)) >> > +class lazymanifestiter(object): >> > + def __init__(self, lm): >> > + self.pos = 0 >> > + self.lm = lm >> > >> > def __iter__(self): >> > - return iter(sorted(dict.keys(self))) >> > + return self >> > >> > - def iterkeys(self): >> > - return iter(sorted(dict.keys(self))) >> > + def next(self): >> > + try: >> > + data, pos = self.lm._get(self.pos) >> > + except IndexError: >> > + raise StopIteration >> > + if pos == -1: >> > + self.pos += 1 >> > + return data[0] >> > + self.pos += 1 >> > + zeropos = data.find('\x00', pos) >> > + return data[pos:zeropos] >> > >> > - def iterentries(self): >> > - return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) >> > +class lazymanifestiterentries(object): >> > + def __init__(self, lm): >> > + self.lm = lm >> > + self.pos = 0 >> > + >> > + def __iter__(self): >> > + return self >> > + >> > + def next(self): >> > + try: >> > + data, pos = self.lm._get(self.pos) >> > + except IndexError: >> > + raise StopIteration >> > + if pos == -1: >> > + self.pos += 1 >> > + return data >> > + zeropos = data.find('\x00', pos) >> > + hashval = unhexlify(data, self.lm.extrainfo[self.pos], >> > + zeropos + 1, 40) >> > + flags = self.lm._getflags(data, self.pos, zeropos) >> > + self.pos += 1 >> > + return (data[pos:zeropos], hashval, flags) >> > + >> > +def unhexlify(data, extra, pos, length): >> > + s = data[pos:pos + length].decode('hex') >> > + if extra: >> > + s += chr(extra & 0xff) >> > + return s >> > + >> > +def _cmp(a, b): >> > + return (a > b) - (a < b) >> > + >> > +class _lazymanifest(object): >> > + def __init__(self, data, positions=None, extrainfo=None, >> > extradata=None): >> > + if positions is None: >> > + self.positions = self.findlines(data) >> > + self.extrainfo = [0] * len(self.positions) >> > + self.data = data >> > + self.extradata = [] >> > + else: >> > + self.positions = positions[:] >> > + self.extrainfo = extrainfo[:] >> > + self.extradata = extradata[:] >> > + self.data = data >> > + >> > + def findlines(self, data): >> > + if not data: >> > + return [] >> > + pos = data.find("\n") >> > + positions = [0] >> > + while pos < len(data) - 1 and pos != -1: >> > + positions.append(pos + 1) >> > + pos = data.find("\n", pos + 1) >> > + return positions >> > + >> > + def _get(self, index): >> > + # get the position encoded in pos: >> > + # positive number is an index in 'data' >> > + # negative number is in extrapieces >> > + pos = self.positions[index] >> > + if pos >= 0: >> > + return self.data, pos >> > + return self.extradata[-pos - 1], -1 >> > + >> > + def _getkey(self, pos): >> > + if pos >= 0: >> > + return self.data[pos:self.data.find('\x00', pos + 1)] >> > + return self.extradata[-pos - 1][0] >> > + >> > + def bsearch(self, key): >> > + first = 0 >> > + last = len(self.positions) - 1 >> > + >> > + while first <= last: >> > + midpoint = (first + last)//2 >> > + nextpos = self.positions[midpoint] >> > + candidate = self._getkey(nextpos) >> > + r = _cmp(key, candidate) >> > + if r == 0: >> > + return midpoint >> > + else: >> > + if r < 0: >> > + last = midpoint - 1 >> > + else: >> > + first = midpoint + 1 >> > + return -1 >> > + >> > + def bsearch2(self, key): >> > + # same as the above, but will always return the position >> > + # done for performance reasons >> > + first = 0 >> > + last = len(self.positions) - 1 >> > + >> > + while first <= last: >> > + midpoint = (first + last)//2 >> > + nextpos = self.positions[midpoint] >> > + candidate = self._getkey(nextpos) >> > + r = _cmp(key, candidate) >> > + if r == 0: >> > + return (midpoint, True) >> > + else: >> > + if r < 0: >> > + last = midpoint - 1 >> > + else: >> > + first = midpoint + 1 >> > + return (first, False) >> > + >> > + def __contains__(self, key): >> > + return self.bsearch(key) != -1 >> > + >> > + def _getflags(self, data, needle, pos): >> > + start = pos + 41 >> > + end = data.find("\n", start) >> > + if end == -1: >> > + end = len(data) - 1 >> > + if start == end: >> > + return '' >> > + return self.data[start:end] >> > + >> > + def __getitem__(self, key): >> > + if not isinstance(key, str): >> > + raise TypeError("getitem: manifest keys must be a string.") >> > + needle = self.bsearch(key) >> > + if needle == -1: >> > + raise KeyError >> > + data, pos = self._get(needle) >> > + if pos == -1: >> > + return (data[1], data[2]) >> > + zeropos = data.find('\x00', pos) >> > + assert 0 <= needle <= len(self.positions) >> > + assert len(self.extrainfo) == len(self.positions) >> > + hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) >> > + flags = self._getflags(data, needle, zeropos) >> > + return (hashval, flags) >> > + >> > + def __delitem__(self, key): >> > + needle, found = self.bsearch2(key) >> > + if not found: >> > + raise KeyError >> > + cur = self.positions[needle] >> > + self.positions = self.positions[:needle] + self.positions[needle >> > + 1:] >> > + self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle >> > + 1:] >> > + if cur >= 0: >> > + self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] >> > + >> > + def __setitem__(self, key, value): >> > + if not isinstance(key, str): >> > + raise TypeError("setitem: manifest keys must be a string.") >> > + if not isinstance(value, tuple) or len(value) != 2: >> > + raise TypeError("Manifest values must be a tuple of (node, >> > flags).") >> > + hashval = value[0] >> > + if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: >> > + raise TypeError("node must be a 20-byte string") >> > + flags = value[1] >> > + if not isinstance(flags, str) or len(flags) > 1: >> > + raise TypeError("flags must a 0 or 1 byte string, got %r", >> > flags) >> > + needle, found = self.bsearch2(key) >> > + if found: >> > + # put the item >> > + pos = self.positions[needle] >> > + if pos < 0: >> > + self.extradata[-pos - 1] = (key, value[0], value[1]) >> > + else: >> > + # just don't bother >> > + self.extradata.append((key, value[0], value[1])) >> > + self.positions[needle] = -len(self.extradata) >> > + else: >> > + # not found, put it in with extra positions >> > + self.extradata.append((key, value[0], value[1])) >> > + self.positions = (self.positions[:needle] + >> > [-len(self.extradata)] >> > + + self.positions[needle:]) >> > + self.extrainfo = (self.extrainfo[:needle] + [0] + >> > + self.extrainfo[needle:]) >> > >> > def copy(self): >> > - c = _lazymanifest('') >> > - c.update(self) >> > - return c >> > + # XXX call _compact like in C? >> > + return _lazymanifest(self.data, self.positions, self.extrainfo, >> > + self.extradata) >> > + >> > + def _compact(self): >> > + # hopefully not called TOO often >> > + def findnextposition(positions, start, lgt): >> > + i = start >> > + while i < len(positions): >> > + if positions[i] >= 0: >> > + return positions[i] >> > + i += 1 >> > + return lgt >> > + >> > + if len(self.extradata) == 0: >> > + return >> > + l = [] >> > + last_cut = 0 >> > + i = 0 >> > + offset = 0 >> > + self.extrainfo = [0] * len(self.positions) >> > + while i < len(self.positions): >> > + if self.positions[i] >= 0: >> > + cur = self.positions[i] >> > + last_cut = cur >> > + while True: >> > + self.positions[i] = offset >> > + i += 1 >> > + if i == len(self.positions) or self.positions[i] < 0: >> > + break >> > + offset += self.positions[i] - cur >> > + cur = self.positions[i] >> > + end_cut = findnextposition(self.positions, i, >> > len(self.data)) >> > + offset += end_cut - cur >> > + l.append(self.data[last_cut:end_cut]) >> > + else: >> > + while i < len(self.positions) and self.positions[i] < 0: >> > + cur = self.positions[i] >> > + t = self.extradata[-cur - 1] >> > + l.append(self._pack(t)) >> > + self.positions[i] = offset >> > + if len(t[1]) > 20: >> > + self.extrainfo[i] = ord(t[1][21]) >> > + offset += len(l[-1]) >> > + i += 1 >> > + self.data = ''.join(l) >> > + self.extradata = [] >> > + >> > + def _pack(self, d): >> > + return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' >> > + >> > + def text(self): >> > + self._compact() >> > + return self.data >> > >> > def diff(self, m2, clean=False): >> > '''Finds changes between the current manifest and m2.''' >> > + # XXX think whether efficiency matters here >> > diff = {} >> > >> > - for fn, e1 in self.iteritems(): >> > + for fn, e1, flags in self.iterentries(): >> > if fn not in m2: >> > - diff[fn] = e1, (None, '') >> > + diff[fn] = (e1, flags), (None, '') >> > else: >> > e2 = m2[fn] >> > - if e1 != e2: >> > - diff[fn] = e1, e2 >> > + if (e1, flags) != e2: >> > + diff[fn] = (e1, flags), e2 >> > elif clean: >> > diff[fn] = None >> > >> > - for fn, e2 in m2.iteritems(): >> > + for fn, e2, flags in m2.iterentries(): >> > if fn not in self: >> > - diff[fn] = (None, ''), e2 >> > + diff[fn] = (None, ''), (e2, flags) >> > >> > return diff >> > >> > + def iterentries(self): >> > + return lazymanifestiterentries(self) >> > + >> > + def iterkeys(self): >> > + return lazymanifestiter(self) >> > + >> > + def __iter__(self): >> > + return lazymanifestiter(self) >> > + >> > + def __len__(self): >> > + return len(self.positions) >> > + >> > def filtercopy(self, filterfn): >> > + # XXX should be optimized >> > c = _lazymanifest('') >> > for f, n, fl in self.iterentries(): >> > if filterfn(f): >> > c[f] = n, fl >> > return c >> > >> > - def text(self): >> > - """Get the full data of this manifest as a bytestring.""" >> > - return _textv1(self.iterentries()) >> > - >> > try: >> > _lazymanifest = parsers.lazymanifest >> > except AttributeError: >> > diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py >> > --- a/mercurial/pure/bdiff.py >> > +++ b/mercurial/pure/bdiff.py >> > @@ -111,8 +111,8 @@ >> > def blocks(sa, sb): >> > a = ffi.new("struct bdiff_line**") >> > b = ffi.new("struct bdiff_line**") >> > - ac = ffi.new("char[]", sa) >> > - bc = ffi.new("char[]", sb) >> > + ac = ffi.new("char[]", str(sa)) >> > + bc = ffi.new("char[]", str(sb)) >> > l = ffi.new("struct bdiff_hunk*") >> > try: >> > an = lib.bdiff_splitlines(ac, len(sa), a) >> > @@ -138,8 +138,8 @@ >> > def bdiff(sa, sb): >> > a = ffi.new("struct bdiff_line**") >> > b = ffi.new("struct bdiff_line**") >> > - ac = ffi.new("char[]", sa) >> > - bc = ffi.new("char[]", sb) >> > + ac = ffi.new("char[]", str(sa)) >> > + bc = ffi.new("char[]", str(sb)) >> > l = ffi.new("struct bdiff_hunk*") >> > try: >> > an = lib.bdiff_splitlines(ac, len(sa), a) >> _______________________________________________ >> Mercurial-devel mailing list >> Mercurial-devel@mercurial-scm.org >> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel