> On Sep 17, 2016, at 14:23, Maciej Fijalkowski <fij...@gmail.com> wrote: > > This should fix the issues presented. > > There is one problem which is that the hash in test-rebase-detach > changes. The way I see it is just an RNG phase-shift, but it might be > a real bug. Some actual input will be appreciated.
I'll get a look at this in the morning America/New_York - my other miscellany occupied me for far too long today and I just don't have brainpower for this tonight. Sorry to keep you waiting. AF > > > > On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski <fij...@gmail.com> wrote: >> # HG changeset patch >> # User Maciej Fijalkowski <fij...@gmail.com> >> # Date 1473680234 -7200 >> # Mon Sep 12 13:37:14 2016 +0200 >> # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc >> # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >> lazymanifest: write a more efficient, pypy friendly version of lazymanifest >> >> diff --git a/mercurial/manifest.py b/mercurial/manifest.py >> --- a/mercurial/manifest.py >> +++ b/mercurial/manifest.py >> @@ -104,69 +104,297 @@ >> _checkforbidden(files) >> return ''.join(lines) >> >> -class _lazymanifest(dict): >> - """This is the pure implementation of lazymanifest. >> - >> - It has not been optimized *at all* and is not lazy. >> - """ >> - >> - def __init__(self, data): >> - dict.__init__(self) >> - for f, n, fl in _parse(data): >> - self[f] = n, fl >> - >> - def __setitem__(self, k, v): >> - node, flag = v >> - assert node is not None >> - if len(node) > 21: >> - node = node[:21] # match c implementation behavior >> - dict.__setitem__(self, k, (node, flag)) >> +class lazymanifestiter(object): >> + def __init__(self, lm): >> + self.pos = 0 >> + self.lm = lm >> >> def __iter__(self): >> - return iter(sorted(dict.keys(self))) >> + return self >> >> - def iterkeys(self): >> - return iter(sorted(dict.keys(self))) >> + def next(self): >> + try: >> + data, pos = self.lm._get(self.pos) >> + except IndexError: >> + raise StopIteration >> + if pos == -1: >> + self.pos += 1 >> + return data[0] >> + self.pos += 1 >> + zeropos = data.find('\x00', pos) >> + return data[pos:zeropos] >> >> - def iterentries(self): >> - return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) >> +class lazymanifestiterentries(object): >> + def __init__(self, lm): >> + self.lm = lm >> + self.pos = 0 >> + >> + def __iter__(self): >> + return self >> + >> + def next(self): >> + try: >> + data, pos = self.lm._get(self.pos) >> + except IndexError: >> + raise StopIteration >> + if pos == -1: >> + self.pos += 1 >> + return data >> + zeropos = data.find('\x00', pos) >> + hashval = unhexlify(data, self.lm.extrainfo[self.pos], >> + zeropos + 1, 40) >> + flags = self.lm._getflags(data, self.pos, zeropos) >> + self.pos += 1 >> + return (data[pos:zeropos], hashval, flags) >> + >> +def unhexlify(data, extra, pos, length): >> + s = data[pos:pos + length].decode('hex') >> + if extra: >> + s += chr(extra & 0xff) >> + return s >> + >> +def _cmp(a, b): >> + return (a > b) - (a < b) >> + >> +class _lazymanifest(object): >> + def __init__(self, data, positions=None, extrainfo=None, >> extradata=None): >> + if positions is None: >> + self.positions = self.findlines(data) >> + self.extrainfo = [0] * len(self.positions) >> + self.data = data >> + self.extradata = [] >> + else: >> + self.positions = positions[:] >> + self.extrainfo = extrainfo[:] >> + self.extradata = extradata[:] >> + self.data = data >> + >> + def findlines(self, data): >> + if not data: >> + return [] >> + pos = data.find("\n") >> + positions = [0] >> + while pos < len(data) - 1 and pos != -1: >> + positions.append(pos + 1) >> + pos = data.find("\n", pos + 1) >> + return positions >> + >> + def _get(self, index): >> + # get the position encoded in pos: >> + # positive number is an index in 'data' >> + # negative number is in extrapieces >> + pos = self.positions[index] >> + if pos >= 0: >> + return self.data, pos >> + return self.extradata[-pos - 1], -1 >> + >> + def _getkey(self, pos): >> + if pos >= 0: >> + return self.data[pos:self.data.find('\x00', pos + 1)] >> + return self.extradata[-pos - 1][0] >> + >> + def bsearch(self, key): >> + first = 0 >> + last = len(self.positions) - 1 >> + >> + while first <= last: >> + midpoint = (first + last)//2 >> + nextpos = self.positions[midpoint] >> + candidate = self._getkey(nextpos) >> + r = _cmp(key, candidate) >> + if r == 0: >> + return midpoint >> + else: >> + if r < 0: >> + last = midpoint - 1 >> + else: >> + first = midpoint + 1 >> + return -1 >> + >> + def bsearch2(self, key): >> + # same as the above, but will always return the position >> + # done for performance reasons >> + first = 0 >> + last = len(self.positions) - 1 >> + >> + while first <= last: >> + midpoint = (first + last)//2 >> + nextpos = self.positions[midpoint] >> + candidate = self._getkey(nextpos) >> + r = _cmp(key, candidate) >> + if r == 0: >> + return (midpoint, True) >> + else: >> + if r < 0: >> + last = midpoint - 1 >> + else: >> + first = midpoint + 1 >> + return (first, False) >> + >> + def __contains__(self, key): >> + return self.bsearch(key) != -1 >> + >> + def _getflags(self, data, needle, pos): >> + start = pos + 41 >> + end = data.find("\n", start) >> + if end == -1: >> + end = len(data) - 1 >> + if start == end: >> + return '' >> + return self.data[start:end] >> + >> + def __getitem__(self, key): >> + if not isinstance(key, str): >> + raise TypeError("getitem: manifest keys must be a string.") >> + needle = self.bsearch(key) >> + if needle == -1: >> + raise KeyError >> + data, pos = self._get(needle) >> + if pos == -1: >> + return (data[1], data[2]) >> + zeropos = data.find('\x00', pos) >> + assert 0 <= needle <= len(self.positions) >> + assert len(self.extrainfo) == len(self.positions) >> + hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) >> + flags = self._getflags(data, needle, zeropos) >> + return (hashval, flags) >> + >> + def __delitem__(self, key): >> + needle, found = self.bsearch2(key) >> + if not found: >> + raise KeyError >> + cur = self.positions[needle] >> + self.positions = self.positions[:needle] + self.positions[needle + >> 1:] >> + self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + >> 1:] >> + if cur >= 0: >> + self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] >> + >> + def __setitem__(self, key, value): >> + if not isinstance(key, str): >> + raise TypeError("setitem: manifest keys must be a string.") >> + if not isinstance(value, tuple) or len(value) != 2: >> + raise TypeError("Manifest values must be a tuple of (node, >> flags).") >> + hashval = value[0] >> + if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: >> + raise TypeError("node must be a 20-byte string") >> + flags = value[1] >> + if not isinstance(flags, str) or len(flags) > 1: >> + raise TypeError("flags must a 0 or 1 byte string, got %r", >> flags) >> + needle, found = self.bsearch2(key) >> + if found: >> + # put the item >> + pos = self.positions[needle] >> + if pos < 0: >> + self.extradata[-pos - 1] = (key, value[0], value[1]) >> + else: >> + # just don't bother >> + self.extradata.append((key, value[0], value[1])) >> + self.positions[needle] = -len(self.extradata) >> + else: >> + # not found, put it in with extra positions >> + self.extradata.append((key, value[0], value[1])) >> + self.positions = (self.positions[:needle] + >> [-len(self.extradata)] >> + + self.positions[needle:]) >> + self.extrainfo = (self.extrainfo[:needle] + [0] + >> + self.extrainfo[needle:]) >> >> def copy(self): >> - c = _lazymanifest('') >> - c.update(self) >> - return c >> + # XXX call _compact like in C? >> + return _lazymanifest(self.data, self.positions, self.extrainfo, >> + self.extradata) >> + >> + def _compact(self): >> + # hopefully not called TOO often >> + def findnextposition(positions, start, lgt): >> + i = start >> + while i < len(positions): >> + if positions[i] >= 0: >> + return positions[i] >> + i += 1 >> + return lgt >> + >> + if len(self.extradata) == 0: >> + return >> + l = [] >> + last_cut = 0 >> + i = 0 >> + offset = 0 >> + self.extrainfo = [0] * len(self.positions) >> + while i < len(self.positions): >> + if self.positions[i] >= 0: >> + cur = self.positions[i] >> + last_cut = cur >> + while True: >> + self.positions[i] = offset >> + i += 1 >> + if i == len(self.positions) or self.positions[i] < 0: >> + break >> + offset += self.positions[i] - cur >> + cur = self.positions[i] >> + end_cut = findnextposition(self.positions, i, >> len(self.data)) >> + offset += end_cut - cur >> + l.append(self.data[last_cut:end_cut]) >> + else: >> + while i < len(self.positions) and self.positions[i] < 0: >> + cur = self.positions[i] >> + t = self.extradata[-cur - 1] >> + l.append(self._pack(t)) >> + self.positions[i] = offset >> + if len(t[1]) > 20: >> + self.extrainfo[i] = ord(t[1][21]) >> + offset += len(l[-1]) >> + i += 1 >> + self.data = ''.join(l) >> + self.extradata = [] >> + >> + def _pack(self, d): >> + return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' >> + >> + def text(self): >> + self._compact() >> + return self.data >> >> def diff(self, m2, clean=False): >> '''Finds changes between the current manifest and m2.''' >> + # XXX think whether efficiency matters here >> diff = {} >> >> - for fn, e1 in self.iteritems(): >> + for fn, e1, flags in self.iterentries(): >> if fn not in m2: >> - diff[fn] = e1, (None, '') >> + diff[fn] = (e1, flags), (None, '') >> else: >> e2 = m2[fn] >> - if e1 != e2: >> - diff[fn] = e1, e2 >> + if (e1, flags) != e2: >> + diff[fn] = (e1, flags), e2 >> elif clean: >> diff[fn] = None >> >> - for fn, e2 in m2.iteritems(): >> + for fn, e2, flags in m2.iterentries(): >> if fn not in self: >> - diff[fn] = (None, ''), e2 >> + diff[fn] = (None, ''), (e2, flags) >> >> return diff >> >> + def iterentries(self): >> + return lazymanifestiterentries(self) >> + >> + def iterkeys(self): >> + return lazymanifestiter(self) >> + >> + def __iter__(self): >> + return lazymanifestiter(self) >> + >> + def __len__(self): >> + return len(self.positions) >> + >> def filtercopy(self, filterfn): >> + # XXX should be optimized >> c = _lazymanifest('') >> for f, n, fl in self.iterentries(): >> if filterfn(f): >> c[f] = n, fl >> return c >> >> - def text(self): >> - """Get the full data of this manifest as a bytestring.""" >> - return _textv1(self.iterentries()) >> - >> try: >> _lazymanifest = parsers.lazymanifest >> except AttributeError: >> diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py >> --- a/mercurial/pure/bdiff.py >> +++ b/mercurial/pure/bdiff.py >> @@ -111,8 +111,8 @@ >> def blocks(sa, sb): >> a = ffi.new("struct bdiff_line**") >> b = ffi.new("struct bdiff_line**") >> - ac = ffi.new("char[]", sa) >> - bc = ffi.new("char[]", sb) >> + ac = ffi.new("char[]", str(sa)) >> + bc = ffi.new("char[]", str(sb)) >> l = ffi.new("struct bdiff_hunk*") >> try: >> an = lib.bdiff_splitlines(ac, len(sa), a) >> @@ -138,8 +138,8 @@ >> def bdiff(sa, sb): >> a = ffi.new("struct bdiff_line**") >> b = ffi.new("struct bdiff_line**") >> - ac = ffi.new("char[]", sa) >> - bc = ffi.new("char[]", sb) >> + ac = ffi.new("char[]", str(sa)) >> + bc = ffi.new("char[]", str(sb)) >> l = ffi.new("struct bdiff_hunk*") >> try: >> an = lib.bdiff_splitlines(ac, len(sa), a) > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel