joerg.sonnenberger created this revision. Herald added a reviewer: hg-reviewers. Herald added a subscriber: mercurial-patches.
REVISION SUMMARY Computing tags requires parsing .hgtags for all heads. Mercurial therefore keeps a cache to efficiently find the .hgtags version of a revision without having to parse the manifest, but this cache is computed lazily and often incomplete. The new implementation of the test works a lot more like the revbranchcache and updates the cache in two stages: (1) When a new changeset is added, check if .hgtags is touched. The common case is that it didn't change and it is therefore inherited from the parent. Now the parent might not be fully resolved yet (missing manifest), so just keep a dictionary mapping to the parent revision that potentially touched it. (2) At the end of the transaction, resolve entries before writing the cache to disk. At this point, manifests are all known, so they can be parsed as necessary. The fast path here is reading just the delta, but this doesn't necessarily answer the question, since the delta could have been to a non-parent. If the cache logic hits an invalid or missing node, it will recheck all nodes. This is a bit more expensive, but simplifies the logic and avoids recursions. This penalty is normally hit only once, but read-only clients should run debugupdatecaches once and after strips. REPOSITORY rHG Mercurial BRANCH default REVISION DETAIL https://phab.mercurial-scm.org/D10082 AFFECTED FILES mercurial/bundle2.py mercurial/debugcommands.py mercurial/interfaces/repository.py mercurial/localrepo.py mercurial/statichttprepo.py mercurial/tags.py CHANGE DETAILS diff --git a/mercurial/tags.py b/mercurial/tags.py --- a/mercurial/tags.py +++ b/mercurial/tags.py @@ -492,7 +492,7 @@ without a '.hgtags' file. """ starttime = util.timer() - fnodescache = hgtagsfnodescache(repo.unfiltered()) + fnodescache = repo.hgtagsfnodescache() cachefnode = {} for node in nodes: fnode = fnodescache.getfnode(node) @@ -504,9 +504,7 @@ duration = util.timer() - starttime ui.log( b'tagscache', - b'%d/%d cache hits/lookups in %0.4f seconds\n', - fnodescache.hitcount, - fnodescache.lookupcount, + b'cache update took %0.4f seconds\n', duration, ) return cachefnode @@ -695,186 +693,140 @@ def __init__(self, repo): assert repo.filtername is None - self._repo = repo - - # Only for reporting purposes. - self.lookupcount = 0 - self.hitcount = 0 - + self._delayed = {} + self._delayed_clr = {} try: - data = repo.cachevfs.read(_fnodescachefile) - except (OSError, IOError): - data = b"" - self._raw = bytearray(data) + self._hgtagsrevs = bytearray(repo.cachevfs.read(_fnodescachefile)) + except (IOError, OSError): + self._hgtagsrevs = bytearray() + self._hgtagsrevslen = min( + len(self._hgtagsrevs) // _fnodesrecsize, len(repo) + ) + expected = self._hgtagsrevslen * _fnodesrecsize + if len(self._hgtagsrevs) != expected: + self._hgtagsrevs = self._hgtagsrevs[:expected] + self._oldhgtagsrevslen = self._hgtagsrevslen - # The end state of self._raw is an array that is of the exact length - # required to hold a record for every revision in the repository. - # We truncate or extend the array as necessary. self._dirtyoffset is - # defined to be the start offset at which we need to write the output - # file. This offset is also adjusted when new entries are calculated - # for array members. - cllen = len(repo.changelog) - wantedlen = cllen * _fnodesrecsize - rawlen = len(self._raw) - - self._dirtyoffset = None + def _clear(self): + self._hgtagsrevs = bytearray() + self._oldhgtagsrevslen = 0 + self._hgtagsrevslen = 0 - rawlentokeep = min( - wantedlen, (rawlen // _fnodesrecsize) * _fnodesrecsize - ) - if rawlen > rawlentokeep: - # There's no easy way to truncate array instances. This seems - # slightly less evil than copying a potentially large array slice. - for i in range(rawlen - rawlentokeep): - self._raw.pop() - rawlen = len(self._raw) - self._dirtyoffset = rawlen - if rawlen < wantedlen: - if self._dirtyoffset is None: - self._dirtyoffset = rawlen - # TODO: zero fill entire record, because it's invalid not missing? - self._raw.extend(b'\xff' * (wantedlen - rawlen)) - - def getfnode(self, node, computemissing=True): + def getfnode(self, nodeorrev, computemissing=True): """Obtain the filenode of the .hgtags file at a specified revision. + If an .hgtags does not exist at the specified revision, nullid is used + as filenode. + If the value is in the cache, the entry will be validated and returned. - Otherwise, the filenode will be computed and returned unless - "computemissing" is False. In that case, None will be returned if - the entry is missing or False if the entry is invalid without - any potentially expensive computation being performed. - - If an .hgtags does not exist at the specified revision, nullid is - returned. + If "computemissing" is False and the value is not in the cache or the + cached entry is invalid, None is returned. + If "computemissing" is True, a missing or invalid value will be + computed and the cache updated. """ - if node == nullid: + if nodeorrev == nullid or nodeorrev == nullrev: return nullid - ctx = self._repo[node] - rev = ctx.rev() - - self.lookupcount += 1 - + if isinstance(nodeorrev, int): + rev = nodeorrev + node = self._repo.changelog.node(rev) + else: + node = nodeorrev + rev = self._repo.changelog.rev(node) offset = rev * _fnodesrecsize - record = b'%s' % self._raw[offset : offset + _fnodesrecsize] - properprefix = node[0:4] - - # Validate and return existing entry. - if record != _fnodesmissingrec and len(record) == _fnodesrecsize: - fileprefix = record[0:4] + if rev < self._hgtagsrevslen: + record = b'%s' % self._hgtagsrevs[offset : offset + _fnodesrecsize] + if record[:4] == node[:4]: + return record[4:] + if not computemissing: + return None + # If there should have been a cache entrance, force a full recompute. + # Otherwise assume that the cache was valid and just compute new + # entries. + self._updatecache(rev < self._hgtagsrevslen) + record = b'%s' % self._hgtagsrevs[offset : offset + _fnodesrecsize] + return record[4:] - if fileprefix == properprefix: - self.hitcount += 1 - return record[4:] - - # Fall through. - - # If we get here, the entry is either missing or invalid. - - if not computemissing: - if record != _fnodesmissingrec: - return False - return None - - fnode = None + def _computefnode(self, rev): + # Used by _updatecache cl = self._repo.changelog - p1rev, p2rev = cl._uncheckedparentrevs(rev) - p1node = cl.node(p1rev) - p1fnode = self.getfnode(p1node, computemissing=False) - if p2rev != nullrev: - # There is some no-merge changeset where p1 is null and p2 is set - # Processing them as merge is just slower, but still gives a good - # result. - p2node = cl.node(p1rev) - p2fnode = self.getfnode(p2node, computemissing=False) - if p1fnode != p2fnode: - # we cannot rely on readfast because we don't know against what - # parent the readfast delta is computed - p1fnode = None - if p1fnode: - mctx = ctx.manifestctx() - fnode = mctx.readfast().get(b'.hgtags') - if fnode is None: - fnode = p1fnode - if fnode is None: - # Populate missing entry. - try: - fnode = ctx.filenode(b'.hgtags') - except error.LookupError: - # No .hgtags file on this revision. - fnode = nullid - - self._writeentry(offset, properprefix, fnode) - return fnode - - def setfnode(self, node, fnode): - """Set the .hgtags filenode for a given changeset.""" - assert len(fnode) == 20 - ctx = self._repo[node] + if rev in self._delayed: + # New node + p1, p2 = cl._uncheckedparentrevs(rev) + return self.getfnode(p1 if p1 != nullrev else p2, False) + changelogrevision = self._delayed_clr.get(rev) + if changelogrevision is None: + changelogrevision = cl.changelogrevision(rev) + if b'.hgtags' not in changelogrevision.files: + # Since .hgtags didn't change from the parents, + # just look up the first parent. Topological order + # of the repository means we should already know this. + p1, p2 = cl._uncheckedparentrevs(rev) + return self.getfnode(p1 if p1 != nullrev else p2, False) + mfl = self._repo.manifestlog[changelogrevision.manifest] + mctx = self._repo[rev]._manifestctx + fnode = mctx.readdelta().get(b'.hgtags') + if fnode: + return fnode + try: + return mctx.find(b'.hgtags')[0] + except (error.ManifestLookupError, KeyError): + return nullid - # Do a lookup first to avoid writing if nothing has changed. - if self.getfnode(ctx.node(), computemissing=False) == fnode: - return - - self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode) - - def _writeentry(self, offset, prefix, fnode): - # Slices on array instances only accept other array. - entry = bytearray(prefix + fnode) - self._raw[offset : offset + _fnodesrecsize] = entry - # self._dirtyoffset could be None. - self._dirtyoffset = min(self._dirtyoffset or 0, offset or 0) + def _updatecache(self, full): + repo = self._repo + cl = repo.changelog + startrev = 0 if full else self._hgtagsrevslen + for rev in pycompat.xrange(startrev, self._hgtagsrevslen): + offset = rev * _fnodesrecsize + node = cl.node(rev) + if node[:4] == self._hgtagsrevs[offset : offset + 4]: + continue + fnode = self._computefnode(rev) + if fnode is None: + raise error.Abort(_(b'invalid topology')) + record = node[:4] + fnode + self._hgtagsrevs[offset : offset + _fnodesrecsize] = record + self._oldhgtagsrevslen = min(self._oldhgtagsrevslen, rev) + self._hgtagsrevs.extend( + b'\x00' * (len(repo) - self._hgtagsrevslen) * _fnodesrecsize + ) + for rev in pycompat.xrange(self._hgtagsrevslen, len(repo)): + offset = rev * _fnodesrecsize + fnode = self._computefnode(rev) + if fnode is None: + if full: + raise error.Abort(_(b'invalid topology')) + self._updatecache(True) + return + record = cl.node(rev)[:4] + fnode + self._hgtagsrevs[offset : offset + _fnodesrecsize] = record + self._hgtagsrevslen += 1 - def write(self): - """Perform all necessary writes to cache file. + def setdata(self, rev, changelogrevision): + """add new data information to the cache""" + if b'.hgtags' not in changelogrevision.files: + p1, p2 = self._repo.changelog._uncheckedparentrevs(rev) + p = p1 if p1 != nullrev else p2 + if p in self._delayed: + p = self._delayed[p] + self._delayed[rev] = p + else: + self._delayed_clr[rev] = changelogrevision - This may no-op if no writes are needed or if a write lock could - not be obtained. - """ - if self._dirtyoffset is None: + def write(self, tr=None): + """Save branch cache if it is dirty.""" + self._updatecache(False) + if self._oldhgtagsrevslen == self._hgtagsrevslen: return - - data = self._raw[self._dirtyoffset :] - if not data: - return - repo = self._repo - try: - lock = repo.lock(wait=False) - except error.LockError: - repo.ui.log( - b'tagscache', - b'not writing .hg/cache/%s because ' - b'lock cannot be acquired\n' % _fnodescachefile, + f = repo.cachevfs.open(_fnodescachefile, b'wb', atomictemp=True) + f.write(self._hgtagsrevs) + f.close() + except (IOError, OSError) as inst: + repo.ui.debug( + b"couldn't write .hgtags revision cache: %s\n" + % stringutil.forcebytestr(inst) ) - return - - try: - f = repo.cachevfs.open(_fnodescachefile, b'ab') - try: - # if the file has been truncated - actualoffset = f.tell() - if actualoffset < self._dirtyoffset: - self._dirtyoffset = actualoffset - data = self._raw[self._dirtyoffset :] - f.seek(self._dirtyoffset) - f.truncate() - repo.ui.log( - b'tagscache', - b'writing %d bytes to cache/%s\n' - % (len(data), _fnodescachefile), - ) - f.write(data) - self._dirtyoffset = None - finally: - f.close() - except (IOError, OSError) as inst: - repo.ui.log( - b'tagscache', - b"couldn't write cache/%s: %s\n" - % (_fnodescachefile, stringutil.forcebytestr(inst)), - ) - finally: - lock.release() diff --git a/mercurial/statichttprepo.py b/mercurial/statichttprepo.py --- a/mercurial/statichttprepo.py +++ b/mercurial/statichttprepo.py @@ -215,6 +215,7 @@ self.nodetagscache = None self._branchcaches = branchmap.BranchMapCache() self._revbranchcache = None + self._hgtagsfnodescache = None self.encodepats = None self.decodepats = None self._transref = None diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -1356,6 +1356,7 @@ self._filterpats = {} self._datafilters = {} self._transref = self._lockref = self._wlockref = None + self._hgtagsfnodescache = None # A cache for various files under .hg/ that tracks file changes, # (used by the filecache decorator) @@ -1468,6 +1469,8 @@ def _writecaches(self): if self._revbranchcache: self._revbranchcache.write() + if self._hgtagsfnodescache: + self._hgtagsfnodescache.write() def _restrictcapabilities(self, caps): if self.ui.configbool(b'experimental', b'bundle2-advertise'): @@ -2065,8 +2068,17 @@ self._revbranchcache = branchmap.revbranchcache(self.unfiltered()) return self._revbranchcache + @unfilteredmethod + def hgtagsfnodescache(self): + if not self._hgtagsfnodescache: + self._hgtagsfnodescache = tagsmod.hgtagsfnodescache( + self.unfiltered() + ) + return self._hgtagsfnodescache + def register_changeset(self, rev, changelogrevision): self.revbranchcache().setdata(rev, changelogrevision) + self.hgtagsfnodescache().setdata(rev, changelogrevision) def branchtip(self, branch, ignoremissing=False): """return the tip node for a given branch @@ -2702,6 +2714,8 @@ self.filtered(b'served').branchmap() self.filtered(b'served.hidden').branchmap() + self.hgtagsfnodescache()._updatecache(full) + if full: unfi = self.unfiltered() @@ -2717,8 +2731,6 @@ for ctx in self[b'.'].parents(): ctx.manifest() # accessing the manifest is enough - # accessing fnode cache warms the cache - tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs()) # accessing tags warm the cache self.tags() self.filtered(b'served').tags() diff --git a/mercurial/interfaces/repository.py b/mercurial/interfaces/repository.py --- a/mercurial/interfaces/repository.py +++ b/mercurial/interfaces/repository.py @@ -1648,6 +1648,9 @@ def revbranchcache(): pass + def hgtagsfnodescache(): + pass + def register_changeset(rev, changelogrevision): """Extension point for caches for new nodes. diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -3864,16 +3864,14 @@ @command(b'debugtagscache', []) def debugtagscache(ui, repo): """display the contents of .hg/cache/hgtagsfnodes1""" - cache = tagsmod.hgtagsfnodescache(repo.unfiltered()) + cache = repo.hgtagsfnodescache() for r in repo: node = repo[r].node() tagsnode = cache.getfnode(node, computemissing=False) if tagsnode: tagsnodedisplay = hex(tagsnode) - elif tagsnode is False: - tagsnodedisplay = b'invalid' else: - tagsnodedisplay = b'missing' + tagsnodedisplay = b'missing or invalid' ui.write(b'%d %s %s\n' % (r, hex(node), tagsnodedisplay)) diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py --- a/mercurial/bundle2.py +++ b/mercurial/bundle2.py @@ -1754,7 +1754,7 @@ def addparttagsfnodescache(repo, bundler, outgoing): # we include the tags fnode cache for the bundle changeset # (as an optional parts) - cache = tags.hgtagsfnodescache(repo.unfiltered()) + cache = repo.hgtagsfnodescache() chunks = [] # .hgtags fnodes are only relevant for head changesets. While we could @@ -2450,27 +2450,10 @@ @parthandler(b'hgtagsfnodes') def handlehgtagsfnodes(op, inpart): - """Applies .hgtags fnodes cache entries to the local repo. - - Payload is pairs of 20 byte changeset nodes and filenodes. - """ - # Grab the transaction so we ensure that we have the lock at this point. - if op.ui.configbool(b'experimental', b'bundle2lazylocking'): - op.gettransaction() - cache = tags.hgtagsfnodescache(op.repo.unfiltered()) - - count = 0 - while True: - node = inpart.read(20) - fnode = inpart.read(20) - if len(node) < 20 or len(fnode) < 20: - op.ui.debug(b'ignoring incomplete received .hgtags fnodes data\n') - break - cache.setfnode(node, fnode) - count += 1 - - cache.write() - op.ui.debug(b'applied %i hgtags fnodes cache entries\n' % count) + """Legacy part, ignored for compatibility with bundles from + or for Mercurial before 5.9. Newer Mercurial computes the cache + efficiently enough during unbundling that the additional transfer + is unnecessary.""" rbcstruct = struct.Struct(b'>III') To: joerg.sonnenberger, #hg-reviewers Cc: mercurial-patches, mercurial-devel _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel