# HG changeset patch # User Boris Feld <boris.f...@octobus.net> # Date 1546605681 -3600 # Fri Jan 04 13:41:21 2019 +0100 # Node ID 73926c4ab24d6c01723ed050601b134bdc89562f # Parent 4a56fbdacff33c3985bbb84f2e19ddfbd48ed4fa # EXP-Topic revs-efficiency # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 73926c4ab24d revset: introduce an API that avoids `formatspec` input serialization
Instead of having the data fully serialized, the input can be replaced with a `__internal_input__(<idx>)` entry in the revspec. The actual value at `<idx>` as to be passed along with the format spec but the operation can get much more efficient. Just using it for simple "%ld" case provide a significant boost. For example here are the impact on a sample discovery run between two pypy repositories with arbitrary differences (using hg perfdiscovery). $ hg perfdiscovery before: ! wall 0.700435 comb 0.710000 user 0.700000 sys 0.010000 (median of 15) after: ! wall 0.501305 comb 0.510000 user 0.490000 sys 0.020000 (median of 20) diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -1362,8 +1362,8 @@ class localrepository(object): Returns a revset.abstractsmartset, which is a list-like interface that contains integer revisions. ''' - expr = revsetlang.formatspec(expr, *args) - m = revset.match(None, expr) + expr, inputs = revsetlang.formatspecargs(expr, *args) + m = revset.matchany(None, [expr], inputs=inputs) return m(self) def set(self, expr, *args): diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -2194,6 +2194,14 @@ def _hexlist(repo, subset, x, order): else: return _orderedhexlist(repo, subset, x) +@predicate(revsetlang.internal_input_func, takeorder=True) +def _internal_input(repo, subset, x, order): + # access subtituted value during internal revset runs + if order == followorder: + return subset & x[1] + else: + return x[1] & subset + methods = { "range": rangeset, "rangeall": rangeall, @@ -2230,7 +2238,7 @@ def match(ui, spec, lookup=None): """Create a matcher for a single revision spec""" return matchany(ui, [spec], lookup=lookup) -def matchany(ui, specs, lookup=None, localalias=None): +def matchany(ui, specs, lookup=None, localalias=None, inputs=()): """Create a matcher that will include any revisions matching one of the given specs @@ -2239,6 +2247,9 @@ def matchany(ui, specs, lookup=None, loc If localalias is not None, it is a dict {name: definitionstring}. It takes precedence over [revsetalias] config section. + + inputs containts value for __internal_input__ reference. This is used by + internal revset runs. """ if not specs: def mfunc(repo, subset=None): @@ -2261,6 +2272,8 @@ def matchany(ui, specs, lookup=None, loc aliases.extend(localalias.items()) if aliases: tree = revsetlang.expandaliases(tree, aliases, warn=warn) + if inputs: + tree = revsetlang.expandinputs(inputs, tree) tree = revsetlang.foldconcat(tree) tree = revsetlang.analyze(tree) tree = revsetlang.optimize(tree) diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py --- a/mercurial/revsetlang.py +++ b/mercurial/revsetlang.py @@ -69,6 +69,9 @@ symbols = {} # default set of valid characters for non-initial letters of symbols _symletters = _syminitletters | set(pycompat.iterbytestr('-/')) +_internal_input = '__internal_input_placeholder__' +internal_input_func = '__internal_input__' + def tokenize(program, lookup=None, syminitletters=None, symletters=None): ''' Parse a revset statement into a stream of tokens @@ -333,7 +336,7 @@ def _analyze(x): elif op == 'negate': s = getstring(x[1], _("can't negate that")) return _analyze(('string', '-' + s)) - elif op in ('string', 'symbol'): + elif op in ('string', 'symbol', 'smartset'): return x elif op == 'rangeall': return (op, None) @@ -373,7 +376,7 @@ def _optimize(x): return 0, x op = x[0] - if op in ('string', 'symbol'): + if op in ('string', 'symbol', 'smartset'): return 0.5, x # single revisions are small elif op == 'and': wa, ta = _optimize(x[1]) @@ -532,6 +535,26 @@ def expandaliases(tree, aliases, warn=No alias.warned = True return tree +class _inputrules(parser.basealiasrules): + """replace internal input reference by their actual value""" + + @classmethod + def _getalias(cls, inputs, tree): + if not isinstance(tree, tuple): + return None + if tree[0] != 'func': + return None + if getsymbol(tree[1]) != _internal_input: + return None + idx = int(getsymbol(tree[2])) + newtree = ('func', + ('symbol', internal_input_func), + ('smartset', inputs[idx]) + ) + return parser.alias(idx, None, None, newtree), None + +expandinputs = _inputrules.expand + def foldconcat(tree): """Fold elements to be concatenated by `##` """ @@ -686,12 +709,23 @@ def formatspec(expr, *args): if t == 'baseset': if isinstance(arg, set): arg = sorted(arg) - try: - ret.append(_formatintlist(list(arg))) - except (TypeError, ValueError): - raise error.ParseError(_('invalid argument for revspec')) + ret.append(_formatintlist(list(arg))) return b''.join(ret) +def formatspecargs(expr, *args): + """same as formatspec, but preserve some expensive arguments""" + parsed = _parseargs(expr, args) + ret = [] + inputs = [] + for t, arg in parsed: + if t is None: + ret.append(arg) + if t == 'baseset': + key = '%s(%d)' % (_internal_input, len(inputs)) + inputs.append(smartset.baseset(arg)) + ret.append(key) + return (b''.join(ret), inputs) + def _parseargs(expr, args): """parse the expression and replace all inexpensive args _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel