# HG changeset patch
# User Yuya Nishihara
# Date 1532161152 -32400
# Sat Jul 21 17:19:12 2018 +0900
# Node ID 33247ffb2c71b954e229da8424798d52c1e0d8e1
# Parent db488b9c65596088550b2b96ed25aff60a938219
fileset: combine union of basic patterns into single matcher
This appears to improve query performance in a big repository than I thought.
Writing less Python in a hot loop, faster computation we gain.
$ hg files --cwd mozilla-central --time 'set:a* + b* + c* + d* + e*'
(orig) time: real 0.670 secs (user 0.640+0.000 sys 0.030+0.000)
(new) time: real 0.210 secs (user 0.180+0.000 sys 0.020+0.000)
diff --git a/mercurial/fileset.py b/mercurial/fileset.py
--- a/mercurial/fileset.py
+++ b/mercurial/fileset.py
@@ -50,6 +50,12 @@ def kindpatmatch(mctx, x, y):
return stringmatch(mctx, _getkindpat(x, y, matchmod.allpatternkinds,
_("pattern must be a string")))
+def patternsmatch(mctx, *xs):
+allkinds = matchmod.allpatternkinds
+patterns = [getpattern(x, allkinds, _("pattern must be a string"))
+for x in xs]
+return mctx.matcher(patterns)
+
def andmatch(mctx, x, y):
xm = getmatch(mctx, x)
ym = getmatch(mctx, y)
@@ -436,6 +442,7 @@ methods = {
'string': stringmatch,
'symbol': stringmatch,
'kindpat': kindpatmatch,
+'patterns': patternsmatch,
'and': andmatch,
'or': ormatch,
'minus': minusmatch,
diff --git a/mercurial/filesetlang.py b/mercurial/filesetlang.py
--- a/mercurial/filesetlang.py
+++ b/mercurial/filesetlang.py
@@ -185,6 +185,21 @@ def _optimizeandops(op, ta, tb):
return ('minus', ta, tb[1])
return (op, ta, tb)
+def _optimizeunion(xs):
+# collect string patterns so they can be compiled into a single regexp
+ws, ts, ss = [], [], []
+for x in xs:
+w, t = _optimize(x)
+if t is not None and t[0] in {'string', 'symbol', 'kindpat'}:
+ss.append(t)
+continue
+ws.append(w)
+ts.append(t)
+if ss:
+ws.append(WEIGHT_CHECK_FILENAME)
+ts.append(('patterns',) + tuple(ss))
+return ws, ts
+
def _optimize(x):
if x is None:
return 0, x
@@ -206,7 +221,9 @@ def _optimize(x):
else:
return wb, _optimizeandops(op, tb, ta)
if op == 'or':
-ws, ts = zip(*(_optimize(y) for y in x[1:]))
+ws, ts = _optimizeunion(x[1:])
+if len(ts) == 1:
+return ws[0], ts[0] # 'or' operation is fully optimized out
ts = tuple(it[1] for it in sorted(enumerate(ts),
key=lambda it: ws[it[0]]))
return max(ws), (op,) + ts
diff --git a/mercurial/minifileset.py b/mercurial/minifileset.py
--- a/mercurial/minifileset.py
+++ b/mercurial/minifileset.py
@@ -40,7 +40,7 @@ def _compile(tree):
return f
raise error.ParseError(_("unsupported file pattern: %s") % name,
hint=_('paths must be prefixed with "path:"'))
-elif op == 'or':
+elif op in {'or', 'patterns'}:
funcs = [_compile(x) for x in tree[1:]]
return lambda n, s: any(f(n, s) for f in funcs)
elif op == 'and':
diff --git a/tests/test-fileset.t b/tests/test-fileset.t
--- a/tests/test-fileset.t
+++ b/tests/test-fileset.t
@@ -53,9 +53,7 @@ Test operators and basic patterns
(symbol 'glob')
(symbol 'b?')))
* matcher:
- ,
-]>
+
a1
b1
b2
@@ -182,8 +180,9 @@ Show parsed tree at stages:
None)))
* optimized:
(or
-(symbol 'a1')
-(symbol 'a2')
+(patterns
+ (symbol 'a1')
+ (symbol 'a2'))
(and
(func
(symbol 'clean')
@@ -193,8 +192,7 @@ Show parsed tree at stages:
(string 'b'
* matcher:
,
-,
+,
,
m2=>]>
@@ -203,13 +201,30 @@ Show parsed tree at stages:
b1
b2
+Union of basic patterns:
+
+ $ fileset -p optimized -s -r. 'a1 or a2 or path:b1'
+ * optimized:
+ (patterns
+(symbol 'a1')
+(symbol 'a2')
+(kindpat
+ (symbol 'path')
+ (symbol 'b1')))
+ * matcher:
+
+ a1
+ a2
+ b1
+
OR expression should be reordered by weight:
$ fileset -p optimized -s -r. 'grep("a") or a1 or grep("b") or b2'
* optimized:
(or
-(symbol 'a1')
-(symbol 'b2')
+(patterns
+ (symbol 'a1')
+ (symbol 'b2'))
(func
(symbol 'grep')
(string 'a'))
@@ -218,8 +233,7 @@ OR expression should be reordered by wei
(string 'b')))
* matcher:
,
-,
+,
,
]>
a1
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel