hgext/git/gitlog.py | 2 +- hgext/git/index.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-)
# HG changeset patch # User Josef 'Jeff' Sipek <jef...@josefsipek.net> # Date 1728051924 14400 # Fri Oct 04 10:25:24 2024 -0400 # Node ID 0715b44d7fe4cf909b632e661c643db9e6d67297 # Parent 4caa2e795ca6b8bdb2e3d39eb150519c73240980 git: cache the number of commits to speed up large repo operations Instead of iterating over the whole changelog table every time we want to know how many commits there are, we can cache the number between mercurial invocations. Unsurprisingly, this speeds up certain operations on repos with large histories. The following measurements are all in seconds and they represent the runtime of `hg log -T ' ' -l1 > /dev/null`. In other words, this includes python startup overhead, etc. On small and medium repos, there is no observable difference in runtime (because of the relatively large overhead of python runtime startup, and the rest of mercurial doing useful work), but on large repos the user-visible execution time drops by a factor of 10x or more. small repo (~600 commits): Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1052 0.1076 0.1096 0.1102 0.1110 0.1210 (before) 0.1049 0.1087 0.1106 0.1120 0.1127 0.1302 (after) medium repo (12k commits): Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1063 0.1095 0.1116 0.1129 0.1153 0.1349 (before) 0.1044 0.1092 0.1108 0.1115 0.1130 0.1326 (after) large repo (1.4M commits): Min. 1st Qu. Median Mean 3rd Qu. Max. 1.973 2.105 2.256 2.243 2.406 2.443 (before) 0.144 0.147 0.148 0.150 0.151 0.176 (after) diff --git a/hgext/git/gitlog.py b/hgext/git/gitlog.py --- a/hgext/git/gitlog.py +++ b/hgext/git/gitlog.py @@ -43,7 +43,7 @@ class baselog: # revlog.revlog): def __len__(self) -> int: return int( - self._db.execute('SELECT COUNT(*) FROM changelog').fetchone()[0] + self._db.execute('SELECT ncommits FROM cache').fetchone()[0] ) def files(self): diff --git a/hgext/git/index.py b/hgext/git/index.py --- a/hgext/git/index.py +++ b/hgext/git/index.py @@ -18,7 +18,7 @@ from . import gitutil pygit2 = gitutil.get_pygit2() -_CURRENT_SCHEMA_VERSION = 4 +_CURRENT_SCHEMA_VERSION = 5 _SCHEMA = ( """ CREATE TABLE refs ( @@ -72,6 +72,12 @@ CREATE TABLE changedfiles ( CREATE INDEX changedfiles_nodes_idx ON changedfiles(node); +-- Cached values to improve performance +CREATE TABLE cache ( + ncommits INTEGER +); +INSERT INTO cache (ncommits) VALUES (NULL); + PRAGMA user_version=%d """ % _CURRENT_SCHEMA_VERSION @@ -399,6 +405,11 @@ def _index_repo( prog.update(pos) _index_repo_commit(gitrepo, db, h) + db.execute(''' + UPDATE cache SET + ncommits = (SELECT COUNT(1) FROM changelog) + ''') + db.commit() if prog is not None: prog.complete() _______________________________________________ Mercurial-devel mailing list Mercurial-devel@lists.mercurial-scm.org https://lists.mercurial-scm.org/mailman/listinfo/mercurial-devel