On Sat, Jun 27, 2020, at 1:34 AM, Chun-Yu Shei wrote: > According to cProfile, catpkgsplit is called up to 1-5.5 million times > during "emerge -uDvpU --with-bdeps=y @world". Adding a dict to cache its > results reduces the time for this command from 43.53 -> 41.53 seconds -- > a 4.8% speedup. > --- > lib/portage/versions.py | 7 +++++++ > 1 file changed, 7 insertions(+) > > diff --git a/lib/portage/versions.py b/lib/portage/versions.py > index 0c21373cc..ffec316ce 100644 > --- a/lib/portage/versions.py > +++ b/lib/portage/versions.py > @@ -312,6 +312,7 @@ def _pkgsplit(mypkg, eapi=None): > > _cat_re = re.compile('^%s$' % _cat, re.UNICODE) > _missing_cat = 'null' > +_catpkgsplit_cache = {} > > def catpkgsplit(mydata, silent=1, eapi=None): > """ > @@ -331,6 +332,11 @@ def catpkgsplit(mydata, silent=1, eapi=None): > return mydata.cpv_split > except AttributeError: > pass > + > + cache_entry = _catpkgsplit_cache.get(mydata) > + if cache_entry is not None: > + return cache_entry > + > mysplit = mydata.split('/', 1) > p_split = None > if len(mysplit) == 1: > @@ -343,6 +349,7 @@ def catpkgsplit(mydata, silent=1, eapi=None): > if not p_split: > return None > retval = (cat, p_split[0], p_split[1], p_split[2]) > + _catpkgsplit_cache[mydata] = retval > return retval > > class _pkg_str(_unicode): > -- > 2.27.0.212.ge8ba1cc988-goog >
There are libraries that provide decorators, etc, for caching and memoization. Have you evaluated any of those? One is available in the standard library: https://docs.python.org/dev/library/functools.html#functools.lru_cache I comment as this would increase code clarity.