commit:     6aef2c5968d66e91dc083820db489d85697f3587
Author:     John Turner <jturner.usa <AT> gmail <DOT> com>
AuthorDate: Tue Feb 13 20:45:51 2024 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Fri Feb 16 20:12:49 2024 +0000
URL:        https://gitweb.gentoo.org/proj/gentoolkit.git/commit/?id=6aef2c59

dependencies.py: replace hand rolled depcache with functools.cache

functools.cache caches the output of functions "automatically" without
requiring any manual management of a cache value. When used on class
methods, the cache is associated with each class instance and only
lives as long as the instance does.

The Dependencies.graph_reverse_depends method used a dict to cache
the output from pkg.get_all_depends. The get_all_depends method involves calling
portage's aux_get and parsing the DEPEND string that is returned by
it. This dict has been removed and replaced with functools.cache.

The graph_reverse_depends method did not cache the output of the
"raw=True" get_all_depends calls. This "raw" output is the literal string value 
for
the pkgdeps *DEPEND variables as returned by aux_get. Searching this for a
category/package sub-string allows quickly ruling out non-matching
pkgdeps, which allows skipping parsing the DEPEND string into a list
of Atoms.

Using functools cache the method that fetches the raw DEPEND string
massively improves performance for graph_reverse_depends when
searching for indirect reverse dependencies. "equery depends --indirect emacs"
runtime is ~2s with the raw value being cached, and ~60s
without. Searching for only direct reverse dependencies does not
utilize the cache and does not see any chance in runtime for me.

Signed-off-by: John Turner <jturner.usa <AT> gmail.com>
Closes: https://github.com/gentoo/gentoolkit/pull/44
Signed-off-by: Sam James <sam <AT> gentoo.org>

 pym/gentoolkit/dependencies.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/pym/gentoolkit/dependencies.py b/pym/gentoolkit/dependencies.py
index 4564d8c..3bbc757 100644
--- a/pym/gentoolkit/dependencies.py
+++ b/pym/gentoolkit/dependencies.py
@@ -12,6 +12,7 @@ __all__ = ("Dependencies",)
 # =======
 
 import itertools
+from functools import cache
 from enum import StrEnum
 from typing import List, Dict
 
@@ -102,9 +103,11 @@ class Dependencies(Query):
         except portage.exception.InvalidPackageName as err:
             raise errors.GentoolkitInvalidCPV(err)
 
+    @cache
     def get_raw_depends(self) -> str:
         return self._get_depend([depkind for depkind in DependencyKind], 
raw=True)
 
+    @cache
     def get_depends(self) -> Dict[DependencyKind, List[Atom]]:
         depends = dict()
         for depkind in DependencyKind:
@@ -189,7 +192,6 @@ class Dependencies(Query):
         printer_fn=None,
         # The rest of these are only used internally:
         depth=0,
-        depcache=None,
         seen=None,
         result=None,
     ):
@@ -233,8 +235,6 @@ class Dependencies(Query):
             )
             raise errors.GentoolkitFatalError(err % (self.__class__.__name__,))
 
-        if depcache is None:
-            depcache = dict()
         if seen is None:
             seen = set()
         if result is None:
@@ -250,12 +250,8 @@ class Dependencies(Query):
                 # us the work of instantiating a whole Atom() for *every*
                 # dependency of *every* package in pkgset.
                 continue
-            try:
-                all_depends = depcache[pkgdep]
-            except KeyError:
-                all_depends = pkgdep.get_all_depends()
-                depcache[pkgdep] = all_depends
 
+            all_depends = pkgdep.get_all_depends()
             dep_is_displayed = False
             for dep in all_depends:
                 # TODO: Add ability to determine if dep is enabled by USE flag.
@@ -284,7 +280,6 @@ class Dependencies(Query):
                         only_direct=only_direct,
                         printer_fn=printer_fn,
                         depth=depth + 1,
-                        depcache=depcache,
                         seen=seen,
                         result=result,
                     )

Reply via email to