commit:     5424b91133b3b155b0e6ddc08fb46ba301d971f8
Author:     Zac Medico <zmedico <AT> gentoo <DOT> org>
AuthorDate: Sat Nov  1 15:06:01 2014 +0000
Commit:     Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Sun Dec  7 23:10:48 2014 +0000
URL:        
http://sources.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=5424b911

Add IndexedVardb class.

Searching of installed packages is optimized to take advantage of
vardbdbapi._aux_cache, which is backed by vdb_metadata.pickle.
This class only implements a subset of vardbapi functionality that is
useful for searching incrementally. For this reason, the cp_all method
returns an ordered iterator instead of a list, so that search results
can be displayed incrementally.

X-Gentoo-Bug: 525718
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718

---
 pym/portage/dbapi/IndexedVardb.py | 94 +++++++++++++++++++++++++++++++++++++++
 pym/portage/dbapi/vartree.py      | 23 +++++++---
 2 files changed, 110 insertions(+), 7 deletions(-)

diff --git a/pym/portage/dbapi/IndexedVardb.py 
b/pym/portage/dbapi/IndexedVardb.py
new file mode 100644
index 0000000..424defc
--- /dev/null
+++ b/pym/portage/dbapi/IndexedVardb.py
@@ -0,0 +1,94 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import portage
+from portage.dep import Atom
+from portage.versions import _pkg_str
+
+class IndexedVardb(object):
+       """
+       A vardbapi interface that sacrifices validation in order to
+       improve performance. It takes advantage of vardbdbapi._aux_cache,
+       which is backed by vdb_metadata.pickle. Since _aux_cache is
+       not updated for every single merge/unmerge (see
+       _aux_cache_threshold), the list of packages is obtained directly
+       from the real vardbapi instance. If a package is missing from
+       _aux_cache, then its metadata is obtained using the normal
+       (validated) vardbapi.aux_get method.
+
+       For performance reasons, the match method only supports package
+       name and version constraints.
+       """
+
+       # Match returns unordered results.
+       match_unordered = True
+
+       _copy_attrs = ('cpv_exists',
+               '_aux_cache_keys', '_cpv_sort_ascending')
+
+       def __init__(self, vardb):
+               self._vardb = vardb
+
+               for k in self._copy_attrs:
+                       setattr(self, k, getattr(vardb, k))
+
+               self._cp_map = None
+
+       def cp_all(self):
+               """
+               Returns an ordered iterator instead of a list, so that search
+               results can be displayed incrementally.
+               """
+               if self._cp_map is not None:
+                       return iter(sorted(self._cp_map))
+
+               return self._iter_cp_all()
+
+       def _iter_cp_all(self):
+               self._cp_map = cp_map = {}
+               previous_cp = None
+               for cpv in self._vardb._iter_cpv_all(sort = True):
+                       cp = portage.cpv_getkey(cpv)
+                       if cp is not None:
+                               cp_list = cp_map.get(cp)
+                               if cp_list is None:
+                                       cp_list = []
+                                       cp_map[cp] = cp_list
+                               cp_list.append(cpv)
+                               if previous_cp is not None and \
+                                       previous_cp != cp:
+                                       yield previous_cp
+                               previous_cp = cp
+
+               if previous_cp is not None:
+                       yield previous_cp
+
+       def match(self, atom):
+               """
+               For performance reasons, only package name and version
+               constraints are supported, and the returned list is
+               unordered.
+               """
+               if not isinstance(atom, Atom):
+                       atom = Atom(atom)
+               cp_list = self._cp_map.get(atom.cp)
+               if cp_list is None:
+                       return []
+
+               if atom == atom.cp:
+                       return cp_list[:]
+               else:
+                       return portage.match_from_list(atom, cp_list)
+
+       def aux_get(self, cpv, attrs, myrepo=None):
+               pkg_data = self._vardb._aux_cache["packages"].get(cpv)
+               if not isinstance(pkg_data, tuple) or \
+                       len(pkg_data) != 2 or \
+                       not isinstance(pkg_data[1], dict):
+                       pkg_data = None
+               if pkg_data is None:
+                       # It may be missing from _aux_cache due to
+                       # _aux_cache_threshold.
+                       return self._vardb.aux_get(cpv, attrs)
+               metadata = pkg_data[1]
+               return [metadata.get(k, "") for k in attrs]

diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py
index 4840492..9c8b276 100644
--- a/pym/portage/dbapi/vartree.py
+++ b/pym/portage/dbapi/vartree.py
@@ -435,6 +435,9 @@ class vardbapi(dbapi):
                (generally this is only necessary in critical sections that
                involve merge or unmerge of packages).
                """
+               return list(self._iter_cpv_all(use_cache=use_cache))
+
+       def _iter_cpv_all(self, use_cache=True, sort=False):
                returnme = []
                basepath = os.path.join(self._eroot, VDB_PATH) + os.path.sep
 
@@ -451,26 +454,32 @@ class vardbapi(dbapi):
                                        del e
                                        return []
 
-               for x in listdir(basepath, EmptyOnError=1, ignorecvs=1, 
dirsonly=1):
+               catdirs = listdir(basepath, EmptyOnError=1, ignorecvs=1, 
dirsonly=1)
+               if sort:
+                       catdirs.sort()
+
+               for x in catdirs:
                        if self._excluded_dirs.match(x) is not None:
                                continue
                        if not self._category_re.match(x):
                                continue
-                       for y in listdir(basepath + x, EmptyOnError=1, 
dirsonly=1):
+
+                       pkgdirs = listdir(basepath + x, EmptyOnError=1, 
dirsonly=1)
+                       if sort:
+                               pkgdirs.sort()
+
+                       for y in pkgdirs:
                                if self._excluded_dirs.match(y) is not None:
                                        continue
                                subpath = x + "/" + y
                                # -MERGING- should never be a cpv, nor should 
files.
                                try:
-                                       if catpkgsplit(subpath) is None:
-                                               
self.invalidentry(self.getpath(subpath))
-                                               continue
+                                       subpath = _pkg_str(subpath)
                                except InvalidData:
                                        self.invalidentry(self.getpath(subpath))
                                        continue
-                               returnme.append(subpath)
 
-               return returnme
+                               yield subpath
 
        def cp_all(self, use_cache=1):
                mylist = self.cpv_all(use_cache=use_cache)

Reply via email to