So anyplace we retrieve pkgIds as a unique identifier it would seem to
make sense to grab pkgKey, too and stuff that in the package object so
we can get to it later for doing joins with it instead of pkgId. Is that
what you were thinking?
I did that but it didn't help (see attachment). I tried to optimize
returnPrco() but it kept using up a lot of time. I was wondering if there
was a problem with the caching, but it turned out that it really has to load
lots of Prcos.
Reason for this is that the sqlitesack.search*(name) returns a list of
package objects. Because of that the surrounding code has to load the
provides for all returned packages and has to search through them.
Easiest solution would be to return a list of (pkg, name, flag, (e,v,r))
tuples or a dict {pkg -> list of (n, f, (e,v,r))} (or use new Requires and
Conflicts objects). Even better would be to also pass flag and (e,v,r) to
the search methods. That way the sqlitesack could put as much intelligence
into the SQL queries as it wants to further reduce the amount of data being
processed.
The attached patch changes all pkgId to pkgKey lookups and loads every
package only once. I renamed ._excluded to .excluded to avoid usage with
pkgId insted of pkgKey.
Florian
Index: yum/sqlitesack.py
===================================================================
RCS file: /cvsroot/yum/cvs/yum/yum/sqlitesack.py,v
retrieving revision 1.87
diff -u -r1.87 sqlitesack.py
--- yum/sqlitesack.py 9 Mar 2007 17:05:57 -0000 1.87
+++ yum/sqlitesack.py 22 Mar 2007 15:00:57 -0000
@@ -65,10 +65,12 @@
except (IndexError, KeyError):
return None
- for item in ['name', 'arch', 'epoch', 'version', 'release', 'pkgId']:
+ for item in ['name', 'arch', 'epoch', 'version', 'release', 'pkgId', 'pkgKey']:
try:
setattr(self, item, db_obj[item])
except (IndexError, KeyError):
+ if item == 'pkgKey':
+ raise
pass
try:
@@ -99,8 +101,8 @@
dbname = db2simplemap[varname]
cache = self.sack.primarydb[self.repo]
c = cache.cursor()
- executeSQL(c, "select %s from packages where pkgId = ?" %(dbname,),
- (self.pkgId,))
+ executeSQL(c, "select %s from packages where pkgKey = ?" %(dbname,),
+ (self.pkgKey,))
r = c.fetchone()
setattr(self, varname, r[0])
@@ -116,11 +118,8 @@
self.sack.populate(self.repo, mdtype='filelists')
cache = self.sack.filelistsdb[self.repo]
cur = cache.cursor()
- executeSQL(cur, "select filelist.dirname as dirname, "
- "filelist.filetypes as filetypes, "
- "filelist.filenames as filenames from filelist,packages "
- "where packages.pkgId = ? and "
- "packages.pkgKey = filelist.pkgKey", (self.pkgId,))
+ executeSQL(cur, "select dirname, filetypes, filenames from filelist "
+ "where pkgKey = ?", (self.pkgKey,))
for ob in cur:
dirname = ob['dirname']
filetypes = decodefiletypelist(ob['filetypes'])
@@ -148,11 +147,8 @@
return
cache = self.sack.otherdb[self.repo]
cur = cache.cursor()
- executeSQL(cur, "select changelog.date as date, "
- "changelog.author as author, "
- "changelog.changelog as changelog "
- "from changelog,packages where packages.pkgId = ? "
- "and packages.pkgKey = changelog.pkgKey", (self.pkgId,))
+ executeSQL(cur, "select date, author, changelog "
+ "from changelog where pkgKey = ?", (self.pkgKey,))
for ob in cur:
result.append( (ob['date'], ob['author'], ob['changelog']) )
self._changelog = result
@@ -178,19 +174,13 @@
if not self.prco[prcotype]:
cache = self.sack.primarydb[self.repo]
cur = cache.cursor()
- query = "select %s.name as name, %s.version as version, "\
- "%s.release as release, %s.epoch as epoch, "\
- "%s.flags as flags from %s,packages "\
- "where packages.pkgId = '%s' and "\
- "packages.pkgKey = %s.pkgKey" % (prcotype, prcotype,
- prcotype, prcotype, prcotype, prcotype, self.pkgId,
- prcotype)
+ query = ("select name, version, release, epoch, flags from %s "
+ "where pkgKey = %s ") % (prcotype, self.pkgKey)
executeSQL(cur, query)
for ob in cur:
self.prco[prcotype].append((ob['name'], ob['flags'],
(ob['epoch'], ob['version'],
ob['release'])))
-
return RpmBase.returnPrco(self, prcotype, printable)
class YumSqlitePackageSack(yumRepo.YumPackageSack):
@@ -204,6 +194,7 @@
self.filelistsdb = {}
self.otherdb = {}
self.excludes = {}
+ self.pkgs = {}
def __len__(self):
for (rep,cache) in self.primarydb.items():
@@ -221,20 +212,19 @@
# Remove a package
# Because we don't want to remove a package from the database we just
# add it to the exclude list
- def delPackage(self, obj):
+ def delPackage(self, obj):
if not self.excludes.has_key(obj.repo):
self.excludes[obj.repo] = {}
- self.excludes[obj.repo][obj.pkgId] = 1
+ self.excludes[obj.repo][obj.pkgKey] = 1
self.pkglist = None
-
- def _excluded(self, repo, pkgId):
+ def excluded(self, repo, pkgKey):
if self.excludes.has_key(repo):
- if self.excludes[repo].has_key(pkgId):
+ if self.excludes[repo].has_key(pkgKey):
return True
return False
-
+
def addDict(self, repo, datatype, dataobj, callback=None):
if self.added.has_key(repo):
if datatype in self.added[repo]:
@@ -257,6 +247,47 @@
self.added[repo].append(datatype)
+
+ def getPackageByKey(self, repo, pkgKey):
+ if self.excluded(repo, pkgKey):
+ return None
+ pkgs = self.pkgs.setdefault(repo, {})
+ if not pkgKey in pkgs:
+ cur = self.primarydb[repo].cursor()
+ executeSQL(cur, "select * from packages where pkgKey = ?",
+ (pkgKey,))
+ pkgs[pkgKey] = self.pc(repo, cur.fetchone())
+ return pkgs[pkgKey]
+
+ def getPackagesByKeys(self, repo, cur):
+ needed = []
+ result = []
+
+ pkgs = self.pkgs.setdefault(repo, {})
+ for ob in cur:
+ pkgKey = ob['pkgKey']
+ if self.excluded(repo, pkgKey):
+ continue
+ if pkgKey in pkgs:
+ result.append(pkgs[pkgKey])
+ else:
+ needed.append(pkgKey)
+ if len(needed) == 1:
+ pkgKey = needed[0]
+ executeSQL(cur, "select * from packages where pkgKey = ?",
+ (pkgKey,))
+ pkgs[pkgKey] = self.pc(repo, cur.fetchone())
+ result.append(pkgs[pkgKey])
+ elif needed:
+ executeSQL(cur, "select * from packages where pkgKey in %s" %
+ str(tuple(needed)))
+ for ob in cur:
+ po = self.pc(repo, ob)
+ pkgs[po.pkgKey] = po
+ result.append(po)
+ return result
+
+
# Get all files for a certain pkgId from the filelists.xml metadata
# Search packages that either provide something containing name
@@ -291,17 +322,11 @@
# grab the entries that are a single file in the
# filenames section, use sqlites globbing if it is a glob
- executeSQL(cur, "select packages.pkgId as pkgId from filelist, \
- packages where packages.pkgKey = filelist.pkgKey and \
- length(filelist.filetypes) = 1 and \
- filelist.dirname || ? || filelist.filenames \
+ executeSQL(cur, "select pkgKey from filelist\
+ where length(filetypes) = 1 and \
+ dirname || ? || filenames \
%s ?" % querytype, ('/', name))
- for ob in cur:
- if self._excluded(rep, ob['pkgId']):
- continue
- pkg = self.getPackageDetails(ob['pkgId'])
- po = self.pc(rep, pkg)
- pkgs.append(po)
+ pkgs.extend(self.getPackagesByKeys(rep, cur))
def filelist_globber(dirname, filenames):
files = filenames.split('/')
@@ -315,18 +340,11 @@
cache.create_function("filelist_globber", 2, filelist_globber)
# for all the ones where filenames is multiple files,
# make the files up whole and use python's globbing method
- executeSQL(cur, "select packages.pkgID as pkgID \
- from filelist,packages where \
- packages.pkgKey = filelist.pkgKey \
- and length(filelist.filetypes) > 1 \
- and filelist_globber(filelist.dirname,filelist.filenames)")
+ executeSQL(cur, "select pkgKey from filelist where \
+ length(filetypes) > 1 and \
+ filelist_globber(dirname,filenames)")
- for ob in cur:
- if self._excluded(rep, ob['pkgId']):
- continue
- pkg = self.getPackageDetails(ob['pkgId'])
- po = self.pc(rep, pkg)
- pkgs.append(po)
+ pkgs.extend(self.getPackagesByKeys(rep, cur))
pkgs = misc.unique(pkgs)
return pkgs
@@ -337,7 +355,7 @@
if len(fields) < 1:
return result
- basestring="select DISTINCT pkgId from packages where %s like '%%%s%%' " % (fields[0], searchstring)
+ basestring="select DISTINCT pkgKey from packages where %s like '%%%s%%' " % (fields[0], searchstring)
for f in fields[1:]:
basestring = "%s or %s like '%%%s%%' " % (basestring, f, searchstring)
@@ -345,12 +363,7 @@
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
executeSQL(cur, basestring)
- for ob in cur:
- if self._excluded(rep, ob['pkgId']):
- continue
- pkg = self.getPackageDetails(ob['pkgId'])
- result.append((self.pc(rep,pkg)))
-
+ result.extend(self.getPackagesByKeys(rep, cur))
return result
def returnObsoletes(self):
@@ -358,7 +371,7 @@
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
executeSQL(cur, "select packages.name as name,\
- packages.pkgId as pkgId,\
+ packages.pkgKey as pkgKey,\
packages.arch as arch, packages.epoch as epoch,\
packages.release as release, packages.version as version,\
obsoletes.name as oname, obsoletes.epoch as oepoch,\
@@ -368,7 +381,7 @@
for ob in cur:
# If the package that is causing the obsoletes is excluded
# continue without processing the obsoletes
- if self._excluded(rep, ob['pkgId']):
+ if self.excluded(rep, ob['pkgKey']):
continue
key = ( ob['name'],ob['arch'],
@@ -416,11 +429,8 @@
results = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select DISTINCT packages.* from %s,packages where %s.name %s ? and %s.pkgKey=packages.pkgKey" % (prcotype,prcotype,querytype,prcotype), (name,))
- for x in cur:
- if self._excluded(rep, x['pkgId']):
- continue
- results.append(self.pc(rep, x))
+ executeSQL(cur, "select DISTINCT pkgKey from %s where name %s ?" % (prcotype,querytype), (name,))
+ results.extend(self.getPackagesByKeys(rep, cur))
# If it's not a provides or a filename, we are done
if prcotype != "provides" or name[0] != '/':
@@ -430,11 +440,8 @@
# If it is a filename, search the primary.xml file info
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select DISTINCT packages.* from files,packages where files.name %s ? and files.pkgKey = packages.pkgKey" % querytype, (name,))
- for x in cur:
- if self._excluded(rep,x['pkgId']):
- continue
- results.append(self.pc(rep,x))
+ executeSQL(cur, "select DISTINCT pkgKey from files where name %s ?" % querytype, (name,))
+ results.extend(self.getPackagesByKeys(rep, cur))
matched = 0
globs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
@@ -549,9 +556,9 @@
simplelist = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages")
+ executeSQL(cur, "select pkgId,pkgKey,name,epoch,version,release,arch from packages")
for pkg in cur:
- if self._excluded(rep, pkg['pkgId']):
+ if self.excluded(rep, pkg['pkgKey']):
continue
simplelist.append((pkg['name'], pkg['arch'], pkg['epoch'], pkg['version'], pkg['release']))
@@ -571,11 +578,8 @@
allpkg = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages where name=? and arch=?",naTup)
- for x in cur:
- if self._excluded(rep, x['pkgId']):
- continue
- allpkg.append(self.pc(rep,x))
+ executeSQL(cur, "select pkgKey from packages where name=? and arch=?",naTup)
+ allpkg.extend(self.getPackagesByKeys(rep, cur))
# if we've got zilch then raise
if not allpkg:
@@ -592,11 +596,8 @@
allpkg = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages where name=?", (name,))
- for x in cur:
- if self._excluded(rep, x['pkgId']):
- continue
- allpkg.append(self.pc(rep,x))
+ executeSQL(cur, "select pkgKey from packages where name=?", (name,))
+ allpkg.extend(self.getPackagesByKeys(rep, cur))
# if we've got zilch then raise
if not allpkg:
@@ -620,12 +621,10 @@
for (rep, db) in self.primarydb.items():
cur = db.cursor()
executeSQL(cur, query)
- for pkg in cur:
- if self._excluded(rep, pkg['pkgId']):
- continue
- if p in unmatched:
- unmatched.remove(p)
- matchres.append(self.pc(rep, pkg))
+ res = self.getPackagesByKeys(rep, cur)
+ if res and p in unmatched:
+ unmatched.remove(p)
+ matchres.extend(res)
exactmatch = misc.unique(exactmatch)
matched = misc.unique(matched)
@@ -634,12 +633,11 @@
def returnPackages(self, repoid=None):
"""Returns a list of packages, only containing nevra information """
-
returnList = []
if hasattr(self, 'pkgobjlist'):
if self.pkgobjlist:
for po in self.pkgobjlist:
- if self._excluded(po.repo, po.pkgId):
+ if self.excluded(po.repo, po.pkgKey):
continue
returnList.append(po)
return returnList
@@ -648,15 +646,10 @@
if (repoid == None or repoid == repo.id):
cur = cache.cursor()
- executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages")
- for x in cur:
- if self._excluded(repo,x['pkgId']):
- continue
-
- returnList.append(self.pc(repo,x))
+ executeSQL(cur, "select pkgKey from packages")
+ returnList.extend(self.getPackagesByKeys(repo, cur))
self.pkgobjlist = returnList
-
return returnList
def searchNevra(self, name=None, epoch=None, ver=None, rel=None, arch=None):
@@ -672,7 +665,7 @@
return returnList
# make up our execute string
- q = "select * from packages WHERE"
+ q = "select pkgKey from packages WHERE"
for (col, var) in [('name', name), ('epoch', epoch), ('version', ver),
('arch', arch), ('release', rel)]:
if var:
@@ -686,10 +679,7 @@
cur = cache.cursor()
#executeSQL(cur, "select * from packages WHERE name = %s AND epoch = %s AND version = %s AND release = %s AND arch = %s" , (name,epoch,ver,rel,arch))
executeSQL(cur, q)
- for x in cur:
- if self._excluded(rep, x['pkgId']):
- continue
- returnList.append(self.pc(rep,x))
+ returnList.extend(self.getPackagesByKeys(rep, cur))
return returnList
def excludeArchs(self, archlist):
@@ -698,11 +688,12 @@
for (rep, cache) in self.primarydb.items():
cur = cache.cursor()
- myq = "select pkgId from packages where arch not in %s" % arch_query
+ myq = "select pkgKey from packages where arch not in %s" % arch_query
executeSQL(cur, myq)
+ d = self.excludes.setdefault(rep, {})
for row in cur:
- obj = self.pc(rep,row)
- self.delPackage(obj)
+ d[row['pkgKey']] = 1
+ self.pkglist = None
# Simple helper functions
@@ -735,7 +726,7 @@
# Check against name, nameArch, nameVerRelArch, nameVer, nameVerRel,
# envra, nevra
PARSE_QUERY = """
-select pkgId, name, arch, epoch, version, release from packages
+select pkgKey, pkgId, name, arch, epoch, version, release from packages
where name %(op)s '%(q)s'
or name || '.' || arch %(op)s '%(q)s'
or name || '-' || version %(op)s '%(q)s'
_______________________________________________
Yum-devel mailing list
[email protected]
https://lists.dulug.duke.edu/mailman/listinfo/yum-devel