Revision: 23295 http://sourceforge.net/p/gar/code/23295 Author: wahwah Date: 2014-03-30 16:55:59 +0000 (Sun, 30 Mar 2014) Log Message: ----------- safe-remove-package: Do not cache on disk
Reusing the for-generation endpoint still gives us reasonable running times and lets us let go of caching on local disk. Modified Paths: -------------- csw/mgar/gar/v2/lib/python/representations.py csw/mgar/gar/v2/lib/python/rest.py csw/mgar/gar/v2/lib/python/safe_remove_package.py Modified: csw/mgar/gar/v2/lib/python/representations.py =================================================================== --- csw/mgar/gar/v2/lib/python/representations.py 2014-03-30 16:24:22 UTC (rev 23294) +++ csw/mgar/gar/v2/lib/python/representations.py 2014-03-30 16:55:59 UTC (rev 23295) @@ -27,3 +27,6 @@ ElfSymInfo = collections.namedtuple('Symbol', ['bind', 'flags', 'shndx', 'soname', 'symbol', 'version']) + +# Since we use this idea everywhere, it deserves its own named tuple. +CatalogSpec = collections.namedtuple('CatalogSpec', ['catrel', 'arch', 'osrel']) Modified: csw/mgar/gar/v2/lib/python/rest.py =================================================================== --- csw/mgar/gar/v2/lib/python/rest.py 2014-03-30 16:24:22 UTC (rev 23294) +++ csw/mgar/gar/v2/lib/python/rest.py 2014-03-30 16:55:59 UTC (rev 23295) @@ -382,8 +382,8 @@ url += '?use_in_catalogs=0' return self._CurlPut(url, []) - def GetCatalogForGeneration(self, catrel, arch, osrel): - url = (self.pkgdb_url + "/catalogs/%s/%s/%s/for-generation/" + def GetCatalogForGenerationAsDicts(self, catrel, arch, osrel): + url = (self.pkgdb_url + "/catalogs/%s/%s/%s/for-generation/as-dicts/" % (catrel, arch, osrel)) logging.debug("GetCatalogForGeneration(): url=%r", url) data = urllib2.urlopen(url).read() Modified: csw/mgar/gar/v2/lib/python/safe_remove_package.py =================================================================== --- csw/mgar/gar/v2/lib/python/safe_remove_package.py 2014-03-30 16:24:22 UTC (rev 23294) +++ csw/mgar/gar/v2/lib/python/safe_remove_package.py 2014-03-30 16:55:59 UTC (rev 23295) @@ -19,9 +19,10 @@ import sys import urllib2 +from lib.python import common_constants from lib.python import configuration +from lib.python.representations import CatalogSpec from lib.python import rest -from lib.python import common_constants USAGE = """%prog --os-releases=SunOS5.10,SunOS5.11 -c <catalogname> @@ -63,47 +64,44 @@ """ def __init__(self, rest_client): + """Initialize class instance. + + self.cached_catalogs is a dict of: + CatalogSpec: [(md5, pkgname), ...] + """ self.cached_catalogs = {} + self.pkgs_by_md5 = {} + self.pkgs_by_pkgname = {} self.cp = rest.CachedPkgstats(fn_pkgstatsdb, rest_client) self.rest_client = rest_client def MakeRevIndex(self, catrel, arch, osrel, quiet=False): - key = (catrel, arch, osrel) + def PkgnameListFromString(s): + if s == 'none': + return [] + return s.split('|') + logging.info('MakeRevIndex(%r, %r, %r, %r)', catrel, arch, osrel, quiet) + key = CatalogSpec(catrel=catrel, arch=arch, osrel=osrel) if key in self.cached_catalogs: return - fn = fn_revdeps % key - if os.path.exists(fn): - with open(fn, "r") as fd: - self.cached_catalogs[key] = cjson.decode(fd.read()) - return - # This should be rewritten to use RestClient.GetCatalogForGeneration - logging.info( - "Building a database of reverse dependencies. " - "This can take up to multiple hours.") - catalog = self.rest_client.GetCatalog(*key) + # This takes 30-40s + catalog = self.rest_client.GetCatalogForGenerationAsDicts(catrel, arch, osrel) rev_deps = {} - counter = 0 - for pkg_simple in catalog: - md5 = pkg_simple["md5_sum"] - short_data = self.cp.GetDeps(md5) - pkgname = short_data["pkgname"] - for dep_pkgname, _ in short_data["deps"]: - rev_dep_set = rev_deps.setdefault(dep_pkgname, []) - rev_dep_set.append((md5, pkgname)) - if not quiet and not counter % EVERY_N_DOTS: - sys.stdout.write(".") - sys.stdout.flush() - counter += 1 - sys.stdout.write("\n") + for pkg in catalog: + self.pkgs_by_pkgname[pkg['pkgname']] = pkg + self.pkgs_by_md5[pkg['md5_sum']] = pkg + for pkg in catalog: + deps = PkgnameListFromString(pkg['deps']) + for dep_pkgname in deps: + rev_dep_lst = rev_deps.setdefault(dep_pkgname, []) + rev_dep_lst.append((pkg['md5_sum'], pkg['pkgname'])) self.cached_catalogs[key] = rev_deps - with open(fn, "w") as fd: - fd.write(cjson.encode(self.cached_catalogs[key])) def RevDepsByMD5(self, catrel, arch, osrel, md5_sum): self.MakeRevIndex(catrel, arch, osrel) - pkg = self.cp.GetPkgstats(md5_sum) - pkgname = pkg["basic_stats"]["pkgname"] - key = (catrel, arch, osrel) + pkg = self.pkgs_by_md5[md5_sum] + pkgname = pkg['pkgname'] + key = CatalogSpec(catrel=catrel, arch=arch, osrel=osrel) if pkgname in self.cached_catalogs[key]: return self.cached_catalogs[key][pkgname] else: @@ -111,7 +109,7 @@ def RevDepsByPkg(self, catrel, arch, osrel, pkgname): self.MakeRevIndex(catrel, arch, osrel) - key = (catrel, arch, osrel) + key = CatalogSpec(catrel=catrel, arch=arch, osrel=osrel) if pkgname in self.cached_catalogs[key]: return self.cached_catalogs[key][pkgname] else: @@ -189,7 +187,8 @@ def main(): parser = optparse.OptionParser(USAGE) - parser.add_option("-c", "--catalogname", dest="catalogname", help='the name of the package in catalog') + parser.add_option("-c", "--catalogname", dest="catalogname", + help='the name of the package in catalog') parser.add_option("--os-releases", dest="os_releases", help=("Comma separated OS releases, e.g. " "SunOS5.9,SunOS5.10")) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.