Hi Jan, Following your advices, i modified my code: Now, it get include and exclude filters from database:
I ran the following SQL query on my Oracle XE Database: ALTER TABLE rhnContentSource ADD (include_filter VARCHAR(255), exclude_filter VARCHAR(255)); Regards. Baptiste ----- Mail original ----- De: "Baptiste AGASSE" <baptiste.aga...@lyra-network.com> À: spacewalk-devel@redhat.com Envoyé: Jeudi 18 Août 2011 22:50:34 Objet: Re: [Spacewalk-devel] [PATCH] Filters on reposync Hi Jan, Ok, i can take a look on this next week. Regards. Baptiste ----- Mail original ----- De: "Jan Pazdziora" <jpazdzi...@redhat.com> À: spacewalk-devel@redhat.com Envoyé: Jeudi 18 Août 2011 13:39:57 Objet: Re: [Spacewalk-devel] [PATCH] Filters on reposync On Wed, Aug 17, 2011 at 08:23:36PM +0200, Baptiste AGASSE wrote: > > Following your advices I have modified my code: > - You can now include and / or exclude packages (with --include and / or > --exclude options) > - Include filter takes priority over exclude filter: if one package meet > 'include' and 'exclude' rules, it will be included > eg: > exclude = [ 'openoffice.org-langpack-*', ...] > include = [ 'openoffice.org-langpack-en-*', ...] > > - Package filtering is in yum_src.py > - Yum dependencies resolver is now used to find selected packages dependencies > - All versions of the packages excluded by a filter is now deleted from DB > and filesystem > - Print elapsed time at end of sync > > Any comments are welcome. I don't really like the fact that the exclude/include options are specified on the spacewalk-repo-sync runtime, rather than being properties of the repository. In other words -- these exclude/include lists should be specified in the database, so that they would be used any time spacewalk-repo-sync is run, no matter if it is run from the command line or via scheduled event by taskomatic. Now that the core functionality is in place, could you amend it some more and have the lists stored in some database table and used from there? -- Jan Pazdziora Principal Software Engineer, Satellite Engineering, Red Hat _______________________________________________ Spacewalk-devel mailing list Spacewalk-devel@redhat.com https://www.redhat.com/mailman/listinfo/spacewalk-devel
diff --git a/backend/satellite_tools/repo_plugins/yum_src.py b/backend/satellite_tools/repo_plugins/yum_src.py index bfc6161..9871a32 100644 --- a/backend/satellite_tools/repo_plugins/yum_src.py +++ b/backend/satellite_tools/repo_plugins/yum_src.py @@ -74,14 +74,15 @@ class YumUpdateMetadata(UpdateMetadata): no = self._no_cache.setdefault(file['name'], set()) no.add(un) -class ContentSource: +class ContentSource(yum.YumBase): url = None name = None - repo = None cache_dir = '/var/cache/rhn/reposync/' - def __init__(self, url, name): - self.url = url - self.name = name + repo_id = None + filters = {'include': [], 'exclude': []} + + def __init__(self, url, name, filters = None ): + yum.YumBase.__init__(self) self._clean_cache(self.cache_dir + name) # read the proxy configuration in /etc/rhn/rhn.conf @@ -97,14 +98,14 @@ class ContentSource: else: self.proxy_url = None - def list_packages(self): - """ list packages""" - repo = yum.yumRepo.YumRepository(self.name) - self.repo = repo + if filters: + self.filters = filters + + repo = yum.yumRepo.YumRepository(name) repo.cache = 0 repo.metadata_expire = 0 - repo.mirrorlist = self.url - repo.baseurl = [self.url] + repo.mirrorlist = url + repo.baseurl = [url] repo.basecachedir = self.cache_dir if self.proxy_url is not None: repo.proxy = self.proxy_url @@ -113,13 +114,36 @@ class ContentSource: warnings.disable() repo.baseurlSetup() warnings.restore() - repo.setup(False) - sack = repo.getPackageSack() - sack.populate(repo, 'metadata', None, 0) - list = sack.returnPackages() - to_return = [] - for pack in list: + repos = self.repos.findRepos('*') + for rep in repos: + self.repos.disableRepo(rep.id) + self.repos.delete(rep.id) + + self.repo_id = repo.id + self.repos.add(repo) + self.pkgSack = self.repos.getRepo(self.repo_id).getPackageSack() + self.pkgSack.populate(self.repos.getRepo(self.repo_id), 'metadata', None, 0) + + def getName(self): + return self.repos.getRepo(self.repo_id).id + + def getUrl(self): + return self.repos.getRepo(self.repo_id).mirrorlist + + def getFilters(self): + return self.filters + + def getRepo(self): + return self.repos.getRepo(self.repo_id) + + def list_packages(self): + """ list packages""" + return self._list_packages(self.pkgSack.returnPackages()) + + def _list_packages(self, packages_list): + packages = [] + for pack in packages_list: if pack.arch == 'src': continue new_pack = ContentPackage() @@ -130,13 +154,13 @@ class ContentSource: if new_pack.checksum_type == 'sha': new_pack.checksum_type = 'sha1' new_pack.checksum = pack.checksums[0][1] - to_return.append(new_pack) - return to_return + packages.append(new_pack) + return packages def get_package(self, package): """ get package """ check = (self.verify_pkg, (package.unique_id ,1), {}) - return self.repo.getPackage(package.unique_id, checkfunc=check) + return self.repos.getRepo(self.repo_id).getPackage(package.unique_id, checkfunc=check) def verify_pkg(self, fo, pkg, fail): return pkg.verifyLocalPkg() @@ -145,8 +169,55 @@ class ContentSource: shutil.rmtree(directory, True) def get_updates(self): - if not self.repo.repoXML.repoData.has_key('updateinfo'): + if not self.repos.getRepo(self.repo_id).repoXML.repoData.has_key('updateinfo'): return [] um = YumUpdateMetadata() - um.add(self.repo, all=True) + um.add(self.repos.getRepo(self.repo_id), all=True) return um.notices + + def filter_packages(self): + """ filter packages """ + # Returns 2 lists : selected packages, and excluded + selected = [] + excluded = [] + if len(self.filters['include']) > 0: + exactmatch, matched, unmatched = yum.packages.parsePackages(self.pkgSack.returnPackages(), self.filters['include']) + selected = yum.misc.unique(exactmatch + matched) + selected = self._get_packages_dependencies(selected) + + if len(self.filters['exclude']) > 0: + exactmatch, matched, unmatched = yum.packages.parsePackages(self.pkgSack.returnPackages(), self.filters['exclude']) + excluded = yum.misc.unique(exactmatch + matched) + + if len(self.filters['exclude']) > 0 and len(self.filters['include']) == 0: + # Only exclude filter: Add others packages in included + packages = self.pkgSack.returnPackages() + for pack in packages: + if excluded.count(pack) > 0: + continue + selected.append(pack) + + selected = self._get_packages_dependencies(selected) + + elif len(self.filters['include']) > 0: + # Only include filter or both include and exclude filters : Add others packages in excluded + packages = self.pkgSack.returnPackages() + for pack in packages: + if selected.count(pack) > 0 or excluded.count(pack) > 0: + continue + excluded.append(pack) + + # Drop packages from excluded if they are also in included or a dependency of included package + for pack in selected: + if excluded.count(pack) > 0: + excluded.remove(pack) + + return (self._list_packages(selected), self._list_packages(excluded)) + + def _get_packages_dependencies(self, packages): + deps = self.findDeps(packages) + for pkg in deps: + for dep in deps[pkg]: + packages.extend(deps[pkg][dep]) + + return yum.misc.unique(packages) diff --git a/backend/satellite_tools/reposync.py b/backend/satellite_tools/reposync.py index 6834adb..af9b960 100644 --- a/backend/satellite_tools/reposync.py +++ b/backend/satellite_tools/reposync.py @@ -38,7 +38,7 @@ class RepoSync: parser = None type = None - urls = None + urls = [] channel_label = None channel = None fail = False @@ -69,7 +69,7 @@ class RepoSync: if not options.url: if options.channel_label: # TODO:need to look at user security across orgs - h = rhnSQL.prepare("""select s.source_url + h = rhnSQL.prepare("""select s.source_url, s.include_filter, s.exclude_filter from rhnContentSource s, rhnChannelContentSource cs, rhnChannel c @@ -79,12 +79,26 @@ class RepoSync: h.execute(label=options.channel_label) source_urls = h.fetchall_dict() or [] if source_urls: - self.urls = [row['source_url'] for row in source_urls] + for row in source_urls: + url = {'url': row['source_url'], 'filters': {'include': [], 'exclude': []}} + self.print_msg("url : " + row['source_url']) + self.print_msg("include : " + str(row['include_filter'])) + self.print_msg("exclude : " + str(row['exclude_filter'])) + if row['include_filter']: + url['filters']['include'] = row['include_filter'].split() + if row['exclude_filter']: + url['filters']['exclude'] = row['exclude_filter'].split() + self.urls.append(url) else: quit = True self.error_msg("Channel has no URL associated") else: - self.urls = [options.url] + url = {'url': options.url, 'filters': {'include': [], 'exclude': []}} + if options.include: + url['filters']['include'] = options.include.split() + if options.include: + url['filters']['exclude'] = options.exclude.split() + self.urls = [url] if not options.channel_label: quit = True self.error_msg("--channel must be specified") @@ -105,18 +119,21 @@ class RepoSync: if not self.channel or not rhnChannel.isCustomChannel(self.channel['id']): print "Channel does not exist or is not custom" sys.exit(1) - + start = datetime.now() for url in self.urls: - plugin = self.load_plugin()(url, self.channel_label) - self.import_packages(plugin, url) - self.import_updates(plugin, url) + plugin = self.load_plugin()(url['url'], self.channel_label, url['filters']) + self.import_packages(plugin) + self.import_updates(plugin) if self.regen: taskomatic.add_to_repodata_queue_for_channel_package_subscription( [self.channel_label], [], "server.app.yumreposync") taskomatic.add_to_erratacache_queue(self.channel_label) self.update_date() - rhnSQL.commit() - self.print_msg("Sync complete") + rhnSQL.commit() + exec_time = datetime.now() - start + minutes, seconds = divmod(exec_time.seconds, 60) + hours, minutes = divmod(minutes, 60) + self.print_msg("Sync complete in %d day(s) %d hour(s) %d minute(s) %d second(s)" % (exec_time.days, hours, minutes, seconds)) def update_date(self): @@ -132,7 +149,11 @@ class RepoSync: self.parser.add_option('-t', '--type', action='store', dest='type', help='The type of repo, currently only "yum" is supported', default='yum') self.parser.add_option('-f', '--fail', action='store_true', dest='fail', default=False , help="If a package import fails, fail the entire operation") self.parser.add_option('-q', '--quiet', action='store_true', dest='quiet', default=False, help="Print no output, still logs output") + self.parser.add_option('-i', '--include', action='store', dest='include', help="Synchronize only the packets that meet the filter and their dependencies") + self.parser.add_option('-e', '--exclude', action='store', dest='exclude', help="Don't synchronize packets that meet the filter") + return self.parser.parse_args() + def load_plugin(self): name = self.type + "_src" @@ -140,9 +161,9 @@ class RepoSync: submod = getattr(mod, name) return getattr(submod, "ContentSource") - def import_updates(self, plug, url): + def import_updates(self, plug): notices = plug.get_updates() - self.print_msg("Repo " + url + " has " + str(len(notices)) + " errata.") + self.print_msg("Repo " + plug.getUrl() + " has " + str(len(notices)) + " errata.") if len(notices) > 0: self.upload_updates(notices) @@ -307,10 +328,40 @@ class RepoSync: importer.run() self.regen = True - def import_packages(self, plug, url): - packages = plug.list_packages() + def import_packages(self, plug): + repo_packages = plug.list_packages() + filters = plug.getFilters() to_process = [] - self.print_msg("Repo " + url + " has " + str(len(packages)) + " packages.") + to_delete = [] + self.print_msg("Repo " + plug.getUrl() + " has " + str(len(repo_packages)) + " packages.") + + if len(filters['include']) > 0 or len(filters['exclude']) > 0: + selected, excluded = plug.filter_packages() + packages = selected + self.print_msg("Repo " + plug.getUrl() + " has " + str(len(selected)) + " packages selected by filters.") + self.print_msg("Repo " + plug.getUrl() + " has " + str(len(excluded)) + " packages excluded by filters.") + + for pack in excluded: + db_pack = rhnPackage.get_info_for_package( + [pack.name, pack.version, pack.release, pack.epoch, pack.arch], + self.channel_label) + + to_remove = False + to_unlink = False + if db_pack['path']: + pack.path = os.path.join(CFG.MOUNT_POINT, db_pack['path']) + if os.path.exists(pack.path): + to_remove = True + if db_pack['channel_label'] == self.channel_label: + # package is already in the channel + to_unlink = True + + if to_remove or to_unlink: + to_delete.append((pack, db_pack, to_remove, to_unlink)) + else: + self.print_msg("Repo " + plug.getUrl() + " has no filters set.") + packages = repo_packages + for pack in packages: db_pack = rhnPackage.get_info_for_package( [pack.name, pack.version, pack.release, pack.epoch, pack.arch], @@ -327,30 +378,47 @@ class RepoSync: if db_pack['channel_label'] == self.channel_label: # package is already in the channel to_link = False - elif db_pack['channel_label'] == self.channel_label: - # different package with SAME NVREA - self.disassociate_package(db_pack) + elif db_pack['channel_label'] == self.channel_label: + # different package with SAME NVREA + self.disassociate_package(db_pack) if to_download or to_link: to_process.append((pack, to_download, to_link)) num_to_process = len(to_process) - if num_to_process == 0: + num_to_delete = len(to_delete) + if num_to_process == 0 and num_to_delete == 0: self.print_msg("No new packages to sync.") return self.regen=True - is_non_local_repo = (url.find("file://") < 0) + is_non_local_repo = (plug.getUrl().find("file://") < 0) # try/except/finally doesn't work in python 2.4 (RHEL5), so here's a hack def finally_remove(path): if is_non_local_repo and path and os.path.exists(path): os.remove(path) + for (index, what) in enumerate(to_delete): + pack, db_pack, to_remove, to_unlink = what + try: + self.print_msg("%d/%d : Delete %s" % (index+1, num_to_delete, pack.getNVREA())) + if to_remove: + os.remove(pack.path) + if to_unlink: + self.disassociate_package(db_pack) + except KeyboardInterrupt: + raise + except Exception, e: + self.error_msg(e) + if self.fail: + raise + continue + for (index, what) in enumerate(to_process): pack, to_download, to_link = what localpath = None try: - self.print_msg("%d/%d : %s" % (index+1, num_to_process, pack.getNVREA())) + self.print_msg("%d/%d : Sync %s" % (index+1, num_to_process, pack.getNVREA())) if to_download: pack.path = localpath = plug.get_package(pack) pack.load_checksum_from_header()
_______________________________________________ Spacewalk-devel mailing list Spacewalk-devel@redhat.com https://www.redhat.com/mailman/listinfo/spacewalk-devel