Hi Michael,
Following your advices I have modified my code:
- You can now include and / or exclude packages (with --include and / or
--exclude options)
- Include filter takes priority over exclude filter: if one package meet
'include' and 'exclude' rules, it will be included
eg:
exclude = [ 'openoffice.org-langpack-*', ...]
include = [ 'openoffice.org-langpack-en-*', ...]
- Package filtering is in yum_src.py
- Yum dependencies resolver is now used to find selected packages dependencies
- All versions of the packages excluded by a filter is now deleted from DB and
filesystem
- Print elapsed time at end of sync
Any comments are welcome.
Regards.
Baptiste
----- Mail original -----
De: "Michael Mraka" <michael.mr...@redhat.com>
À: spacewalk-devel@redhat.com
Envoyé: Lundi 8 Août 2011 11:45:41
Objet: Re: [Spacewalk-devel] [PATCH] Filters on reposync
Baptiste AGASSE wrote:
% Hi all,
%
% I've modified "/backend/satellite_tools/reposync.py" to add "filters"
% support with dependencies solving (like the "--rpm-list" option of
% cobbler) to spacewalk reposync command.
Hi Baptiste,
this is good feature, thanks for sharing it.
% It add the "--filters" option to reposync eg: --filters "osad rhncfg-*
% foo* bar"
So the --filters 'xyz*' means to include xyz* packages only; It would be
great to have also the opposite option to exclude some packages which can
even stack on each other. E.g.
spacewalk-repo-sync --exclude=openoffice.org-langpack-* \
--include=openoffice.org-langpack-en-* ...
% It allow to download only selected packages from a repository in order
% to save disk space, mainly if you use only few packages from it, and
% deletes packages already present on your system (in spacewalk server
% and on the filesystem) that don't meet any filters (only packages with
% the same NVREA)
%
% TODO: - Search in spacewalk server older versions of packages
% downloaded previously that don't match filters and remove them (I will
% work on it). - Make it available from web UI in repositories
% management part.
%
% Maybe someone can work on the web UI and the database schema to make
% this option available directly from web UI ? (i'm not familiar with
% java and oracle DB)
%
% Any comments are welcome, it's the first time that i'm programming in
% python :).
% diff --git a/backend/satellite_tools/reposync.py
b/backend/satellite_tools/reposync.py
% index 6834adb..0428bb9 100644
% --- a/backend/satellite_tools/reposync.py
% +++ b/backend/satellite_tools/reposync.py
...
% @@ -132,6 +137,7 @@ class RepoSync:
% self.parser.add_option('-t', '--type', action='store', dest='type',
help='The type of repo, currently only "yum" is supported', default='yum')
% self.parser.add_option('-f', '--fail', action='store_true',
dest='fail', default=False , help="If a package import fails, fail the entire
operation")
% self.parser.add_option('-q', '--quiet', action='store_true',
dest='quiet', default=False, help="Print no output, still logs output")
% + self.parser.add_option('-p', '--filters', action='store_true',
dest='filters', help="Synchronize only the packets that meet the filter and
their dependencies")
action='store_true' means it's True/False option but you likely want return a
string (list of patterns)
% return self.parser.parse_args()
%
% def load_plugin(self):
% @@ -308,9 +314,43 @@ class RepoSync:
% self.regen = True
%
...
% + def filter_packages(self, filters, packages):
% + # Returns 3 lists : selected packages, dependencies, and others
% + selected = []
% + dependencies = []
% + others = []
% + for pack in packages:
% + # Select all packages that match one filter
% + match = False
% + for filter_str in filters:
% + reg = re.compile("^" + filter_str.replace("*",".*") + "$")
% + if reg.match(pack.name):
Wouldn't be fnmatch.filter() better/easier to use here?
% + match = True
% + break
% + if match:
...
% +
% + def package_deps(self, package, packages_list):
...
This is yum repo plugin specific code and it would be better to
implement it in the plugin itself, i.e. repo_plugins/yum_src.py in this case.
Moreover it would be better to call yum's internal depsolver and not
reinvent it again.
Regards,
--
Michael Mráka
Satellite Engineering, Red Hat
_______________________________________________
Spacewalk-devel mailing list
Spacewalk-devel@redhat.com
https://www.redhat.com/mailman/listinfo/spacewalk-devel
diff --git a/backend/satellite_tools/repo_plugins/yum_src.py b/backend/satellite_tools/repo_plugins/yum_src.py
index bfc6161..db6ee3b 100644
--- a/backend/satellite_tools/repo_plugins/yum_src.py
+++ b/backend/satellite_tools/repo_plugins/yum_src.py
@@ -74,14 +74,14 @@ class YumUpdateMetadata(UpdateMetadata):
no = self._no_cache.setdefault(file['name'], set())
no.add(un)
-class ContentSource:
+class ContentSource(yum.YumBase):
url = None
name = None
- repo = None
cache_dir = '/var/cache/rhn/reposync/'
+ repo_id = None
+
def __init__(self, url, name):
- self.url = url
- self.name = name
+ yum.YumBase.__init__(self)
self._clean_cache(self.cache_dir + name)
# read the proxy configuration in /etc/rhn/rhn.conf
@@ -97,14 +97,11 @@ class ContentSource:
else:
self.proxy_url = None
- def list_packages(self):
- """ list packages"""
- repo = yum.yumRepo.YumRepository(self.name)
- self.repo = repo
+ repo = yum.yumRepo.YumRepository(name)
repo.cache = 0
repo.metadata_expire = 0
- repo.mirrorlist = self.url
- repo.baseurl = [self.url]
+ repo.mirrorlist = url
+ repo.baseurl = [url]
repo.basecachedir = self.cache_dir
if self.proxy_url is not None:
repo.proxy = self.proxy_url
@@ -113,13 +110,33 @@ class ContentSource:
warnings.disable()
repo.baseurlSetup()
warnings.restore()
-
repo.setup(False)
- sack = repo.getPackageSack()
- sack.populate(repo, 'metadata', None, 0)
- list = sack.returnPackages()
- to_return = []
- for pack in list:
+ repos = self.repos.findRepos('*')
+ for rep in repos:
+ self.repos.disableRepo(rep.id)
+ self.repos.delete(rep.id)
+
+ self.repo_id = repo.id
+ self.repos.add(repo)
+ self.pkgSack = self.repos.getRepo(self.repo_id).getPackageSack()
+ self.pkgSack.populate(self.repos.getRepo(self.repo_id), 'metadata', None, 0)
+
+ def getName(self):
+ return self.repos.getRepo(self.repo_id).id
+
+ def getUrl(self):
+ return self.repos.getRepo(self.repo_id).mirrorlist
+
+ def getRepo(self):
+ return self.repos.getRepo(self.repo_id)
+
+ def list_packages(self):
+ """ list packages"""
+ return self._list_packages(self.pkgSack.returnPackages())
+
+ def _list_packages(self, packages_list):
+ packages = []
+ for pack in packages_list:
if pack.arch == 'src':
continue
new_pack = ContentPackage()
@@ -130,13 +147,13 @@ class ContentSource:
if new_pack.checksum_type == 'sha':
new_pack.checksum_type = 'sha1'
new_pack.checksum = pack.checksums[0][1]
- to_return.append(new_pack)
- return to_return
+ packages.append(new_pack)
+ return packages
def get_package(self, package):
""" get package """
check = (self.verify_pkg, (package.unique_id ,1), {})
- return self.repo.getPackage(package.unique_id, checkfunc=check)
+ return self.repos.getRepo(self.repo_id).getPackage(package.unique_id, checkfunc=check)
def verify_pkg(self, fo, pkg, fail):
return pkg.verifyLocalPkg()
@@ -145,8 +162,55 @@ class ContentSource:
shutil.rmtree(directory, True)
def get_updates(self):
- if not self.repo.repoXML.repoData.has_key('updateinfo'):
+ if not self.repos.getRepo(self.repo_id).repoXML.repoData.has_key('updateinfo'):
return []
um = YumUpdateMetadata()
- um.add(self.repo, all=True)
+ um.add(self.repos.getRepo(self.repo_id), all=True)
return um.notices
+
+ def filter_packages(self, filters):
+ """ filter packages """
+ # Returns 2 lists : selected packages, and excluded
+ selected = []
+ excluded = []
+ if len(filters['include']) > 0:
+ exactmatch, matched, unmatched = yum.packages.parsePackages(self.pkgSack.returnPackages(), filters['include'])
+ selected = yum.misc.unique(exactmatch + matched)
+ selected = self._get_packages_dependencies(selected)
+
+ if len(filters['exclude']) > 0:
+ exactmatch, matched, unmatched = yum.packages.parsePackages(self.pkgSack.returnPackages(), filters['exclude'])
+ excluded = yum.misc.unique(exactmatch + matched)
+
+ if len(filters['exclude']) > 0 and len(filters['include']) == 0:
+ # Only exclude filter: Add others packages in included
+ packages = self.pkgSack.returnPackages()
+ for pack in packages:
+ if excluded.count(pack) > 0:
+ continue
+ selected.append(pack)
+
+ selected = self._get_packages_dependencies(selected)
+
+ elif len(filters['include']) > 0:
+ # Only include filter or both include and exclude filters : Add others packages in excluded
+ packages = self.pkgSack.returnPackages()
+ for pack in packages:
+ if selected.count(pack) > 0 or excluded.count(pack) > 0:
+ continue
+ excluded.append(pack)
+
+ # Drop packages from excluded if they are also in included or a dependency of included package
+ for pack in selected:
+ if excluded.count(pack) > 0:
+ excluded.remove(pack)
+
+ return (self._list_packages(selected), self._list_packages(excluded))
+
+ def _get_packages_dependencies(self, packages):
+ deps = self.findDeps(packages)
+ for pkg in deps:
+ for dep in deps[pkg]:
+ packages.extend(deps[pkg][dep])
+
+ return yum.misc.unique(packages)
diff --git a/backend/satellite_tools/reposync.py b/backend/satellite_tools/reposync.py
index 6834adb..25aeb19 100644
--- a/backend/satellite_tools/reposync.py
+++ b/backend/satellite_tools/reposync.py
@@ -44,6 +44,7 @@ class RepoSync:
fail = False
quiet = False
regen = False
+ filters = {'include': [], 'exclude': []}
def main(self):
initCFG('server')
@@ -89,6 +90,11 @@ class RepoSync:
quit = True
self.error_msg("--channel must be specified")
+ if options.include:
+ self.filters['include'] = options.include.split()
+ if options.include:
+ self.filters['exclude'] = options.exclude.split()
+
self.log_msg("\nSync started: %s" % (time.asctime(time.localtime())))
self.log_msg(str(sys.argv))
@@ -105,18 +111,21 @@ class RepoSync:
if not self.channel or not rhnChannel.isCustomChannel(self.channel['id']):
print "Channel does not exist or is not custom"
sys.exit(1)
-
+ start = datetime.now()
for url in self.urls:
plugin = self.load_plugin()(url, self.channel_label)
- self.import_packages(plugin, url)
- self.import_updates(plugin, url)
+ self.import_packages(plugin)
+ self.import_updates(plugin)
if self.regen:
taskomatic.add_to_repodata_queue_for_channel_package_subscription(
[self.channel_label], [], "server.app.yumreposync")
taskomatic.add_to_erratacache_queue(self.channel_label)
self.update_date()
- rhnSQL.commit()
- self.print_msg("Sync complete")
+ rhnSQL.commit()
+ exec_time = datetime.now() - start
+ minutes, seconds = divmod(exec_time.seconds, 60)
+ hours, minutes = divmod(minutes, 60)
+ self.print_msg("Sync complete in %d day(s) %d hour(s) %d minute(s) %d second(s)" % (exec_time.days, hours, minutes, seconds))
def update_date(self):
@@ -132,6 +141,8 @@ class RepoSync:
self.parser.add_option('-t', '--type', action='store', dest='type', help='The type of repo, currently only "yum" is supported', default='yum')
self.parser.add_option('-f', '--fail', action='store_true', dest='fail', default=False , help="If a package import fails, fail the entire operation")
self.parser.add_option('-q', '--quiet', action='store_true', dest='quiet', default=False, help="Print no output, still logs output")
+ self.parser.add_option('-i', '--include', action='store', dest='include', help="Synchronize only the packets that meet the filter and their dependencies")
+ self.parser.add_option('-e', '--exclude', action='store', dest='exclude', help="Don't synchronize packets that meet the filter")
return self.parser.parse_args()
def load_plugin(self):
@@ -140,9 +151,9 @@ class RepoSync:
submod = getattr(mod, name)
return getattr(submod, "ContentSource")
- def import_updates(self, plug, url):
+ def import_updates(self, plug):
notices = plug.get_updates()
- self.print_msg("Repo " + url + " has " + str(len(notices)) + " errata.")
+ self.print_msg("Repo " + plug.getUrl() + " has " + str(len(notices)) + " errata.")
if len(notices) > 0:
self.upload_updates(notices)
@@ -307,10 +318,39 @@ class RepoSync:
importer.run()
self.regen = True
- def import_packages(self, plug, url):
- packages = plug.list_packages()
+ def import_packages(self, plug):
+ repo_packages = plug.list_packages()
to_process = []
- self.print_msg("Repo " + url + " has " + str(len(packages)) + " packages.")
+ to_delete = []
+ self.print_msg("Repo " + plug.getUrl() + " has " + str(len(repo_packages)) + " packages.")
+
+ if len(self.filters['include']) > 0 or len(self.filters['exclude']) > 0:
+ selected, excluded = plug.filter_packages(self.filters)
+ packages = selected
+ self.print_msg("Repo " + plug.getUrl() + " has " + str(len(selected)) + " packages selected by filters.")
+ self.print_msg("Repo " + plug.getUrl() + " has " + str(len(excluded)) + " packages excluded by filters.")
+
+ for pack in excluded:
+ db_pack = rhnPackage.get_info_for_package(
+ [pack.name, pack.version, pack.release, pack.epoch, pack.arch],
+ self.channel_label)
+
+ to_remove = False
+ to_unlink = False
+ if db_pack['path']:
+ pack.path = os.path.join(CFG.MOUNT_POINT, db_pack['path'])
+ if os.path.exists(pack.path):
+ to_remove = True
+ if db_pack['channel_label'] == self.channel_label:
+ # package is already in the channel
+ to_unlink = True
+
+ if to_remove or to_unlink:
+ to_delete.append((pack, db_pack, to_remove, to_unlink))
+ else:
+ self.print_msg("Repo " + plug.getUrl() + " has no filters set.")
+ packages = repo_packages
+
for pack in packages:
db_pack = rhnPackage.get_info_for_package(
[pack.name, pack.version, pack.release, pack.epoch, pack.arch],
@@ -327,30 +367,47 @@ class RepoSync:
if db_pack['channel_label'] == self.channel_label:
# package is already in the channel
to_link = False
- elif db_pack['channel_label'] == self.channel_label:
- # different package with SAME NVREA
- self.disassociate_package(db_pack)
+ elif db_pack['channel_label'] == self.channel_label:
+ # different package with SAME NVREA
+ self.disassociate_package(db_pack)
if to_download or to_link:
to_process.append((pack, to_download, to_link))
num_to_process = len(to_process)
- if num_to_process == 0:
+ num_to_delete = len(to_delete)
+ if num_to_process == 0 and num_to_delete == 0:
self.print_msg("No new packages to sync.")
return
self.regen=True
- is_non_local_repo = (url.find("file://") < 0)
+ is_non_local_repo = (plug.getUrl().find("file://") < 0)
# try/except/finally doesn't work in python 2.4 (RHEL5), so here's a hack
def finally_remove(path):
if is_non_local_repo and path and os.path.exists(path):
os.remove(path)
+ for (index, what) in enumerate(to_delete):
+ pack, db_pack, to_remove, to_unlink = what
+ try:
+ self.print_msg("%d/%d : Delete %s" % (index+1, num_to_delete, pack.getNVREA()))
+ if to_remove:
+ os.remove(pack.path)
+ if to_unlink:
+ self.disassociate_package(db_pack)
+ except KeyboardInterrupt:
+ raise
+ except Exception, e:
+ self.error_msg(e)
+ if self.fail:
+ raise
+ continue
+
for (index, what) in enumerate(to_process):
pack, to_download, to_link = what
localpath = None
try:
- self.print_msg("%d/%d : %s" % (index+1, num_to_process, pack.getNVREA()))
+ self.print_msg("%d/%d : Sync %s" % (index+1, num_to_process, pack.getNVREA()))
if to_download:
pack.path = localpath = plug.get_package(pack)
pack.load_checksum_from_header()
_______________________________________________
Spacewalk-devel mailing list
Spacewalk-devel@redhat.com
https://www.redhat.com/mailman/listinfo/spacewalk-devel