Hi all,

I've modified "/backend/satellite_tools/reposync.py" to add "filters" support 
with dependencies solving (like the "--rpm-list" option of cobbler) to 
spacewalk reposync command.

It add the "--filters" option to reposync
eg: --filters "osad rhncfg-* foo* bar"

It allow to download only selected packages from a repository in order to save 
disk space, mainly if you use only few packages from it, and deletes packages 
already present on your system (in spacewalk server and on the filesystem) that 
don't meet any filters (only packages with the same NVREA)

TODO: 
 - Search in spacewalk server older versions of packages downloaded previously 
that don't match filters and remove them (I will work on it).
 - Make it available from web UI in repositories management part.

Maybe someone can work on the web UI and the database schema to make this 
option available directly from web UI ? (i'm not familiar with java and oracle 
DB)

Any comments are welcome, it's the first time that i'm programming in python :).

Regards.

Baptiste.

diff --git a/backend/satellite_tools/reposync.py b/backend/satellite_tools/reposync.py
index 6834adb..0428bb9 100644
--- a/backend/satellite_tools/reposync.py
+++ b/backend/satellite_tools/reposync.py
@@ -15,6 +15,7 @@
 #
 import sys, os, time
 import hashlib
+import re
 from datetime import datetime
 import traceback
 from optparse import OptionParser
@@ -44,6 +45,7 @@ class RepoSync:
     fail = False
     quiet = False
     regen = False
+    filters = []
 
     def main(self):
         initCFG('server')
@@ -89,6 +91,9 @@ class RepoSync:
             quit = True
             self.error_msg("--channel must be specified")
 
+        if options.filters:
+            self.filters = options.filters.split()
+
         self.log_msg("\nSync started: %s" % (time.asctime(time.localtime())))
         self.log_msg(str(sys.argv))
 
@@ -132,6 +137,7 @@ class RepoSync:
         self.parser.add_option('-t', '--type', action='store', dest='type', help='The type of repo, currently only "yum" is supported', default='yum')
         self.parser.add_option('-f', '--fail', action='store_true', dest='fail', default=False , help="If a package import fails, fail the entire operation")
         self.parser.add_option('-q', '--quiet', action='store_true', dest='quiet', default=False, help="Print no output, still logs output")
+        self.parser.add_option('-p', '--filters', action='store_true', dest='filters', help="Synchronize only the packets that meet the filter and their dependencies")
         return self.parser.parse_args()
 
     def load_plugin(self):
@@ -308,9 +314,43 @@ class RepoSync:
         self.regen = True
 
     def import_packages(self, plug, url):
-        packages = plug.list_packages()
+        repo_packages = plug.list_packages()
         to_process = []
-        self.print_msg("Repo " + url + " has " + str(len(packages)) + " packages.")
+        to_delete = []
+        self.print_msg("Repo " + url + " has " + str(len(repo_packages)) + " packages.")
+
+        if len(self.filters) > 0:
+            selected, dependencies, others = self.filter_packages(self.filters, repo_packages)
+            packages = selected
+            packages.extend(dependencies)
+            self.print_msg("Repo " + url + " has " + str(len(selected)) + " packages selected by filters.")
+            self.print_msg("Repo " + url + " has " + str(len(dependencies)) + " packages dependencies.")
+            self.print_msg("Repo " + url + " has " + str(len(others)) + " packages excluded.")
+
+            for pack in others:
+                # TODO: search for older versions of package and drop them too
+                db_pack = rhnPackage.get_info_for_package(
+                       [pack.name, pack.version, pack.release, pack.epoch, pack.arch],
+                       self.channel_label)
+
+                to_remove = False
+                to_unlink = False
+                if db_pack['path']:
+                    pack.path = os.path.join(CFG.MOUNT_POINT, db_pack['path'])
+                    if self.match_package_checksum(pack.path,
+                                    pack.checksum_type, pack.checksum):
+                        # package is already on disk
+                        to_remove = True
+                        if db_pack['channel_label'] == self.channel_label or db_pack['channel_label'] == self.channel_label:
+                            # package is already in the channel
+                            to_unlink = True
+
+                if to_remove or to_unlink:
+                    to_delete.append((pack, to_remove, to_unlink))
+        else:
+            self.print_msg("Repo " + url + " has no filters set.")
+            packages = repo_packages
+
         for pack in packages:
             db_pack = rhnPackage.get_info_for_package(
                    [pack.name, pack.version, pack.release, pack.epoch, pack.arch],
@@ -327,15 +367,16 @@ class RepoSync:
                     if db_pack['channel_label'] == self.channel_label:
                         # package is already in the channel
                         to_link = False
-		elif db_pack['channel_label'] == self.channel_label:
-		    # different package with SAME NVREA
-		    self.disassociate_package(db_pack)
+                elif db_pack['channel_label'] == self.channel_label:
+                  # different package with SAME NVREA
+                  self.disassociate_package(db_pack)
 
             if to_download or to_link:
                 to_process.append((pack, to_download, to_link))
 
         num_to_process = len(to_process)
-        if num_to_process == 0:
+        num_to_delete = len(to_delete)
+        if num_to_process == 0 and num_to_delete == 0:
             self.print_msg("No new packages to sync.")
             return
 
@@ -346,11 +387,27 @@ class RepoSync:
             if is_non_local_repo and path and os.path.exists(path):
                 os.remove(path)
 
+        for (index, what) in enumerate(to_delete):
+            pack, to_remove, to_unlink = what
+            try:
+                self.print_msg("%d/%d : Delete %s" % (index+1, num_to_delete, pack.getNVREA()))
+                if to_remove:
+                    os.remove(pack.path)
+                if to_unlink:
+                    self.disassociate_package(pack)
+            except KeyboardInterrupt:
+                raise
+            except Exception, e:
+                self.error_msg(e)
+                if self.fail:
+                    raise
+                continue
+
         for (index, what) in enumerate(to_process):
             pack, to_download, to_link = what
             localpath = None
             try:
-                self.print_msg("%d/%d : %s" % (index+1, num_to_process, pack.getNVREA()))
+                self.print_msg("%d/%d : Sync %s" % (index+1, num_to_process, pack.getNVREA()))
                 if to_download:
                     pack.path = localpath = plug.get_package(pack)
                 pack.load_checksum_from_header()
@@ -452,6 +509,59 @@ class RepoSync:
     def short_hash(self, str):
         return hashlib.new(default_hash, str).hexdigest()[0:8]
 
+    def filter_packages(self, filters, packages):
+        # Returns 3 lists : selected packages, dependencies, and others
+        selected = []
+        dependencies = []
+        others = []
+        for pack in packages:
+            # Select all packages that match one filter
+            match = False
+            for filter_str in filters:
+                reg = re.compile("^" + filter_str.replace("*",".*") + "$")
+                if reg.match(pack.name):
+                    match = True
+                    break
+            if match:
+                # Package match one of filters
+                selected.append(pack)
+            else:
+                # Others, packages
+                others.append(pack)
+
+        for pack in selected:
+            # find all dependencies for all selected packages
+            if not len(others) > 0:
+                break
+            deps, others = self.package_deps(pack, others)
+            dependencies.extend(deps)
+
+        return selected, dependencies, others
+
+    def package_deps(self, package, packages_list):
+        # Returns all dependencies for packages and new package_list
+        dependencies, package_list = self.package_deps_one_level(package, packages_list)
+        for dep in dependencies:
+            deps, packages_list = self.package_deps_one_level(dep, packages_list)
+            dependencies.extend(deps)
+    
+        return dependencies, package_list
+        
+    def package_deps_one_level(self, package, packages_list):
+        # Returns 1st level of dependencies for packages and new package_list
+        dependencies = []
+        requires = package.unique_id.returnPrco('requires', True)
+        for req in requires:
+            for pack in packages_list:
+                provides = pack.unique_id.returnPrco('provides', True)
+                if provides.count(req) > 0:
+                    # pack provide one of requ of package, 
+                    # pop it from packages_list and add it to dependencies
+                    dependencies.append(packages_list.pop(packages_list.index(pack)))
+                    break
+
+        return dependencies, packages_list
+
     def _to_db_date(self, date):
         ret = ""
         if date.isdigit():
_______________________________________________
Spacewalk-devel mailing list
Spacewalk-devel@redhat.com
https://www.redhat.com/mailman/listinfo/spacewalk-devel

Reply via email to