Hi Jan,

Following your advices, i modified my code:
Now, it get include and exclude filters from database:

I ran the following SQL query on my Oracle XE Database:
ALTER TABLE rhnContentSource ADD (include_filter VARCHAR(255), exclude_filter 
VARCHAR(255));

Regards.

Baptiste

----- Mail original -----
De: "Baptiste AGASSE" <baptiste.aga...@lyra-network.com>
À: spacewalk-devel@redhat.com
Envoyé: Jeudi 18 Août 2011 22:50:34
Objet: Re: [Spacewalk-devel] [PATCH] Filters on reposync

Hi Jan,
Ok, i can take a look on this next week.

Regards.

Baptiste

----- Mail original -----
De: "Jan Pazdziora" <jpazdzi...@redhat.com>
À: spacewalk-devel@redhat.com
Envoyé: Jeudi 18 Août 2011 13:39:57
Objet: Re: [Spacewalk-devel] [PATCH] Filters on reposync

On Wed, Aug 17, 2011 at 08:23:36PM +0200, Baptiste AGASSE wrote:
> 
> Following your advices I have modified my code:
> - You can now include and / or exclude packages (with --include and / or 
> --exclude options)
> - Include filter takes priority over exclude filter: if one package meet 
> 'include' and 'exclude' rules, it will be included
>   eg:
>     exclude = [ 'openoffice.org-langpack-*', ...]
>     include = [ 'openoffice.org-langpack-en-*', ...]
> 
> - Package filtering is in yum_src.py
> - Yum dependencies resolver is now used to find selected packages dependencies
> - All versions of the packages excluded by a filter is now deleted from DB 
> and filesystem
> - Print elapsed time at end of sync
> 
> Any comments are welcome.

I don't really like the fact that the exclude/include options are
specified on the spacewalk-repo-sync runtime, rather than being
properties of the repository. In other words -- these exclude/include
lists should be specified in the database, so that they would be used
any time spacewalk-repo-sync is run, no matter if it is run from the
command line or via scheduled event by taskomatic.

Now that the core functionality is in place, could you amend it some
more and have the lists stored in some database table and used from
there?

-- 
Jan Pazdziora
Principal Software Engineer, Satellite Engineering, Red Hat

_______________________________________________
Spacewalk-devel mailing list
Spacewalk-devel@redhat.com
https://www.redhat.com/mailman/listinfo/spacewalk-devel
diff --git a/backend/satellite_tools/repo_plugins/yum_src.py b/backend/satellite_tools/repo_plugins/yum_src.py
index bfc6161..9871a32 100644
--- a/backend/satellite_tools/repo_plugins/yum_src.py
+++ b/backend/satellite_tools/repo_plugins/yum_src.py
@@ -74,14 +74,15 @@ class YumUpdateMetadata(UpdateMetadata):
                             no = self._no_cache.setdefault(file['name'], set())
                             no.add(un)
 
-class ContentSource:
+class ContentSource(yum.YumBase):
     url = None
     name = None
-    repo = None
     cache_dir = '/var/cache/rhn/reposync/'
-    def __init__(self, url, name):
-        self.url = url
-        self.name = name
+    repo_id = None
+    filters = {'include': [], 'exclude': []}
+
+    def __init__(self, url, name, filters = None ):
+        yum.YumBase.__init__(self)
         self._clean_cache(self.cache_dir + name)
 
         # read the proxy configuration in /etc/rhn/rhn.conf
@@ -97,14 +98,14 @@ class ContentSource:
         else:
             self.proxy_url = None
 
-    def list_packages(self):
-        """ list packages"""
-        repo = yum.yumRepo.YumRepository(self.name)
-        self.repo = repo
+        if filters:
+            self.filters = filters
+
+        repo = yum.yumRepo.YumRepository(name)
         repo.cache = 0
         repo.metadata_expire = 0
-        repo.mirrorlist = self.url
-        repo.baseurl = [self.url]
+        repo.mirrorlist = url
+        repo.baseurl = [url]
         repo.basecachedir = self.cache_dir
         if self.proxy_url is not None:
             repo.proxy = self.proxy_url
@@ -113,13 +114,36 @@ class ContentSource:
         warnings.disable()
         repo.baseurlSetup()
         warnings.restore()
-
         repo.setup(False)
-        sack = repo.getPackageSack()
-        sack.populate(repo, 'metadata', None, 0)
-        list = sack.returnPackages()
-        to_return = []
-        for pack in list:
+        repos = self.repos.findRepos('*')
+        for rep in repos:
+            self.repos.disableRepo(rep.id)
+            self.repos.delete(rep.id)
+        
+        self.repo_id = repo.id
+        self.repos.add(repo)
+        self.pkgSack = self.repos.getRepo(self.repo_id).getPackageSack()
+        self.pkgSack.populate(self.repos.getRepo(self.repo_id), 'metadata', None, 0)
+
+    def getName(self):
+        return self.repos.getRepo(self.repo_id).id
+  
+    def getUrl(self):
+        return self.repos.getRepo(self.repo_id).mirrorlist
+
+    def getFilters(self):
+        return self.filters
+
+    def getRepo(self):
+        return self.repos.getRepo(self.repo_id)
+    
+    def list_packages(self):
+        """ list packages"""
+        return self._list_packages(self.pkgSack.returnPackages())
+
+    def _list_packages(self, packages_list):
+        packages = []
+        for pack in packages_list:
             if pack.arch == 'src':
                 continue
             new_pack = ContentPackage()
@@ -130,13 +154,13 @@ class ContentSource:
             if new_pack.checksum_type == 'sha':
                 new_pack.checksum_type = 'sha1'
             new_pack.checksum      = pack.checksums[0][1]
-            to_return.append(new_pack)
-        return to_return
+            packages.append(new_pack)
+        return packages
 
     def get_package(self, package):
         """ get package """
         check = (self.verify_pkg, (package.unique_id ,1), {})
-        return self.repo.getPackage(package.unique_id, checkfunc=check)
+        return self.repos.getRepo(self.repo_id).getPackage(package.unique_id, checkfunc=check)
 
     def verify_pkg(self, fo, pkg, fail):
         return pkg.verifyLocalPkg()
@@ -145,8 +169,55 @@ class ContentSource:
         shutil.rmtree(directory, True)
 
     def get_updates(self):
-      if not self.repo.repoXML.repoData.has_key('updateinfo'):
+      if not self.repos.getRepo(self.repo_id).repoXML.repoData.has_key('updateinfo'):
         return []
       um = YumUpdateMetadata()
-      um.add(self.repo, all=True)
+      um.add(self.repos.getRepo(self.repo_id), all=True)
       return um.notices
+
+    def filter_packages(self):
+        """ filter packages """
+        # Returns 2 lists : selected packages, and excluded
+        selected = []
+        excluded = []
+        if len(self.filters['include']) > 0:
+            exactmatch, matched, unmatched = yum.packages.parsePackages(self.pkgSack.returnPackages(), self.filters['include'])
+            selected = yum.misc.unique(exactmatch + matched)
+            selected = self._get_packages_dependencies(selected)
+
+        if len(self.filters['exclude']) > 0:
+            exactmatch, matched, unmatched = yum.packages.parsePackages(self.pkgSack.returnPackages(), self.filters['exclude'])
+            excluded = yum.misc.unique(exactmatch + matched)
+
+        if len(self.filters['exclude']) > 0 and len(self.filters['include']) == 0:
+            # Only exclude filter: Add others packages in included
+            packages = self.pkgSack.returnPackages()
+            for pack in packages:
+                if excluded.count(pack) > 0:
+                    continue
+                selected.append(pack)
+
+            selected = self._get_packages_dependencies(selected)
+
+        elif len(self.filters['include']) > 0:
+            # Only include filter or both include and exclude filters : Add others packages in excluded
+            packages = self.pkgSack.returnPackages()
+            for pack in packages:
+                if selected.count(pack) > 0 or excluded.count(pack) > 0:
+                    continue
+                excluded.append(pack)
+
+        # Drop packages from excluded if they are also in included or a dependency of included package
+        for pack in selected:
+            if excluded.count(pack) > 0:
+                excluded.remove(pack)
+
+        return (self._list_packages(selected), self._list_packages(excluded))
+
+    def _get_packages_dependencies(self, packages):
+        deps = self.findDeps(packages)
+        for pkg in deps:
+            for dep in deps[pkg]:
+                packages.extend(deps[pkg][dep])
+
+        return yum.misc.unique(packages)
diff --git a/backend/satellite_tools/reposync.py b/backend/satellite_tools/reposync.py
index 6834adb..af9b960 100644
--- a/backend/satellite_tools/reposync.py
+++ b/backend/satellite_tools/reposync.py
@@ -38,7 +38,7 @@ class RepoSync:
 
     parser = None
     type = None
-    urls = None
+    urls = []
     channel_label = None
     channel = None
     fail = False
@@ -69,7 +69,7 @@ class RepoSync:
         if not options.url:
             if options.channel_label:
                 # TODO:need to look at user security across orgs
-                h = rhnSQL.prepare("""select s.source_url
+                h = rhnSQL.prepare("""select s.source_url, s.include_filter, s.exclude_filter
                                       from rhnContentSource s,
                                            rhnChannelContentSource cs,
                                            rhnChannel c
@@ -79,12 +79,26 @@ class RepoSync:
                 h.execute(label=options.channel_label)
                 source_urls = h.fetchall_dict() or []
                 if source_urls:
-                    self.urls = [row['source_url'] for row in source_urls]
+                    for row in source_urls:
+                        url = {'url': row['source_url'], 'filters': {'include': [], 'exclude': []}}
+                        self.print_msg("url : " + row['source_url'])
+                        self.print_msg("include : " + str(row['include_filter']))
+                        self.print_msg("exclude : " + str(row['exclude_filter']))
+                        if row['include_filter']:
+                            url['filters']['include'] = row['include_filter'].split()
+                        if row['exclude_filter']:
+                            url['filters']['exclude'] = row['exclude_filter'].split()
+                        self.urls.append(url)
                 else:
                     quit = True
                     self.error_msg("Channel has no URL associated")
         else:
-            self.urls = [options.url]
+            url = {'url': options.url, 'filters': {'include': [], 'exclude': []}}
+            if options.include:
+                url['filters']['include'] = options.include.split()
+            if options.include:
+                url['filters']['exclude'] = options.exclude.split()
+            self.urls = [url]
         if not options.channel_label:
             quit = True
             self.error_msg("--channel must be specified")
@@ -105,18 +119,21 @@ class RepoSync:
         if not self.channel or not rhnChannel.isCustomChannel(self.channel['id']):
             print "Channel does not exist or is not custom"
             sys.exit(1)
-
+        start = datetime.now()
         for url in self.urls:
-            plugin = self.load_plugin()(url, self.channel_label)
-            self.import_packages(plugin, url)
-            self.import_updates(plugin, url)
+            plugin = self.load_plugin()(url['url'], self.channel_label, url['filters'])
+            self.import_packages(plugin)
+            self.import_updates(plugin)
         if self.regen:
             taskomatic.add_to_repodata_queue_for_channel_package_subscription(
                 [self.channel_label], [], "server.app.yumreposync")
             taskomatic.add_to_erratacache_queue(self.channel_label)
         self.update_date()
-        rhnSQL.commit()        
-        self.print_msg("Sync complete")
+        rhnSQL.commit()
+        exec_time = datetime.now() - start
+        minutes, seconds =  divmod(exec_time.seconds, 60)
+        hours, minutes =  divmod(minutes, 60)
+        self.print_msg("Sync complete in %d day(s) %d hour(s) %d minute(s) %d second(s)" % (exec_time.days, hours, minutes, seconds))
 
 
     def update_date(self):
@@ -132,7 +149,11 @@ class RepoSync:
         self.parser.add_option('-t', '--type', action='store', dest='type', help='The type of repo, currently only "yum" is supported', default='yum')
         self.parser.add_option('-f', '--fail', action='store_true', dest='fail', default=False , help="If a package import fails, fail the entire operation")
         self.parser.add_option('-q', '--quiet', action='store_true', dest='quiet', default=False, help="Print no output, still logs output")
+        self.parser.add_option('-i', '--include', action='store', dest='include', help="Synchronize only the packets that meet the filter and their dependencies")
+        self.parser.add_option('-e', '--exclude', action='store', dest='exclude', help="Don't synchronize packets that meet the filter")
+        
         return self.parser.parse_args()
+        
 
     def load_plugin(self):
         name = self.type + "_src"
@@ -140,9 +161,9 @@ class RepoSync:
         submod = getattr(mod, name)
         return getattr(submod, "ContentSource")
 
-    def import_updates(self, plug, url):
+    def import_updates(self, plug):
       notices = plug.get_updates()
-      self.print_msg("Repo " + url + " has " + str(len(notices)) + " errata.")
+      self.print_msg("Repo " + plug.getUrl() + " has " + str(len(notices)) + " errata.")
       if len(notices) > 0:
         self.upload_updates(notices)
 
@@ -307,10 +328,40 @@ class RepoSync:
         importer.run()
         self.regen = True
 
-    def import_packages(self, plug, url):
-        packages = plug.list_packages()
+    def import_packages(self, plug):
+        repo_packages = plug.list_packages()
+        filters = plug.getFilters()
         to_process = []
-        self.print_msg("Repo " + url + " has " + str(len(packages)) + " packages.")
+        to_delete = []
+        self.print_msg("Repo " + plug.getUrl() + " has " + str(len(repo_packages)) + " packages.")
+
+        if len(filters['include']) > 0 or len(filters['exclude']) > 0:
+            selected, excluded = plug.filter_packages()
+            packages = selected
+            self.print_msg("Repo " + plug.getUrl() + " has " + str(len(selected)) + " packages selected by filters.")
+            self.print_msg("Repo " + plug.getUrl() + " has " + str(len(excluded)) + " packages excluded by filters.")
+
+            for pack in excluded:
+                db_pack = rhnPackage.get_info_for_package(
+                   [pack.name, pack.version, pack.release, pack.epoch, pack.arch],
+                   self.channel_label)
+
+                to_remove = False
+                to_unlink = False
+                if db_pack['path']:
+                    pack.path = os.path.join(CFG.MOUNT_POINT, db_pack['path'])
+                    if os.path.exists(pack.path):
+                       to_remove = True
+                    if db_pack['channel_label'] == self.channel_label:
+                        # package is already in the channel
+                        to_unlink = True
+
+                if to_remove or to_unlink:
+                    to_delete.append((pack, db_pack, to_remove, to_unlink))
+        else:
+            self.print_msg("Repo " + plug.getUrl() + " has no filters set.")
+            packages = repo_packages
+
         for pack in packages:
             db_pack = rhnPackage.get_info_for_package(
                    [pack.name, pack.version, pack.release, pack.epoch, pack.arch],
@@ -327,30 +378,47 @@ class RepoSync:
                     if db_pack['channel_label'] == self.channel_label:
                         # package is already in the channel
                         to_link = False
-		elif db_pack['channel_label'] == self.channel_label:
-		    # different package with SAME NVREA
-		    self.disassociate_package(db_pack)
+                elif db_pack['channel_label'] == self.channel_label:
+                  # different package with SAME NVREA
+                  self.disassociate_package(db_pack)
 
             if to_download or to_link:
                 to_process.append((pack, to_download, to_link))
 
         num_to_process = len(to_process)
-        if num_to_process == 0:
+        num_to_delete = len(to_delete)
+        if num_to_process == 0 and num_to_delete == 0:
             self.print_msg("No new packages to sync.")
             return
 
         self.regen=True
-        is_non_local_repo = (url.find("file://") < 0)
+        is_non_local_repo = (plug.getUrl().find("file://") < 0)
         # try/except/finally doesn't work in python 2.4 (RHEL5), so here's a hack
         def finally_remove(path):
             if is_non_local_repo and path and os.path.exists(path):
                 os.remove(path)
 
+        for (index, what) in enumerate(to_delete):
+            pack, db_pack, to_remove, to_unlink = what
+            try:
+                self.print_msg("%d/%d : Delete %s" % (index+1, num_to_delete, pack.getNVREA()))          
+                if to_remove:
+                    os.remove(pack.path)
+                if to_unlink:
+                    self.disassociate_package(db_pack)
+            except KeyboardInterrupt:
+                raise
+            except Exception, e:
+                self.error_msg(e)
+                if self.fail:
+                    raise
+                continue
+
         for (index, what) in enumerate(to_process):
             pack, to_download, to_link = what
             localpath = None
             try:
-                self.print_msg("%d/%d : %s" % (index+1, num_to_process, pack.getNVREA()))
+                self.print_msg("%d/%d : Sync %s" % (index+1, num_to_process, pack.getNVREA()))
                 if to_download:
                     pack.path = localpath = plug.get_package(pack)
                 pack.load_checksum_from_header()
_______________________________________________
Spacewalk-devel mailing list
Spacewalk-devel@redhat.com
https://www.redhat.com/mailman/listinfo/spacewalk-devel

Reply via email to