Add a --content-db option which is required for the content-hash
layout because its file listings return content digests instead of
distfile names.

The content db includes a reverse mapping, for use during garbage
collection. All keys have a prefix separated by a colon. For digest
keys, the prefix is the hash algorithm name. For filename keys,
the prefix is "filename". The values for digest keys are plain
filenames, and the values for distfile keys are dictionaries
of digests suitable for construction of DistfileName instances.

Bug: https://bugs.gentoo.org/756778
Signed-off-by: Zac Medico <zmed...@gentoo.org>
---
 lib/portage/_emirrordist/Config.py           |  7 +++-
 lib/portage/_emirrordist/DeletionIterator.py | 38 ++++++++++++++++++--
 lib/portage/_emirrordist/DeletionTask.py     | 26 ++++++++++++++
 lib/portage/_emirrordist/FetchTask.py        | 16 ++++++++-
 lib/portage/_emirrordist/main.py             | 15 +++++++-
 lib/portage/tests/ebuild/test_fetch.py       | 14 ++++++++
 man/emirrordist.1                            |  6 +++-
 7 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/lib/portage/_emirrordist/Config.py 
b/lib/portage/_emirrordist/Config.py
index 4bee4f45e..53f6582fe 100644
--- a/lib/portage/_emirrordist/Config.py
+++ b/lib/portage/_emirrordist/Config.py
@@ -1,4 +1,4 @@
-# Copyright 2013-2020 Gentoo Authors
+# Copyright 2013-2021 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 import copy
@@ -65,6 +65,11 @@ class Config:
                        self.distfiles_db = self._open_shelve(
                                options.distfiles_db, 'distfiles')
 
+               self.content_db = None
+               if options.content_db is not None:
+                       self.content_db = self._open_shelve(
+                               options.content_db, 'content')
+
                self.deletion_db = None
                if options.deletion_db is not None:
                        self.deletion_db = self._open_shelve(
diff --git a/lib/portage/_emirrordist/DeletionIterator.py 
b/lib/portage/_emirrordist/DeletionIterator.py
index 08985ed6c..24fb096bf 100644
--- a/lib/portage/_emirrordist/DeletionIterator.py
+++ b/lib/portage/_emirrordist/DeletionIterator.py
@@ -1,10 +1,12 @@
-# Copyright 2013-2019 Gentoo Authors
+# Copyright 2013-2021 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 import logging
 import stat
+import typing
 
 from portage import os
+from portage.package.ebuild.fetch import DistfileName
 from .DeletionTask import DeletionTask
 
 class DeletionIterator:
@@ -12,6 +14,37 @@ class DeletionIterator:
        def __init__(self, config):
                self._config = config
 
+       def _map_filename(self, filename: typing.Union[str, DistfileName]) -> 
typing.Union[str, DistfileName]:
+               """
+               Map a filename listed by the layout get_filenames method,
+               translating it from a content digest to a distfile name.
+               If filename is already a distfile name, then it will pass
+               through unchanged.
+
+               @param filename: A filename listed by layout get_filenames
+               @return: The distfile name, mapped from the corresponding
+                       content digest when necessary
+               """
+               if not isinstance(filename, DistfileName):
+                       if self._config.content_db is not None:
+                               distfile_key = "filename:{}".format(filename)
+                               try:
+                                       digests = 
self._config.content_db[distfile_key]
+                               except KeyError:
+                                       pass
+                               else:
+                                       return DistfileName(filename, 
digests=digests)
+                       return DistfileName(filename)
+               if filename.digests and self._config.content_db is not None:
+                       for k, v in filename.digests.items():
+                               digest_key = "{}:{}".format(k, v).lower()
+                               try:
+                                       distfile_str = 
self._config.content_db[digest_key]
+                               except KeyError:
+                                       continue
+                               return DistfileName(distfile_str, digests={k:v})
+               return filename
+
        def __iter__(self):
                distdir = self._config.options.distfiles
                file_owners = self._config.file_owners
@@ -22,7 +55,8 @@ class DeletionIterator:
                start_time = self._config.start_time
                distfiles_set = set()
                for layout in self._config.layouts:
-                       distfiles_set.update(layout.get_filenames(distdir))
+                       distfiles_set.update(self._map_filename(filename)
+                               for filename in layout.get_filenames(distdir))
                for filename in distfiles_set:
                        # require at least one successful stat()
                        exceptions = []
diff --git a/lib/portage/_emirrordist/DeletionTask.py 
b/lib/portage/_emirrordist/DeletionTask.py
index 5eb01d840..96c52fa93 100644
--- a/lib/portage/_emirrordist/DeletionTask.py
+++ b/lib/portage/_emirrordist/DeletionTask.py
@@ -5,6 +5,7 @@ import errno
 import logging
 
 from portage import os
+from portage.package.ebuild.fetch import ContentHashLayout
 from portage.util._async.FileCopier import FileCopier
 from _emerge.CompositeTask import CompositeTask
 
@@ -99,6 +100,10 @@ class DeletionTask(CompositeTask):
        def _delete_links(self):
                success = True
                for layout in self.config.layouts:
+                       if isinstance(layout, ContentHashLayout) and not 
self.distfile.digests:
+                               logging.debug(("_delete_links: '%s' has "
+                                       "no digests") % self.distfile)
+                               continue
                        distfile_path = os.path.join(
                                self.config.options.distfiles,
                                layout.get_path(self.distfile))
@@ -134,6 +139,27 @@ class DeletionTask(CompositeTask):
                                logging.debug(("drop '%s' from "
                                        "distfiles db") % self.distfile)
 
+               if self.config.content_db is not None:
+                       distfile_key = "filename:{}".format(self.distfile)
+                       try:
+                               digests = self.config.content_db[distfile_key]
+                       except KeyError:
+                               pass
+                       else:
+                               for k, v in digests.items():
+                                       digest_key = "{}:{}".format(k, v)
+                                       try:
+                                               del 
self.config.content_db[digest_key]
+                                       except KeyError:
+                                               pass
+
+                               logging.debug(("drop '%s' from "
+                                       "content db") % self.distfile)
+                               try:
+                                       del self.config.content_db[distfile_key]
+                               except KeyError:
+                                       pass
+
                if self.config.deletion_db is not None:
                        try:
                                del self.config.deletion_db[self.distfile]
diff --git a/lib/portage/_emirrordist/FetchTask.py 
b/lib/portage/_emirrordist/FetchTask.py
index 997762082..5a39cdb1a 100644
--- a/lib/portage/_emirrordist/FetchTask.py
+++ b/lib/portage/_emirrordist/FetchTask.py
@@ -1,4 +1,4 @@
-# Copyright 2013-2020 Gentoo Authors
+# Copyright 2013-2021 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 import collections
@@ -47,6 +47,20 @@ class FetchTask(CompositeTask):
                        # Convert _pkg_str to str in order to prevent pickle 
problems.
                        self.config.distfiles_db[self.distfile] = str(self.cpv)
 
+               if self.config.content_db is not None:
+                       # The content db includes a reverse mapping, for use 
during garbage
+                       # collection. All keys have a prefix separated by a 
colon. For digest
+                       # keys, the prefix is the hash algorithm name. For 
filename keys,
+                       # the prefix is "filename". The values for digest keys 
are plain
+                       # filenames, and the values for distfile keys are 
dictionaries
+                       # of digests suitable for construction of DistfileName 
instances.
+                       distfile_str = str(self.distfile)
+                       distfile_key = 'filename:{}'.format(distfile_str)
+                       for k, v in self.distfile.digests.items():
+                               digest_key = '{}:{}'.format(k, v).lower()
+                               self.config.content_db[digest_key] = 
distfile_str
+                               self.config.content_db.setdefault(distfile_key, 
{})[k] = v
+
                if not self._have_needed_digests():
                        msg = "incomplete digests: %s" % " ".join(self.digests)
                        self.scheduler.output(msg, background=self.background,
diff --git a/lib/portage/_emirrordist/main.py b/lib/portage/_emirrordist/main.py
index 8d00a05f5..2200ec715 100644
--- a/lib/portage/_emirrordist/main.py
+++ b/lib/portage/_emirrordist/main.py
@@ -1,4 +1,4 @@
-# Copyright 2013-2020 Gentoo Authors
+# Copyright 2013-2021 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 import argparse
@@ -7,6 +7,7 @@ import sys
 
 import portage
 from portage import os
+from portage.package.ebuild.fetch import ContentHashLayout
 from portage.util import normalize_path, _recursive_file_list
 from portage.util._async.run_main_scheduler import run_main_scheduler
 from portage.util._async.SchedulerInterface import SchedulerInterface
@@ -151,6 +152,12 @@ common_options = (
                        "distfile belongs to",
                "metavar"  : "FILE"
        },
+       {
+               "longopt"  : "--content-db",
+               "help"     : "database file used to map content digests to"
+                       "distfiles names (required for content-hash layout)",
+               "metavar"  : "FILE"
+       },
        {
                "longopt"  : "--recycle-dir",
                "help"     : "directory for extended retention of files that "
@@ -441,6 +448,12 @@ def emirrordist_main(args):
                if not options.mirror:
                        parser.error('No action specified')
 
+               if options.delete and config.content_db is None:
+                       for layout in config.layouts:
+                               if isinstance(layout, ContentHashLayout):
+                                       parser.error("content-hash layout 
requires "
+                                               "--content-db to be specified")
+
                returncode = os.EX_OK
 
                if options.mirror:
diff --git a/lib/portage/tests/ebuild/test_fetch.py 
b/lib/portage/tests/ebuild/test_fetch.py
index d50a4cbfc..881288cdc 100644
--- a/lib/portage/tests/ebuild/test_fetch.py
+++ b/lib/portage/tests/ebuild/test_fetch.py
@@ -172,6 +172,16 @@ class EbuildFetchTestCase(TestCase):
                                with open(os.path.join(settings['DISTDIR'], 
'layout.conf'), 'wt') as f:
                                        f.write(layout_data)
 
+                               if any(isinstance(layout, ContentHashLayout) 
for layout in layouts):
+                                       content_db = 
os.path.join(playground.eprefix, 'var/db/emirrordist/content.db')
+                                       
os.makedirs(os.path.dirname(content_db), exist_ok=True)
+                                       try:
+                                               os.unlink(content_db)
+                                       except OSError:
+                                               pass
+                               else:
+                                       content_db = None
+
                                # Demonstrate that fetch preserves a stale file 
in DISTDIR when no digests are given.
                                foo_uri = {'foo': 
('{scheme}://{host}:{port}/distfiles/foo'.format(scheme=scheme, host=host, 
port=server.server_port),)}
                                foo_path = os.path.join(settings['DISTDIR'], 
'foo')
@@ -233,9 +243,13 @@ class EbuildFetchTestCase(TestCase):
                                        os.path.join(self.bindir, 
'emirrordist'),
                                        '--distfiles', settings['DISTDIR'],
                                        '--config-root', settings['EPREFIX'],
+                                       '--delete',
                                        '--repositories-configuration', 
settings.repositories.config_string(),
                                        '--repo', 'test_repo', '--mirror')
 
+                               if content_db is not None:
+                                       emirrordist_cmd = emirrordist_cmd + 
('--content-db', content_db,)
+
                                env = settings.environ()
                                env['PYTHONPATH'] = ':'.join(
                                        filter(None, [PORTAGE_PYM_PATH] + 
os.environ.get('PYTHONPATH', '').split(':')))
diff --git a/man/emirrordist.1 b/man/emirrordist.1
index 45108ef8c..7ad10dfd0 100644
--- a/man/emirrordist.1
+++ b/man/emirrordist.1
@@ -1,4 +1,4 @@
-.TH "EMIRRORDIST" "1" "Dec 2015" "Portage VERSION" "Portage"
+.TH "EMIRRORDIST" "1" "Feb 2021" "Portage VERSION" "Portage"
 .SH "NAME"
 emirrordist \- a fetch tool for mirroring of package distfiles
 .SH SYNOPSIS
@@ -66,6 +66,10 @@ reporting purposes. Opened in append mode.
 Log file for scheduled deletions, with tab\-delimited output, for
 reporting purposes. Overwritten with each run.
 .TP
+\fB\-\-content\-db\fR=\fIFILE\fR
+Database file used to pair content digests with distfiles names
+(required fo content\-hash layout).
+.TP
 \fB\-\-delete\fR
 Enable deletion of unused distfiles.
 .TP
-- 
2.26.2


Reply via email to