Add a --content-db option which is required for the content-hash
layout because its file listings return content digests instead of
distfile names.
The content db includes a reverse mapping, for use during garbage
collection. All keys have a prefix separated by a colon. For digest
keys, the prefix is the hash algorithm name. For filename keys,
the prefix is "filename". The values for digest keys are plain
filenames, and the values for distfile keys are dictionaries
of digests suitable for construction of DistfileName instances.
Bug: https://bugs.gentoo.org/756778
Signed-off-by: Zac Medico
---
lib/portage/_emirrordist/Config.py | 7 +++-
lib/portage/_emirrordist/DeletionIterator.py | 38 ++--
lib/portage/_emirrordist/DeletionTask.py | 26 ++
lib/portage/_emirrordist/FetchTask.py| 16 -
lib/portage/_emirrordist/main.py | 15 +++-
lib/portage/tests/ebuild/test_fetch.py | 14
man/emirrordist.1| 6 +++-
7 files changed, 116 insertions(+), 6 deletions(-)
diff --git a/lib/portage/_emirrordist/Config.py
b/lib/portage/_emirrordist/Config.py
index 4bee4f45e..53f6582fe 100644
--- a/lib/portage/_emirrordist/Config.py
+++ b/lib/portage/_emirrordist/Config.py
@@ -1,4 +1,4 @@
-# Copyright 2013-2020 Gentoo Authors
+# Copyright 2013-2021 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
import copy
@@ -65,6 +65,11 @@ class Config:
self.distfiles_db = self._open_shelve(
options.distfiles_db, 'distfiles')
+ self.content_db = None
+ if options.content_db is not None:
+ self.content_db = self._open_shelve(
+ options.content_db, 'content')
+
self.deletion_db = None
if options.deletion_db is not None:
self.deletion_db = self._open_shelve(
diff --git a/lib/portage/_emirrordist/DeletionIterator.py
b/lib/portage/_emirrordist/DeletionIterator.py
index 08985ed6c..24fb096bf 100644
--- a/lib/portage/_emirrordist/DeletionIterator.py
+++ b/lib/portage/_emirrordist/DeletionIterator.py
@@ -1,10 +1,12 @@
-# Copyright 2013-2019 Gentoo Authors
+# Copyright 2013-2021 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
import logging
import stat
+import typing
from portage import os
+from portage.package.ebuild.fetch import DistfileName
from .DeletionTask import DeletionTask
class DeletionIterator:
@@ -12,6 +14,37 @@ class DeletionIterator:
def __init__(self, config):
self._config = config
+ def _map_filename(self, filename: typing.Union[str, DistfileName]) ->
typing.Union[str, DistfileName]:
+ """
+ Map a filename listed by the layout get_filenames method,
+ translating it from a content digest to a distfile name.
+ If filename is already a distfile name, then it will pass
+ through unchanged.
+
+ @param filename: A filename listed by layout get_filenames
+ @return: The distfile name, mapped from the corresponding
+ content digest when necessary
+ """
+ if not isinstance(filename, DistfileName):
+ if self._config.content_db is not None:
+ distfile_key = "filename:{}".format(filename)
+ try:
+ digests =
self._config.content_db[distfile_key]
+ except KeyError:
+ pass
+ else:
+ return DistfileName(filename,
digests=digests)
+ return DistfileName(filename)
+ if filename.digests and self._config.content_db is not None:
+ for k, v in filename.digests.items():
+ digest_key = "{}:{}".format(k, v).lower()
+ try:
+ distfile_str =
self._config.content_db[digest_key]
+ except KeyError:
+ continue
+ return DistfileName(distfile_str, digests={k:v})
+ return filename
+
def __iter__(self):
distdir = self._config.options.distfiles
file_owners = self._config.file_owners
@@ -22,7 +55,8 @@ class DeletionIterator:
start_time = self._config.start_time
distfiles_set = set()
for layout in self._config.layouts:
- distfiles_set.update(layout.get_filenames(distdir))
+ distfiles_set.update(self._map_filename(filename)
+ for