Add a support for the subset of GLEP 75 needed by Gentoo Infra. This includes fetching and parsing layout.conf, and support for flat layout and filename-hash layout with cutoffs being multiplies of 4.
Bug: https://bugs.gentoo.org/646898 Signed-off-by: Michał Górny <mgo...@gentoo.org> --- lib/portage/package/ebuild/fetch.py | 158 ++++++++++++++++++++++++- lib/portage/tests/ebuild/test_fetch.py | 94 ++++++++++++++- 2 files changed, 247 insertions(+), 5 deletions(-) Chages in v3: - mirrors are evaluated lazily (i.e. Portage doesn't fetch layouts for all mirrors prematurely), - garbage layout.conf is handled gracefully, - cache updates are done atomically, - layout.conf argument verification has been moved to invidual classes, - a few unit and integration tests have been added. diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py index 227bf45ae..fa250535f 100644 --- a/lib/portage/package/ebuild/fetch.py +++ b/lib/portage/package/ebuild/fetch.py @@ -6,13 +6,17 @@ from __future__ import print_function __all__ = ['fetch'] import errno +import functools import io +import itertools +import json import logging import random import re import stat import sys import tempfile +import time from collections import OrderedDict @@ -27,14 +31,19 @@ portage.proxy.lazyimport.lazyimport(globals(), 'portage.package.ebuild.doebuild:doebuild_environment,' + \ '_doebuild_spawn', 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs', + 'portage.util:atomic_ofstream', + 'portage.util.configparser:SafeConfigParser,read_configs,' + + 'NoOptionError,ConfigParserError', + 'portage.util._urlopen:urlopen', ) from portage import os, selinux, shutil, _encodings, \ _movefile, _shell_quote, _unicode_encode from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all, - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter) + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter, + checksum_str) from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \ - GLOBAL_CONFIG_PATH + GLOBAL_CONFIG_PATH, CACHE_PATH from portage.data import portage_gid, portage_uid, secpass, userpriv_groups from portage.exception import FileNotFound, OperationNotPermitted, \ PortageException, TryAgain @@ -253,6 +262,144 @@ _size_suffix_map = { 'Y' : 80, } + +class FlatLayout(object): + def get_path(self, filename): + return filename + + @staticmethod + def verify_args(args): + return len(args) == 1 + + +class FilenameHashLayout(object): + def __init__(self, algo, cutoffs): + self.algo = algo + self.cutoffs = [int(x) for x in cutoffs.split(':')] + + def get_path(self, filename): + fnhash = checksum_str(filename.encode('utf8'), self.algo) + ret = '' + for c in self.cutoffs: + assert c % 4 == 0 + c = c // 4 + ret += fnhash[:c] + '/' + fnhash = fnhash[c:] + return ret + filename + + @staticmethod + def verify_args(args): + if len(args) != 3: + return False + if args[1] not in get_valid_checksum_keys(): + return False + # argsidate cutoffs + for c in args[2].split(':'): + try: + c = int(c) + except ValueError: + break + else: + if c % 4 != 0: + break + else: + return True + return False + + +class MirrorLayoutConfig(object): + """ + Class to read layout.conf from a mirror. + """ + + def __init__(self): + self.structure = () + + def read_from_file(self, f): + cp = SafeConfigParser() + read_configs(cp, [f]) + vals = [] + for i in itertools.count(): + try: + vals.append(tuple(cp.get('structure', '%d' % i).split())) + except NoOptionError: + break + self.structure = tuple(vals) + + def serialize(self): + return self.structure + + def deserialize(self, data): + self.structure = data + + @staticmethod + def validate_structure(val): + if val[0] == 'flat': + return FlatLayout.verify_args(val) + if val[0] == 'filename-hash': + return FilenameHashLayout.verify_args(val) + return False + + def get_best_supported_layout(self): + for val in self.structure: + if self.validate_structure(val): + if val[0] == 'flat': + return FlatLayout(*val[1:]) + elif val[0] == 'filename-hash': + return FilenameHashLayout(*val[1:]) + else: + # fallback + return FlatLayout() + + +def get_mirror_url(mirror_url, filename, eroot): + """ + Get correct fetch URL for a given file, accounting for mirror + layout configuration. + + @param mirror_url: Base URL to the mirror (without '/distfiles') + @param filename: Filename to fetch + @param eroot: EROOT to use for the cache file + @return: Full URL to fetch + """ + + mirror_conf = MirrorLayoutConfig() + + cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json') + try: + with open(cache_file, 'r') as f: + cache = json.load(f) + except (IOError, ValueError): + cache = {} + + ts, data = cache.get(mirror_url, (0, None)) + # refresh at least daily + if ts >= time.time() - 86400: + mirror_conf.deserialize(data) + else: + try: + f = urlopen(mirror_url + '/distfiles/layout.conf') + try: + data = io.StringIO(f.read().decode('utf8')) + finally: + f.close() + + try: + mirror_conf.read_from_file(data) + except ConfigParserError: + pass + except IOError: + pass + + cache[mirror_url] = (time.time(), mirror_conf.serialize()) + f = atomic_ofstream(cache_file, 'w') + json.dump(cache, f) + f.close() + + return (mirror_url + "/distfiles/" + + mirror_conf.get_best_supported_layout().get_path(filename)) + + def fetch(myuris, mysettings, listonly=0, fetchonly=0, locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None, allow_missing_digests=True): @@ -434,8 +581,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, for myfile, myuri in file_uri_tuples: if myfile not in filedict: filedict[myfile]=[] - for y in range(0,len(locations)): - filedict[myfile].append(locations[y]+"/distfiles/"+myfile) + for l in locations: + filedict[myfile].append(functools.partial( + get_mirror_url, l, myfile, mysettings["EROOT"])) if myuri is None: continue if myuri[:9]=="mirror://": @@ -895,6 +1043,8 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, tried_locations = set() while uri_list: loc = uri_list.pop() + if isinstance(loc, functools.partial): + loc = loc() # Eliminate duplicates here in case we've switched to # "primaryuri" mode on the fly due to a checksum failure. if loc in tried_locations: diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py index 83321fed7..f2254c468 100644 --- a/lib/portage/tests/ebuild/test_fetch.py +++ b/lib/portage/tests/ebuild/test_fetch.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import functools +import io import tempfile import portage @@ -11,12 +12,14 @@ from portage import shutil, os from portage.tests import TestCase from portage.tests.resolver.ResolverPlayground import ResolverPlayground from portage.tests.util.test_socks5 import AsyncHTTPServer +from portage.util.configparser import ConfigParserError from portage.util.futures.executor.fork import ForkExecutor from portage.util._async.SchedulerInterface import SchedulerInterface from portage.util._eventloop.global_event_loop import global_event_loop from portage.package.ebuild.config import config from portage.package.ebuild.digestgen import digestgen -from portage.package.ebuild.fetch import _download_suffix +from portage.package.ebuild.fetch import (_download_suffix, FlatLayout, + FilenameHashLayout, MirrorLayoutConfig) from _emerge.EbuildFetcher import EbuildFetcher from _emerge.Package import Package @@ -228,3 +231,92 @@ class EbuildFetchTestCase(TestCase): finally: shutil.rmtree(ro_distdir) playground.cleanup() + + def test_flat_layout(self): + self.assertTrue(FlatLayout.verify_args(('flat',))) + self.assertFalse(FlatLayout.verify_args(('flat', 'extraneous-arg'))) + self.assertEqual(FlatLayout().get_path('foo-1.tar.gz'), 'foo-1.tar.gz') + + def test_filename_hash_layout(self): + self.assertFalse(FilenameHashLayout.verify_args(('filename-hash',))) + self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '8'))) + self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'INVALID-HASH', '8'))) + self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:12'))) + self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '3'))) + self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', 'junk'))) + self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:junk'))) + + self.assertEqual(FilenameHashLayout('SHA1', '4').get_path('foo-1.tar.gz'), + '1/foo-1.tar.gz') + self.assertEqual(FilenameHashLayout('SHA1', '8').get_path('foo-1.tar.gz'), + '19/foo-1.tar.gz') + self.assertEqual(FilenameHashLayout('SHA1', '8:16').get_path('foo-1.tar.gz'), + '19/c3b6/foo-1.tar.gz') + self.assertEqual(FilenameHashLayout('SHA1', '8:16:24').get_path('foo-1.tar.gz'), + '19/c3b6/37a94b/foo-1.tar.gz') + + def test_mirror_layout_config(self): + mlc = MirrorLayoutConfig() + self.assertEqual(mlc.serialize(), ()) + self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout) + + conf = ''' +[structure] +0=flat +''' + mlc.read_from_file(io.StringIO(conf)) + self.assertEqual(mlc.serialize(), (('flat',),)) + self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout) + self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'), + 'foo-1.tar.gz') + + conf = ''' +[structure] +0=filename-hash SHA1 8:16 +1=flat +''' + mlc.read_from_file(io.StringIO(conf)) + self.assertEqual(mlc.serialize(), ( + ('filename-hash', 'SHA1', '8:16'), + ('flat',) + )) + self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout) + self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'), + '19/c3b6/foo-1.tar.gz') + serialized = mlc.serialize() + + # test fallback + conf = ''' +[structure] +0=filename-hash INVALID-HASH 8:16 +1=filename-hash SHA1 32 +2=flat +''' + mlc.read_from_file(io.StringIO(conf)) + self.assertEqual(mlc.serialize(), ( + ('filename-hash', 'INVALID-HASH', '8:16'), + ('filename-hash', 'SHA1', '32'), + ('flat',) + )) + self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout) + self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'), + '19c3b637/foo-1.tar.gz') + + # test deserialization + mlc.deserialize(serialized) + self.assertEqual(mlc.serialize(), ( + ('filename-hash', 'SHA1', '8:16'), + ('flat',) + )) + self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout) + self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'), + '19/c3b6/foo-1.tar.gz') + + # test erraneous input + conf = ''' +[#(*DA*&*F +[structure] +0=filename-hash SHA1 32 +''' + self.assertRaises(ConfigParserError, mlc.read_from_file, + io.StringIO(conf)) -- 2.23.0