On Thu, Oct 3, 2019 at 7:52 AM Michał Górny <mgo...@gentoo.org> wrote:
> Add a support for the subset of GLEP 75 needed by Gentoo Infra. This > includes fetching and parsing layout.conf, and support for flat layout > and filename-hash layout with cutoffs being multiplies of 4. > > Bug: https://bugs.gentoo.org/646898 > Signed-off-by: Michał Górny <mgo...@gentoo.org> > --- > lib/portage/package/ebuild/fetch.py | 113 +++++++++++++++++++++++++++- > 1 file changed, 109 insertions(+), 4 deletions(-) > > diff --git a/lib/portage/package/ebuild/fetch.py > b/lib/portage/package/ebuild/fetch.py > index 227bf45ae..692efcc01 100644 > --- a/lib/portage/package/ebuild/fetch.py > +++ b/lib/portage/package/ebuild/fetch.py > @@ -7,12 +7,15 @@ __all__ = ['fetch'] > > import errno > import io > +import itertools > +import json > import logging > import random > import re > import stat > import sys > import tempfile > +import time > > from collections import OrderedDict > > @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(), > 'portage.package.ebuild.doebuild:doebuild_environment,' + \ > '_doebuild_spawn', > 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs', > + > 'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError', > + 'portage.util._urlopen:urlopen', > ) > > from portage import os, selinux, shutil, _encodings, \ > _movefile, _shell_quote, _unicode_encode > from portage.checksum import (get_valid_checksum_keys, perform_md5, > verify_all, > - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter) > + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter, > + checksum_str) > from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \ > - GLOBAL_CONFIG_PATH > + GLOBAL_CONFIG_PATH, CACHE_PATH > from portage.data import portage_gid, portage_uid, secpass, > userpriv_groups > from portage.exception import FileNotFound, OperationNotPermitted, \ > PortageException, TryAgain > @@ -253,6 +259,104 @@ _size_suffix_map = { > 'Y' : 80, > } > > + > +def filename_hash_path(filename, algo, cutoffs): > + """ > + Get directory path for filename in filename-hash mirror structure. > + > + @param filename: Filename to fetch > + @param algo: Hash algorithm > + @param cutoffs: Cutoff values (n:n...) > + @return: Directory path > + """ > + > + fnhash = checksum_str(filename.encode('utf8'), algo) > + ret = '' > + for c in cutoffs.split(':'): > + c = int(c) // 4 > + ret += fnhash[:c] + '/' > When making a path, please use os.path.join() > + fnhash = fnhash[c:] > + return ret > + > + > +def get_mirror_url(mirror_url, filename, eroot): > + """ > + Get correct fetch URL for a given file, accounting for mirror > + layout configuration. > + > + @param mirror_url: Base URL to the mirror (without '/distfiles') > + @param filename: Filename to fetch > + @param eroot: EROOT to use for the cache file > + @return: Full URL to fetch > + """ > + > + cache_file = os.path.join(eroot, CACHE_PATH, > 'mirror-metadata.json') > + try: > + with open(cache_file, 'r') as f: > + cache = json.load(f) > + except (IOError, ValueError): > + cache = {} > I'm a bit worried that we are opening this cache file off of disk every time we call get_mirror_url(). Can we just cache the contents in memory between calls; or even better pass the cache in as argument rather than it be contained in get_mirror_url? > + > + ts, layout = cache.get(mirror_url, (0, None)) > + # refresh at least daily > + if ts < time.time() - 86400: > + # the default > + layout = ('flat',) > + > + try: > + f = urlopen(mirror_url + '/distfiles/layout.conf') > + try: > + data = io.StringIO(f.read().decode('utf8')) > + finally: > + f.close() > + cp = SafeConfigParser() > + read_configs(cp, [data]) > + > + for i in itertools.count(): > + try: > + val = tuple(cp.get('structure', > '%d' % i).split()) > + if val == ('flat',): > + pass > + elif val[0] == 'filename-hash' and > len(val) == 3: > + if val[1] not in > get_valid_checksum_keys(): > + continue > + # validate cutoffs > + cutoffs_good = False > + for c in val[2].split(':'): > + try: > + c = int(c) > + except ValueError: > + break > + else: > + if c % 4 > != 0: > + > break > + else: > + cutoffs_good = True > + if not cutoffs_good: > + continue > + else: > + # (skip unsupported > variant) > + continue > + layout = val > + break > + except NoOptionError: > + break > + except IOError: > + pass > + > + cache[mirror_url] = (time.time(), layout) > + with open(cache_file, 'w') as f: > + json.dump(cache, f) > + > + if layout[0] == 'flat': > + return mirror_url + "/distfiles/" + filename > + elif layout[0] == 'filename-hash': > + return (mirror_url + "/distfiles/" + > + filename_hash_path(filename, *layout[1:]) > + filename) > + else: > + raise AssertionError("get_mirror_url() got unknown layout > type") > raise AssertionError("get_mirror_url() got unknown layout type %s wanted one of %s" % (layout[0], ('flat', 'filename-hash'))) E.g. if you got an unknown thing, it's nice to print what you want and what you wanted so callers can fix it. > + > + > def fetch(myuris, mysettings, listonly=0, fetchonly=0, > locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None, > allow_missing_digests=True): > @@ -434,8 +538,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, > for myfile, myuri in file_uri_tuples: > if myfile not in filedict: > filedict[myfile]=[] > - for y in range(0,len(locations)): > - > filedict[myfile].append(locations[y]+"/distfiles/"+myfile) > + for l in locations: > + filedict[myfile].append(get_mirror_url(l, > myfile, > + mysettings["EROOT"])) > if myuri is None: > continue > if myuri[:9]=="mirror://": > -- > 2.23.0 > > >