On Thu, Oct 3, 2019 at 7:52 AM Michał Górny <mgo...@gentoo.org> wrote:

> Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
> includes fetching and parsing layout.conf, and support for flat layout
> and filename-hash layout with cutoffs being multiplies of 4.
>
> Bug: https://bugs.gentoo.org/646898
> Signed-off-by: Michał Górny <mgo...@gentoo.org>
> ---
>  lib/portage/package/ebuild/fetch.py | 113 +++++++++++++++++++++++++++-
>  1 file changed, 109 insertions(+), 4 deletions(-)
>
> diff --git a/lib/portage/package/ebuild/fetch.py
> b/lib/portage/package/ebuild/fetch.py
> index 227bf45ae..692efcc01 100644
> --- a/lib/portage/package/ebuild/fetch.py
> +++ b/lib/portage/package/ebuild/fetch.py
> @@ -7,12 +7,15 @@ __all__ = ['fetch']
>
>  import errno
>  import io
> +import itertools
> +import json
>  import logging
>  import random
>  import re
>  import stat
>  import sys
>  import tempfile
> +import time
>
>  from collections import OrderedDict
>
> @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
>         'portage.package.ebuild.doebuild:doebuild_environment,' + \
>                 '_doebuild_spawn',
>         'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
> +
>  'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
> +       'portage.util._urlopen:urlopen',
>  )
>
>  from portage import os, selinux, shutil, _encodings, \
>         _movefile, _shell_quote, _unicode_encode
>  from portage.checksum import (get_valid_checksum_keys, perform_md5,
> verify_all,
> -       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
> +       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
> +       checksum_str)
>  from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
> -       GLOBAL_CONFIG_PATH
> +       GLOBAL_CONFIG_PATH, CACHE_PATH
>  from portage.data import portage_gid, portage_uid, secpass,
> userpriv_groups
>  from portage.exception import FileNotFound, OperationNotPermitted, \
>         PortageException, TryAgain
> @@ -253,6 +259,104 @@ _size_suffix_map = {
>         'Y' : 80,
>  }
>
> +
> +def filename_hash_path(filename, algo, cutoffs):
> +       """
> +       Get directory path for filename in filename-hash mirror structure.
> +
> +       @param filename: Filename to fetch
> +       @param algo: Hash algorithm
> +       @param cutoffs: Cutoff values (n:n...)
> +       @return: Directory path
> +       """
> +
> +       fnhash = checksum_str(filename.encode('utf8'), algo)
> +       ret = ''
> +       for c in cutoffs.split(':'):
> +               c = int(c) // 4
> +               ret += fnhash[:c] + '/'
>

When making a path, please use os.path.join()


> +               fnhash = fnhash[c:]
> +       return ret
> +
> +
> +def get_mirror_url(mirror_url, filename, eroot):
> +       """
> +       Get correct fetch URL for a given file, accounting for mirror
> +       layout configuration.
> +
> +       @param mirror_url: Base URL to the mirror (without '/distfiles')
> +       @param filename: Filename to fetch
> +       @param eroot: EROOT to use for the cache file
> +       @return: Full URL to fetch
> +       """
>
+
> +       cache_file = os.path.join(eroot, CACHE_PATH,
> 'mirror-metadata.json')
> +       try:
> +               with open(cache_file, 'r') as f:
> +                       cache = json.load(f)
> +       except (IOError, ValueError):
> +               cache = {}
>

I'm a bit worried that we are opening this cache file off of disk every
time we call get_mirror_url(). Can we just cache the contents in memory
between calls; or even better pass the cache in as argument rather than it
be contained in get_mirror_url?


> +
> +       ts, layout = cache.get(mirror_url, (0, None))
> +       # refresh at least daily
> +       if ts < time.time() - 86400:
> +               # the default
> +               layout = ('flat',)
> +
> +               try:
> +                       f = urlopen(mirror_url + '/distfiles/layout.conf')
> +                       try:
> +                               data = io.StringIO(f.read().decode('utf8'))
> +                       finally:
> +                               f.close()
> +                       cp = SafeConfigParser()
> +                       read_configs(cp, [data])
> +
> +                       for i in itertools.count():
> +                               try:
> +                                       val = tuple(cp.get('structure',
> '%d' % i).split())
> +                                       if val == ('flat',):
> +                                               pass
> +                                       elif val[0] == 'filename-hash' and
> len(val) == 3:
> +                                               if val[1] not in
> get_valid_checksum_keys():
> +                                                       continue
> +                                               # validate cutoffs
> +                                               cutoffs_good = False
> +                                               for c in val[2].split(':'):
> +                                                       try:
> +                                                               c = int(c)
> +                                                       except ValueError:
> +                                                               break
> +                                                       else:
> +                                                               if c % 4
> != 0:
> +
>  break
> +                                               else:
> +                                                       cutoffs_good = True
> +                                               if not cutoffs_good:
> +                                                       continue
> +                                       else:
> +                                               # (skip unsupported
> variant)
> +                                               continue
> +                                       layout = val
> +                                       break
> +                               except NoOptionError:
> +                                       break
> +               except IOError:
> +                       pass
> +
> +               cache[mirror_url] = (time.time(), layout)
> +               with open(cache_file, 'w') as f:
> +                       json.dump(cache, f)
> +
> +       if layout[0] == 'flat':
> +               return mirror_url + "/distfiles/" + filename
> +       elif layout[0] == 'filename-hash':
> +               return (mirror_url + "/distfiles/" +
> +                               filename_hash_path(filename, *layout[1:])
> + filename)
> +       else:
> +               raise AssertionError("get_mirror_url() got unknown layout
> type")
>

raise AssertionError("get_mirror_url() got unknown layout type %s wanted
one of %s" % (layout[0], ('flat', 'filename-hash')))

E.g. if you got an unknown thing, it's nice to print what you want and what
you wanted so callers can fix it.


> +
> +
>  def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>         locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
>         allow_missing_digests=True):
> @@ -434,8 +538,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>         for myfile, myuri in file_uri_tuples:
>                 if myfile not in filedict:
>                         filedict[myfile]=[]
> -                       for y in range(0,len(locations)):
> -
>  filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
> +                       for l in locations:
> +                               filedict[myfile].append(get_mirror_url(l,
> myfile,
> +                                               mysettings["EROOT"]))
>                 if myuri is None:
>                         continue
>                 if myuri[:9]=="mirror://":
> --
> 2.23.0
>
>
>

Reply via email to