Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
includes fetching and parsing layout.conf, and support for flat layout
and filename-hash layout with cutoffs being multiplies of 4.

Bug: https://bugs.gentoo.org/646898
Signed-off-by: Michał Górny <mgo...@gentoo.org>
---
 lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
 1 file changed, 135 insertions(+), 4 deletions(-)

Changes in v2: switched to a more classy layout to make the code
reusable in emirrordist.

diff --git a/lib/portage/package/ebuild/fetch.py 
b/lib/portage/package/ebuild/fetch.py
index 227bf45ae..18e3d390a 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -7,12 +7,15 @@ __all__ = ['fetch']
 
 import errno
 import io
+import itertools
+import json
 import logging
 import random
 import re
 import stat
 import sys
 import tempfile
+import time
 
 from collections import OrderedDict
 
@@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
        'portage.package.ebuild.doebuild:doebuild_environment,' + \
                '_doebuild_spawn',
        'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
+       'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
+       'portage.util._urlopen:urlopen',
 )
 
 from portage import os, selinux, shutil, _encodings, \
        _movefile, _shell_quote, _unicode_encode
 from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
-       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
+       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
+       checksum_str)
 from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
-       GLOBAL_CONFIG_PATH
+       GLOBAL_CONFIG_PATH, CACHE_PATH
 from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
 from portage.exception import FileNotFound, OperationNotPermitted, \
        PortageException, TryAgain
@@ -253,6 +259,130 @@ _size_suffix_map = {
        'Y' : 80,
 }
 
+
+class FlatLayout(object):
+       def get_path(self, filename):
+               return filename
+
+
+class FilenameHashLayout(object):
+       def __init__(self, algo, cutoffs):
+               self.algo = algo
+               self.cutoffs = [int(x) for x in cutoffs.split(':')]
+
+       def get_path(self, filename):
+               fnhash = checksum_str(filename.encode('utf8'), self.algo)
+               ret = ''
+               for c in self.cutoffs:
+                       assert c % 4 == 0
+                       c = c // 4
+                       ret += fnhash[:c] + '/'
+                       fnhash = fnhash[c:]
+               return ret + filename
+
+
+class MirrorLayoutConfig(object):
+       """
+       Class to read layout.conf from a mirror.
+       """
+
+       def __init__(self):
+               self.structure = ()
+
+       def read_from_file(self, f):
+               cp = SafeConfigParser()
+               read_configs(cp, [f])
+               vals = []
+               for i in itertools.count():
+                       try:
+                               vals.append(tuple(cp.get('structure', '%d' % 
i).split()))
+                       except NoOptionError:
+                               break
+               self.structure = tuple(vals)
+
+       def serialize(self):
+               return self.structure
+
+       def deserialize(self, data):
+               self.structure = data
+
+       @staticmethod
+       def validate_structure(val):
+               if val == ('flat',):
+                       return True
+               if val[0] == 'filename-hash' and len(val) == 3:
+                       if val[1] not in get_valid_checksum_keys():
+                               return False
+                       # validate cutoffs
+                       for c in val[2].split(':'):
+                               try:
+                                       c = int(c)
+                               except ValueError:
+                                       break
+                               else:
+                                       if c % 4 != 0:
+                                               break
+                       else:
+                               return True
+                       return False
+               return False
+
+       def get_best_supported_layout(self):
+               for val in self.structure:
+                       if self.validate_structure(val):
+                               if val[0] == 'flat':
+                                       return FlatLayout()
+                               elif val[0] == 'filename-hash':
+                                       return FilenameHashLayout(val[1], 
val[2])
+               else:
+                       # fallback
+                       return FlatLayout()
+
+
+def get_mirror_url(mirror_url, filename, eroot):
+       """
+       Get correct fetch URL for a given file, accounting for mirror
+       layout configuration.
+
+       @param mirror_url: Base URL to the mirror (without '/distfiles')
+       @param filename: Filename to fetch
+       @param eroot: EROOT to use for the cache file
+       @return: Full URL to fetch
+       """
+
+       mirror_conf = MirrorLayoutConfig()
+
+       cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json')
+       try:
+               with open(cache_file, 'r') as f:
+                       cache = json.load(f)
+       except (IOError, ValueError):
+               cache = {}
+
+       ts, data = cache.get(mirror_url, (0, None))
+       # refresh at least daily
+       if ts >= time.time() - 86400:
+               mirror_conf.deserialize(data)
+       else:
+               try:
+                       f = urlopen(mirror_url + '/distfiles/layout.conf')
+                       try:
+                               data = io.StringIO(f.read().decode('utf8'))
+                       finally:
+                               f.close()
+
+                       mirror_conf.read_from_file(data)
+               except IOError:
+                       pass
+
+               cache[mirror_url] = (time.time(), mirror_conf.serialize())
+               with open(cache_file, 'w') as f:
+                       json.dump(cache, f)
+
+       return (mirror_url + "/distfiles/" +
+                       
mirror_conf.get_best_supported_layout().get_path(filename))
+
+
 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
        locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
        allow_missing_digests=True):
@@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
        for myfile, myuri in file_uri_tuples:
                if myfile not in filedict:
                        filedict[myfile]=[]
-                       for y in range(0,len(locations)):
-                               
filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
+                       for l in locations:
+                               filedict[myfile].append(get_mirror_url(l, 
myfile,
+                                               mysettings["EROOT"]))
                if myuri is None:
                        continue
                if myuri[:9]=="mirror://":
-- 
2.23.0


Reply via email to