Attaching rebased mash multilib optimization patch. In testing with dgilmore today this reduced the multilib section of mash to about 1.5 minutes of wall clock time which is a 50% savings.
-Toshio
From 30f3fc732fe4bd25fbb1e249136b0855b4f54ec2 Mon Sep 17 00:00:00 2001 From: Toshio Kuratomi <[email protected]> Date: Wed, 6 May 2015 12:14:15 -0700 Subject: [PATCH] Multilib mashing optimization * Only create the lists of packages and directories once, when the class is created (not at instantiation or worse, everytime the method is invoked) * Use sets instead of lists for containment tests * Reduce calling of fnmatch * Bypass groups of tests when we know earlier that they will never be true --- mash/config.py | 56 +++++++++++++++++++--- mash/multilib.py | 141 ++++++++++++++++++++++++++++++------------------------- 2 files changed, 127 insertions(+), 70 deletions(-) diff --git a/mash/config.py b/mash/config.py index 57b6668..a2f928a 100644 --- a/mash/config.py +++ b/mash/config.py @@ -19,6 +19,48 @@ import string from ConfigParser import RawConfigParser from yum import config +from yum.misc import read_in_items_from_dot_dir + +class SetOption(config.Option): + """An option that contains a set of strings. + + This is a port of :class:`yum.config.ListOption` to return sets + """ + def __init__(self, default=None, parse_default=False): + if default is None: + default = set() + super(SetOption, self).__init__(default, parse_default) + + def parse(self, s): + """Convert a string from the config file into a set. parses + globdir:paths as foo.d-style dirs. + + :param s: The string to be converted to a set. Commas and + whitespace are used as separators for the set. + :return: *s* converted to a set + """ + # we need to allow for the '\n[whitespace]' continuation - easier + # to sub the \n with a space and then read the lines + s = s.replace('\n', ' ') + s = s.replace(',', ' ') + results = set() + for item in s.split(): + if item.startswith('glob:'): + thisglob = item.replace('glob:', '') + results.update(read_in_items_from_dot_dir(thisglob)) + continue + results.add(item) + + return results + + def tostring(self, value): + """Convert a set of strings to a string value. This does the + opposite of the :meth:`parse` method above. + + :param value: a list of values + :return: string representation of input + """ + return '\n '.join(value) class MashConfig(config.BaseConfig): rpm_path = config.Option('Mash') @@ -29,23 +71,23 @@ class MashConfig(config.BaseConfig): multilib = config.BoolOption(True) multilib_method = config.Option('devel') multilib_file = config.Option() - multilib_devel_whitelist = config.ListOption() - multilib_devel_blacklist = config.ListOption([ + multilib_devel_whitelist = SetOption() + multilib_devel_blacklist = SetOption(set(( 'dmraid-devel', 'kdeutils-devel', 'mkinitrd-devel', 'java-1.5.0-gcj-devel', 'java-1.7.0-icedtea-devel', 'php-devel', 'java-1.6.0-openjdk-devel', 'java-1.7.0-openjdk-devel', 'java-1.8.0-openjdk-devel', 'httpd-devel', - ]) - multilib_runtime_whitelist = config.ListOption([ + ))) + multilib_runtime_whitelist = SetOption(set(( 'libgnat', 'wine', 'lmms-vst', 'nspluginwrapper', 'libflashsupport', 'valgrind', 'perl-libs', 'redhat-lsb', 'yaboot', 'syslinux-extlinux-nonlinux', 'syslinux-nonlinux', 'syslinux-tftpboot', - ]) - multilib_runtime_blacklist = config.ListOption([ + ))) + multilib_runtime_blacklist = SetOption(set(( 'tomcat-native', 'php', 'httpd', - ]) + ))) arches = config.ListOption() keys = config.ListOption() configdir = config.Option('/etc/mash') diff --git a/mash/multilib.py b/mash/multilib.py index e18b87b..b3b7ce2 100644 --- a/mash/multilib.py +++ b/mash/multilib.py @@ -11,15 +11,18 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +import os from fnmatch import fnmatch -class MultilibMethod: +class MultilibMethod(object): + PREFER_64 = frozenset(( 'gdb', 'frysk', 'systemtap', 'systemtap-runtime', 'ltrace', 'strace' )) + def __init__(self, config): self.name = 'base' + def select(self, po): - prefer_64 = [ 'gdb', 'frysk', 'systemtap', 'systemtap-runtime', 'ltrace', 'strace' ] if po.arch.find('64') != -1: - if po.name in prefer_64: + if po.name in self.PREFER_64: return True if po.name.startswith('kernel'): for (p_name, p_flag, (p_e, p_v, p_r)) in po.provides: @@ -27,7 +30,7 @@ class MultilibMethod: return True return False -class NoMultilibMethod: +class NoMultilibMethod(object): def __init__(self, config): self.name = 'none' @@ -40,7 +43,7 @@ class AllMultilibMethod(MultilibMethod): def select(self, po): return True - + class FileMultilibMethod(MultilibMethod): def __init__(self, config): file = config.multilib_file @@ -54,14 +57,14 @@ class FileMultilibMethod(MultilibMethod): line = line.strip() if not line.startswith('#'): self.list.append(line) - + def select(self, po): for item in self.list: if fnmatch(po.name, item): return True return False -class KernelMultilibMethod: +class KernelMultilibMethod(object): def __init__(self, config): self.name = 'base' @@ -73,7 +76,7 @@ class KernelMultilibMethod: return True return False -class YabootMultilibMethod: +class YabootMultilibMethod(object): def __init__(self, config): self.name = 'base' @@ -83,18 +86,41 @@ class YabootMultilibMethod: return True return False + class RuntimeMultilibMethod(MultilibMethod): + ROOTLIBDIRS = frozenset(('/lib', '/lib64')) + USRLIBDIRS = frozenset(('/usr/lib', '/usr/lib64')) + LIBDIRS = ROOTLIBDIRS.union(USRLIBDIRS) + OPROFILEDIRS = frozenset(('/usr/lib/oprofile', '/usr/lib64/oprofile')) + WINEDIRS = frozenset(('/usr/lib/wine', '/usr/lib64/wine')) + SANEDIRS = frozenset(('/usr/lib/sane', '/usr/lib64/sane')) + + by_dir = set() + + # alsa, dri, gtk-accessibility, scim-bridge-gtk, krb5, sasl, vdpau + by_dir.update(frozenset(os.path.join('/usr/lib', p) for p in ('alsa-lib', + 'dri', 'gtk-2.0/modules', 'gtk-2.0/immodules', 'krb5/plugins', + 'sasl2', 'vdpau'))) + by_dir.update(frozenset(os.path.join('/usr/lib64', p) for p in ('alsa-lib', + 'dri', 'gtk-2.0/modules', 'gtk-2.0/immodules', 'krb5/plugins', + 'sasl2', 'vdpau'))) + + # pam + by_dir.update(frozenset(os.path.join(p, 'security') for p in ROOTLIBDIRS)) + + # lsb + by_dir.add('/etc/lsb-release.d') + def __init__(self, config): self.name = 'runtime' self.config = config def select(self, po): - libdirs = [ '/usr/lib', '/usr/lib64', '/lib', '/lib64' ] if po.name in self.config.multilib_runtime_blacklist: return False if po.name in self.config.multilib_runtime_whitelist: return True - if MultilibMethod.select(self,po): + if MultilibMethod.select(self, po): return True if po.name.startswith('kernel'): for (p_name, p_flag, (p_e, p_v, p_r)) in po.provides: @@ -102,57 +128,49 @@ class RuntimeMultilibMethod(MultilibMethod): return False for file in po.returnFileEntries(): (dirname, filename) = file.rsplit('/', 1) + # libraries in standard dirs - if dirname in libdirs and fnmatch(filename, '*.so.*'): - return True - # dri - if dirname in [ '/usr/lib/dri', '/usr/lib64/dri' ]: - return True - # vdpau - if dirname in [ '/usr/lib/vdpau', '/usr/lib64/vdpau' ]: - return True - # krb5 - if dirname in [ '/usr/lib/krb5/plugins', '/usr/lib64/krb5/plugins' ]: - return True - # pam - if dirname in [ '/lib/security', '/lib64/security' ]: + if dirname in self.LIBDIRS and fnmatch(filename, '*.so.*'): return True - # sasl - if dirname in [ '/usr/lib/sasl2', '/usr/lib64/sasl2' ]: - return True - # nss - if dirname in [ '/lib', '/lib64' ] and filename.startswith('libnss_'): - return True - # alsa - if dirname in [ '/usr/lib/alsa-lib', '/usr/lib64/alsa-lib' ]: - return True - # lsb - if dirname == '/etc/lsb-release.d': + if dirname in self.by_dir: return True # mysql, qt, etc. if dirname == '/etc/ld.so.conf.d' and filename.endswith('.conf'): return True - # gtk2-engines - if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/engines'): - return True - # accessibility - if fnmatch(dirname, '/usr/lib*/gtk-2.0/modules'): - return True - if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/modules'): - return True - # scim-bridge-gtk - if fnmatch(dirname, '/usr/lib*/gtk-2.0/immodules'): + # nss (Some nss modules end in .so instead of .so.X) + # db (db modules end in .so instead of .so.X) + if dirname in self.ROOTLIBDIRS and (filename.startswith('libnss_') or filename.startswith('libdb-')): return True - if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/immodules'): - return True - # images - if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/loaders'): - return True - if fnmatch(dirname, '/usr/lib*/gdk-pixbuf-2.0/*/loaders'): - return True - if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/printbackends'): - return True - if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/filesystems'): + # Optimization: + # All tests beyond here are for things in USRLIBDIRS + if not dirname.startswith(tuple(self.USRLIBDIRS)): + # The dirname does not start with a USRLIBDIR so we can move + # on to the next file + continue + + if dirname.startswith(('/usr/lib/gtk-2.0', '/usr/lib64/gtk-2.0')): + # gtk2-engines + if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/engines'): + return True + # accessibility + if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/modules'): + return True + # scim-bridge-gtk + if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/immodules'): + return True + # images + if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/loaders'): + return True + if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/printbackends'): + return True + if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/filesystems'): + return True + # Optimization: + # No tests beyond here for things in /usr/lib*/gtk-2.0 + continue + + # gstreamer + if dirname.startswith(('/usr/lib/gstreamer-', '/usr/lib64/gstreamer-')): return True # qt/kde fun if fnmatch(dirname, '/usr/lib*/qt*/plugins/*'): @@ -162,23 +180,20 @@ class RuntimeMultilibMethod(MultilibMethod): # qml if fnmatch(dirname, '/usr/lib*/qt5/qml/*'): return True - # gstreamer - if fnmatch(dirname, '/usr/lib*/gstreamer-*'): + # images + if fnmatch(dirname, '/usr/lib*/gdk-pixbuf-2.0/*/loaders'): return True # xine-lib if fnmatch(dirname, '/usr/lib*/xine/plugins/*'): return True # oprofile - if fnmatch(dirname, '/usr/lib*/oprofile') and fnmatch(filename, '*.so.*'): + if dirname in self.OPROFILEDIRS and fnmatch(filename, '*.so.*'): return True # wine - if fnmatch(dirname, '/usr/lib*/wine') and filename.endswith('.so'): - return True - # db - if dirname in [ '/lib', '/lib64' ] and filename.startswith('libdb-'): + if dirname in self.WINEDIRS and filename.endswith('.so'): return True # sane drivers - if dirname in [ '/usr/lib/sane', '/usr/lib64/sane' ] and filename.startswith('libsane-'): + if dirname in self.SANEDIRS and filename.startswith('libsane-'): return True return False @@ -192,7 +207,7 @@ class DevelMultilibMethod(RuntimeMultilibMethod): return False if po.name in self.config.multilib_devel_whitelist: return True - if RuntimeMultilibMethod.select(self,po): + if RuntimeMultilibMethod.select(self, po): return True if po.name.startswith('ghc-'): return False -- 2.1.0
pgpFxtNd3F6Pq.pgp
Description: PGP signature
-- buildsys mailing list [email protected] https://admin.fedoraproject.org/mailman/listinfo/buildsys
