Re: [gentoo-portage-dev] [PATCH] dep_check: use DNF to optimize overlapping virtual || deps (bug 632026)

Alec Warner Mon, 13 Nov 2017 13:11:56 -0800

On Sun, Nov 12, 2017 at 6:48 AM, Zac Medico <zmed...@gentoo.org> wrote:


> Deps like these:
>
>   || ( foo bar ) || ( bar baz )
>
> Translate to disjunctive normal form (DNF):
>
>   || ( ( foo bar ) ( foo baz ) ( bar bar ) ( bar baz ) )
>
> Using DNF, if none of the packages are currently installed,
> then the ( bar bar ) choice will be automatically preferred
> since it is satisfied by the fewest number of packages.
> If the ( foo baz ) choice is already satisfied, then that
> choice will be preferred instead.
>
> Since DNF results in exponential explosion of the formula,
> only use DNF for the parts of the dependencies that have
> overlapping atoms.
>
> In order to simplify the implementation of the dnf_convert
> function, this patch also fixes _expand_new_virtuals to
> normalize results in the same way as use_reduce (with no
> redundant nested lists).
>
> Bug: https://bugs.gentoo.org/632026
> ---
>  pym/portage/dep/_dnf.py                            |  97 ++++++++++++++++
>  pym/portage/dep/dep_check.py                       | 127
> +++++++++++++++++++--
>  pym/portage/tests/dep/test_dnf_convert.py          |  48 ++++++++
>  pym/portage/tests/dep/test_overlap_dnf.py          |  28 +++++
>  .../resolver/test_virtual_minimize_children.py     |  92 +++++++++++++++
>  5 files changed, 385 insertions(+), 7 deletions(-)
>  create mode 100644 pym/portage/dep/_dnf.py
>  create mode 100644 pym/portage/tests/dep/test_dnf_convert.py
>  create mode 100644 pym/portage/tests/dep/test_overlap_dnf.py
>  create mode 100644 pym/portage/tests/resolver/test_virtual_minimize_
> children.py
>
> diff --git a/pym/portage/dep/_dnf.py b/pym/portage/dep/_dnf.py
> new file mode 100644
> index 000000000..503b209c2
> --- /dev/null
> +++ b/pym/portage/dep/_dnf.py
> @@ -0,0 +1,97 @@
> +# Copyright 2017 Gentoo Foundation
> +# Distributed under the terms of the GNU General Public License v2
> +
> +from __future__ import unicode_literals
> +
> +import itertools
> +
> +
> +def dnf_convert(dep_struct):
> +       """
> +       Convert dep_struct to disjunctive normal form (DNF), where
> dep_struct
> +       is either a conjunction or disjunction of the form produced by
> +       use_reduce(opconvert=True).
> +       """
> +       # Normalize input to have a top-level conjunction.
> +       if isinstance(dep_struct, list):
> +               if dep_struct and dep_struct[0] == '||':
> +                       dep_struct = [dep_struct]
> +       else:
> +               dep_struct = [dep_struct]
> +
> +       conjunction = []
> +       disjunctions = []
> +
> +       for x in dep_struct:
> +               if isinstance (x, list):
> +                       assert x
>


I'm not a huge fan of asserts, but if we use them can we use them in the
expr, message form?

assert x, "Assertion failed, wanted x in list form in dep: %s" % dep_struct

or whatever.


> +                       if x[0] == '||':
> +                               if any(isinstance(element, list) for
> element in x):
> +                                       x_dnf = ['||']
> +                                       for element in x[1:]:
> +                                               if isinstance(element,
> list):
> +                                                       # Due to
> normalization, a disjunction must not be
> +                                                       # nested directly
> in another disjunction, so this
> +                                                       # must be a
> conjunction.
> +                                                       assert element and
> element[0] != '||'
> +                                                       element =
> dnf_convert(element)
> +                                                       if
> contains_disjunction(element):
> +                                                               assert
> (len(element) == 1 and
> +
>  element[0] and element[0][0] == '||')
> +
>  x_dnf.extend(element[0][1:])
> +                                                       else:
> +
>  x_dnf.append(element)
> +                                               else:
> +
>  x_dnf.append(element)
> +                                       x = x_dnf
> +                               disjunctions.append(x)
> +                       else:
> +                               for x in dnf_convert(x):
> +                                       if isinstance (x, list):
> +                                               # Due to normalization, a
> conjunction must not be
> +                                               # nested directly in
> another conjunction, so this
> +                                               # must be a disjunction.
> +                                               assert x and x[0] == '||'
> +                                               disjunctions.append(x)
> +                                       else:
> +                                               conjunction.append(x)
> +               else:
> +                       conjunction.append(x)
> +
> +       if disjunctions and (conjunction or len(disjunctions) > 1):
> +               dnf_form = ['||']
> +               for x in itertools.product(*[x[1:] for x in disjunctions]):
> +                       normalized = conjunction[:]
> +                       for element in x:
> +                               if isinstance(element, list):
> +                                       normalized.extend(element)
> +                               else:
> +                                       normalized.append(element)
> +                       dnf_form.append(normalized)
> +               result = [dnf_form]
> +       else:
> +               result = conjunction + disjunctions
> +
> +       return result
> +
> +
> +def contains_disjunction(dep_struct):
> +       """
> +       Search for a disjunction contained in dep_struct, where dep_struct
> +       is either a conjunction or disjunction of the form produced by
> +       use_reduce(opconvert=True). If dep_struct is a disjunction, then
> +       this only returns True if there is a nested disjunction. Due to
> +       normalization, recursion is only needed when dep_struct is a
> +       disjunction containing a conjunction. If dep_struct is a
> conjunction,
> +       then it is assumed that normalization has elevated any nested
> +       disjunctions to the top-level.
> +       """
> +       is_disjunction = dep_struct and dep_struct[0] == '||'
> +       for x in dep_struct:
> +               if isinstance(x, list):
> +                       assert x
> +                       if x[0] == '||':
> +                               return True
> +                       elif is_disjunction and contains_disjunction(x):
> +                               return True
> +       return False
> diff --git a/pym/portage/dep/dep_check.py b/pym/portage/dep/dep_check.py
> index b33f7e5db..6db4cc42e 100644
> --- a/pym/portage/dep/dep_check.py
> +++ b/pym/portage/dep/dep_check.py
> @@ -6,14 +6,20 @@ from __future__ import unicode_literals
>  __all__ = ['dep_check', 'dep_eval', 'dep_wordreduce', 'dep_zapdeps']
>
>  import collections
> +import itertools
>  import logging
>  import operator
>
>  import portage
>  from portage.dep import Atom, match_from_list, use_reduce
> +from portage.dep._dnf import (
> +       dnf_convert as _dnf_convert,
> +       contains_disjunction as _contains_disjunction,
> +)
>  from portage.exception import InvalidDependString, ParseError
>  from portage.localization import _
>  from portage.util import writemsg, writemsg_level
> +from portage.util.digraph import digraph
>  from portage.util.SlotObject import SlotObject
>  from portage.versions import vercmp, _pkg_str
>
> @@ -28,7 +34,11 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi,
> mysettings, myroot="/",
>         atom because it wouldn't necessarily make sense to block all the
> components
>         of a compound virtual.  When more than one new-style virtual is
> matched,
>         the matches are sorted from highest to lowest versions and the
> atom is
> -       expanded to || ( highest match ... lowest match )."""
> +       expanded to || ( highest match ... lowest match ).
> +
> +       The result is normalized in the same way as use_reduce, having a
> top-level
> +       conjuction, and no redundant nested lists.
> +       """
>         newsplit = []
>         mytrees = trees[myroot]
>         portdb = mytrees["porttree"].dbapi
> @@ -54,14 +64,30 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi,
> mysettings, myroot="/",
>                 portdb = trees[myroot]["bintree"].dbapi
>         pprovideddict = mysettings.pprovideddict
>         myuse = kwargs["myuse"]
> +       is_disjunction = mysplit and mysplit[0] == '||'
>         for x in mysplit:
>                 if x == "||":
>                         newsplit.append(x)
>                         continue
>                 elif isinstance(x, list):
> -                       newsplit.append(_expand_new_virtuals(x, edebug,
> mydbapi,
> +                       x = _expand_new_virtuals(x, edebug, mydbapi,
>                                 mysettings, myroot=myroot, trees=trees,
> use_mask=use_mask,
> -                               use_force=use_force, **kwargs))
> +                               use_force=use_force, **kwargs)
> +                       if is_disjunction:
> +                               if len(x) == 1:
> +                                       x = x[0]
> +                                       if isinstance(x, list):
> +                                               # Due to normalization, a
> conjunction must not be
> +                                               # nested directly in
> another conjunction, so this
> +                                               # must be a disjunction.
> +                                               assert x and x[0] == '||'
> +                                               newsplit.extend(x[1:])
> +                                       else:
> +                                               newsplit.append(x)
> +                               else:
> +                                       newsplit.append(x)
> +                       else:
> +                               newsplit.extend(x)
>                         continue
>
>                 if not isinstance(x, Atom):
> @@ -101,6 +127,8 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi,
> mysettings, myroot="/",
>                                         a.append(Atom(x.replace(x.cp,
> y.cp, 1)))
>                                 if not a:
>                                         newsplit.append(x)
> +                               elif is_disjunction:
> +                                       newsplit.extend(a)
>                                 elif len(a) == 1:
>                                         newsplit.append(a[0])
>                                 else:
> @@ -218,11 +246,18 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi,
> mysettings, myroot="/",
>                         newsplit.append(x)
>                         if atom_graph is not None:
>                                 atom_graph.add((x, id(x)), graph_parent)
> +               elif is_disjunction:
> +                       newsplit.extend(a)
>                 elif len(a) == 1:
> -                       newsplit.append(a[0])
> +                       newsplit.extend(a[0])
>                 else:
>                         newsplit.append(['||'] + a)
>
> +       # For consistency with related functions like use_reduce, always
> +       # normalize the result to have a top-level conjunction.
> +       if is_disjunction:
> +               newsplit = [newsplit]
> +
>         return newsplit
>
>  def dep_eval(deplist):
> @@ -612,9 +647,9 @@ def dep_zapdeps(unreduced, reduced, myroot,
> use_binaries=0, trees=None):
>         for choices in choice_bins:
>                 if len(choices) < 2:
>                         continue
> -               # Prefer choices with all_installed_slots for bug #480736.
> -               choices.sort(key=operator.attrgetter('all_installed_
> slots'),
> -                       reverse=True)
> +               # Prefer choices with all_installed_slots for bug #480736,
> and
> +               # choices with a smaller number of packages for bug
> #632026.
> +               choices.sort(key=lambda x: (not x.all_installed_slots,
> len(x.slot_map)))
>                 for choice_1 in choices[1:]:
>                         cps = set(choice_1.cp_map)
>                         for choice_2 in choices:
> @@ -741,6 +776,9 @@ def dep_check(depstring, mydbapi, mysettings,
> use="yes", mode=None, myuse=None,
>         except ParseError as e:
>                 return [0, "%s" % (e,)]
>
> +       if mysettings.local_config: # if not repoman
> +               mysplit = _overlap_dnf(mysplit)
> +
>         mysplit2 = dep_wordreduce(mysplit,
>                 mysettings, mydbapi, mode, use_cache=use_cache)
>         if mysplit2 is None:
> @@ -755,6 +793,81 @@ def dep_check(depstring, mydbapi, mysettings,
> use="yes", mode=None, myuse=None,
>
>         return [1, selected_atoms]
>
> +
> +def _overlap_dnf(dep_struct):
> +       """
> +       Combine overlapping || groups using disjunctive normal form (DNF),
> in
> +       order to minimize the number of packages chosen to satisfy cases
> like
> +       "|| ( foo bar ) || ( bar baz )" as in bug #632026. Non-overlapping
> +       groups are excluded from the conversion, since DNF leads to
> exponential
> +       explosion of the formula.
> +       """
> +       if not _contains_disjunction(dep_struct):
> +               return dep_struct
> +
> +       # map atom.cp to disjunctions
> +       cp_map = collections.defaultdict(list)
> +       # graph atom.cp, with edges connecting atoms in the same
> disjunction
> +       overlap_graph = digraph()
> +       # map id(disjunction) to index in dep_struct, for deterministic
> output
> +       order_map = {}
> +       order_key = lambda x: order_map[id(x)]
> +       result = []
> +       for i, x in enumerate(dep_struct):
> +               if isinstance(x, list):
> +                       assert x and x[0] == '||'
> +                       order_map[id(x)] = i
> +                       prev_cp = None
> +                       for atom in _iter_flatten(x):
> +                               if isinstance(atom, Atom) and not
> atom.blocker:
> +                                       cp_map[atom.cp].append(x)
> +                                       overlap_graph.add(atom.cp,
> parent=prev_cp)
> +                                       prev_cp = atom.cp
> +                       if prev_cp is None: # only contains blockers
> +                               result.append(x)
> +               else:
> +                       result.append(x)
> +
> +       # group together disjunctions having atom.cp overlap
> +       traversed = set()
> +       for cp in overlap_graph:
> +               if cp in traversed:
> +                       continue
> +               disjunctions = {}
> +               stack = [cp]
> +               while stack:
> +                       cp = stack.pop()
> +                       traversed.add(cp)
> +                       for x in cp_map[cp]:
> +                               disjunctions[id(x)] = x
> +                       for other_cp in itertools.chain(overlap_graph.
> child_nodes(cp),
> +                               overlap_graph.parent_nodes(cp)):
> +                               if other_cp not in traversed:
> +                                       stack.append(other_cp)
> +
> +               if len(disjunctions) > 1:
> +                       # convert overlapping disjunctions to DNF
> +                       result.extend(_dnf_convert(
> +                               sorted(disjunctions.values(),
> key=order_key)))
> +               else:
> +                       # pass through non-overlapping disjunctions
> +                       result.append(disjunctions.popitem()[1])
> +
> +       return result
> +
> +
> +def _iter_flatten(dep_struct):
> +       """
> +       Yield nested elements of dep_struct.
> +       """
> +       for x in dep_struct:
> +               if isinstance(x, list):
> +                       for x in _iter_flatten(x):
> +                               yield x
> +               else:
> +                       yield x
> +
> +
>  def dep_wordreduce(mydeplist,mysettings,mydbapi,mode,use_cache=1):
>         "Reduces the deplist to ones and zeros"
>         deplist=mydeplist[:]
> diff --git a/pym/portage/tests/dep/test_dnf_convert.py
> b/pym/portage/tests/dep/test_dnf_convert.py
> new file mode 100644
> index 000000000..b92778d4a
> --- /dev/null
> +++ b/pym/portage/tests/dep/test_dnf_convert.py
> @@ -0,0 +1,48 @@
> +# Copyright 2017 Gentoo Foundation
> +# Distributed under the terms of the GNU General Public License v2
> +
> +from portage.tests import TestCase
> +from portage.dep import use_reduce
> +from portage.dep._dnf import dnf_convert
> +
> +class DNFConvertTestCase(TestCase):
> +
> +       def testDNFConvert(self):
> +
> +               test_cases = (
> +                       (
> +                               '|| ( A B ) || ( C D )',
> +                               [['||', ['A', 'C'], ['A', 'D'], ['B',
> 'C'], ['B', 'D']]],
> +                       ),
> +                       (
> +                               '|| ( A B ) || ( B C )',
> +                               [['||', ['A', 'B'], ['A', 'C'], ['B',
> 'B'], ['B', 'C']]],
> +                       ),
> +                       (
> +                               '|| ( A ( B C D ) )',
> +                               [['||', 'A', ['B', 'C', 'D']]],
> +                       ),
> +                       (
> +                               '|| ( A ( B C D ) ) E',
> +                               [['||', ['E', 'A'], ['E', 'B', 'C', 'D']]],
> +                       ),
> +                       (
> +                               '|| ( A ( B C ) ) || ( D E ) F',
> +                               [['||', ['F', 'A', 'D'], ['F', 'A', 'E'],
> ['F', 'B', 'C', 'D'], ['F', 'B', 'C', 'E']]],
> +                       ),
> +                       (
> +                               '|| ( A ( B C || ( D E ) ) ( F G ) H )',
> +                               [['||', 'A', ['B', 'C', 'D'], ['B', 'C',
> 'E'], ['F', 'G'], 'H']],
> +                       ),
> +                       (
> +                               '|| ( A ( B C || ( D E ) ) F )',
> +                               [['||', 'A', ['B', 'C', 'D'], ['B', 'C',
> 'E'], 'F']],
> +                       ),
> +                       (
> +                               '|| ( A ( C || ( D E ) || ( F G ) ) H )',
> +                               [['||', 'A', ['C', 'D', 'F'], ['C', 'D',
> 'G'], ['C', 'E', 'F'], ['C', 'E', 'G'], 'H']],
> +                       ),
> +               )
> +
> +               for dep_str, result in test_cases:
> +                       self.assertEqual(dnf_convert(use_reduce(dep_str,
> opconvert=True)), result)
> diff --git a/pym/portage/tests/dep/test_overlap_dnf.py
> b/pym/portage/tests/dep/test_overlap_dnf.py
> new file mode 100644
> index 000000000..79b3e8e46
> --- /dev/null
> +++ b/pym/portage/tests/dep/test_overlap_dnf.py
> @@ -0,0 +1,28 @@
> +# Copyright 2017 Gentoo Foundation
> +# Distributed under the terms of the GNU General Public License v2
> +
> +from portage.tests import TestCase
> +from portage.dep import Atom, use_reduce
> +from portage.dep.dep_check import _overlap_dnf
> +
> +class OverlapDNFTestCase(TestCase):
> +
> +       def testOverlapDNF(self):
> +
> +               test_cases = (
> +                       (
> +                               '|| ( cat/A cat/B ) cat/E || ( cat/C cat/D
> )',
> +                               ['cat/E', ['||', 'cat/A', 'cat/B'], ['||',
> 'cat/C', 'cat/D']],
> +                       ),
> +                       (
> +                               '|| ( cat/A cat/B ) cat/D || ( cat/B cat/C
> )',
> +                               ['cat/D', ['||', ['cat/A', 'cat/B'],
> ['cat/A', 'cat/C'], ['cat/B', 'cat/B'], ['cat/B', 'cat/C']]],
> +                       ),
> +                       (
> +                               '|| ( cat/A cat/B ) || ( cat/C cat/D )  ||
> ( ( cat/B cat/E ) cat/F )',
> +                               [['||', ['cat/A', 'cat/B', 'cat/E'],
> ['cat/A', 'cat/F'], ['cat/B', 'cat/B', 'cat/E'], ['cat/B', 'cat/F']],
> ['||', 'cat/C', 'cat/D']],
> +                       ),
> +               )
> +
> +               for dep_str, result in test_cases:
> +                       self.assertEqual(_overlap_dnf(use_reduce(dep_str,
> token_class=Atom, opconvert=True)), result)
> diff --git a/pym/portage/tests/resolver/test_virtual_minimize_children.py
> b/pym/portage/tests/resolver/test_virtual_minimize_children.py
> new file mode 100644
> index 000000000..35b881bfb
> --- /dev/null
> +++ b/pym/portage/tests/resolver/test_virtual_minimize_children.py
> @@ -0,0 +1,92 @@
> +# Copyright 2017 Gentoo Foundation
> +# Distributed under the terms of the GNU General Public License v2
> +
> +from portage.tests import TestCase
> +from portage.tests.resolver.ResolverPlayground import (
> +       ResolverPlayground,
> +       ResolverPlaygroundTestCase,
> +)
> +
> +class VirtualMinimizeChildrenTestCase(TestCase):
> +
> +       def testVirtualMinimizeChildren(self):
> +               ebuilds = {
> +                       'app-misc/bar-1': {
> +                               'EAPI': '6',
> +                               'RDEPEND': 'virtual/foo'
> +                       },
> +                       'virtual/foo-1': {
> +                               'EAPI': '6',
> +                               'RDEPEND': '|| ( app-misc/A app-misc/B )
> || ( app-misc/B app-misc/C )'
> +                       },
> +                       'app-misc/A-1': {
> +                               'EAPI': '6',
> +                       },
> +                       'app-misc/B-1': {
> +                               'EAPI': '6',
> +                       },
> +                       'app-misc/C-1': {
> +                               'EAPI': '6',
> +                       },
> +               }
> +
> +               test_cases = (
> +                       # Test bug 632026, where we want to minimize the
> number of
> +                       # packages chosen to satisfy overlapping || deps
> like
> +                       # "|| ( foo bar ) || ( bar baz )".
> +                       ResolverPlaygroundTestCase(
> +                               ['app-misc/bar'],
> +                               success=True,
> +                               mergelist=[
> +                                       'app-misc/B-1',
> +                                       'virtual/foo-1',
> +                                       'app-misc/bar-1',
> +                               ],
> +                       ),
> +               )
> +
> +               playground = ResolverPlayground(debug=False,
> +                       ebuilds=ebuilds)
> +
> +               try:
> +                       for test_case in test_cases:
> +                               playground.run_TestCase(test_case)
> +                               self.assertEqual(test_case.test_success,
> True,
> +                                       test_case.fail_msg)
> +               finally:
> +                       playground.debug = False
> +                       playground.cleanup()
> +
> +               # If app-misc/A and app-misc/C are installed then
> +               # that choice should be preferred over app-misc/B.
> +               installed = {
> +                       'app-misc/A-1': {
> +                               'EAPI': '6',
> +                       },
> +                       'app-misc/C-1': {
> +                               'EAPI': '6',
> +                       },
> +               }
> +
> +               test_cases = (
> +                       ResolverPlaygroundTestCase(
> +                               ['app-misc/bar'],
> +                               success=True,
> +                               mergelist=[
> +                                       'virtual/foo-1',
> +                                       'app-misc/bar-1',
> +                               ],
> +                       ),
> +               )
> +
> +               playground = ResolverPlayground(debug=False,
> +                       ebuilds=ebuilds, installed=installed)
> +
> +               try:
> +                       for test_case in test_cases:
> +                               playground.run_TestCase(test_case)
> +                               self.assertEqual(test_case.test_success,
> True,
> +                                       test_case.fail_msg)
> +               finally:
> +                       playground.debug = False
> +                       playground.cleanup()
> --
> 2.13.5
>
>
>

Re: [gentoo-portage-dev] [PATCH] dep_check: use DNF to optimize overlapping virtual || deps (bug 632026)

Reply via email to