This is an automated email from the ASF dual-hosted git repository. not-in-ldap pushed a commit to branch shared/split_out_update_state in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit 87cf8e95c848c350518882bdb0d9b94c69388721 Author: Jonathan Maw <[email protected]> AuthorDate: Wed Apr 17 17:32:44 2019 +0100 CacheKey: Add a CacheKey class which delegates logic to varying implementations i.e. a CacheKey class, with implementations such as: * StrictCacheKey - a normal element when buildstream runs in Strict mode * NonStrictCacheKey - a normal element when buildstream runs in non-strict mode * StrictWorkspacedCacheKey - a workspaced element when Strict mode * NonStrictWorkspacedCacheKey - a workspaced element in non-strict mode (note: only CacheKey and StrictCacheKey are implemented for now) This involves: * Creating the CacheKey and StrictCacheKey classes * In Element instantiation, create a __cache_key_obj to delegate various logic to. * For the moment, extending various functions to call a __cache_key_obj's methods if they're implemented. * This includesExtending the _KeyStrength enum to include a STRICT version, so strict cache keys can be accessed through a common interface. --- buildstream/_cachekey.py | 68 --------------- buildstream/_cachekey/__init__.py | 22 +++++ buildstream/_cachekey/cachekey.py | 148 ++++++++++++++++++++++++++++++++ buildstream/_cachekey/strictcachekey.py | 110 ++++++++++++++++++++++++ buildstream/_loader/loader.py | 8 +- buildstream/_pipeline.py | 10 ++- buildstream/element.py | 148 ++++++++++++++++++++++++++++---- buildstream/types.py | 4 + 8 files changed, 429 insertions(+), 89 deletions(-) diff --git a/buildstream/_cachekey.py b/buildstream/_cachekey.py deleted file mode 100644 index e56b582..0000000 --- a/buildstream/_cachekey.py +++ /dev/null @@ -1,68 +0,0 @@ -# -# Copyright (C) 2018 Codethink Limited -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library. If not, see <http://www.gnu.org/licenses/>. -# -# Authors: -# Tristan Van Berkom <[email protected]> - - -import hashlib - -import ujson - -from . import _yaml - -# Internal record of the size of a cache key -_CACHEKEY_SIZE = len(hashlib.sha256().hexdigest()) - - -# Hex digits -_HEX_DIGITS = "0123456789abcdef" - - -# is_key() -# -# Check if the passed in string *could be* a cache key. This basically checks -# that the length matches a sha256 hex digest, and that the string does not -# contain any non-hex characters and is fully lower case. -# -# Args: -# key (str): The string to check -# -# Returns: -# (bool): Whether or not `key` could be a cache key -# -def is_key(key): - if len(key) != _CACHEKEY_SIZE: - return False - return not any(ch not in _HEX_DIGITS for ch in key) - - -# generate_key() -# -# Generate an sha256 hex digest from the given value. The value -# can be a simple value or recursive dictionary with lists etc, -# anything simple enough to serialize. -# -# Args: -# value: A value to get a key for -# -# Returns: -# (str): An sha256 hex digest of the given value -# -def generate_key(value): - ordered = _yaml.node_sanitize(value) - ustring = ujson.dumps(ordered, sort_keys=True, escape_forward_slashes=False).encode('utf-8') - return hashlib.sha256(ustring).hexdigest() diff --git a/buildstream/_cachekey/__init__.py b/buildstream/_cachekey/__init__.py new file mode 100644 index 0000000..17bab8a --- /dev/null +++ b/buildstream/_cachekey/__init__.py @@ -0,0 +1,22 @@ +# +# Copyright (C) 2019 Bloomberg Finance LP +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see <http://www.gnu.org/licenses/>. +# +# Authors: +# Jonathan Maw <[email protected]> + + +from .cachekey import generate_key, is_key +from .strictcachekey import StrictCacheKey diff --git a/buildstream/_cachekey/cachekey.py b/buildstream/_cachekey/cachekey.py new file mode 100644 index 0000000..3a71458 --- /dev/null +++ b/buildstream/_cachekey/cachekey.py @@ -0,0 +1,148 @@ +# +# Copyright (C) 2018 Codethink Limited +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see <http://www.gnu.org/licenses/>. +# +# Authors: +# Tristan Van Berkom <[email protected]> + + +import hashlib + +import ujson + +from .. import _yaml +from .._exceptions import ImplError +from ..types import _KeyStrength, Scope + +# Internal record of the size of a cache key +_CACHEKEY_SIZE = len(hashlib.sha256().hexdigest()) + + +# Hex digits +_HEX_DIGITS = "0123456789abcdef" + + +# TODO: DOCSTRINGS +# XXX: Should this have a better name than CacheKey? e.g. CacheKeyController? +class CacheKey(): + def __init__(self, element): + self._element = element + self._weak_key = None + self._strict_key = None + self._strong_key = None + self._weak_cached = None + # TODO: Understand why there's no __strict_cached + self._strong_cached = None + + # ABSTRACT METHODS + def calculate_keys(self): + raise ImplError("CacheKey does not implement calculate_keys()") + + def get_key(self, strength): + raise ImplError("CacheKey does not implement get_key()") + + def maybe_schedule_assemble(self): + raise ImplError("CacheKey does not implement maybe_schedule_assemble()") + + def is_cached(self, strength): + raise ImplError("CacheKey does not implement is_cached()") + + def tracking_done(self): + raise ImplError("CacheKey does not implement tracking_done()") + + def pull_done(self): + raise ImplError("CacheKey does not implement pull_done()") + + def assemble_done(self): + raise ImplError("CacheKey does not implement assemble_done()") + + # PRIVATE METHODS + + def _update_weak_cached(self): + if self._weak_key and not self._weak_cached: + self._weak_cached = self._element._is_weak_cached_by_artifact() + + def _update_strong_cached(self): + if self._strict_key and not self._strong_cached: + self._strong_cached = self._element._is_strong_cached_by_artifact() + + # Set the weak key + def _calculate_weak_key(self): + if self._weak_key is None: + if self._element.BST_STRICT_REBUILD: + deps = [e._get_cache_key(strength=_KeyStrength.WEAK) + for e in self._element.dependencies(Scope.BUILD)] + else: + deps = [e.name for e in self._element.dependencies(Scope.BUILD, recurse=False)] + + # XXX: Perhaps it would be better to move all cache key calculation + # into CacheKey, and have Element use a function to generate + # the cache_key_dict. Generate, rather than store internally, + # because workspaces could have a different cache_key_dict after + # building. + self._weak_key = self._element._calculate_cache_key(deps) + + if self._weak_key is None: + return False + + return True + + # Set the strict key + def _calculate_strict_key(self): + if self._strict_key is None: + deps = [e._get_cache_key(strength=_KeyStrength.STRICT) + for e in self._element.dependencies(Scope.BUILD)] + self._strict_key = self._element._calculate_cache_key(deps) + + if self._strict_key is None: + return False + + return True + + +# is_key() +# +# Check if the passed in string *could be* a cache key. This basically checks +# that the length matches a sha256 hex digest, and that the string does not +# contain any non-hex characters and is fully lower case. +# +# Args: +# key (str): The string to check +# +# Returns: +# (bool): Whether or not `key` could be a cache key +# +def is_key(key): + if len(key) != _CACHEKEY_SIZE: + return False + return not any(ch not in _HEX_DIGITS for ch in key) + + +# generate_key() +# +# Generate an sha256 hex digest from the given value. The value +# can be a simple value or recursive dictionary with lists etc, +# anything simple enough to serialize. +# +# Args: +# value: A value to get a key for +# +# Returns: +# (str): An sha256 hex digest of the given value +# +def generate_key(value): + ordered = _yaml.node_sanitize(value) + ustring = ujson.dumps(ordered, sort_keys=True, escape_forward_slashes=False).encode('utf-8') + return hashlib.sha256(ustring).hexdigest() diff --git a/buildstream/_cachekey/strictcachekey.py b/buildstream/_cachekey/strictcachekey.py new file mode 100644 index 0000000..e984886 --- /dev/null +++ b/buildstream/_cachekey/strictcachekey.py @@ -0,0 +1,110 @@ +# +# Copyright (C) 2019 Bloomberg Finance LP +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see <http://www.gnu.org/licenses/>. +# +# Authors: +# Jonathan Maw <[email protected]> + + +from .cachekey import CacheKey +from ..types import _KeyStrength, Consistency + + +# TODO: DOCSTRINGS +class StrictCacheKey(CacheKey): + def calculate_keys(self): + if self._element._get_consistency() == Consistency.INCONSISTENT: + return + + if not self._calculate_weak_key(): + # Failure when calculating weak key + # This usually happens when the element is BST_STRICT_REBUILD, and + # its dependency is an uncached workspace, or pending track. + return + + if not self._calculate_strict_key(): + # Failure when calculating strict key + # Usually because a dependency is pending track or is workspaced + # and not cached + return + + # Assemble the strict artifact + self._element._assemble_strict_artifact() + + if self._strong_key is None: + self._strong_key = self._strict_key + + self._update_strong_cached() + + # TODO: Figure out why _weak_cached is only set if it's identical + # NOTE: Elements with no dependencies have identical strict and weak keys. + if self._strict_key == self._weak_key: + self._update_weak_cached() + + self._element._check_ready_for_runtime() + + def get_key(self, strength): + # NOTE: KeyStrength numbers are not sequential + if strength == _KeyStrength.WEAK: + return self._weak_key + elif strength == _KeyStrength.STRICT: + return self._strict_key + elif strength == _KeyStrength.STRONG: + return self._strong_key + else: + raise AssertionError("Bad key strength value {}".format(strength)) + + def maybe_schedule_assemble(self): + # XXX: Should _cached_success take a _KeyStrength? + if (self._weak_key and self._strong_key and + self._element._is_pending_assembly() and + self._element._is_required() and + not self._element._cached_success() and + not self._element._pull_pending()): + self._element._schedule_assemble() + + def is_cached(self, strength): + if strength == _KeyStrength.STRONG: + return self._strong_cached + elif strength == _KeyStrength.STRICT: + # TODO: Understand difference between strict cached and strong cached + raise AssertionError("I have no idea why it's strong_cached and not strict_cached") + elif strength == _KeyStrength.WEAK: + return self._weak_cached + else: + raise AssertionError("Bad key strength value {}".format(strength)) + + def tracking_done(self): + # this generator includes this corresponding element + for element in self._element._reverse_deps_for_update(): + element._calculate_keys() + element._maybe_schedule_assemble() + + def pull_done(self): + # Cache keys are already known before this. + # Element may become cached. + self._update_strong_cached() + if self._weak_key == self._strict_key: + self._update_weak_cached() + + # If it failed to pull, it should assemble. + self._element._maybe_schedule_assemble() + + def assemble_done(self): + # Cache keys are already known before this. + # Element may become cached. + self._update_strong_cached() + if self._weak_key == self._strict_key: + self._update_weak_cached() diff --git a/buildstream/_loader/loader.py b/buildstream/_loader/loader.py index 6d8310c..c6a51e6 100644 --- a/buildstream/_loader/loader.py +++ b/buildstream/_loader/loader.py @@ -552,7 +552,13 @@ class Loader(): basedir = sources[0]._get_local_path() else: # Stage sources - element._update_state() + # TODO: Remove conditional once implemented wholly + if element._Element__cache_key_obj: + element._update_source_state() + element._calculate_keys() + else: + element._update_state() + basedir = os.path.join(self.project.directory, ".bst", "staged-junctions", filename, element._get_cache_key()) if not os.path.exists(basedir): diff --git a/buildstream/_pipeline.py b/buildstream/_pipeline.py index c176b82..99cbe9e 100644 --- a/buildstream/_pipeline.py +++ b/buildstream/_pipeline.py @@ -136,7 +136,15 @@ class Pipeline(): element._preflight() # Determine initial element state. - element._update_state() + if element._Element__cache_key_obj: + # Ensure consistency of sources + element._update_source_state() + + element._calculate_keys() + # TODO: maybe schedule assembly if workspaced + + else: + element._update_state() # dependencies() # diff --git a/buildstream/element.py b/buildstream/element.py index c619a10..8db746f 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -95,6 +95,7 @@ from ._exceptions import BstError, LoadError, LoadErrorReason, ImplError, \ from .utils import UtilError from . import utils from . import _cachekey +from ._cachekey import StrictCacheKey from . import _signals from . import _site from ._platform import Platform @@ -189,9 +190,15 @@ class Element(Plugin): self.__cache_key_dict = None # Dict for cache key calculation self.__cache_key = None # Our cached cache key + self.__cache_key_obj = None # Object for handling cache keys super().__init__(meta.name, context, project, meta.provenance, "element") + # TODO: Give a proper name when we've moved the cache keys completely out of element + # TODO: Cache the result of _get_workspace + if context.get_strict() and not self._get_workspace(): + self.__cache_key_obj = StrictCacheKey(self) + self.__is_junction = meta.kind == "junction" if not self.__is_junction: @@ -1040,7 +1047,7 @@ class Element(Plugin): # cached state) # # Args: - # (_KeyStrength) keystrength: The strength of the key to + # (_KeyStrength) keystrength: The strength of the key to # determine whether it is cached. # Returns: # (bool): Whether this element is already present in @@ -1050,7 +1057,10 @@ class Element(Plugin): if keystrength is None: keystrength = _KeyStrength.STRONG if self._get_context().get_strict() else _KeyStrength.WEAK - return self.__strong_cached if keystrength == _KeyStrength.STRONG else self.__weak_cached + if self.__cache_key_obj: + return self.__cache_key_obj.is_cached(keystrength) + else: + return self.__strong_cached if keystrength == _KeyStrength.STRONG else self.__weak_cached # _get_build_result(): # @@ -1140,10 +1150,16 @@ class Element(Plugin): # None is returned if information for the cache key is missing. # def _get_cache_key(self, strength=_KeyStrength.STRONG): - if strength == _KeyStrength.STRONG: - return self.__cache_key + # TODO: Remove conditional once all implementations are added + if self.__cache_key_obj: + return self.__cache_key_obj.get_key(strength) else: - return self.__weak_cache_key + if strength == _KeyStrength.STRONG: + return self.__cache_key + elif strength == _KeyStrength.STRICT: + return self.__strict_cache_key + else: + return self.__weak_cache_key # _can_query_cache(): # @@ -1162,7 +1178,7 @@ class Element(Plugin): return True # cache cannot be queried until strict cache key is available - return self.__strict_cache_key is not None + return self._get_cache_key(_KeyStrength.STRICT) is not None # _update_state() # @@ -1240,7 +1256,7 @@ class Element(Plugin): if self.__strict_cache_key is None: dependencies = [ - e.__strict_cache_key for e in self.dependencies(Scope.BUILD) + e._get_cache_key(_KeyStrength.STRICT) for e in self.dependencies(Scope.BUILD) ] self.__strict_cache_key = self._calculate_cache_key(dependencies) @@ -1304,6 +1320,14 @@ class Element(Plugin): self._check_ready_for_runtime() + def _assemble_strict_artifact(self): + context = self._get_context() + strict = self._get_cache_key(strength=_KeyStrength.STRICT) + weak = self._get_cache_key(strength=_KeyStrength.WEAK) + self.__strict_artifact = Artifact(self, context, strong_key=strict, + weak_key=weak) + self.__artifact = self.__strict_artifact + # _get_display_key(): # # Returns cache keys for display purposes @@ -1316,6 +1340,7 @@ class Element(Plugin): # Question marks are returned if information for the cache key is missing. # def _get_display_key(self): + context = self._get_context() dim_key = True @@ -1323,7 +1348,7 @@ class Element(Plugin): if not cache_key: cache_key = "{:?<64}".format('') - elif self._get_cache_key() == self.__strict_cache_key: + elif self._get_cache_key() == self._get_cache_key(_KeyStrength.STRICT): # Strong cache key used in this session matches cache key # that would be used in strict build mode dim_key = False @@ -1411,7 +1436,12 @@ class Element(Plugin): self.__tracking_scheduled = False self.__tracking_done = True - self.__update_state_recursively() + # TODO: Remove this conditional once implementations are in place + if self.__cache_key_obj: + self._update_source_state() + self.__cache_key_obj.tracking_done() + else: + self.__update_state_recursively() # _track(): # @@ -1567,14 +1597,17 @@ class Element(Plugin): if self.__required: # Already done return - self.__required = True # Request artifacts of runtime dependencies for dep in self.dependencies(Scope.RUN, recurse=False): dep._set_required() - self._update_state() + # TODO: Remove conditional once all implementations are done + if self.__cache_key_obj: + self.__cache_key_obj.maybe_schedule_assemble() + else: + self._update_state() # _is_required(): # @@ -1626,7 +1659,13 @@ class Element(Plugin): if workspace: workspace.invalidate_key() - self._update_state() + # NOTE: Ideally, we won't need to do any state handling here + # Currently needed for: + # * This is the first time that an uncached, non-strict, can't pull, + # element can determine it's strong cache key + # * For workspaces, just set ~everything to None (look at update state now) + if not self.__cache_key_obj: + self._update_state() # _assemble_done(): # @@ -1641,7 +1680,11 @@ class Element(Plugin): self.__assemble_scheduled = False self.__assemble_done = True - self.__update_state_recursively() + # TODO: Remove conditional once implementations are done + if self.__cache_key_obj: + self.__cache_key_obj.assemble_done() + else: + self.__update_state_recursively() if self._get_workspace() and self._cached_success(): assert utils._is_main_process(), \ @@ -1866,11 +1909,12 @@ class Element(Plugin): # in user context, as to complete a partial artifact subdir, _ = self.__pull_directories() - if self.__strong_cached and subdir: + strong_cached = self._cached(_KeyStrength.STRONG) + if strong_cached and subdir: # If we've specified a subdir, check if the subdir is cached locally - if self.__artifacts.contains_subdir_artifact(self, self.__strict_cache_key, subdir): + if self.__artifacts.contains_subdir_artifact(self, self._get_cache_key(_KeyStrength.STRICT), subdir): return False - elif self.__strong_cached: + elif strong_cached: return False # Pull is pending if artifact remote server available @@ -1890,7 +1934,11 @@ class Element(Plugin): def _pull_done(self): self.__pull_done = True - self.__update_state_recursively() + # TODO: Remove conditional when all implementations are done + if self.__cache_key_obj: + self.__cache_key_obj.pull_done() + else: + self.__update_state_recursively() # _pull(): # @@ -2323,6 +2371,20 @@ class Element(Plugin): source._update_state() self.__consistency = min(self.__consistency, source._get_consistency()) + # _calculate_keys(): + # + # TODO: DOCSTRING + # + def _calculate_keys(self): + return self.__cache_key_obj.calculate_keys() + + # _maybe_schedule_assemble(): + # + # TODO: DOCSTRING + # + def _maybe_schedule_assemble(self): + self.__cache_key_obj.maybe_schedule_assemble() + # _check_ready_for_runtime(): # # TODO: DOCSTRING @@ -2332,6 +2394,48 @@ class Element(Plugin): self.__ready_for_runtime = all( dep.__ready_for_runtime for dep in self.__runtime_dependencies) + # _is_strong_cached_by_artifact(): + # + # TODO: DOCSTRING + # + def _is_strong_cached_by_artifact(self): + return self.__strict_artifact.cached() + + # _is_weak_cached_by_artifact() + # + # TODO: Doctring + def _is_weak_cached_by_artifact(self): + return self.__artifact.cached() + + # _is_pending_assembly(): + # + # TODO: DOCSTRING + # + def _is_pending_assembly(self): + return not self.__assemble_scheduled and not self.__assemble_done + + # _reverse_deps_cachekeys_for_update() + # + # Yield every reverse dependency that's not ready for runtime + # + def _reverse_deps_for_update(self): + # XXX: Would this be nicer if the CacheKey owned __ready_for_runtime? + queue = _UniquePriorityQueue() + queue.push(self._unique_id, self) + + while queue: + element = queue.pop() + + # If ready, it never becomes unready + if element.__ready_for_runtime: + continue + + yield element + + # Element readiness changed, maybe rdeps will, too + if element.__ready_for_runtime: + for rdep in element.__reverse_dependencies: + queue.push(rdep._unique_id, rdep) ############################################################# # Private Local Methods # @@ -2857,7 +2961,7 @@ class Element(Plugin): # def __pull_strong(self, *, progress=None, subdir=None, excluded_subdirs=None): weak_key = self._get_cache_key(strength=_KeyStrength.WEAK) - key = self.__strict_cache_key + key = self._get_cache_key(strength=_KeyStrength.STRICT) if not self.__artifacts.pull(self, key, progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs): return False @@ -2947,7 +3051,13 @@ class Element(Plugin): element = queue.pop() old_ready_for_runtime = element.__ready_for_runtime - element._update_state() + # TODO: Replace this once all cases are implemented + if element.__cache_key_obj: + element._update_source_state() + element.__cache_key_obj.calculate_keys() + element.__cache_key_obj.maybe_schedule_assemble() + else: + element._update_state() if element.__ready_for_runtime != old_ready_for_runtime: for rdep in element.__reverse_dependencies: diff --git a/buildstream/types.py b/buildstream/types.py index d54bf0b..2e37c2f 100644 --- a/buildstream/types.py +++ b/buildstream/types.py @@ -127,6 +127,10 @@ class _KeyStrength(Enum): # cache keys of dependencies. WEAK = 2 + # Includes strict cache keys of all build dependencies and their + # runtime dependencies. + STRICT = 3 + # _UniquePriorityQueue(): #
