This is an automated email from the ASF dual-hosted git repository. juergbi pushed a commit to branch juerg/cache-config in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit 9559e7254518b0361effd4375f70289e791195e3 Author: Jürg Billeter <[email protected]> AuthorDate: Fri Aug 30 08:53:18 2024 +0200 Add `low-watermark` config option to the `cache` section BuildStream currently uses a hard-coded value of 50% for the low watermark. This adds a config option but doesn't change the default. This requires buildbox-casd 1.2.17+ to be effective. Older versions of buildbox-casd will continue to use 50% as low watermark. --- doc/source/using_config.rst | 12 ++++++++++++ src/buildstream/_cas/casdprocessmanager.py | 7 +++++-- src/buildstream/_context.py | 17 ++++++++++++++++- src/buildstream/data/userconfig.yaml | 3 +++ src/buildstream/utils.py | 29 +++++++++++++++++++++++++++++ 5 files changed, 65 insertions(+), 3 deletions(-) diff --git a/doc/source/using_config.rst b/doc/source/using_config.rst index 59211c2cd..56411a866 100644 --- a/doc/source/using_config.rst +++ b/doc/source/using_config.rst @@ -130,6 +130,9 @@ toplevel of your configuration file, like so: # Keep 5% of disk space available reserved-disk-space: 5% + # Retain 50% of the cache on cleanup + low-watermark: 50% + # Avoid pulling large amounts of data we don't need locally pull-buildtrees: False @@ -196,6 +199,15 @@ Attributes ``reserved-disk-space`` can be specified in the same way as ``quota``, with the exception of the special ``infinity`` value. The default is ``5%``. +* ``low-watermark`` + + This controls how much of the cache should be retained on cleanup. + + ``low-watermark`` is specified as a percentage of the effective cache quota + as configured by ``quota`` and/or ``reserved-disk-space``. The default is + ``50%``, which means that when cleanup is triggered, 50% of the cache will + be pruned by removing CAS objects that haven't been used recently. + * ``pull-buildtrees`` Whether to pull *build trees* when downloading remote artifacts. diff --git a/src/buildstream/_cas/casdprocessmanager.py b/src/buildstream/_cas/casdprocessmanager.py index 3b5775409..5cc64853e 100644 --- a/src/buildstream/_cas/casdprocessmanager.py +++ b/src/buildstream/_cas/casdprocessmanager.py @@ -74,7 +74,8 @@ class CASDProcessManager: protect_session_blobs, messenger, *, - reserved=None + reserved=None, + low_watermark=None ): os.makedirs(path, exist_ok=True) @@ -92,7 +93,9 @@ class CASDProcessManager: if cache_quota is not None: casd_args.append("--quota-high={}".format(int(cache_quota))) - casd_args.append("--quota-low={}".format(int(cache_quota / 2))) + + if low_watermark is not None: + casd_args.append("--quota-low={}%".format(int(low_watermark * 100))) if reserved is not None: casd_args.append("--reserved={}".format(int(reserved))) diff --git a/src/buildstream/_context.py b/src/buildstream/_context.py index db6887b25..b71721f62 100644 --- a/src/buildstream/_context.py +++ b/src/buildstream/_context.py @@ -188,6 +188,9 @@ class Context: # Reserved disk space for local cache in bytes self.config_cache_reserved: Optional[int] = None + # Low watermark for local cache in bytes + self.config_cache_low_watermark: Optional[float] = None + # Remote cache server self.remote_cache_spec: Optional[RemoteSpec] = None @@ -365,7 +368,9 @@ class Context: # We need to find the first existing directory in the path of our # casdir - the casdir may not have been created yet. cache = defaults.get_mapping("cache") - cache.validate_keys(["quota", "reserved-disk-space", "storage-service", "pull-buildtrees", "cache-buildtrees"]) + cache.validate_keys( + ["quota", "reserved-disk-space", "low-watermark", "storage-service", "pull-buildtrees", "cache-buildtrees"] + ) cas_volume = self.casdir while not os.path.exists(cas_volume): @@ -398,6 +403,15 @@ class Context: LoadErrorReason.INVALID_DATA, ) from e + low_watermark_string = cache.get_str("low-watermark") + try: + self.config_cache_low_watermark = utils._parse_percentage(low_watermark_string) + except utils.UtilError as e: + raise LoadError( + "{}\nPlease specify the value as a % of the cache quota.".format(str(e)), + LoadErrorReason.INVALID_DATA, + ) from e + remote_cache = cache.get_mapping("storage-service", default=None) if remote_cache: self.remote_cache_spec = RemoteSpec.new_from_node(remote_cache) @@ -722,6 +736,7 @@ class Context: protect_session_blobs=True, messenger=self.messenger, reserved=self.config_cache_reserved, + low_watermark=self.config_cache_low_watermark, ) return self._casd diff --git a/src/buildstream/data/userconfig.yaml b/src/buildstream/data/userconfig.yaml index 3cd2b0df4..c28e35783 100644 --- a/src/buildstream/data/userconfig.yaml +++ b/src/buildstream/data/userconfig.yaml @@ -41,6 +41,9 @@ cache: # Keep 5% of disk space available reserved-disk-space: 5% + # Retain 50% of the cache on cleanup + low-watermark: 50% + # Whether to pull build trees when downloading element artifacts pull-buildtrees: False diff --git a/src/buildstream/utils.py b/src/buildstream/utils.py index 320d5332f..bf2b3ea0b 100644 --- a/src/buildstream/utils.py +++ b/src/buildstream/utils.py @@ -832,6 +832,35 @@ def _parse_size(size, volume): return int(num) * 1024 ** units.index(unit) +# _parse_percentage(): +# +# Convert a string representing a percentage between 0% and 100% to a float. +# E.g. "80%" -> 0.8. +# +# Arguments: +# percentage (str) The string to parse +# +# Returns: +# (float) The percentage as a float +# +# Raises: +# UtilError if the string is not a valid percentage. +# +def _parse_percentage(percentage): + if not percentage.endswith("%"): + raise UtilError("{} is not a valid percentage.".format(percentage)) + + try: + num = int(percentage[:-1]) + except ValueError: + raise UtilError("{} is not a valid percentage.".format(percentage)) + + if num < 0 or num > 100: + raise UtilError("{} is not between 0% and 100%.".format(percentage)) + + return num / 100 + + # _pretty_size() # # Converts a number of bytes into a string representation in KiB, MiB, GiB, TiB
