This is an automated email from the ASF dual-hosted git repository.

juergbi pushed a commit to branch juerg/cache-config
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 9559e7254518b0361effd4375f70289e791195e3
Author: Jürg Billeter <[email protected]>
AuthorDate: Fri Aug 30 08:53:18 2024 +0200

    Add `low-watermark` config option to the `cache` section
    
    BuildStream currently uses a hard-coded value of 50% for the low
    watermark. This adds a config option but doesn't change the default.
    
    This requires buildbox-casd 1.2.17+ to be effective. Older versions of
    buildbox-casd will continue to use 50% as low watermark.
---
 doc/source/using_config.rst                | 12 ++++++++++++
 src/buildstream/_cas/casdprocessmanager.py |  7 +++++--
 src/buildstream/_context.py                | 17 ++++++++++++++++-
 src/buildstream/data/userconfig.yaml       |  3 +++
 src/buildstream/utils.py                   | 29 +++++++++++++++++++++++++++++
 5 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/doc/source/using_config.rst b/doc/source/using_config.rst
index 59211c2cd..56411a866 100644
--- a/doc/source/using_config.rst
+++ b/doc/source/using_config.rst
@@ -130,6 +130,9 @@ toplevel of your configuration file, like so:
      # Keep 5% of disk space available
      reserved-disk-space: 5%
 
+     # Retain 50% of the cache on cleanup
+     low-watermark: 50%
+
      # Avoid pulling large amounts of data we don't need locally
      pull-buildtrees: False
 
@@ -196,6 +199,15 @@ Attributes
   ``reserved-disk-space`` can be specified in the same way as ``quota``, with
   the exception of the special ``infinity`` value. The default is ``5%``.
 
+* ``low-watermark``
+
+  This controls how much of the cache should be retained on cleanup.
+
+  ``low-watermark`` is specified as a percentage of the effective cache quota
+  as configured by ``quota`` and/or ``reserved-disk-space``. The default is
+  ``50%``, which means that when cleanup is triggered, 50% of the cache will
+  be pruned by removing CAS objects that haven't been used recently.
+
 * ``pull-buildtrees``
 
   Whether to pull *build trees* when downloading remote artifacts.
diff --git a/src/buildstream/_cas/casdprocessmanager.py 
b/src/buildstream/_cas/casdprocessmanager.py
index 3b5775409..5cc64853e 100644
--- a/src/buildstream/_cas/casdprocessmanager.py
+++ b/src/buildstream/_cas/casdprocessmanager.py
@@ -74,7 +74,8 @@ class CASDProcessManager:
         protect_session_blobs,
         messenger,
         *,
-        reserved=None
+        reserved=None,
+        low_watermark=None
     ):
         os.makedirs(path, exist_ok=True)
 
@@ -92,7 +93,9 @@ class CASDProcessManager:
 
         if cache_quota is not None:
             casd_args.append("--quota-high={}".format(int(cache_quota)))
-            casd_args.append("--quota-low={}".format(int(cache_quota / 2)))
+
+        if low_watermark is not None:
+            casd_args.append("--quota-low={}%".format(int(low_watermark * 
100)))
 
         if reserved is not None:
             casd_args.append("--reserved={}".format(int(reserved)))
diff --git a/src/buildstream/_context.py b/src/buildstream/_context.py
index db6887b25..b71721f62 100644
--- a/src/buildstream/_context.py
+++ b/src/buildstream/_context.py
@@ -188,6 +188,9 @@ class Context:
         # Reserved disk space for local cache in bytes
         self.config_cache_reserved: Optional[int] = None
 
+        # Low watermark for local cache in bytes
+        self.config_cache_low_watermark: Optional[float] = None
+
         # Remote cache server
         self.remote_cache_spec: Optional[RemoteSpec] = None
 
@@ -365,7 +368,9 @@ class Context:
         # We need to find the first existing directory in the path of our
         # casdir - the casdir may not have been created yet.
         cache = defaults.get_mapping("cache")
-        cache.validate_keys(["quota", "reserved-disk-space", 
"storage-service", "pull-buildtrees", "cache-buildtrees"])
+        cache.validate_keys(
+            ["quota", "reserved-disk-space", "low-watermark", 
"storage-service", "pull-buildtrees", "cache-buildtrees"]
+        )
 
         cas_volume = self.casdir
         while not os.path.exists(cas_volume):
@@ -398,6 +403,15 @@ class Context:
                 LoadErrorReason.INVALID_DATA,
             ) from e
 
+        low_watermark_string = cache.get_str("low-watermark")
+        try:
+            self.config_cache_low_watermark = 
utils._parse_percentage(low_watermark_string)
+        except utils.UtilError as e:
+            raise LoadError(
+                "{}\nPlease specify the value as a % of the cache 
quota.".format(str(e)),
+                LoadErrorReason.INVALID_DATA,
+            ) from e
+
         remote_cache = cache.get_mapping("storage-service", default=None)
         if remote_cache:
             self.remote_cache_spec = RemoteSpec.new_from_node(remote_cache)
@@ -722,6 +736,7 @@ class Context:
                 protect_session_blobs=True,
                 messenger=self.messenger,
                 reserved=self.config_cache_reserved,
+                low_watermark=self.config_cache_low_watermark,
             )
         return self._casd
 
diff --git a/src/buildstream/data/userconfig.yaml 
b/src/buildstream/data/userconfig.yaml
index 3cd2b0df4..c28e35783 100644
--- a/src/buildstream/data/userconfig.yaml
+++ b/src/buildstream/data/userconfig.yaml
@@ -41,6 +41,9 @@ cache:
   # Keep 5% of disk space available
   reserved-disk-space: 5%
 
+  # Retain 50% of the cache on cleanup
+  low-watermark: 50%
+
   # Whether to pull build trees when downloading element artifacts
   pull-buildtrees: False
 
diff --git a/src/buildstream/utils.py b/src/buildstream/utils.py
index 320d5332f..bf2b3ea0b 100644
--- a/src/buildstream/utils.py
+++ b/src/buildstream/utils.py
@@ -832,6 +832,35 @@ def _parse_size(size, volume):
     return int(num) * 1024 ** units.index(unit)
 
 
+# _parse_percentage():
+#
+# Convert a string representing a percentage between 0% and 100% to a float.
+# E.g. "80%" -> 0.8.
+#
+# Arguments:
+#     percentage (str) The string to parse
+#
+# Returns:
+#     (float) The percentage as a float
+#
+# Raises:
+#     UtilError if the string is not a valid percentage.
+#
+def _parse_percentage(percentage):
+    if not percentage.endswith("%"):
+        raise UtilError("{} is not a valid percentage.".format(percentage))
+
+    try:
+        num = int(percentage[:-1])
+    except ValueError:
+        raise UtilError("{} is not a valid percentage.".format(percentage))
+
+    if num < 0 or num > 100:
+        raise UtilError("{} is not between 0% and 100%.".format(percentage))
+
+    return num / 100
+
+
 # _pretty_size()
 #
 # Converts a number of bytes into a string representation in KiB, MiB, GiB, TiB

Reply via email to