This is an automated email from the ASF dual-hosted git repository.
jscheffl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new ee458138da8 Support PR: Add TTL-enabled LRU cache for StatsD metrics
aggregation (#60933)
ee458138da8 is described below
commit ee458138da89ef260ae3ada9c22177056d761cfe
Author: AutomationDev85 <[email protected]>
AuthorDate: Fri Jan 23 13:18:50 2026 +0100
Support PR: Add TTL-enabled LRU cache for StatsD metrics aggregation
(#60933)
* Making statsd-exporter TTL & cache-size/type configurable in Airflow Helm
chart
* Added statsd configs in values.schema.json
* Fixing test errors
* updated the default values for statsd in schema.json
* Added default argumentd as cache sieze cache type and ttl, accomodated
user defined args as well
* Removed EOF error from statsd deployment and improved description in
values.schema.json
* Added default values in deployment.yaml, removed spelling errors formated
schema.json properly
* added newlines and changed 'lru' to `lru`
* edited as per test output
* Making changes in deployment file
* Added newsfragments for statsd changes
* removed 0s from deployment.yaml
* resolving errors in newsfragment, and tests
* Patch errrors related to test cases
* removed errors from test_statsd and test_apiserver
* removed default args for statsd
* added configs in default and removed overridden confugs
* removed defaults for statsd
* removed default args to resolve the test
* removed syntax error
* Added values in render_chart
* removed unnecessary issues
* helm unit test
* Moved args into else block
* Revusuted statsd args and made changes
* solved resources error
* Changed JSON for values schema
* resolved error in the statsd tests
* updated the deployment and values.yaml
* resolving ci formatting issues
* Update chart/newsfragments/51792.significant.rst
Co-authored-by: Przemysław Mirowski <[email protected]>
* Update chart/newsfragments/51792.significant.rst
Co-authored-by: Przemysław Mirowski <[email protected]>
* restructured cache options in statsd
* fixing indentation in deployment.yaml
* fix: updated newsfragment file and fixed CI static check
* updated the indentations in values and statsd deployment
* reverted the apiserver changes
* changed the formatting
* Remove TTL parameter from command line and add into the defaults part of
config map
---------
Co-authored-by: shubham36deshpande <[email protected]>
Co-authored-by: shubham36deshpande
<[email protected]>
Co-authored-by: Przemysław Mirowski <[email protected]>
Co-authored-by: AutomationDev85 <AutomationDev85>
---
chart/newsfragments/51792.significant.rst | 11 +++++
chart/templates/configmaps/statsd-configmap.yaml | 4 ++
chart/templates/statsd/statsd-deployment.yaml | 16 ++++++--
chart/values.schema.json | 22 ++++++++++
chart/values.yaml | 31 ++++++++++++++
helm-tests/tests/helm_tests/other/test_statsd.py | 51 +++++++++++++++++++++---
6 files changed, 125 insertions(+), 10 deletions(-)
diff --git a/chart/newsfragments/51792.significant.rst
b/chart/newsfragments/51792.significant.rst
new file mode 100644
index 00000000000..fea7210b84a
--- /dev/null
+++ b/chart/newsfragments/51792.significant.rst
@@ -0,0 +1,11 @@
+StatsD metrics aggregation now supports configurable TTL-enabled LRU cache to
prevent memory growth in long-running daemons.
+
+The Helm Chart now includes new configuration options for StatsD aggregation
management:
+
+* ``statsd.cache.type`` - Enable TTL-enabled ``lru`` cache or ``random`` cache
for metrics aggregation (default: ``lru``)
+* ``statsd.cache.size`` - Maximum number of metrics to cache (default: 1000)
+* ``statsd.cache.ttl`` - Time-to-live for cached metrics in seconds (``0s`` is
TTL disabled) (default: ``0s``)
+
+This feature addresses uncontrolled memory growth in StatsD daemons by
automatically cleaning up stale or unused metric entries. When enabled, the
cache uses both LRU (Least Recently Used) eviction and TTL (Time To Live)
expiration to manage memory usage effectively.
+
+To maintain backward compatibility, the default behaviour remains unchanged.
Users experiencing memory growth issues with StatsD can enable this feature by
setting ``statsd.cache: true`` in their Helm values.
diff --git a/chart/templates/configmaps/statsd-configmap.yaml
b/chart/templates/configmaps/statsd-configmap.yaml
index 0529e16082f..b5aa884d1d0 100644
--- a/chart/templates/configmaps/statsd-configmap.yaml
+++ b/chart/templates/configmaps/statsd-configmap.yaml
@@ -49,4 +49,8 @@ data:
{{- toYaml .Values.statsd.extraMappings | nindent 6 }}
{{- end }}
{{- end }}
+ {{- if .Values.statsd.cache.ttl }}
+ defaults:
+ ttl: {{ .Values.statsd.cache.ttl }}
+ {{- end }}
{{- end }}
diff --git a/chart/templates/statsd/statsd-deployment.yaml
b/chart/templates/statsd/statsd-deployment.yaml
index 6fef2cb71c6..1d2662f08bd 100644
--- a/chart/templates/statsd/statsd-deployment.yaml
+++ b/chart/templates/statsd/statsd-deployment.yaml
@@ -68,7 +68,7 @@ spec:
annotations:
checksum/statsd-config: {{ include (print $.Template.BasePath
"/configmaps/statsd-configmap.yaml") . | sha256sum }}
{{- if .Values.statsd.podAnnotations }}
- {{- toYaml .Values.statsd.podAnnotations | nindent 8 }}
+ {{- toYaml .Values.statsd.podAnnotations | nindent 8 }}
{{- end }}
{{- end }}
spec:
@@ -95,10 +95,18 @@ spec:
{{- if $containerLifecycleHooks }}
lifecycle: {{- tpl (toYaml $containerLifecycleHooks) . | nindent 12
}}
{{- end }}
- {{- if .Values.statsd.args }}
- args: {{ tpl (toYaml .Values.statsd.args) . | nindent 12 }}
- {{- else}}
args:
+ {{- if .Values.statsd.cache.size }}
+ - "--statsd.cache-size={{ .Values.statsd.cache.size }}"
+ {{- end }}
+ {{- if .Values.statsd.cache.type }}
+ - "--statsd.cache-type={{ .Values.statsd.cache.type }}"
+ {{- end }}
+ {{- if .Values.statsd.args }}
+ {{- range $arg := .Values.statsd.args }}
+ - {{ $arg | quote }}
+ {{- end }}
+ {{- else }}
- "--statsd.mapping-config=/etc/statsd-exporter/mappings.yml"
{{- end }}
resources: {{- toYaml .Values.statsd.resources | nindent 12 }}
diff --git a/chart/values.schema.json b/chart/values.schema.json
index d09e10a4c07..7ef95c6f88f 100644
--- a/chart/values.schema.json
+++ b/chart/values.schema.json
@@ -7751,6 +7751,28 @@
"type": "boolean",
"default": true
},
+ "cache": {
+ "description": "StatsD cache configuration.",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "size": {
+ "description": "Maximum number of metric mappings
to cache in memory. Higher values improve performance for frequently used
metrics but consume more memory.",
+ "type": "integer",
+ "default": 1000
+ },
+ "type": {
+ "description": "Cache eviction strategy for metric
mappings. `lru` (Least Recently Used) evicts oldest accessed items, 'random'
evicts randomly selected items.",
+ "type": "string",
+ "default": "lru"
+ },
+ "ttl": {
+ "description": "Time-to-live for cached metric
mappings. Determines how long mappings remain in cache before expiring. Set to
'0s' to disable expiration.",
+ "type": "string",
+ "default": "0s"
+ }
+ }
+ },
"revisionHistoryLimit": {
"description": "Number of old replicasets to retain.",
"type": [
diff --git a/chart/values.yaml b/chart/values.yaml
index add1703308c..35ba98d5fff 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -2608,6 +2608,37 @@ statsd:
# Arguments for StatsD exporter command.
args: ["--statsd.mapping-config=/etc/statsd-exporter/mappings.yml"]
+ # If you ever need to fully override the entire args list, you can
+ # supply your own array here; if set, all below flag-specific values
+ # (mappingConfig, cache-size, cache-type, ttl) are ignored.
+ # args:
+ # - "--statsd.cache-size=1000"
+ # - "--statsd.cache-type=random"
+ # - "--ttl=10m"
+ # -------------------------------------------------------------------
+
+ # Path in the container to the mapping config file.
+
+ cache:
+ # Maximum number of metric‐mapping entries to keep in cache.
+ # When you send more distinct metric names than this, older entries
+ # will be evicted according to cacheType.
+ # Default: 1000
+ size: 1000
+
+ # Metrics Eviction policy for the mapping cache.
+ # - lru → Least‐Recently‐Used eviction
+ # - random → Random eviction
+ # Default: lru
+ type: lru
+
+ # Per‐metric time‐to‐live. When set to a non‐zero duration, any metric
+ # series that hasn't received an update in this interval will be dropped
+ # from the exported /metrics output.
+ # Format: Go duration string (e.g. "30s", "5m", "1h")
+ # Default: "0s" (disabled, never expires)
+ ttl: "0s"
+
# Annotations to add to the StatsD Deployment.
annotations: {}
diff --git a/helm-tests/tests/helm_tests/other/test_statsd.py
b/helm-tests/tests/helm_tests/other/test_statsd.py
index 711b7cd49b6..6f45d9cd4ec 100644
--- a/helm-tests/tests/helm_tests/other/test_statsd.py
+++ b/helm-tests/tests/helm_tests/other/test_statsd.py
@@ -26,7 +26,10 @@ class TestStatsd:
"""Tests statsd."""
def test_should_create_statsd_default(self):
- docs =
render_chart(show_only=["templates/statsd/statsd-deployment.yaml"])
+ docs = render_chart(
+ values={"statsd": {"enabled": True, "cache": {"size": 1000,
"type": "lru", "ttl": "0s"}}},
+ show_only=["templates/statsd/statsd-deployment.yaml"],
+ )
assert jmespath.search("metadata.name", docs[0]) ==
"release-name-statsd"
@@ -42,8 +45,34 @@ class TestStatsd:
"readOnly": True,
} in jmespath.search("spec.template.spec.containers[0].volumeMounts",
docs[0])
- default_args =
["--statsd.mapping-config=/etc/statsd-exporter/mappings.yml"]
- assert default_args ==
jmespath.search("spec.template.spec.containers[0].args", docs[0])
+ expected_args = [
+ "--statsd.cache-size=1000",
+ "--statsd.cache-type=lru",
+ "--statsd.mapping-config=/etc/statsd-exporter/mappings.yml",
+ ]
+ assert expected_args ==
jmespath.search("spec.template.spec.containers[0].args", docs[0])
+
+ @pytest.mark.parametrize(
+ ("ttl"),
+ [None, "5m"],
+ )
+ def test_statsd_configmap_check_ttl(self, ttl):
+ docs = render_chart(
+ values={"statsd": {"enabled": True, "cache": {"ttl": ttl} if ttl
else {}}},
+ show_only=["templates/configmaps/statsd-configmap.yaml"],
+ )
+
+ mappings_yml = jmespath.search('data."mappings.yml"', docs[0])
+ mappings_yml_obj = yaml.safe_load(mappings_yml)
+
+ assert "defaults" in mappings_yml_obj
+
+ if ttl:
+ assert mappings_yml_obj["defaults"]["ttl"] == ttl
+ else:
+ assert mappings_yml_obj["defaults"]["ttl"] == "0s"
+
+ assert "mappings" in mappings_yml_obj
def
test_should_add_volume_and_volume_mount_when_exist_extra_mappings(self):
extra_mapping = {
@@ -313,13 +342,23 @@ class TestStatsd:
assert mappings_yml_obj["mappings"][0]["name"] ==
"airflow_pool_queued_slots"
def test_statsd_args_can_be_overridden(self):
- args = ["--some-arg=foo"]
+ args = [
+ "--statsd.cache-size=",
+ "--statsd.cache-type=",
+ "--statsd.mapping-config=/custom/path",
+ ]
docs = render_chart(
- values={"statsd": {"enabled": True, "args": args}},
+ values={"statsd": {"enabled": True, "args": args, "cache":
{"size": 0, "type": "", "ttl": ""}}},
show_only=["templates/statsd/statsd-deployment.yaml"],
)
- assert jmespath.search("spec.template.spec.containers[0].args",
docs[0]) == args
+ expected_args = [
+ "--statsd.cache-size=",
+ "--statsd.cache-type=",
+ "--statsd.mapping-config=/custom/path",
+ ]
+
+ assert jmespath.search("spec.template.spec.containers[0].args",
docs[0]) == expected_args
def test_should_add_component_specific_annotations(self):
docs = render_chart(