This is an automated email from the ASF dual-hosted git repository.
jli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 290bcc1dbbb feat(cache): use configurable hash algorithm for
flask-caching (#37361)
290bcc1dbbb is described below
commit 290bcc1dbbbd2824639d8d2a405028c339104a6b
Author: Daniel Vaz Gaspar <[email protected]>
AuthorDate: Mon Jan 26 18:19:51 2026 +0000
feat(cache): use configurable hash algorithm for flask-caching (#37361)
---
superset/utils/cache.py | 3 +-
superset/utils/cache_manager.py | 141 ++++++++++++++++++++--
tests/unit_tests/utils/test_cache_manager.py | 171 +++++++++++++++++++++++++++
3 files changed, 307 insertions(+), 8 deletions(-)
diff --git a/superset/utils/cache.py b/superset/utils/cache.py
index 76294696e43..706a74dbde4 100644
--- a/superset/utils/cache.py
+++ b/superset/utils/cache.py
@@ -31,6 +31,7 @@ from superset import db
from superset.constants import CACHE_DISABLED_TIMEOUT
from superset.extensions import cache_manager
from superset.models.cache import CacheKey
+from superset.utils.cache_manager import configurable_hash_method
from superset.utils.hashing import hash_from_dict
from superset.utils.json import json_int_dttm_ser
@@ -273,7 +274,7 @@ def etag_cache( # noqa: C901
wrapper.uncached = f # type: ignore
wrapper.cache_timeout = timeout # type: ignore
wrapper.make_cache_key = cache._memoize_make_cache_key( # type:
ignore # pylint: disable=protected-access
- make_name=None, timeout=timeout
+ make_name=None, timeout=timeout,
hash_method=configurable_hash_method
)
return wrapper
diff --git a/superset/utils/cache_manager.py b/superset/utils/cache_manager.py
index d3b2dbdb00d..0804e0d4b5d 100644
--- a/superset/utils/cache_manager.py
+++ b/superset/utils/cache_manager.py
@@ -14,10 +14,11 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+import hashlib
import logging
-from typing import Any, Optional, Union
+from typing import Any, Callable, Optional, Union
-from flask import Flask
+from flask import current_app, Flask
from flask_caching import Cache
from markupsafe import Markup
@@ -27,8 +28,134 @@ logger = logging.getLogger(__name__)
CACHE_IMPORT_PATH =
"superset.extensions.metastore_cache.SupersetMetastoreCache"
+# Hash function lookup table matching superset.utils.hashing
+_HASH_METHODS: dict[str, Callable[..., Any]] = {
+ "sha256": hashlib.sha256,
+ "md5": hashlib.md5,
+}
+
+
+class ConfigurableHashMethod:
+ """
+ A callable that defers hash algorithm selection to runtime.
+
+ Flask-caching's memoize decorator evaluates hash_method at decoration time
+ (module import), but we need to read HASH_ALGORITHM config at function call
+ time when the app context is available.
+
+ This class acts like a hashlib function but looks up the configured
+ algorithm when called.
+ """
+
+ def __call__(self, data: bytes = b"") -> Any:
+ """
+ Create a hash object using the configured algorithm.
+
+ Args:
+ data: Optional initial data to hash
+
+ Returns:
+ A hashlib hash object (e.g., sha256 or md5)
+
+ Raises:
+ ValueError: If HASH_ALGORITHM is set to an unsupported value
+ """
+ algorithm = current_app.config["HASH_ALGORITHM"]
+ hash_func = _HASH_METHODS.get(algorithm)
+ if hash_func is None:
+ raise ValueError(f"Unsupported hash algorithm: {algorithm}")
+ return hash_func(data)
+
+
+# Singleton instance to use as default hash_method
+configurable_hash_method = ConfigurableHashMethod()
+
+
+class SupersetCache(Cache):
+ """
+ Cache subclass that uses the configured HASH_ALGORITHM instead of MD5.
+
+ Flask-caching uses MD5 by default for cache key generation, which fails
+ in FIPS mode where MD5 is disabled. This class overrides the default
+ hash method to use the algorithm specified by HASH_ALGORITHM config.
+
+ Note: Switching hash algorithms will invalidate existing cache keys,
+ causing a one-time cache miss on upgrade.
+ """
+
+ def memoize(
+ self,
+ timeout: int | None = None,
+ make_name: Callable[..., Any] | None = None,
+ unless: Callable[..., bool] | None = None,
+ forced_update: Callable[..., bool] | None = None,
+ response_filter: Callable[..., Any] | None = None,
+ hash_method: Callable[..., Any] = configurable_hash_method,
+ cache_none: bool = False,
+ source_check: bool | None = None,
+ args_to_ignore: Any | None = None,
+ ) -> Callable[..., Any]:
+ return super().memoize(
+ timeout=timeout,
+ make_name=make_name,
+ unless=unless,
+ forced_update=forced_update,
+ response_filter=response_filter,
+ hash_method=hash_method,
+ cache_none=cache_none,
+ source_check=source_check,
+ args_to_ignore=args_to_ignore,
+ )
+
+ def cached(
+ self,
+ timeout: int | None = None,
+ key_prefix: str = "view/%s",
+ unless: Callable[..., bool] | None = None,
+ forced_update: Callable[..., bool] | None = None,
+ response_filter: Callable[..., Any] | None = None,
+ query_string: bool = False,
+ hash_method: Callable[..., Any] = configurable_hash_method,
+ cache_none: bool = False,
+ make_cache_key: Callable[..., Any] | None = None,
+ source_check: bool | None = None,
+ response_hit_indication: bool | None = False,
+ ) -> Callable[..., Any]:
+ return super().cached(
+ timeout=timeout,
+ key_prefix=key_prefix,
+ unless=unless,
+ forced_update=forced_update,
+ response_filter=response_filter,
+ query_string=query_string,
+ hash_method=hash_method,
+ cache_none=cache_none,
+ make_cache_key=make_cache_key,
+ source_check=source_check,
+ response_hit_indication=response_hit_indication,
+ )
+
+ # pylint: disable=protected-access
+ def _memoize_make_cache_key(
+ self,
+ make_name: Callable[..., Any] | None = None,
+ timeout: Callable[..., Any] | None = None,
+ forced_update: bool = False,
+ hash_method: Callable[..., Any] = configurable_hash_method,
+ source_check: bool | None = False,
+ args_to_ignore: Any | None = None,
+ ) -> Callable[..., Any]:
+ return super()._memoize_make_cache_key(
+ make_name=make_name,
+ timeout=timeout,
+ forced_update=forced_update,
+ hash_method=hash_method,
+ source_check=source_check,
+ args_to_ignore=args_to_ignore,
+ )
+
-class ExploreFormDataCache(Cache):
+class ExploreFormDataCache(SupersetCache):
def get(self, *args: Any, **kwargs: Any) -> Optional[Union[str, Markup]]:
cache = self.cache.get(*args, **kwargs)
@@ -53,10 +180,10 @@ class CacheManager:
def __init__(self) -> None:
super().__init__()
- self._cache = Cache()
- self._data_cache = Cache()
- self._thumbnail_cache = Cache()
- self._filter_state_cache = Cache()
+ self._cache = SupersetCache()
+ self._data_cache = SupersetCache()
+ self._thumbnail_cache = SupersetCache()
+ self._filter_state_cache = SupersetCache()
self._explore_form_data_cache = ExploreFormDataCache()
@staticmethod
diff --git a/tests/unit_tests/utils/test_cache_manager.py
b/tests/unit_tests/utils/test_cache_manager.py
new file mode 100644
index 00000000000..b7b10e4506e
--- /dev/null
+++ b/tests/unit_tests/utils/test_cache_manager.py
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import hashlib
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from superset.utils.cache_manager import (
+ configurable_hash_method,
+ ConfigurableHashMethod,
+ SupersetCache,
+)
+
+
+def test_configurable_hash_method_uses_sha256():
+ """Test ConfigurableHashMethod uses sha256 when configured."""
+ mock_app = MagicMock()
+ mock_app.config = {"HASH_ALGORITHM": "sha256"}
+
+ with patch("superset.utils.cache_manager.current_app", mock_app):
+ hash_obj = configurable_hash_method(b"test")
+ # Verify it returns a sha256 hash object
+ assert hash_obj.hexdigest() == hashlib.sha256(b"test").hexdigest()
+
+
+def test_configurable_hash_method_uses_md5():
+ """Test ConfigurableHashMethod uses md5 when configured."""
+ mock_app = MagicMock()
+ mock_app.config = {"HASH_ALGORITHM": "md5"}
+
+ with patch("superset.utils.cache_manager.current_app", mock_app):
+ hash_obj = configurable_hash_method(b"test")
+ # Verify it returns a md5 hash object
+ assert hash_obj.hexdigest() == hashlib.md5(b"test").hexdigest() #
noqa: S324
+
+
+def test_configurable_hash_method_empty_data():
+ """Test ConfigurableHashMethod with empty data."""
+ mock_app = MagicMock()
+ mock_app.config = {"HASH_ALGORITHM": "sha256"}
+
+ with patch("superset.utils.cache_manager.current_app", mock_app):
+ hash_obj = configurable_hash_method()
+ assert hash_obj.hexdigest() == hashlib.sha256(b"").hexdigest()
+
+
+def test_configurable_hash_method_is_callable():
+ """Test that ConfigurableHashMethod instance is callable."""
+ method = ConfigurableHashMethod()
+ assert callable(method)
+
+
+def test_superset_cache_memoize_uses_configurable_hash():
+ """Test that SupersetCache.memoize uses configurable_hash_method by
default."""
+ cache = SupersetCache()
+
+ with patch.object(
+ cache.__class__.__bases__[0], "memoize", return_value=lambda f: f
+ ) as mock_memoize:
+ cache.memoize(timeout=300)
+
+ mock_memoize.assert_called_once()
+ call_kwargs = mock_memoize.call_args[1]
+ assert call_kwargs["hash_method"] is configurable_hash_method
+
+
+def test_superset_cache_memoize_allows_explicit_hash_method():
+ """Test that SupersetCache.memoize allows explicit hash_method override."""
+ cache = SupersetCache()
+
+ with patch.object(
+ cache.__class__.__bases__[0], "memoize", return_value=lambda f: f
+ ) as mock_memoize:
+ cache.memoize(timeout=300, hash_method=hashlib.md5)
+
+ mock_memoize.assert_called_once()
+ call_kwargs = mock_memoize.call_args[1]
+ assert call_kwargs["hash_method"] == hashlib.md5
+
+
+def test_superset_cache_cached_uses_configurable_hash():
+ """Test that SupersetCache.cached uses configurable_hash_method by
default."""
+ cache = SupersetCache()
+
+ with patch.object(
+ cache.__class__.__bases__[0], "cached", return_value=lambda f: f
+ ) as mock_cached:
+ cache.cached(timeout=300)
+
+ mock_cached.assert_called_once()
+ call_kwargs = mock_cached.call_args[1]
+ assert call_kwargs["hash_method"] is configurable_hash_method
+
+
+def test_superset_cache_cached_allows_explicit_hash_method():
+ """Test that SupersetCache.cached allows explicit hash_method override."""
+ cache = SupersetCache()
+
+ with patch.object(
+ cache.__class__.__bases__[0], "cached", return_value=lambda f: f
+ ) as mock_cached:
+ cache.cached(timeout=300, hash_method=hashlib.md5)
+
+ mock_cached.assert_called_once()
+ call_kwargs = mock_cached.call_args[1]
+ assert call_kwargs["hash_method"] == hashlib.md5
+
+
+def test_superset_cache_memoize_make_cache_key_uses_configurable_hash():
+ """Test _memoize_make_cache_key uses configurable_hash_method by
default."""
+ cache = SupersetCache()
+
+ with patch.object(
+ cache.__class__.__bases__[0],
+ "_memoize_make_cache_key",
+ return_value=lambda *args, **kwargs: "cache_key",
+ ) as mock_make_key:
+ cache._memoize_make_cache_key(make_name=None, timeout=300)
+
+ mock_make_key.assert_called_once()
+ call_kwargs = mock_make_key.call_args[1]
+ assert call_kwargs["hash_method"] is configurable_hash_method
+
+
+def test_superset_cache_memoize_make_cache_key_allows_explicit_hash():
+ """Test _memoize_make_cache_key allows explicit hash_method override."""
+ cache = SupersetCache()
+
+ with patch.object(
+ cache.__class__.__bases__[0],
+ "_memoize_make_cache_key",
+ return_value=lambda *args, **kwargs: "cache_key",
+ ) as mock_make_key:
+ cache._memoize_make_cache_key(
+ make_name=None, timeout=300, hash_method=hashlib.md5
+ )
+
+ mock_make_key.assert_called_once()
+ call_kwargs = mock_make_key.call_args[1]
+ assert call_kwargs["hash_method"] == hashlib.md5
+
+
[email protected](
+ "algorithm,expected_digest",
+ [
+ ("sha256", hashlib.sha256(b"test_data").hexdigest()),
+ ("md5", hashlib.md5(b"test_data").hexdigest()), # noqa: S324
+ ],
+)
+def test_configurable_hash_method_parametrized(algorithm, expected_digest):
+ """Parametrized test for ConfigurableHashMethod with different
algorithms."""
+ mock_app = MagicMock()
+ mock_app.config = {"HASH_ALGORITHM": algorithm}
+
+ with patch("superset.utils.cache_manager.current_app", mock_app):
+ hash_obj = configurable_hash_method(b"test_data")
+ assert hash_obj.hexdigest() == expected_digest