codeant-ai-for-open-source[bot] commented on code in PR #37361: URL: https://github.com/apache/superset/pull/37361#discussion_r2720552237
########## tests/unit_tests/utils/test_cache_manager.py: ########## @@ -0,0 +1,171 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import hashlib +from unittest.mock import MagicMock, patch + +import pytest + +from superset.utils.cache_manager import ( + configurable_hash_method, + ConfigurableHashMethod, + SupersetCache, +) + + +def test_configurable_hash_method_uses_sha256(): + """Test ConfigurableHashMethod uses sha256 when configured.""" + mock_app = MagicMock() + mock_app.config = {"HASH_ALGORITHM": "sha256"} + + with patch("superset.utils.cache_manager.current_app", mock_app): + hash_obj = configurable_hash_method(b"test") + # Verify it returns a sha256 hash object + assert hash_obj.hexdigest() == hashlib.sha256(b"test").hexdigest() + + +def test_configurable_hash_method_uses_md5(): + """Test ConfigurableHashMethod uses md5 when configured.""" + mock_app = MagicMock() + mock_app.config = {"HASH_ALGORITHM": "md5"} + + with patch("superset.utils.cache_manager.current_app", mock_app): + hash_obj = configurable_hash_method(b"test") + # Verify it returns a md5 hash object + assert hash_obj.hexdigest() == hashlib.md5(b"test").hexdigest() # noqa: S324 + + +def test_configurable_hash_method_empty_data(): + """Test ConfigurableHashMethod with empty data.""" + mock_app = MagicMock() + mock_app.config = {"HASH_ALGORITHM": "sha256"} + + with patch("superset.utils.cache_manager.current_app", mock_app): + hash_obj = configurable_hash_method() + assert hash_obj.hexdigest() == hashlib.sha256(b"").hexdigest() + + +def test_configurable_hash_method_is_callable(): + """Test that ConfigurableHashMethod instance is callable.""" + method = ConfigurableHashMethod() + assert callable(method) + + +def test_superset_cache_memoize_uses_configurable_hash(): + """Test that SupersetCache.memoize uses configurable_hash_method by default.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], "memoize", return_value=lambda f: f + ) as mock_memoize: + cache.memoize(timeout=300) + + mock_memoize.assert_called_once() + call_kwargs = mock_memoize.call_args[1] + assert call_kwargs["hash_method"] is configurable_hash_method + + +def test_superset_cache_memoize_allows_explicit_hash_method(): + """Test that SupersetCache.memoize allows explicit hash_method override.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], "memoize", return_value=lambda f: f + ) as mock_memoize: + cache.memoize(timeout=300, hash_method=hashlib.md5) + + mock_memoize.assert_called_once() + call_kwargs = mock_memoize.call_args[1] + assert call_kwargs["hash_method"] == hashlib.md5 + + +def test_superset_cache_cached_uses_configurable_hash(): + """Test that SupersetCache.cached uses configurable_hash_method by default.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], "cached", return_value=lambda f: f + ) as mock_cached: + cache.cached(timeout=300) + + mock_cached.assert_called_once() + call_kwargs = mock_cached.call_args[1] + assert call_kwargs["hash_method"] is configurable_hash_method + + +def test_superset_cache_cached_allows_explicit_hash_method(): + """Test that SupersetCache.cached allows explicit hash_method override.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], "cached", return_value=lambda f: f + ) as mock_cached: + cache.cached(timeout=300, hash_method=hashlib.md5) + + mock_cached.assert_called_once() + call_kwargs = mock_cached.call_args[1] + assert call_kwargs["hash_method"] == hashlib.md5 + + +def test_superset_cache_memoize_make_cache_key_uses_configurable_hash(): + """Test _memoize_make_cache_key uses configurable_hash_method by default.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], + "_memoize_make_cache_key", + return_value=lambda *args, **kwargs: "cache_key", + ) as mock_make_key: + cache._memoize_make_cache_key(make_name=None, timeout=300) + + mock_make_key.assert_called_once() + call_kwargs = mock_make_key.call_args[1] + assert call_kwargs["hash_method"] is configurable_hash_method + + +def test_superset_cache_memoize_make_cache_key_allows_explicit_hash(): + """Test _memoize_make_cache_key allows explicit hash_method override.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], + "_memoize_make_cache_key", + return_value=lambda *args, **kwargs: "cache_key", + ) as mock_make_key: + cache._memoize_make_cache_key( + make_name=None, timeout=300, hash_method=hashlib.md5 + ) + + mock_make_key.assert_called_once() + call_kwargs = mock_make_key.call_args[1] + assert call_kwargs["hash_method"] == hashlib.md5 + + [email protected]( + "algorithm,expected_digest", + [ + ("sha256", hashlib.sha256(b"test_data").hexdigest()), + ("md5", hashlib.md5(b"test_data").hexdigest()), # noqa: S324 + ], +) +def test_configurable_hash_method_parametrized(algorithm, expected_digest): + """Parametrized test for ConfigurableHashMethod with different algorithms.""" + mock_app = MagicMock() + mock_app.config = {"HASH_ALGORITHM": algorithm} + + with patch("superset.utils.cache_manager.current_app", mock_app): + hash_obj = configurable_hash_method(b"test_data") + assert hash_obj.hexdigest() == expected_digest Review Comment: **Suggestion:** Parametrized test uses `hashlib.md5(...)` to compute an expected digest value, which also calls MD5 at runtime and will fail under FIPS. Change the parametrization to avoid calling MD5 (for example assert hexdigest length for MD5) and make the assertion based on digest length rather than computing MD5 directly. [possible bug] <details> <summary><b>Severity Level:</b> Critical 🚨</summary> ```mdx - ❌ Test collection fails in FIPS-enabled environments. - ⚠️ CI pipeline test stage blocked by import error. - ⚠️ Prevents verification of cache HASH_ALGORITHM behavior. ``` </details> ```suggestion "algorithm,expected_length", [ ("sha256", len(hashlib.sha256(b"test_data").hexdigest())), ("md5", 32), # md5 hexdigest length is 32; avoid constructing md5 in FIPS mode ], ) def test_configurable_hash_method_parametrized(algorithm, expected_length): """Parametrized test for ConfigurableHashMethod with different algorithms.""" mock_app = MagicMock() mock_app.config = {"HASH_ALGORITHM": algorithm} with patch("superset.utils.cache_manager.current_app", mock_app): hash_obj = configurable_hash_method(b"test_data") assert isinstance(hash_obj.hexdigest(), str) assert len(hash_obj.hexdigest()) == expected_length ``` <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Run `pytest tests/unit_tests/utils/test_cache_manager.py -q`. 2. Pytest imports the test module; at import time it evaluates the `@pytest.mark.parametrize(...)` decorator at lines 157-163. 3. Evaluation of the parametrization constructs `hashlib.md5(b"test_data").hexdigest()` at line 161 during import. 4. In a FIPS-enabled environment constructing `hashlib.md5()` can raise an exception, causing the test module import to fail before tests run. 5. Observed behavior: pytest fails quickly with an exception during test collection/import, blocking the test run. ``` </details> <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** tests/unit_tests/utils/test_cache_manager.py **Line:** 158:171 **Comment:** *Possible Bug: Parametrized test uses `hashlib.md5(...)` to compute an expected digest value, which also calls MD5 at runtime and will fail under FIPS. Change the parametrization to avoid calling MD5 (for example assert hexdigest length for MD5) and make the assertion based on digest length rather than computing MD5 directly. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. ``` </details> ########## superset/utils/cache_manager.py: ########## @@ -27,8 +28,134 @@ CACHE_IMPORT_PATH = "superset.extensions.metastore_cache.SupersetMetastoreCache" +# Hash function lookup table matching superset.utils.hashing +_HASH_METHODS: dict[str, Callable[..., Any]] = { + "sha256": hashlib.sha256, + "md5": hashlib.md5, +} + + +class ConfigurableHashMethod: + """ + A callable that defers hash algorithm selection to runtime. + + Flask-caching's memoize decorator evaluates hash_method at decoration time + (module import), but we need to read HASH_ALGORITHM config at function call + time when the app context is available. + + This class acts like a hashlib function but looks up the configured + algorithm when called. + """ + + def __call__(self, data: bytes = b"") -> Any: + """ + Create a hash object using the configured algorithm. + + Args: + data: Optional initial data to hash + + Returns: + A hashlib hash object (e.g., sha256 or md5) + + Raises: + ValueError: If HASH_ALGORITHM is set to an unsupported value + """ + algorithm = current_app.config["HASH_ALGORITHM"] Review Comment: **Suggestion:** Accessing `current_app.config["HASH_ALGORITHM"]` will raise a KeyError if the config key is missing and is case-sensitive; normalize and provide a safe default (e.g., "sha256") to avoid runtime KeyError and mismatches from different casings. [possible bug] <details> <summary><b>Severity Level:</b> Critical 🚨</summary> ```mdx - ❌ Cache key generation raises KeyError on missing config. - ⚠️ Any memoize/cached decorator may error during requests. - ⚠️ Affects SupersetCache.memoize and .cached wrappers. ``` </details> ```suggestion algorithm = current_app.config.get("HASH_ALGORITHM", "sha256") algorithm = str(algorithm).lower() ``` <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Start the Superset Flask app or run the cache manager unit tests (see PR testing instructions). The CacheManager class constructs SupersetCache instances at `superset/utils/cache_manager.py:179-186` and exposes them via properties. 2. Any code using the SupersetCache.memoize wrapper will default its hash_method to the singleton `configurable_hash_method` defined in this file (`superset/utils/cache_manager.py`); see `SupersetCache.memoize` signature at `superset/utils/cache_manager.py:86-108` where `hash_method=configurable_hash_method`. 3. When a memoized function is invoked, Flask-Caching will call the provided hash_method which dispatches to `ConfigurableHashMethod.__call__` implemented at `superset/utils/cache_manager.py:63-67`. That method currently does direct indexing into the Flask config with `current_app.config["HASH_ALGORITHM"]`. 4. If the running Flask app configuration does not include the "HASH_ALGORITHM" key (e.g., misconfiguration, minimal test app, or environment where the default isn't set), the direct indexing at `superset/utils/cache_manager.py:63` raises a KeyError, causing the request or memoized call to error out. The improved code uses `.get(..., "sha256")` and normalizes casing to avoid this KeyError and mismatches. ``` </details> <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** superset/utils/cache_manager.py **Line:** 63:63 **Comment:** *Possible Bug: Accessing `current_app.config["HASH_ALGORITHM"]` will raise a KeyError if the config key is missing and is case-sensitive; normalize and provide a safe default (e.g., "sha256") to avoid runtime KeyError and mismatches from different casings. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
