This is an automated email from the ASF dual-hosted git repository. villebro pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/superset.git
commit cc840cd6c7b652d925332f2b64bbdff146d1e40c Author: Ville Brofeldt <[email protected]> AuthorDate: Tue Jul 4 15:55:22 2023 +0300 chore(metastore-cache): add codec support (#24586) --- superset/config.py | 25 ++++++++--- superset/extensions/metastore_cache.py | 38 +++++++++++++---- .../extensions/metastore_cache_test.py | 49 +++++++++++++++++++++- 3 files changed, 96 insertions(+), 16 deletions(-) diff --git a/superset/config.py b/superset/config.py index af46d489c9..c089416505 100644 --- a/superset/config.py +++ b/superset/config.py @@ -62,6 +62,7 @@ from superset.advanced_data_type.plugins.internet_port import internet_port from superset.advanced_data_type.types import AdvancedDataType from superset.constants import CHANGE_ME_SECRET_KEY from superset.jinja_context import BaseTemplateProcessor +from superset.key_value.types import JsonKeyValueCodec from superset.stats_logger import DummyStatsLogger from superset.superset_typing import CacheConfig from superset.tasks.types import ExecutorType @@ -686,20 +687,32 @@ CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "NullCache"} # Cache for datasource metadata and query results DATA_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "NullCache"} -# Cache for dashboard filter state (`CACHE_TYPE` defaults to `SimpleCache` when -# running in debug mode unless overridden) +# Cache for dashboard filter state. `CACHE_TYPE` defaults to `SupersetMetastoreCache` +# that stores the values in the key-value table in the Superset metastore, as it's +# required for Superset to operate correctly, but can be replaced by any +# `Flask-Caching` backend. FILTER_STATE_CACHE_CONFIG: CacheConfig = { + "CACHE_TYPE": "SupersetMetastoreCache", "CACHE_DEFAULT_TIMEOUT": int(timedelta(days=90).total_seconds()), - # should the timeout be reset when retrieving a cached value + # Should the timeout be reset when retrieving a cached value? "REFRESH_TIMEOUT_ON_RETRIEVAL": True, + # The following parameter only applies to `MetastoreCache`: + # How should entries be serialized/deserialized? + "CODEC": JsonKeyValueCodec(), } -# Cache for explore form data state (`CACHE_TYPE` defaults to `SimpleCache` when -# running in debug mode unless overridden) +# Cache for explore form data state. `CACHE_TYPE` defaults to `SupersetMetastoreCache` +# that stores the values in the key-value table in the Superset metastore, as it's +# required for Superset to operate correctly, but can be replaced by any +# `Flask-Caching` backend. EXPLORE_FORM_DATA_CACHE_CONFIG: CacheConfig = { + "CACHE_TYPE": "SupersetMetastoreCache", "CACHE_DEFAULT_TIMEOUT": int(timedelta(days=7).total_seconds()), - # should the timeout be reset when retrieving a cached value + # Should the timeout be reset when retrieving a cached value? "REFRESH_TIMEOUT_ON_RETRIEVAL": True, + # The following parameter only applies to `MetastoreCache`: + # How should entries be serialized/deserialized? + "CODEC": JsonKeyValueCodec(), } # store cache keys by datasource UID (via CacheKey) for custom processing/invalidation diff --git a/superset/extensions/metastore_cache.py b/superset/extensions/metastore_cache.py index f69276c908..0dbd42b2b4 100644 --- a/superset/extensions/metastore_cache.py +++ b/superset/extensions/metastore_cache.py @@ -14,26 +14,37 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +import logging from datetime import datetime, timedelta from typing import Any, Dict, List, Optional from uuid import UUID, uuid3 -from flask import Flask +from flask import current_app, Flask, has_app_context from flask_caching import BaseCache from superset.key_value.exceptions import KeyValueCreateFailedError -from superset.key_value.types import KeyValueResource, PickleKeyValueCodec +from superset.key_value.types import ( + KeyValueCodec, + KeyValueResource, + PickleKeyValueCodec, +) from superset.key_value.utils import get_uuid_namespace RESOURCE = KeyValueResource.METASTORE_CACHE -CODEC = PickleKeyValueCodec() + +logger = logging.getLogger(__name__) class SupersetMetastoreCache(BaseCache): - def __init__(self, namespace: UUID, default_timeout: int = 300) -> None: + def __init__( + self, + namespace: UUID, + codec: KeyValueCodec, + default_timeout: int = 300, + ) -> None: super().__init__(default_timeout) self.namespace = namespace + self.codec = codec @classmethod def factory( @@ -41,6 +52,17 @@ class SupersetMetastoreCache(BaseCache): ) -> BaseCache: seed = config.get("CACHE_KEY_PREFIX", "") kwargs["namespace"] = get_uuid_namespace(seed) + codec = config.get("CODEC") or PickleKeyValueCodec() + if ( + has_app_context() + and not current_app.debug + and isinstance(codec, PickleKeyValueCodec) + ): + logger.warning( + "Using PickleKeyValueCodec with SupersetMetastoreCache may be unsafe, " + "use at your own risk." + ) + kwargs["codec"] = codec return cls(*args, **kwargs) def get_key(self, key: str) -> UUID: @@ -69,7 +91,7 @@ class SupersetMetastoreCache(BaseCache): resource=RESOURCE, key=self.get_key(key), value=value, - codec=CODEC, + codec=self.codec, expires_on=self._get_expiry(timeout), ).run() return True @@ -82,7 +104,7 @@ class SupersetMetastoreCache(BaseCache): CreateKeyValueCommand( resource=RESOURCE, value=value, - codec=CODEC, + codec=self.codec, key=self.get_key(key), expires_on=self._get_expiry(timeout), ).run() @@ -98,7 +120,7 @@ class SupersetMetastoreCache(BaseCache): return GetKeyValueCommand( resource=RESOURCE, key=self.get_key(key), - codec=CODEC, + codec=self.codec, ).run() def has(self, key: str) -> bool: diff --git a/tests/integration_tests/extensions/metastore_cache_test.py b/tests/integration_tests/extensions/metastore_cache_test.py index d9e0e9ee26..eeb7279fe1 100644 --- a/tests/integration_tests/extensions/metastore_cache_test.py +++ b/tests/integration_tests/extensions/metastore_cache_test.py @@ -16,17 +16,27 @@ # under the License. from __future__ import annotations +from contextlib import nullcontext from datetime import datetime, timedelta -from typing import TYPE_CHECKING +from typing import Any, TYPE_CHECKING from uuid import UUID import pytest from flask.ctx import AppContext from freezegun import freeze_time +from superset.key_value.exceptions import KeyValueCodecEncodeException +from superset.key_value.types import ( + JsonKeyValueCodec, + KeyValueCodec, + PickleKeyValueCodec, +) + if TYPE_CHECKING: from superset.extensions.metastore_cache import SupersetMetastoreCache +NAMESPACE = UUID("ee173d1b-ccf3-40aa-941c-985c15224496") + FIRST_KEY = "foo" FIRST_KEY_INITIAL_VALUE = {"foo": "bar"} FIRST_KEY_UPDATED_VALUE = "foo" @@ -40,8 +50,9 @@ def cache() -> SupersetMetastoreCache: from superset.extensions.metastore_cache import SupersetMetastoreCache return SupersetMetastoreCache( - namespace=UUID("ee173d1b-ccf3-40aa-941c-985c15224496"), + namespace=NAMESPACE, default_timeout=600, + codec=PickleKeyValueCodec(), ) @@ -75,3 +86,37 @@ def test_expiry(app_context: AppContext, cache: SupersetMetastoreCache) -> None: with freeze_time(dttm + delta + timedelta(seconds=1)): assert cache.has(FIRST_KEY) is False assert cache.get(FIRST_KEY) is None + + [email protected]( + "input_,codec,expected_result", + [ + ({"foo": "bar"}, JsonKeyValueCodec(), {"foo": "bar"}), + (("foo", "bar"), JsonKeyValueCodec(), ["foo", "bar"]), + (complex(1, 1), JsonKeyValueCodec(), KeyValueCodecEncodeException()), + ({"foo": "bar"}, PickleKeyValueCodec(), {"foo": "bar"}), + (("foo", "bar"), PickleKeyValueCodec(), ("foo", "bar")), + (complex(1, 1), PickleKeyValueCodec(), complex(1, 1)), + ], +) +def test_codec( + input_: Any, + codec: KeyValueCodec, + expected_result: Any, + app_context: AppContext, +) -> None: + from superset.extensions.metastore_cache import SupersetMetastoreCache + + cache = SupersetMetastoreCache( + namespace=NAMESPACE, + default_timeout=600, + codec=codec, + ) + cm = ( + pytest.raises(type(expected_result)) + if isinstance(expected_result, Exception) + else nullcontext() + ) + with cm: + cache.set(FIRST_KEY, input_) + assert cache.get(FIRST_KEY) == expected_result
