This is an automated email from the ASF dual-hosted git repository.
johnbodley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new a3b41e2 fix: Issue 13956 (#13980)
a3b41e2 is described below
commit a3b41e2bac554a097015cf73e950ca1fc5e43b6e
Author: John Bodley <[email protected]>
AuthorDate: Fri Apr 9 15:21:58 2021 +1200
fix: Issue 13956 (#13980)
Co-authored-by: John Bodley <[email protected]>
---
UPDATING.md | 2 +
superset/config.py | 36 +++++++++++-
superset/connectors/sqla/models.py | 34 ++---------
superset/datasets/dao.py | 4 +-
...ea61c5e7_remove_dataset_health_check_message.py | 68 ++++++++++++++++++++++
superset/views/core.py | 4 --
superset/views/datasource.py | 2 -
tests/datasource_tests.py | 16 +----
8 files changed, 111 insertions(+), 55 deletions(-)
diff --git a/UPDATING.md b/UPDATING.md
index f8f32f7..77eb720 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -25,6 +25,8 @@ assists people when migrating to a new version.
## Next
- [13772](https://github.com/apache/superset/pull/13772): Row level security
(RLS) is now enabled by default. To activate the feature, please run `superset
init` to expose the RLS menus to Admin users.
+- [13980](https://github.com/apache/superset/pull/13980): Data health checks
no longer use the metadata database as an interim cache. Though non-breaking,
deployments which implement complex logic should likely memoize the callback
function. Refer to documentation in the confg.py file for more detail.
+
### Breaking Changes
### Potential Downtime
### Deprecations
diff --git a/superset/config.py b/superset/config.py
index 1d9da94..e4e892c 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -53,6 +53,9 @@ logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from flask_appbuilder.security.sqla import models # pylint:
disable=unused-import
+ from superset.connectors.sqla.models import ( # pylint:
disable=unused-import
+ SqlaTable,
+ )
from superset.models.core import Database # pylint: disable=unused-import
# Realtime stats logger, a StatsD implementation exists
@@ -1146,9 +1149,36 @@ GLOBAL_ASYNC_QUERIES_TRANSPORT = "polling"
GLOBAL_ASYNC_QUERIES_POLLING_DELAY = 500
GLOBAL_ASYNC_QUERIES_WEBSOCKET_URL = "ws://127.0.0.1:8080/"
-# It's possible to add a dataset health check logic which is specific to your
system.
-# It will get executed each time when user open a chart's explore view.
-DATASET_HEALTH_CHECK = None
+# A SQL dataset health check. Note if enabled it is strongly advised that the
callable
+# be memoized to aid with performance, i.e.,
+#
+# @cache_manager.cache.memoize(timeout=0)
+# def DATASET_HEALTH_CHECK(datasource: SqlaTable) -> Optional[str]:
+# if (
+# datasource.sql and
+# len(sql_parse.ParsedQuery(datasource.sql,
strip_comments=True).tables) == 1
+# ):
+# return (
+# "This virtual dataset queries only one table and therefore
could be "
+# "replaced by querying the table directly."
+# )
+#
+# return None
+#
+# Within the FLASK_APP_MUTATOR callable, i.e., once the application and thus
cache have
+# been initialized it is also necessary to add the following logic to blow the
cache for
+# all datasources if the callback function changed.
+#
+# def FLASK_APP_MUTATOR(app: Flask) -> None:
+# name = "DATASET_HEALTH_CHECK"
+# func = app.config[name]
+# code = func.uncached.__code__.co_code
+#
+# if cache_manager.cache.get(name) != code:
+# cache_manager.cache.delete_memoized(func)
+# cache_manager.cache.set(name, code, timeout=0)
+#
+DATASET_HEALTH_CHECK: Optional[Callable[["SqlaTable"], str]] = None
# SQLalchemy link doc reference
SQLALCHEMY_DOCS_URL = "https://docs.sqlalchemy.org/en/13/core/engines.html"
diff --git a/superset/connectors/sqla/models.py
b/superset/connectors/sqla/models.py
index 004cb23..74b5a7d 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -60,7 +60,6 @@ from superset.connectors.base.models import BaseColumn,
BaseDatasource, BaseMetr
from superset.db_engine_specs.base import TimestampExpression
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import QueryObjectValidationError,
SupersetSecurityException
-from superset.extensions import event_logger
from superset.jinja_context import (
BaseTemplateProcessor,
ExtraCache,
@@ -687,9 +686,10 @@ class SqlaTable( # pylint:
disable=too-many-public-methods,too-many-instance-at
self.table_name, schema=self.schema, show_cols=False,
latest_partition=False
)
- @property
+ @property # type: ignore
def health_check_message(self) -> Optional[str]:
- return self.extra_dict.get("health_check", {}).get("message")
+ check = config["DATASET_HEALTH_CHECK"]
+ return check(self) if check else None
@property
def data(self) -> Dict[str, Any]:
@@ -703,13 +703,7 @@ class SqlaTable( # pylint:
disable=too-many-public-methods,too-many-instance-at
data_["fetch_values_predicate"] = self.fetch_values_predicate
data_["template_params"] = self.template_params
data_["is_sqllab_view"] = self.is_sqllab_view
- # Don't return previously populated health check message in case
- # the health check feature is turned off
- data_["health_check_message"] = (
- self.health_check_message
- if config.get("DATASET_HEALTH_CHECK")
- else None
- )
+ data_["health_check_message"] = self.health_check_message
data_["extra"] = self.extra
return data_
@@ -1608,26 +1602,6 @@ class SqlaTable( # pylint:
disable=too-many-public-methods,too-many-instance-at
extra_cache_keys += sqla_query.extra_cache_keys
return extra_cache_keys
- def health_check(self, commit: bool = False, force: bool = False) -> None:
- check = config.get("DATASET_HEALTH_CHECK")
- if check is None:
- return
-
- extra = self.extra_dict
- # force re-run health check, or health check is updated
- if force or extra.get("health_check", {}).get("version") !=
check.version:
- with event_logger.log_context(action="dataset_health_check"):
- message = check(self)
- extra["health_check"] = {
- "version": check.version,
- "message": message,
- }
- self.extra = json.dumps(extra)
-
- db.session.merge(self)
- if commit:
- db.session.commit()
-
sa.event.listen(SqlaTable, "after_insert", security_manager.set_perm)
sa.event.listen(SqlaTable, "after_update", security_manager.set_perm)
diff --git a/superset/datasets/dao.py b/superset/datasets/dao.py
index 08df867..f2f57d2 100644
--- a/superset/datasets/dao.py
+++ b/superset/datasets/dao.py
@@ -169,9 +169,7 @@ class DatasetDAO(BaseDAO): # pylint:
disable=too-many-public-methods
super().update(model, properties, commit=commit)
properties["columns"] = original_properties
- updated_model = super().update(model, properties, commit=False)
- model.health_check(force=True, commit=False)
- return updated_model
+ return super().update(model, properties, commit=False)
@classmethod
def update_columns(
diff --git
a/superset/migrations/versions/134cea61c5e7_remove_dataset_health_check_message.py
b/superset/migrations/versions/134cea61c5e7_remove_dataset_health_check_message.py
new file mode 100644
index 0000000..b8cbdc3
--- /dev/null
+++
b/superset/migrations/versions/134cea61c5e7_remove_dataset_health_check_message.py
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""remove dataset health check message
+
+Revision ID: 134cea61c5e7
+Revises: 301362411006
+Create Date: 2021-04-07 07:21:27.324983
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = "134cea61c5e7"
+down_revision = "301362411006"
+
+import json
+import logging
+
+from alembic import op
+from sqlalchemy import Column, Integer, Text
+from sqlalchemy.ext.declarative import declarative_base
+
+from superset import db
+
+Base = declarative_base()
+
+
+class SqlaTable(Base):
+ __tablename__ = "tables"
+
+ id = Column(Integer, primary_key=True)
+ extra = Column(Text)
+
+
+def upgrade():
+ bind = op.get_bind()
+ session = db.Session(bind=bind)
+
+ for datasource in session.query(SqlaTable):
+ if datasource.extra:
+ try:
+ extra = json.loads(datasource.extra)
+
+ if extra and "health_check" in extra:
+ del extra["health_check"]
+ datasource.extra = json.dumps(extra) if extra else None
+ except Exception as ex:
+ logging.exception(ex)
+
+ session.commit()
+ session.close()
+
+
+def downgrade():
+ pass
diff --git a/superset/views/core.py b/superset/views/core.py
index 6475375..0093f50 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py
@@ -732,10 +732,6 @@ class Superset(BaseSupersetView): # pylint:
disable=too-many-public-methods
f"datasource_id={datasource_id}&"
)
- # if feature enabled, run some health check rules for sqla
datasource
- if hasattr(datasource, "health_check"):
- datasource.health_check()
-
viz_type = form_data.get("viz_type")
if not viz_type and datasource and datasource.default_endpoint:
return redirect(datasource.default_endpoint)
diff --git a/superset/views/datasource.py b/superset/views/datasource.py
index be6f73e..10cfc16 100644
--- a/superset/views/datasource.py
+++ b/superset/views/datasource.py
@@ -83,8 +83,6 @@ class Datasource(BaseSupersetView):
status=409,
)
orm_datasource.update_from_object(datasource_dict)
- if hasattr(orm_datasource, "health_check"):
- orm_datasource.health_check(force=True, commit=False)
data = orm_datasource.data
db.session.commit()
diff --git a/tests/datasource_tests.py b/tests/datasource_tests.py
index ef49640..31a0598 100644
--- a/tests/datasource_tests.py
+++ b/tests/datasource_tests.py
@@ -221,21 +221,11 @@ class TestDatasource(SupersetTestCase):
return "Warning message!"
app.config["DATASET_HEALTH_CHECK"] = my_check
- my_check.version = 0.1
-
self.login(username="admin")
tbl = self.get_table_by_name("birth_names")
- self.datasource = ConnectorRegistry.get_datasource("table", tbl.id,
db.session)
-
- for key in self.datasource.export_fields:
- self.original_attrs[key] = getattr(self.datasource, key)
-
- url = f"/datasource/get/{tbl.type}/{tbl.id}/"
- tbl.health_check(commit=True, force=True)
- resp = self.get_json_resp(url)
- self.assertEqual(resp["health_check_message"], "Warning message!")
-
- del app.config["DATASET_HEALTH_CHECK"]
+ datasource = ConnectorRegistry.get_datasource("table", tbl.id,
db.session)
+ assert datasource.health_check_message == "Warning message!"
+ app.config["DATASET_HEALTH_CHECK"] = None
def test_get_datasource_failed(self):
pytest.raises(