This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch chart-telemetry-log
in repository https://gitbox.apache.org/repos/asf/superset.git

commit b6e454666acf3746bac8b806f429082fb0e8227d
Author: Beto Dealmeida <[email protected]>
AuthorDate: Tue Dec 3 18:05:19 2024 -0500

    feat: fine-grain chart data telemetry
---
 superset/charts/data/api.py        | 24 ++++++++++++---------
 superset/charts/post_processing.py | 18 ++++++++++------
 superset/models/core.py            | 43 +++++++++++++++++++++++++++-----------
 3 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/superset/charts/data/api.py b/superset/charts/data/api.py
index 653b09896e..0f6e605c7b 100644
--- a/superset/charts/data/api.py
+++ b/superset/charts/data/api.py
@@ -399,17 +399,19 @@ class ChartDataRestApi(ChartRestApi):
                 for query in queries:
                     with contextlib.suppress(KeyError):
                         del query["query"]
-            response_data = json.dumps(
-                {"result": queries},
-                default=json.json_int_dttm_ser,
-                ignore_nan=True,
-            )
+            with 
event_logger.log_context(f"{self.__class__.__name__}.json_dumps"):
+                response_data = json.dumps(
+                    {"result": queries},
+                    default=json.json_int_dttm_ser,
+                    ignore_nan=True,
+                )
             resp = make_response(response_data, 200)
             resp.headers["Content-Type"] = "application/json; charset=utf-8"
             return resp
 
         return self.response_400(message=f"Unsupported result_format: 
{result_format}")
 
+    @event_logger.log_this
     def _get_data_response(
         self,
         command: ChartDataCommand,
@@ -435,11 +437,13 @@ class ChartDataRestApi(ChartRestApi):
     ) -> dict[str, Any]:
         return {
             "dashboard_id": form_data.get("form_data", {}).get("dashboardId"),
-            "dataset_id": form_data.get("datasource", {}).get("id")
-            if isinstance(form_data.get("datasource"), dict)
-            and form_data.get("datasource", {}).get("type")
-            == DatasourceType.TABLE.value
-            else None,
+            "dataset_id": (
+                form_data.get("datasource", {}).get("id")
+                if isinstance(form_data.get("datasource"), dict)
+                and form_data.get("datasource", {}).get("type")
+                == DatasourceType.TABLE.value
+                else None
+            ),
             "slice_id": form_data.get("form_data", {}).get("slice_id"),
         }
 
diff --git a/superset/charts/post_processing.py 
b/superset/charts/post_processing.py
index 4c5abd8db1..edc3c94474 100644
--- a/superset/charts/post_processing.py
+++ b/superset/charts/post_processing.py
@@ -34,6 +34,7 @@ import pandas as pd
 from flask_babel import gettext as __
 
 from superset.common.chart_data import ChartDataResultFormat
+from superset.extensions import event_logger
 from superset.utils.core import (
     extract_dataframe_dtypes,
     get_column_names,
@@ -296,6 +297,7 @@ post_processors = {
 }
 
 
+@event_logger.log_this
 def apply_post_process(
     result: dict[Any, Any],
     form_data: Optional[dict[str, Any]] = None,
@@ -344,15 +346,19 @@ def apply_post_process(
         # `Tuple[str]`. Otherwise encoding to JSON later will fail because
         # maps cannot have tuples as their keys in JSON.
         processed_df.columns = [
-            " ".join(str(name) for name in column).strip()
-            if isinstance(column, tuple)
-            else column
+            (
+                " ".join(str(name) for name in column).strip()
+                if isinstance(column, tuple)
+                else column
+            )
             for column in processed_df.columns
         ]
         processed_df.index = [
-            " ".join(str(name) for name in index).strip()
-            if isinstance(index, tuple)
-            else index
+            (
+                " ".join(str(name) for name in index).strip()
+                if isinstance(index, tuple)
+                else index
+            )
             for index in processed_df.index
         ]
 
diff --git a/superset/models/core.py b/superset/models/core.py
index 9b06932afc..c923de427c 100755
--- a/superset/models/core.py
+++ b/superset/models/core.py
@@ -75,7 +75,11 @@ from superset.extensions import (
 from superset.models.helpers import AuditMixinNullable, ImportExportMixin
 from superset.result_set import SupersetResultSet
 from superset.sql_parse import Table
-from superset.superset_typing import OAuth2ClientConfig, ResultSetColumnType
+from superset.superset_typing import (
+    DbapiDescription,
+    OAuth2ClientConfig,
+    ResultSetColumnType,
+)
 from superset.utils import cache as cache_util, core as utils, json
 from superset.utils.backports import StrEnum
 from superset.utils.core import DatasourceName, get_username
@@ -667,7 +671,7 @@ class Database(Model, AuditMixinNullable, 
ImportExportMixin):  # pylint: disable
             )
         return sql_
 
-    def get_df(  # pylint: disable=too-many-locals
+    def get_df(
         self,
         sql: str,
         catalog: str | None = None,
@@ -700,21 +704,36 @@ class Database(Model, AuditMixinNullable, 
ImportExportMixin):  # pylint: disable
                     object_ref=__name__,
                 ):
                     self.db_engine_spec.execute(cursor, sql_, self)
-                    if i < len(sqls) - 1:
-                        # If it's not the last, we don't keep the results
-                        cursor.fetchall()
-                    else:
-                        # Last query, fetch and process the results
-                        data = self.db_engine_spec.fetch_data(cursor)
-                        result_set = SupersetResultSet(
-                            data, cursor.description, self.db_engine_spec
-                        )
-                        df = result_set.to_pandas_df()
+
+                if rows := self.fetch_rows(cursor, i == len(sqls) - 1):
+                    df = self.load_into_dataframe(cursor.description, rows)
+
             if mutator:
                 df = mutator(df)
 
             return self.post_process_df(df)
 
+    @event_logger.log_this
+    def fetch_rows(self, cursor: Any, last: bool) -> list[tuple[Any, ...]] | 
None:
+        if not last:
+            cursor.fetchall()
+            return None
+
+        return self.db_engine_spec.fetch_data(cursor)
+
+    @event_logger.log_this
+    def load_into_dataframe(
+        self,
+        description: DbapiDescription,
+        data: list[tuple[Any, ...]],
+    ) -> pd.DataFrame:
+        result_set = SupersetResultSet(
+            data,
+            description,
+            self.db_engine_spec,
+        )
+        return result_set.to_pandas_df()
+
     def compile_sqla_query(
         self,
         qry: Select,

Reply via email to