This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch explorable
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 837ea2a07f55781a7818a2a772e5840cb3d43b44
Author: Beto Dealmeida <[email protected]>
AuthorDate: Thu Oct 16 11:59:25 2025 -0400

    Improving dataframe method
---
 superset/semantic_layers/snowflake_.py | 140 ++++++++++++++++++++-------------
 superset/semantic_layers/types.py      |  11 ++-
 2 files changed, 96 insertions(+), 55 deletions(-)

diff --git a/superset/semantic_layers/snowflake_.py 
b/superset/semantic_layers/snowflake_.py
index a549bc65b3..c86305934e 100644
--- a/superset/semantic_layers/snowflake_.py
+++ b/superset/semantic_layers/snowflake_.py
@@ -53,6 +53,7 @@ from superset.semantic_layers.types import (
     NUMBER,
     OBJECT,
     Operator,
+    PredicateType,
     STRING,
     TIME,
     Type,
@@ -277,7 +278,7 @@ class SnowflakeSemanticLayer:
         enabling dynamic schema updates (e.g., populating schema dropdown after
         database is selected).
         """
-        fields: dict[str, tuple[type, Field]] = {}
+        fields: dict[str, tuple[Any, Field]] = {}
 
         # update configuration with runtime data, for example, to select a 
schema after
         # the database has been selected
@@ -517,12 +518,15 @@ class SnowflakeExplorable:
 
         return STRING
 
-    def _build_where_clause(
+    def _build_predicates(
         self,
-        filters: set[Filter | NativeFilter] | None,
+        filters: set[Filter | NativeFilter],
     ) -> tuple[str, tuple[FilterValues]]:
         """
-        Convert a set of filters to a SQL WHERE clause.
+        Convert a set of filters to a single `AND`ed predicate.
+
+        Caller should check the types of filters beforehand, as this method 
does not
+        differentiate between `WHERE` and `HAVING` predicates.
         """
         if not filters:
             return "", ()
@@ -536,7 +540,7 @@ class SnowflakeExplorable:
             if isinstance(filter_, NativeFilter):
                 predicates.append(f"({filter_.definition})")
             else:
-                predicates.append(f"({self._build_predicate(filter_)})")
+                predicates.append(f"({self._build_native_filter(filter_)})")
                 if filter_.operator not in unary_operators:
                     parameters.extend(
                         [filter_.value]
@@ -544,7 +548,7 @@ class SnowflakeExplorable:
                         else filter_.value
                     )
 
-        return "WHERE " + " AND ".join(predicates), tuple(parameters)
+        return " AND ".join(predicates), tuple(parameters)
 
     def get_values(
         self,
@@ -554,24 +558,29 @@ class SnowflakeExplorable:
         """
         Return distinct values for a dimension.
         """
-        where_clause, parameters = self._build_where_clause(filters)
+        where_clause, parameters = self._build_predicates(
+            {
+                filter_
+                for filter_ in (filters or [])
+                if filter_.type == PredicateType.WHERE
+            }
+        )
         query = dedent(
             f"""
             SELECT {self._quote(dimension.name)}
             FROM SEMANTIC_VIEW(
                 {self.uid()}
                 DIMENSIONS {dimension.id}
-                {where_clause}
+                {"WHERE " + where_clause if where_clause else ""}
             )
             """  # noqa: S608
         )
-        print(query)
         connection_parameters = get_connection_parameters(self.configuration)
         with connect(**connection_parameters) as connection:
             cursor = connection.cursor()
             return {row[0] for row in cursor.execute(query, parameters)}
 
-    def _build_predicate(self, filter_: Filter) -> str:
+    def _build_native_filter(self, filter_: Filter) -> str:
         """
         Convert a Filter to a NativeFilter.
         """
@@ -594,63 +603,75 @@ class SnowflakeExplorable:
 
         return f"{column_name} {operator.value} ?"
 
-    # TODO (betodealmeida): create a class to manage the explorable request 
and pass it
-    # here, instead of individual parameters
     def get_dataframe(
         self,
         metrics: list[Metric],
         dimensions: list[Dimension],
         filters: set[Filter | NativeFilter] | None = None,
+        limit: int | None = None,
+        offset: int | None = None,
     ) -> DataFrame:
         """
         Execute a query and return the results as a Pandas DataFrame.
         """
-        dimension_arguments = ", ".join(
-            f"{dimension.id} AS {self._quote(dimension.id)}" for dimension in 
dimensions
+        if not metrics and not dimensions:
+            return DataFrame()
+
+        query, parameters = self._get_query(metrics, dimensions, filters, 
limit, offset)
+        connection_parameters = get_connection_parameters(self.configuration)
+        with connect(**connection_parameters) as connection:
+            return connection.cursor().execute(query, 
parameters).fetch_pandas_all()
+
+    def _get_query(
+        self,
+        metrics: list[Metric],
+        dimensions: list[Dimension],
+        filters: set[Filter | NativeFilter] | None = None,
+        limit: int | None = None,
+        offset: int | None = None,
+    ) -> tuple[str, tuple[FilterValues]]:
+        """
+        Build a query to fetch data from the explorable.
+
+        This also returns the parameters need to run `cursor.execute()`, passed
+        separately to prevent SQL injection.
+        """
+
+        def alias(element: Metric | Dimension) -> str:
+            """
+            Alias a metric or dimension.
+            """
+            return f"{element.id} AS {self._quote(element.id)}"
+
+        dimension_arguments = ", ".join(alias(dimension) for dimension in 
dimensions)
+        metric_arguments = ", ".join(alias(metric) for metric in metrics)
+
+        filters = filters or set()
+        where_clause, where_parameters = self._build_predicates(
+            {filter_ for filter_ in filters if filter_.type == 
PredicateType.WHERE}
         )
-        metric_arguments = ", ".join(
-            f"{metric.id} AS {self._quote(metric.id)}"
-            for metric in metrics
-            for metric in metrics
+        having_clause, having_parameters = self._build_predicates(
+            {filter_ for filter_ in filters if filter_.type == 
PredicateType.HAVING}
         )
 
-        where_clause, parameters = self._build_where_clause(filters)
         query = dedent(
             f"""
             SELECT * FROM SEMANTIC_VIEW(
                 {self.uid()}
-                DIMENSIONS
-                    {dimension_arguments}
-                METRICS
-                    {metric_arguments}
-                {where_clause}
+                {"DIMENSIONS " + dimension_arguments if dimension_arguments 
else ""}
+                {"METRICS " + metric_arguments if metric_arguments else ""}
+                {"WHERE " + where_clause if where_clause else ""}
             )
+            {"HAVING " + having_clause if having_clause else ""}
+            {"LIMIT " + str(limit) if limit is not None else ""}
+            {"OFFSET " + str(offset) if offset is not None else ""}
             """  # noqa: S608
         )
-        print(query)
-
-    __repr__ = uid
-
+        parameters = where_parameters + having_parameters
 
-"""
-  query_object = QueryObject(
-      columns=["I_BRAND"],
-      metrics=["TOTALSALESPRICE"],
-      row_limit=10
-  )
+        return query, parameters
 
-  Generated SQL
-
-  SELECT * FROM SEMANTIC_VIEW(
-    TPCDS_SEMANTIC_VIEW_SM
-    DIMENSIONS
-      ITEM.BRAND
-    METRICS
-      STORESALES.TOTALSALESPRICE
-  )
-  ORDER BY TOTALSALESPRICE DESC
-  LIMIT 10
-"""
+    __repr__ = uid
 
 
 if __name__ == "__main__":
@@ -706,12 +727,17 @@ if __name__ == "__main__":
     )
     print(explorable.get_values(dimension))
     filters = {
-        Filter(dimension, Operator.IS_NOT_NULL, None),
-        Filter(dimension, Operator.NOT_EQUALS, "Books"),
+        Filter(PredicateType.WHERE, dimension, Operator.IS_NOT_NULL, None),
+        Filter(PredicateType.WHERE, dimension, Operator.NOT_EQUALS, "Books"),
     }
     print(explorable.get_values(dimension, filters))
     filters = {
-        Filter(dimension, Operator.IN, frozenset({"Children", "Electronics"})),
+        Filter(
+            PredicateType.WHERE,
+            dimension,
+            Operator.IN,
+            frozenset({"Children", "Electronics"}),
+        ),
     }
     print(explorable.get_values(dimension, filters))
     print(
@@ -723,9 +749,17 @@ if __name__ == "__main__":
                     NUMBER,
                     None,
                     None,
-                )
+                ),
             ],
             [
+                Dimension(
+                    id="DATE.YEAR",
+                    name="YEAR",
+                    type=INTEGER,
+                    description=None,
+                    definition=None,
+                    grain=None,
+                ),
                 Dimension(
                     id="ITEM.CATEGORY",
                     name="CATEGORY",
@@ -733,11 +767,11 @@ if __name__ == "__main__":
                     description=None,
                     definition="I_CATEGORY",
                     grain=None,
-                )
+                ),
             ],
             {
-                NativeFilter("Year = '2002'"),
-                NativeFilter("Month = '12'"),
+                NativeFilter(PredicateType.WHERE, "Year = '2002'"),
+                NativeFilter(PredicateType.WHERE, "Month = '12'"),
             },
         )
     )
diff --git a/superset/semantic_layers/types.py 
b/superset/semantic_layers/types.py
index 0e76703920..048fbb6ab3 100644
--- a/superset/semantic_layers/types.py
+++ b/superset/semantic_layers/types.py
@@ -122,7 +122,7 @@ class ComparableEnum(enum.Enum):
             return self.value < other.value
         return NotImplemented
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         return hash((self.__class__, self.name))
 
 
@@ -158,7 +158,7 @@ class Metric:
     type: type[Type]
 
     # Metric definitions could be SQL expressions, SQL queries, or even a DSL
-    definition: str
+    definition: str | None
 
     description: str | None = None
 
@@ -181,8 +181,14 @@ class Operator(enum.Enum):
 FilterValues = str | int | float | bool | datetime | date | time | timedelta | 
None
 
 
+class PredicateType(enum.Enum):
+    WHERE = "WHERE"
+    HAVING = "HAVING"
+
+
 @dataclass(frozen=True)
 class Filter:
+    type: PredicateType
     column: Dimension | Metric
     operator: Operator
     value: FilterValues | set[FilterValues]
@@ -190,4 +196,5 @@ class Filter:
 
 @dataclass(frozen=True)
 class NativeFilter:
+    type: PredicateType
     definition: str

Reply via email to