This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch explorable
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/explorable by this push:
     new 9e16d111fb WIP
9e16d111fb is described below

commit 9e16d111fb03c4b294eebd1eb5a24f21c74727df
Author: Beto Dealmeida <[email protected]>
AuthorDate: Wed Oct 22 16:43:26 2025 -0400

    WIP
---
 superset/semantic_layers/mapper.py     | 188 +++++++++++++++++++++++++++------
 superset/semantic_layers/snowflake_.py |  34 ++++++
 superset/semantic_layers/types.py      |  20 ++--
 3 files changed, 199 insertions(+), 43 deletions(-)

diff --git a/superset/semantic_layers/mapper.py 
b/superset/semantic_layers/mapper.py
index ccc2a21c5f..45e05aa992 100644
--- a/superset/semantic_layers/mapper.py
+++ b/superset/semantic_layers/mapper.py
@@ -18,62 +18,104 @@
 from superset.common.query_object import QueryObject
 from superset.semantic_layers.types import (
     AdhocExpression,
+    AdhocFilter,
+    DateGrain,
     Dimension,
     Filter,
     GroupLimit,
     Metric,
     OrderDirection,
     OrderTuple,
+    PredicateType,
     SemanticQuery,
     SemanticViewFeature,
     SemanticViewImplementation,
+    TimeGrain,
 )
 
 
-def map_query_object(query_object: QueryObject) -> SemanticQuery:
+def map_query_object(query_object: QueryObject) -> list[SemanticQuery]:
     """
-    Convert a `QueryObject` into a `SemanticQuery`.
+    Convert a `QueryObject` into a list of `SemanticQuery`.
 
-    This function maps the `QueryObject` into a query that is less 
visualization-centric
-    and more semantic layer-centric. This simplifies the process of adding new 
semantic
-    layers to Superset, by providing a domain-specific representation of 
queries.
+    This function maps the `QueryObject` into query objects that are less 
centered on
+    visualization, simplifying the process of adding new semantic layers to 
Superset.
     """
     semantic_view = query_object.datasource.implementation
     validate_query_object(query_object, semantic_view)
 
-    all_metrics = {metric.id: metric for metric in semantic_view.metrics}
-    all_dimensions = {dimension.id: dimension for dimension in 
semantic_view.dimensions}
+    all_metrics = {metric.name: metric for metric in semantic_view.metrics}
+    all_dimensions = {
+        dimension.name: dimension for dimension in semantic_view.dimensions
+    }
 
     metrics = {all_metrics[metric] for metric in query_object.metrics}
-    dimensions = {all_dimensions[dimension] for dimension in 
query_object.columns}
-    filters = _get_filters_from_query_object(query_object)
+
+    grain = _convert_time_grain(query_object.extras.get("time_grain_sqla"))
+    dimensions = {
+        dimension
+        for dimension in semantic_view.dimensions
+        if dimension.name in query_object.columns
+        and (
+            # if a grain is specified, only include the time dimension if its 
grain
+            # matches the requested grain
+            grain is None
+            or dimension.name != query_object.granularity
+            or dimension.grain == grain
+        )
+    }
+
     order = _get_order_from_query_object(query_object, all_metrics, 
all_dimensions)
     limit = query_object.row_limit
     offset = query_object.row_offset
+
     group_limit = _get_group_limit_from_query_object(
         query_object,
         all_metrics,
         all_dimensions,
     )
 
-    return SemanticQuery(
-        metrics=metrics,
-        dimensions=dimensions,
-        filters=filters,
-        order=order,
-        limit=limit,
-        offset=offset,
-        group_limit=group_limit,
-    )
+    queries = []
+    for offset in [None] + query_object.time_offsets:
+        filters = _get_filters_from_query_object(query_object, offset)
+
+        queries.append(
+            SemanticQuery(
+                metrics=metrics,
+                dimensions=dimensions,
+                filters=filters,
+                order=order,
+                limit=limit,
+                offset=offset,
+                group_limit=group_limit,
+            )
+        )
+
+    return queries
 
 
 def _get_filters_from_query_object(
     query_object: QueryObject,
     all_metrics: dict[str, Metric],
     all_dimensions: dict[str, Dimension],
-) -> set[Filter]:
-    # XXX
-    return set()
+) -> set[Filter | AdhocFilter]:
+    filters: set[Filter | AdhocFilter] = set()
+
+    if (
+        query_object.apply_fetch_values_predicate
+        and query_object.datasource.fetch_values_predicate
+    ):
+        filters.add(
+            AdhocFilter(
+                type=PredicateType.WHERE,
+                definition=query_object.datasource.fetch_values_predicate,
+            )
+        )
+
+    for filter_ in query_object.filter:
+        pass
+
+    return filters
 
 
 def _get_order_from_query_object(
@@ -126,6 +168,19 @@ def _get_group_limit_from_query_object(
     )
 
 
+def _convert_time_grain(time_grain: str) -> TimeGrain | DateGrain | None:
+    """
+    Convert a time grain string from the query object to a TimeGrain or 
DateGrain enum.
+    """
+    if time_grain in TimeGrain.__members__:
+        return TimeGrain[time_grain]
+
+    if time_grain in DateGrain.__members__:
+        return DateGrain[time_grain]
+
+    return None
+
+
 def validate_query_object(
     query_object: QueryObject,
     semantic_view: SemanticViewImplementation,
@@ -136,31 +191,91 @@ def validate_query_object(
     If some semantic view implementation supports these features we should add 
an
     attribute to the `SemanticViewImplementation` to indicate support for them.
     """
-    metric_ids = {metric.id for metric in semantic_view.metrics}
-    dimension_ids = {dimension.id for dimension in semantic_view.dimensions}
+    _validate_metrics(query_object, semantic_view)
+    _validate_dimensions(query_object, semantic_view)
+    _validate_granularity(query_object, semantic_view)
+    _validate_group_limit(query_object, semantic_view)
+    _validate_orderby(query_object, semantic_view)
+
 
-    # Validate adhoc metrics and non-adhoc metrics
+def _validate_metrics(
+    query_object: QueryObject,
+    semantic_view: SemanticViewImplementation,
+) -> None:
+    """
+    Make sure metrics are defined in the semantic view.
+    """
     if any(not isinstance(metric, str) for metric in query_object.metrics):
         raise ValueError("Adhoc metrics are not supported in Semantic Views.")
 
-    if not set(query_object.metrics) <= metric_ids:
+    metric_names = {metric.name for metric in semantic_view.metrics}
+    if not set(query_object.metrics) <= metric_names:
         raise ValueError("All metrics must be defined in the Semantic View.")
 
-    # Validate adhoc dimensions and non-adhoc dimensions
+
+def _validate_dimensions(
+    query_object: QueryObject,
+    semantic_view: SemanticViewImplementation,
+) -> None:
+    """
+    Make sure all dimensions are defined in the semantic view.
+    """
     if any(not isinstance(column, str) for column in query_object.columns):
         raise ValueError("Adhoc dimensions are not supported in Semantic 
Views.")
 
-    if not set(query_object.columns) <= dimension_ids:
+    dimension_names = {dimension.name for dimension in 
semantic_view.dimensions}
+    if not set(query_object.columns) <= dimension_names:
         raise ValueError("All dimensions must be defined in the Semantic 
View.")
 
-    # Validate group limit features
+
+def _validate_granularity(
+    query_object: QueryObject,
+    semantic_view: SemanticViewImplementation,
+) -> None:
+    """
+    Make sure time column and time grain are valid.
+    """
+    dimension_names = {dimension.name for dimension in 
semantic_view.dimensions}
+
+    if time_column := query_object.granularity:
+        if time_column not in dimension_names:
+            raise ValueError(
+                "The time column must be defined in the Semantic View 
dimensions."
+            )
+
+    if time_grain := query_object.extras.get("time_grain_sqla"):
+        if not time_column:
+            raise ValueError(
+                "A time column must be specified when a time grain is 
provided."
+            )
+
+        supported_time_grains = {
+            dimension.grain
+            for dimension in semantic_view.dimensions
+            if dimension.name == time_column and dimension.grain
+        }
+        if _convert_time_grain(time_grain) not in supported_time_grains:
+            raise ValueError(
+                "The time grain is not supported for the time column in the "
+                "Semantic View."
+            )
+
+
+def _validate_group_limit(
+    query_object: QueryObject,
+    semantic_view: SemanticViewImplementation,
+) -> None:
+    """
+    Validate group limit related features in the query object.
+    """
     if (
         query_object.series_columns
         and SemanticViewFeature.GROUP_LIMIT not in semantic_view.features
     ):
         raise ValueError("Group limit is not supported in this Semantic View.")
 
-    if not set(query_object.series_columns) <= dimension_ids:
+    dimension_names = {dimension.name for dimension in 
semantic_view.dimensions}
+    if not set(query_object.series_columns) <= dimension_names:
         raise ValueError("All series columns must be defined in the Semantic 
View.")
 
     if (
@@ -172,7 +287,14 @@ def validate_query_object(
             "View."
         )
 
-    # Validate order by
+
+def _validate_orderby(
+    query_object: QueryObject,
+    semantic_view: SemanticViewImplementation,
+) -> None:
+    """
+    Validate order by elements in the query object.
+    """
     if (
         any(not isinstance(element, str) for element, _ in 
query_object.orderby)
         and SemanticViewFeature.ADHOC_EXPRESSIONS_IN_ORDERBY
@@ -183,7 +305,9 @@ def validate_query_object(
         )
 
     elements = {
-        element.id for element, _ in query_object.orderby if 
isinstance(element, str)
+        element.name for element, _ in query_object.orderby if 
isinstance(element, str)
     }
-    if not elements <= metric_ids | dimension_ids:
+    metric_names = {metric.name for metric in semantic_view.metrics}
+    dimension_names = {dimension.name for dimension in 
semantic_view.dimensions}
+    if not elements <= metric_names | dimension_names:
         raise ValueError("All order by elements must be defined in the 
Semantic View.")
diff --git a/superset/semantic_layers/snowflake_.py 
b/superset/semantic_layers/snowflake_.py
index a195586bb6..da75bcdbbd 100644
--- a/superset/semantic_layers/snowflake_.py
+++ b/superset/semantic_layers/snowflake_.py
@@ -40,6 +40,7 @@ from snowflake.connector import connect, DictCursor
 from snowflake.connector.connection import SnowflakeConnection
 from snowflake.sqlalchemy.snowdialect import SnowflakeDialect
 
+from superset.exceptions import SupersetParseError
 from superset.semantic_layers.types import (
     AdhocExpression,
     AdhocFilter,
@@ -67,6 +68,7 @@ from superset.semantic_layers.types import (
     TIME,
     Type,
 )
+from superset.sql.parse import SQLStatement
 
 REQUEST_TYPE = "snowflake"
 
@@ -94,6 +96,20 @@ def substitute_parameters(query: str, parameters: 
Sequence[Any] | None) -> str:
     return result
 
 
+def validate_order_by(definition: str) -> None:
+    """
+    Validate that an ORDER BY expression is safe to use.
+
+    Note that `definition` could contain multiple expressions separated by 
commas.
+    """
+    try:
+        # this ensures that we have a single statement, preventing SQL 
injection via a
+        # semicolon in the order by clause
+        SQLStatement(f"SELECT 1 ORDER BY {definition}", "snowflake")
+    except SupersetParseError as ex:
+        raise ValueError("Invalid ORDER BY expression") from ex
+
+
 class UserPasswordAuth(BaseModel):
     """
     Username and password authentication.
@@ -799,12 +815,30 @@ class SnowflakeSemanticView:
     ) -> str:
         """
         Build the ORDER BY clause from a list of (element, direction) tuples.
+
+        Note that for adhoc expressions, Superset will still add `ASC` or 
`DESC` to the
+        end, which means adhoc expressions can contain multiple columns as 
long as the
+        last one has no direction specified.
+
+        This is fine:
+
+            gender ASC, COUNT(*)
+
+        But this is not
+
+            gender ASC, COUNT(*) DESC
+
+        The latter will produce a query that looks like this:
+
+            ... ORDER BY gender ASC, COUNT(*) DESC DESC
+
         """
         if not order:
             return ""
 
         def build_element(element: Metric | Dimension | AdhocExpression) -> 
str:
             if isinstance(element, AdhocExpression):
+                validate_order_by(element.definition)
                 return element.definition
             return self._quote(element.id)
 
diff --git a/superset/semantic_layers/types.py 
b/superset/semantic_layers/types.py
index 12a2ab939d..758d28d244 100644
--- a/superset/semantic_layers/types.py
+++ b/superset/semantic_layers/types.py
@@ -133,17 +133,17 @@ class ComparableEnum(enum.Enum):
 
 
 class TimeGrain(ComparableEnum):
-    second = timedelta(seconds=1)
-    minute = timedelta(minutes=1)
-    hour = timedelta(hours=1)
+    PT1S = timedelta(seconds=1)
+    PT1M = timedelta(minutes=1)
+    PT1H = timedelta(hours=1)
 
 
 class DateGrain(ComparableEnum):
-    day = timedelta(days=1)
-    week = timedelta(weeks=1)
-    month = timedelta(days=30)
-    quarter = timedelta(days=90)
-    year = timedelta(days=365)
+    P1D = timedelta(days=1)
+    P1W = timedelta(weeks=1)
+    P1M = timedelta(days=30)
+    P3M = timedelta(days=90)
+    P1Y = timedelta(days=365)
 
 
 @dataclass(frozen=True)
@@ -152,8 +152,8 @@ class Dimension:
     name: str
     type: type[Type]
 
-    description: str | None = None
     definition: str | None = None
+    description: str | None = None
     grain: DateGrain | TimeGrain | None = None
 
 
@@ -163,9 +163,7 @@ class Metric:
     name: str
     type: type[Type]
 
-    # Metric definitions could be SQL expressions, SQL queries, or even a DSL
     definition: str | None
-
     description: str | None = None
 
 

Reply via email to