This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch explorable
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/explorable by this push:
new 9e16d111fb WIP
9e16d111fb is described below
commit 9e16d111fb03c4b294eebd1eb5a24f21c74727df
Author: Beto Dealmeida <[email protected]>
AuthorDate: Wed Oct 22 16:43:26 2025 -0400
WIP
---
superset/semantic_layers/mapper.py | 188 +++++++++++++++++++++++++++------
superset/semantic_layers/snowflake_.py | 34 ++++++
superset/semantic_layers/types.py | 20 ++--
3 files changed, 199 insertions(+), 43 deletions(-)
diff --git a/superset/semantic_layers/mapper.py
b/superset/semantic_layers/mapper.py
index ccc2a21c5f..45e05aa992 100644
--- a/superset/semantic_layers/mapper.py
+++ b/superset/semantic_layers/mapper.py
@@ -18,62 +18,104 @@
from superset.common.query_object import QueryObject
from superset.semantic_layers.types import (
AdhocExpression,
+ AdhocFilter,
+ DateGrain,
Dimension,
Filter,
GroupLimit,
Metric,
OrderDirection,
OrderTuple,
+ PredicateType,
SemanticQuery,
SemanticViewFeature,
SemanticViewImplementation,
+ TimeGrain,
)
-def map_query_object(query_object: QueryObject) -> SemanticQuery:
+def map_query_object(query_object: QueryObject) -> list[SemanticQuery]:
"""
- Convert a `QueryObject` into a `SemanticQuery`.
+ Convert a `QueryObject` into a list of `SemanticQuery`.
- This function maps the `QueryObject` into a query that is less
visualization-centric
- and more semantic layer-centric. This simplifies the process of adding new
semantic
- layers to Superset, by providing a domain-specific representation of
queries.
+ This function maps the `QueryObject` into query objects that are less
centered on
+ visualization, simplifying the process of adding new semantic layers to
Superset.
"""
semantic_view = query_object.datasource.implementation
validate_query_object(query_object, semantic_view)
- all_metrics = {metric.id: metric for metric in semantic_view.metrics}
- all_dimensions = {dimension.id: dimension for dimension in
semantic_view.dimensions}
+ all_metrics = {metric.name: metric for metric in semantic_view.metrics}
+ all_dimensions = {
+ dimension.name: dimension for dimension in semantic_view.dimensions
+ }
metrics = {all_metrics[metric] for metric in query_object.metrics}
- dimensions = {all_dimensions[dimension] for dimension in
query_object.columns}
- filters = _get_filters_from_query_object(query_object)
+
+ grain = _convert_time_grain(query_object.extras.get("time_grain_sqla"))
+ dimensions = {
+ dimension
+ for dimension in semantic_view.dimensions
+ if dimension.name in query_object.columns
+ and (
+ # if a grain is specified, only include the time dimension if its
grain
+ # matches the requested grain
+ grain is None
+ or dimension.name != query_object.granularity
+ or dimension.grain == grain
+ )
+ }
+
order = _get_order_from_query_object(query_object, all_metrics,
all_dimensions)
limit = query_object.row_limit
offset = query_object.row_offset
+
group_limit = _get_group_limit_from_query_object(
query_object,
all_metrics,
all_dimensions,
)
- return SemanticQuery(
- metrics=metrics,
- dimensions=dimensions,
- filters=filters,
- order=order,
- limit=limit,
- offset=offset,
- group_limit=group_limit,
- )
+ queries = []
+ for offset in [None] + query_object.time_offsets:
+ filters = _get_filters_from_query_object(query_object, offset)
+
+ queries.append(
+ SemanticQuery(
+ metrics=metrics,
+ dimensions=dimensions,
+ filters=filters,
+ order=order,
+ limit=limit,
+ offset=offset,
+ group_limit=group_limit,
+ )
+ )
+
+ return queries
def _get_filters_from_query_object(
query_object: QueryObject,
all_metrics: dict[str, Metric],
all_dimensions: dict[str, Dimension],
-) -> set[Filter]:
- # XXX
- return set()
+) -> set[Filter | AdhocFilter]:
+ filters: set[Filter | AdhocFilter] = set()
+
+ if (
+ query_object.apply_fetch_values_predicate
+ and query_object.datasource.fetch_values_predicate
+ ):
+ filters.add(
+ AdhocFilter(
+ type=PredicateType.WHERE,
+ definition=query_object.datasource.fetch_values_predicate,
+ )
+ )
+
+ for filter_ in query_object.filter:
+ pass
+
+ return filters
def _get_order_from_query_object(
@@ -126,6 +168,19 @@ def _get_group_limit_from_query_object(
)
+def _convert_time_grain(time_grain: str) -> TimeGrain | DateGrain | None:
+ """
+ Convert a time grain string from the query object to a TimeGrain or
DateGrain enum.
+ """
+ if time_grain in TimeGrain.__members__:
+ return TimeGrain[time_grain]
+
+ if time_grain in DateGrain.__members__:
+ return DateGrain[time_grain]
+
+ return None
+
+
def validate_query_object(
query_object: QueryObject,
semantic_view: SemanticViewImplementation,
@@ -136,31 +191,91 @@ def validate_query_object(
If some semantic view implementation supports these features we should add
an
attribute to the `SemanticViewImplementation` to indicate support for them.
"""
- metric_ids = {metric.id for metric in semantic_view.metrics}
- dimension_ids = {dimension.id for dimension in semantic_view.dimensions}
+ _validate_metrics(query_object, semantic_view)
+ _validate_dimensions(query_object, semantic_view)
+ _validate_granularity(query_object, semantic_view)
+ _validate_group_limit(query_object, semantic_view)
+ _validate_orderby(query_object, semantic_view)
+
- # Validate adhoc metrics and non-adhoc metrics
+def _validate_metrics(
+ query_object: QueryObject,
+ semantic_view: SemanticViewImplementation,
+) -> None:
+ """
+ Make sure metrics are defined in the semantic view.
+ """
if any(not isinstance(metric, str) for metric in query_object.metrics):
raise ValueError("Adhoc metrics are not supported in Semantic Views.")
- if not set(query_object.metrics) <= metric_ids:
+ metric_names = {metric.name for metric in semantic_view.metrics}
+ if not set(query_object.metrics) <= metric_names:
raise ValueError("All metrics must be defined in the Semantic View.")
- # Validate adhoc dimensions and non-adhoc dimensions
+
+def _validate_dimensions(
+ query_object: QueryObject,
+ semantic_view: SemanticViewImplementation,
+) -> None:
+ """
+ Make sure all dimensions are defined in the semantic view.
+ """
if any(not isinstance(column, str) for column in query_object.columns):
raise ValueError("Adhoc dimensions are not supported in Semantic
Views.")
- if not set(query_object.columns) <= dimension_ids:
+ dimension_names = {dimension.name for dimension in
semantic_view.dimensions}
+ if not set(query_object.columns) <= dimension_names:
raise ValueError("All dimensions must be defined in the Semantic
View.")
- # Validate group limit features
+
+def _validate_granularity(
+ query_object: QueryObject,
+ semantic_view: SemanticViewImplementation,
+) -> None:
+ """
+ Make sure time column and time grain are valid.
+ """
+ dimension_names = {dimension.name for dimension in
semantic_view.dimensions}
+
+ if time_column := query_object.granularity:
+ if time_column not in dimension_names:
+ raise ValueError(
+ "The time column must be defined in the Semantic View
dimensions."
+ )
+
+ if time_grain := query_object.extras.get("time_grain_sqla"):
+ if not time_column:
+ raise ValueError(
+ "A time column must be specified when a time grain is
provided."
+ )
+
+ supported_time_grains = {
+ dimension.grain
+ for dimension in semantic_view.dimensions
+ if dimension.name == time_column and dimension.grain
+ }
+ if _convert_time_grain(time_grain) not in supported_time_grains:
+ raise ValueError(
+ "The time grain is not supported for the time column in the "
+ "Semantic View."
+ )
+
+
+def _validate_group_limit(
+ query_object: QueryObject,
+ semantic_view: SemanticViewImplementation,
+) -> None:
+ """
+ Validate group limit related features in the query object.
+ """
if (
query_object.series_columns
and SemanticViewFeature.GROUP_LIMIT not in semantic_view.features
):
raise ValueError("Group limit is not supported in this Semantic View.")
- if not set(query_object.series_columns) <= dimension_ids:
+ dimension_names = {dimension.name for dimension in
semantic_view.dimensions}
+ if not set(query_object.series_columns) <= dimension_names:
raise ValueError("All series columns must be defined in the Semantic
View.")
if (
@@ -172,7 +287,14 @@ def validate_query_object(
"View."
)
- # Validate order by
+
+def _validate_orderby(
+ query_object: QueryObject,
+ semantic_view: SemanticViewImplementation,
+) -> None:
+ """
+ Validate order by elements in the query object.
+ """
if (
any(not isinstance(element, str) for element, _ in
query_object.orderby)
and SemanticViewFeature.ADHOC_EXPRESSIONS_IN_ORDERBY
@@ -183,7 +305,9 @@ def validate_query_object(
)
elements = {
- element.id for element, _ in query_object.orderby if
isinstance(element, str)
+ element.name for element, _ in query_object.orderby if
isinstance(element, str)
}
- if not elements <= metric_ids | dimension_ids:
+ metric_names = {metric.name for metric in semantic_view.metrics}
+ dimension_names = {dimension.name for dimension in
semantic_view.dimensions}
+ if not elements <= metric_names | dimension_names:
raise ValueError("All order by elements must be defined in the
Semantic View.")
diff --git a/superset/semantic_layers/snowflake_.py
b/superset/semantic_layers/snowflake_.py
index a195586bb6..da75bcdbbd 100644
--- a/superset/semantic_layers/snowflake_.py
+++ b/superset/semantic_layers/snowflake_.py
@@ -40,6 +40,7 @@ from snowflake.connector import connect, DictCursor
from snowflake.connector.connection import SnowflakeConnection
from snowflake.sqlalchemy.snowdialect import SnowflakeDialect
+from superset.exceptions import SupersetParseError
from superset.semantic_layers.types import (
AdhocExpression,
AdhocFilter,
@@ -67,6 +68,7 @@ from superset.semantic_layers.types import (
TIME,
Type,
)
+from superset.sql.parse import SQLStatement
REQUEST_TYPE = "snowflake"
@@ -94,6 +96,20 @@ def substitute_parameters(query: str, parameters:
Sequence[Any] | None) -> str:
return result
+def validate_order_by(definition: str) -> None:
+ """
+ Validate that an ORDER BY expression is safe to use.
+
+ Note that `definition` could contain multiple expressions separated by
commas.
+ """
+ try:
+ # this ensures that we have a single statement, preventing SQL
injection via a
+ # semicolon in the order by clause
+ SQLStatement(f"SELECT 1 ORDER BY {definition}", "snowflake")
+ except SupersetParseError as ex:
+ raise ValueError("Invalid ORDER BY expression") from ex
+
+
class UserPasswordAuth(BaseModel):
"""
Username and password authentication.
@@ -799,12 +815,30 @@ class SnowflakeSemanticView:
) -> str:
"""
Build the ORDER BY clause from a list of (element, direction) tuples.
+
+ Note that for adhoc expressions, Superset will still add `ASC` or
`DESC` to the
+ end, which means adhoc expressions can contain multiple columns as
long as the
+ last one has no direction specified.
+
+ This is fine:
+
+ gender ASC, COUNT(*)
+
+ But this is not
+
+ gender ASC, COUNT(*) DESC
+
+ The latter will produce a query that looks like this:
+
+ ... ORDER BY gender ASC, COUNT(*) DESC DESC
+
"""
if not order:
return ""
def build_element(element: Metric | Dimension | AdhocExpression) ->
str:
if isinstance(element, AdhocExpression):
+ validate_order_by(element.definition)
return element.definition
return self._quote(element.id)
diff --git a/superset/semantic_layers/types.py
b/superset/semantic_layers/types.py
index 12a2ab939d..758d28d244 100644
--- a/superset/semantic_layers/types.py
+++ b/superset/semantic_layers/types.py
@@ -133,17 +133,17 @@ class ComparableEnum(enum.Enum):
class TimeGrain(ComparableEnum):
- second = timedelta(seconds=1)
- minute = timedelta(minutes=1)
- hour = timedelta(hours=1)
+ PT1S = timedelta(seconds=1)
+ PT1M = timedelta(minutes=1)
+ PT1H = timedelta(hours=1)
class DateGrain(ComparableEnum):
- day = timedelta(days=1)
- week = timedelta(weeks=1)
- month = timedelta(days=30)
- quarter = timedelta(days=90)
- year = timedelta(days=365)
+ P1D = timedelta(days=1)
+ P1W = timedelta(weeks=1)
+ P1M = timedelta(days=30)
+ P3M = timedelta(days=90)
+ P1Y = timedelta(days=365)
@dataclass(frozen=True)
@@ -152,8 +152,8 @@ class Dimension:
name: str
type: type[Type]
- description: str | None = None
definition: str | None = None
+ description: str | None = None
grain: DateGrain | TimeGrain | None = None
@@ -163,9 +163,7 @@ class Metric:
name: str
type: type[Type]
- # Metric definitions could be SQL expressions, SQL queries, or even a DSL
definition: str | None
-
description: str | None = None