This is an automated email from the ASF dual-hosted git repository. beto pushed a commit to branch semantic-layer-feature in repository https://gitbox.apache.org/repos/asf/superset.git
commit baec35409eec9a4c7da3db737b0d57fb168417f2 Author: Beto Dealmeida <[email protected]> AuthorDate: Fri Feb 6 15:52:37 2026 -0500 feat: models and DAOs --- .../src/superset_core/semantic_layers/types.py | 2 +- superset/connectors/sqla/models.py | 6 +- superset/daos/semantic_layer.py | 152 +++++++++ superset/explorables/base.py | 128 ++++++- ...6_33d7e0e21daa_add_semantic_layers_and_views.py | 126 +++++++ superset/models/sql_lab.py | 6 +- superset/semantic_layers/models.py | 370 +++++++++++++++++++++ superset/superset_typing.py | 50 ++- superset/utils/core.py | 33 +- tests/integration_tests/charts/api_tests.py | 6 +- 10 files changed, 848 insertions(+), 31 deletions(-) diff --git a/superset-core/src/superset_core/semantic_layers/types.py b/superset-core/src/superset_core/semantic_layers/types.py index 42c0cd49050..46bcf707174 100644 --- a/superset-core/src/superset_core/semantic_layers/types.py +++ b/superset-core/src/superset_core/semantic_layers/types.py @@ -216,7 +216,7 @@ class Metric: name: str type: TypeOf[Type] - definition: str | None + definition: str description: str | None = None diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index be74a199672..6aabe485de5 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -107,6 +107,8 @@ from superset.sql.parse import Table from superset.superset_typing import ( AdhocColumn, AdhocMetric, + DatasetColumnData, + DatasetMetricData, ExplorableData, Metric, QueryObjectDict, @@ -463,8 +465,8 @@ class BaseDatasource( # sqla-specific "sql": self.sql, # one to many - "columns": [o.data for o in self.columns], - "metrics": [o.data for o in self.metrics], + "columns": [cast(DatasetColumnData, o.data) for o in self.columns], + "metrics": [cast(DatasetMetricData, o.data) for o in self.metrics], "folders": self.folders, # TODO deprecate, move logic to JS "order_by_choices": self.order_by_choices, diff --git a/superset/daos/semantic_layer.py b/superset/daos/semantic_layer.py new file mode 100644 index 00000000000..9c591e4a7a4 --- /dev/null +++ b/superset/daos/semantic_layer.py @@ -0,0 +1,152 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""DAOs for semantic layer models.""" + +from __future__ import annotations + +from superset.daos.base import BaseDAO +from superset.extensions import db +from superset.semantic_layers.models import SemanticLayer, SemanticView + + +class SemanticLayerDAO(BaseDAO[SemanticLayer]): + """ + Data Access Object for SemanticLayer model. + """ + + @staticmethod + def validate_uniqueness(name: str) -> bool: + """ + Validate that semantic layer name is unique. + + :param name: Semantic layer name + :return: True if name is unique, False otherwise + """ + query = db.session.query(SemanticLayer).filter(SemanticLayer.name == name) + return not db.session.query(query.exists()).scalar() + + @staticmethod + def validate_update_uniqueness(layer_uuid: str, name: str) -> bool: + """ + Validate that semantic layer name is unique for updates. + + :param layer_uuid: UUID of the semantic layer being updated + :param name: New name to validate + :return: True if name is unique, False otherwise + """ + query = db.session.query(SemanticLayer).filter( + SemanticLayer.name == name, + SemanticLayer.uuid != layer_uuid, + ) + return not db.session.query(query.exists()).scalar() + + @staticmethod + def find_by_name(name: str) -> SemanticLayer | None: + """ + Find semantic layer by name. + + :param name: Semantic layer name + :return: SemanticLayer instance or None + """ + return ( + db.session.query(SemanticLayer) + .filter(SemanticLayer.name == name) + .one_or_none() + ) + + @classmethod + def get_semantic_views(cls, layer_uuid: str) -> list[SemanticView]: + """ + Get all semantic views for a semantic layer. + + :param layer_uuid: UUID of the semantic layer + :return: List of SemanticView instances + """ + return ( + db.session.query(SemanticView) + .filter(SemanticView.semantic_layer_uuid == layer_uuid) + .all() + ) + + +class SemanticViewDAO(BaseDAO[SemanticView]): + """Data Access Object for SemanticView model.""" + + @staticmethod + def find_by_semantic_layer(layer_uuid: str) -> list[SemanticView]: + """ + Find all views for a semantic layer. + + :param layer_uuid: UUID of the semantic layer + :return: List of SemanticView instances + """ + return ( + db.session.query(SemanticView) + .filter(SemanticView.semantic_layer_uuid == layer_uuid) + .all() + ) + + @staticmethod + def validate_uniqueness(name: str, layer_uuid: str) -> bool: + """ + Validate that view name is unique within semantic layer. + + :param name: View name + :param layer_uuid: UUID of the semantic layer + :return: True if name is unique within layer, False otherwise + """ + query = db.session.query(SemanticView).filter( + SemanticView.name == name, + SemanticView.semantic_layer_uuid == layer_uuid, + ) + return not db.session.query(query.exists()).scalar() + + @staticmethod + def validate_update_uniqueness(view_uuid: str, name: str, layer_uuid: str) -> bool: + """ + Validate that view name is unique within semantic layer for updates. + + :param view_uuid: UUID of the view being updated + :param name: New name to validate + :param layer_uuid: UUID of the semantic layer + :return: True if name is unique within layer, False otherwise + """ + query = db.session.query(SemanticView).filter( + SemanticView.name == name, + SemanticView.semantic_layer_uuid == layer_uuid, + SemanticView.uuid != view_uuid, + ) + return not db.session.query(query.exists()).scalar() + + @staticmethod + def find_by_name(name: str, layer_uuid: str) -> SemanticView | None: + """ + Find semantic view by name within a semantic layer. + + :param name: View name + :param layer_uuid: UUID of the semantic layer + :return: SemanticView instance or None + """ + return ( + db.session.query(SemanticView) + .filter( + SemanticView.name == name, + SemanticView.semantic_layer_uuid == layer_uuid, + ) + .one_or_none() + ) diff --git a/superset/explorables/base.py b/superset/explorables/base.py index 2d534b72099..de69257a317 100644 --- a/superset/explorables/base.py +++ b/superset/explorables/base.py @@ -53,6 +53,130 @@ class TimeGrainDict(TypedDict): duration: str | None +@runtime_checkable +class MetricMetadata(Protocol): + """ + Protocol for metric metadata objects. + + Represents a metric that's available on an explorable data source. + Metrics contain SQL expressions or references to semantic layer measures. + + Attributes: + metric_name: Unique identifier for the metric + expression: SQL expression or reference for calculating the metric + verbose_name: Human-readable name for display in the UI + description: Description of what the metric represents + d3format: D3 format string for formatting numeric values + currency: Currency configuration for the metric (JSON object) + warning_text: Warning message to display when using this metric + certified_by: Person or entity that certified this metric + certification_details: Details about the certification + """ + + @property + def metric_name(self) -> str: + """Unique identifier for the metric.""" + + @property + def expression(self) -> str: + """SQL expression or reference for calculating the metric.""" + + @property + def verbose_name(self) -> str | None: + """Human-readable name for display in the UI.""" + + @property + def description(self) -> str | None: + """Description of what the metric represents.""" + + @property + def d3format(self) -> str | None: + """D3 format string for formatting numeric values.""" + + @property + def currency(self) -> dict[str, Any] | None: + """Currency configuration for the metric (JSON object).""" + + @property + def warning_text(self) -> str | None: + """Warning message to display when using this metric.""" + + @property + def certified_by(self) -> str | None: + """Person or entity that certified this metric.""" + + @property + def certification_details(self) -> str | None: + """Details about the certification.""" + + +@runtime_checkable +class ColumnMetadata(Protocol): + """ + Protocol for column metadata objects. + + Represents a column/dimension that's available on an explorable data source. + Used for grouping, filtering, and dimension-based analysis. + + Attributes: + column_name: Unique identifier for the column + type: SQL data type of the column (e.g., 'VARCHAR', 'INTEGER', 'DATETIME') + is_dttm: Whether this column represents a date or time value + verbose_name: Human-readable name for display in the UI + description: Description of what the column represents + groupby: Whether this column is allowed for grouping/aggregation + filterable: Whether this column can be used in filters + expression: SQL expression if this is a calculated column + python_date_format: Python datetime format string for temporal columns + advanced_data_type: Advanced data type classification + extra: Additional metadata stored as JSON + """ + + @property + def column_name(self) -> str: + """Unique identifier for the column.""" + + @property + def type(self) -> str: + """SQL data type of the column.""" + + @property + def is_dttm(self) -> bool: + """Whether this column represents a date or time value.""" + + @property + def verbose_name(self) -> str | None: + """Human-readable name for display in the UI.""" + + @property + def description(self) -> str | None: + """Description of what the column represents.""" + + @property + def groupby(self) -> bool: + """Whether this column is allowed for grouping/aggregation.""" + + @property + def filterable(self) -> bool: + """Whether this column can be used in filters.""" + + @property + def expression(self) -> str | None: + """SQL expression if this is a calculated column.""" + + @property + def python_date_format(self) -> str | None: + """Python datetime format string for temporal columns.""" + + @property + def advanced_data_type(self) -> str | None: + """Advanced data type classification.""" + + @property + def extra(self) -> str | None: + """Additional metadata stored as JSON.""" + + @runtime_checkable class Explorable(Protocol): """ @@ -132,7 +256,7 @@ class Explorable(Protocol): """ @property - def metrics(self) -> list[Any]: + def metrics(self) -> list[MetricMetadata]: """ List of metric metadata objects. @@ -147,7 +271,7 @@ class Explorable(Protocol): # TODO: rename to dimensions @property - def columns(self) -> list[Any]: + def columns(self) -> list[ColumnMetadata]: """ List of column metadata objects. diff --git a/superset/migrations/versions/2025-11-04_11-26_33d7e0e21daa_add_semantic_layers_and_views.py b/superset/migrations/versions/2025-11-04_11-26_33d7e0e21daa_add_semantic_layers_and_views.py new file mode 100644 index 00000000000..1e3b42c5dc3 --- /dev/null +++ b/superset/migrations/versions/2025-11-04_11-26_33d7e0e21daa_add_semantic_layers_and_views.py @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""add_semantic_layers_and_views + +Revision ID: 33d7e0e21daa +Revises: f5b5f88d8526 +Create Date: 2025-11-04 11:26:00.000000 + +""" + +import uuid + +import sqlalchemy as sa +from sqlalchemy_utils import UUIDType +from sqlalchemy_utils.types.json import JSONType + +from superset.extensions import encrypted_field_factory +from superset.migrations.shared.utils import ( + create_fks_for_table, + create_table, + drop_table, +) + +# revision identifiers, used by Alembic. +revision = "33d7e0e21daa" +down_revision = "f5b5f88d8526" + + +def upgrade(): + # Create semantic_layers table + create_table( + "semantic_layers", + sa.Column("uuid", UUIDType(binary=True), default=uuid.uuid4, nullable=False), + sa.Column("created_on", sa.DateTime(), nullable=True), + sa.Column("changed_on", sa.DateTime(), nullable=True), + sa.Column("name", sa.String(length=250), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("type", sa.String(length=250), nullable=False), + sa.Column( + "configuration", + encrypted_field_factory.create(JSONType), + nullable=True, + ), + sa.Column("cache_timeout", sa.Integer(), nullable=True), + sa.Column("created_by_fk", sa.Integer(), nullable=True), + sa.Column("changed_by_fk", sa.Integer(), nullable=True), + sa.PrimaryKeyConstraint("uuid"), + ) + + # Create foreign key constraints for semantic_layers + create_fks_for_table( + "fk_semantic_layers_created_by_fk_ab_user", + "semantic_layers", + "ab_user", + ["created_by_fk"], + ["id"], + ) + + create_fks_for_table( + "fk_semantic_layers_changed_by_fk_ab_user", + "semantic_layers", + "ab_user", + ["changed_by_fk"], + ["id"], + ) + + # Create semantic_views table + create_table( + "semantic_views", + sa.Column("uuid", UUIDType(binary=True), default=uuid.uuid4, nullable=False), + sa.Column("created_on", sa.DateTime(), nullable=True), + sa.Column("changed_on", sa.DateTime(), nullable=True), + sa.Column("name", sa.String(length=250), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column( + "configuration", + encrypted_field_factory.create(JSONType), + nullable=True, + ), + sa.Column("cache_timeout", sa.Integer(), nullable=True), + sa.Column( + "semantic_layer_uuid", + UUIDType(binary=True), + sa.ForeignKey("semantic_layers.uuid", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("created_by_fk", sa.Integer(), nullable=True), + sa.Column("changed_by_fk", sa.Integer(), nullable=True), + sa.PrimaryKeyConstraint("uuid"), + ) + + # Create foreign key constraints for semantic_views + create_fks_for_table( + "fk_semantic_views_created_by_fk_ab_user", + "semantic_views", + "ab_user", + ["created_by_fk"], + ["id"], + ) + + create_fks_for_table( + "fk_semantic_views_changed_by_fk_ab_user", + "semantic_views", + "ab_user", + ["changed_by_fk"], + ["id"], + ) + + +def downgrade(): + drop_table("semantic_views") + drop_table("semantic_layers") diff --git a/superset/models/sql_lab.py b/superset/models/sql_lab.py index 956d33053bc..e7d8cecff20 100644 --- a/superset/models/sql_lab.py +++ b/superset/models/sql_lab.py @@ -22,7 +22,7 @@ import logging import re from collections.abc import Hashable from datetime import datetime -from typing import Any, Optional, TYPE_CHECKING +from typing import Any, cast, Optional, TYPE_CHECKING import sqlalchemy as sqla from flask import current_app as app @@ -64,7 +64,7 @@ from superset.sql.parse import ( Table, ) from superset.sqllab.limiting_factor import LimitingFactor -from superset.superset_typing import ExplorableData, QueryObjectDict +from superset.superset_typing import DatasetColumnData, ExplorableData, QueryObjectDict from superset.utils import json from superset.utils.core import ( get_column_name, @@ -258,7 +258,7 @@ class Query( ], "filter_select": True, "name": self.tab_name, - "columns": [o.data for o in self.columns], + "columns": [cast(DatasetColumnData, o.data) for o in self.columns], "metrics": [], "id": self.id, "type": self.type, diff --git a/superset/semantic_layers/models.py b/superset/semantic_layers/models.py new file mode 100644 index 00000000000..4c4fda71b52 --- /dev/null +++ b/superset/semantic_layers/models.py @@ -0,0 +1,370 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Semantic layer models.""" + +from __future__ import annotations + +import uuid +from collections.abc import Hashable +from dataclasses import dataclass +from functools import cached_property +from typing import Any, TYPE_CHECKING + +from flask_appbuilder import Model +from sqlalchemy import Column, ForeignKey, Integer, String, Text +from sqlalchemy.orm import relationship +from sqlalchemy_utils import UUIDType +from sqlalchemy_utils.types.json import JSONType +from superset_core.semantic_layers.mapper import get_results +from superset_core.semantic_layers.semantic_layer import ( + SemanticLayer as SemanticLayerProtocol, +) +from superset_core.semantic_layers.semantic_view import ( + SemanticView as SemanticViewProtocol, +) +from superset_core.semantic_layers.types import ( + BINARY, + BOOLEAN, + DATE, + DATETIME, + DECIMAL, + INTEGER, + INTERVAL, + NUMBER, + OBJECT, + STRING, + TIME, + Type, +) + +from superset.common.query_object import QueryObject +from superset.explorables.base import TimeGrainDict +from superset.extensions import encrypted_field_factory +from superset.models.helpers import AuditMixinNullable, QueryResult +from superset.utils import json +from superset.utils.core import GenericDataType + +if TYPE_CHECKING: + from superset.superset_typing import ExplorableData, QueryObjectDict + + +def get_column_type(semantic_type: type[Type]) -> GenericDataType: + """ + Map semantic layer types to generic data types. + """ + if semantic_type in {DATE, DATETIME, TIME}: + return GenericDataType.TEMPORAL + if semantic_type in {INTEGER, NUMBER, DECIMAL, INTERVAL}: + return GenericDataType.NUMERIC + if semantic_type is BOOLEAN: + return GenericDataType.BOOLEAN + if semantic_type in {STRING, OBJECT, BINARY}: + return GenericDataType.STRING + return GenericDataType.STRING + + +@dataclass(frozen=True) +class MetricMetadata: + metric_name: str + expression: str + verbose_name: str | None = None + description: str | None = None + d3format: str | None = None + currency: dict[str, Any] | None = None + warning_text: str | None = None + certified_by: str | None = None + certification_details: str | None = None + + +@dataclass(frozen=True) +class ColumnMetadata: + column_name: str + type: str + is_dttm: bool + verbose_name: str | None = None + description: str | None = None + groupby: bool = True + filterable: bool = True + expression: str | None = None + python_date_format: str | None = None + advanced_data_type: str | None = None + extra: str | None = None + + +class SemanticLayer(AuditMixinNullable, Model): + """ + Semantic layer model. + + A semantic layer provides an abstraction over data sources, + allowing users to query data through a semantic interface. + """ + + __tablename__ = "semantic_layers" + + uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4) + + # Core fields + name = Column(String(250), nullable=False) + description = Column(Text, nullable=True) + type = Column(String(250), nullable=False) # snowflake, etc + + configuration = Column(encrypted_field_factory.create(JSONType), default=dict) + cache_timeout = Column(Integer, nullable=True) + + # Semantic views relationship + semantic_views: list[SemanticView] = relationship( + "SemanticView", + back_populates="semantic_layer", + cascade="all, delete-orphan", + passive_deletes=True, + ) + + def __repr__(self) -> str: + return self.name or str(self.uuid) + + @cached_property + def implementation( + self, + ) -> SemanticLayerProtocol[Any, SemanticViewProtocol]: + """ + Return semantic layer implementation. + """ + # TODO (betodealmeida): + # return extension_manager.get_contribution("semanticLayers", self.type) + raise NotImplementedError() + + +class SemanticView(AuditMixinNullable, Model): + """ + Semantic view model. + + A semantic view represents a queryable view within a semantic layer. + """ + + __tablename__ = "semantic_views" + + uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4) + + # Core fields + name = Column(String(250), nullable=False) + description = Column(Text, nullable=True) + + configuration = Column(encrypted_field_factory.create(JSONType), default=dict) + cache_timeout = Column(Integer, nullable=True) + + # Semantic layer relationship + semantic_layer_uuid = Column( + UUIDType(binary=True), + ForeignKey("semantic_layers.uuid", ondelete="CASCADE"), + nullable=False, + ) + semantic_layer: SemanticLayer = relationship( + "SemanticLayer", + back_populates="semantic_views", + foreign_keys=[semantic_layer_uuid], + ) + + def __repr__(self) -> str: + return self.name or str(self.uuid) + + @cached_property + def implementation(self) -> SemanticViewProtocol: + """ + Return semantic view implementation. + """ + return self.semantic_layer.implementation.get_semantic_view( + self.name, + json.loads(self.configuration), + ) + + # ========================================================================= + # Explorable protocol implementation + # ========================================================================= + + def get_query_result(self, query_object: QueryObject) -> QueryResult: + return get_results(query_object) + + def get_query_str(self, query_obj: QueryObjectDict) -> str: + return "Not implemented for semantic layers" + + @property + def uid(self) -> str: + return self.implementation.uid() + + @property + def type(self) -> str: + return "semantic_view" + + @property + def metrics(self) -> list[MetricMetadata]: + return [ + MetricMetadata( + metric_name=metric.name, + expression=metric.definition, + description=metric.description, + ) + for metric in self.implementation.get_metrics() + ] + + @property + def columns(self) -> list[ColumnMetadata]: + return [ + ColumnMetadata( + column_name=dimension.name, + type=dimension.type.__name__, + is_dttm=dimension.type in {DATE, TIME, DATETIME}, + description=dimension.description, + expression=dimension.definition, + extra=json.dumps({"grain": dimension.grain}), + ) + for dimension in self.implementation.get_dimensions() + ] + + @property + def column_names(self) -> list[str]: + return [dimension.name for dimension in self.implementation.get_dimensions()] + + @property + def data(self) -> ExplorableData: + return { + # core + "id": self.uuid.hex, + "uid": self.uid, + "type": "semantic_view", + "name": self.name, + "columns": [ + { + "advanced_data_type": None, + "certification_details": None, + "certified_by": None, + "column_name": dimension.name, + "description": dimension.description, + "expression": dimension.definition, + "filterable": True, + "groupby": True, + "id": None, + "uuid": None, + "is_certified": False, + "is_dttm": dimension.type in {DATE, TIME, DATETIME}, + "python_date_format": None, + "type": dimension.type.__name__, + "type_generic": get_column_type(dimension.type), + "verbose_name": None, + "warning_markdown": None, + } + for dimension in self.implementation.get_dimensions() + ], + "metrics": [ + { + "certification_details": None, + "certified_by": None, + "d3format": None, + "description": metric.description, + "expression": metric.definition, + "id": None, + "uuid": None, + "is_certified": False, + "metric_name": metric.name, + "warning_markdown": None, + "warning_text": None, + "verbose_name": None, + } + for metric in self.implementation.get_metrics() + ], + "database": {}, + # UI features + "verbose_map": {}, + "order_by_choices": [], + "filter_select": True, + "filter_select_enabled": True, + "sql": None, + "select_star": None, + "owners": [], + "description": self.description, + "table_name": self.name, + "column_types": [ + get_column_type(dimension.type) + for dimension in self.implementation.get_dimensions() + ], + "column_names": { + dimension.name for dimension in self.implementation.get_dimensions() + }, + # rare + "column_formats": {}, + "datasource_name": self.name, + "perm": self.perm, + "offset": self.offset, + "cache_timeout": self.cache_timeout, + "params": None, + # sql-specific + "schema": None, + "catalog": None, + "main_dttm_col": None, + "time_grain_sqla": [], + "granularity_sqla": [], + "fetch_values_predicate": None, + "template_params": None, + "is_sqllab_view": False, + "extra": None, + "always_filter_main_dttm": False, + "normalize_columns": False, + # TODO XXX + # "owners": [owner.id for owner in self.owners], + "edit_url": "", + "default_endpoint": None, + "folders": [], + "health_check_message": None, + } + + def get_extra_cache_keys(self, query_obj: QueryObjectDict) -> list[Hashable]: + return [] + + @property + def perm(self) -> str: + return self.semantic_layer_uuid.hex + "::" + self.uuid.hex + + @property + def offset(self) -> int: + # always return datetime as UTC + return 0 + + @property + def get_time_grains(self) -> list[TimeGrainDict]: + return [ + { + "name": dimension.grain.name, + "function": "", + "duration": dimension.grain.representation, + } + for dimension in self.implementation.get_dimensions() + if dimension.grain + ] + + def has_drill_by_columns(self, column_names: list[str]) -> bool: + dimension_names = { + dimension.name for dimension in self.implementation.get_dimensions() + } + return all(column_name in dimension_names for column_name in column_names) + + @property + def is_rls_supported(self) -> bool: + return False + + @property + def query_language(self) -> str | None: + return None diff --git a/superset/superset_typing.py b/superset/superset_typing.py index 02e294a08cf..ef002ac86ba 100644 --- a/superset/superset_typing.py +++ b/superset/superset_typing.py @@ -30,6 +30,46 @@ if TYPE_CHECKING: SQLType: TypeAlias = TypeEngine | type[TypeEngine] +class DatasetColumnData(TypedDict, total=False): + """Type for column metadata in ExplorableData datasets.""" + + advanced_data_type: str | None + certification_details: str | None + certified_by: str | None + column_name: str + description: str | None + expression: str | None + filterable: bool + groupby: bool + id: int | None + uuid: str | None + is_certified: bool + is_dttm: bool + python_date_format: str | None + type: str + type_generic: NotRequired["GenericDataType" | None] + verbose_name: str | None + warning_markdown: str | None + + +class DatasetMetricData(TypedDict, total=False): + """Type for metric metadata in ExplorableData datasets.""" + + certification_details: str | None + certified_by: str | None + currency: NotRequired[dict[str, Any]] + d3format: str | None + description: str | None + expression: str | None + id: int | None + uuid: str | None + is_certified: bool + metric_name: str + warning_markdown: str | None + warning_text: str | None + verbose_name: str | None + + class LegacyMetric(TypedDict): label: str | None @@ -254,7 +294,7 @@ class ExplorableData(TypedDict, total=False): """ # Core fields from BaseDatasource.data - id: int + id: int | str # String for UUID-based explorables like SemanticView uid: str column_formats: dict[str, str | None] description: str | None @@ -274,8 +314,8 @@ class ExplorableData(TypedDict, total=False): perm: str | None edit_url: str sql: str | None - columns: list[dict[str, Any]] - metrics: list[dict[str, Any]] + columns: list["DatasetColumnData"] + metrics: list["DatasetMetricData"] folders: Any # JSON field, can be list or dict order_by_choices: list[tuple[str, str]] owners: list[int] | list[dict[str, Any]] # Can be either format @@ -283,8 +323,8 @@ class ExplorableData(TypedDict, total=False): select_star: str | None # Additional fields from SqlaTable and data_for_slices - column_types: list[Any] - column_names: set[str] | set[Any] + column_types: list["GenericDataType"] + column_names: set[str] | list[str] granularity_sqla: list[tuple[Any, Any]] time_grain_sqla: list[tuple[Any, Any]] main_dttm_col: str | None diff --git a/superset/utils/core.py b/superset/utils/core.py index 23a3017bf2c..795be29be69 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -96,7 +96,6 @@ from superset.exceptions import ( SupersetException, SupersetTimeoutException, ) -from superset.explorables.base import Explorable from superset.sql.parse import sanitize_clause from superset.superset_typing import ( AdhocColumn, @@ -115,7 +114,7 @@ from superset.utils.hashing import hash_from_dict, hash_from_str from superset.utils.pandas import detect_datetime_format if TYPE_CHECKING: - from superset.connectors.sqla.models import TableColumn + from superset.explorables.base import ColumnMetadata, Explorable from superset.models.core import Database logging.getLogger("MARKDOWN").setLevel(logging.INFO) @@ -200,6 +199,7 @@ class DatasourceType(StrEnum): QUERY = "query" SAVEDQUERY = "saved_query" VIEW = "view" + SEMANTIC_VIEW = "semantic_view" class LoggerLevel(StrEnum): @@ -1730,15 +1730,12 @@ def get_metric_type_from_column(column: Any, datasource: Explorable) -> str: :return: The inferred metric type as a string, or an empty string if the column is not a metric or no valid operation is found. """ - - from superset.connectors.sqla.models import SqlMetric - - metric: SqlMetric = next( - (metric for metric in datasource.metrics if metric.metric_name == column), - SqlMetric(metric_name=""), + metric = next( + (m for m in datasource.metrics if m.metric_name == column), + None, ) - if metric.metric_name == "": + if metric is None: return "" expression: str = metric.expression @@ -1784,7 +1781,7 @@ def extract_dataframe_dtypes( generic_types: list[GenericDataType] = [] for column in df.columns: - column_object = columns_by_name.get(column) + column_object = columns_by_name.get(str(column)) series = df[column] inferred_type: str = "" if series.isna().all(): @@ -1814,11 +1811,17 @@ def extract_dataframe_dtypes( return generic_types -def extract_column_dtype(col: TableColumn) -> GenericDataType: - if col.is_temporal: +def extract_column_dtype(col: ColumnMetadata) -> GenericDataType: + # Check for temporal type + if hasattr(col, "is_temporal") and col.is_temporal: + return GenericDataType.TEMPORAL + if col.is_dttm: return GenericDataType.TEMPORAL - if col.is_numeric: + + # Check for numeric type + if hasattr(col, "is_numeric") and col.is_numeric: return GenericDataType.NUMERIC + # TODO: add check for boolean data type when proper support is added return GenericDataType.STRING @@ -1832,9 +1835,7 @@ def get_time_filter_status( applied_time_extras: dict[str, str], ) -> tuple[list[dict[str, str]], list[dict[str, str]]]: temporal_columns: set[Any] = { - (col.column_name if hasattr(col, "column_name") else col.get("column_name")) - for col in datasource.columns - if (col.is_dttm if hasattr(col, "is_dttm") else col.get("is_dttm")) + col.column_name for col in datasource.columns if col.is_dttm } applied: list[dict[str, str]] = [] rejected: list[dict[str, str]] = [] diff --git a/tests/integration_tests/charts/api_tests.py b/tests/integration_tests/charts/api_tests.py index b8b60355419..ea35b176449 100644 --- a/tests/integration_tests/charts/api_tests.py +++ b/tests/integration_tests/charts/api_tests.py @@ -626,7 +626,8 @@ class TestChartApi(ApiOwnersTestCaseMixin, InsertChartMixin, SupersetTestCase): assert response == { "message": { "datasource_type": [ - "Must be one of: table, dataset, query, saved_query, view." + "Must be one of: table, dataset, query, saved_query, view, " + "semantic_view." ] } } @@ -981,7 +982,8 @@ class TestChartApi(ApiOwnersTestCaseMixin, InsertChartMixin, SupersetTestCase): assert response == { "message": { "datasource_type": [ - "Must be one of: table, dataset, query, saved_query, view." + "Must be one of: table, dataset, query, saved_query, view, " + "semantic_view." ] } }
