codeant-ai-for-open-source[bot] commented on code in PR #37816: URL: https://github.com/apache/superset/pull/37816#discussion_r2875311201
########## superset/semantic_layers/models.py: ########## @@ -0,0 +1,398 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Semantic layer models.""" + +from __future__ import annotations + +import uuid +from collections.abc import Hashable +from dataclasses import dataclass +from functools import cached_property +from typing import Any, TYPE_CHECKING + +from flask_appbuilder import Model +from sqlalchemy import Column, ForeignKey, Identity, Integer, String, Text +from sqlalchemy.orm import relationship +from sqlalchemy_utils import UUIDType +from sqlalchemy_utils.types.json import JSONType +from superset_core.semantic_layers.semantic_layer import ( + SemanticLayer as SemanticLayerABC, +) +from superset_core.semantic_layers.semantic_view import ( + SemanticView as SemanticViewABC, +) +from superset_core.semantic_layers.types import ( + BINARY, + BOOLEAN, + DATE, + DATETIME, + DECIMAL, + INTEGER, + INTERVAL, + NUMBER, + OBJECT, + STRING, + TIME, + Type, +) + +from superset.common.query_object import QueryObject +from superset.explorables.base import TimeGrainDict +from superset.extensions import encrypted_field_factory +from superset.models.helpers import AuditMixinNullable, QueryResult +from superset.semantic_layers.mapper import get_results +from superset.semantic_layers.registry import registry +from superset.utils import json +from superset.utils.core import GenericDataType + +if TYPE_CHECKING: + from superset.superset_typing import ExplorableData, QueryObjectDict + + +def get_column_type(semantic_type: type[Type]) -> GenericDataType: + """ + Map semantic layer types to generic data types. + """ + if semantic_type in {DATE, DATETIME, TIME}: + return GenericDataType.TEMPORAL + if semantic_type in {INTEGER, NUMBER, DECIMAL, INTERVAL}: + return GenericDataType.NUMERIC + if semantic_type is BOOLEAN: + return GenericDataType.BOOLEAN + if semantic_type in {STRING, OBJECT, BINARY}: + return GenericDataType.STRING + return GenericDataType.STRING + + +@dataclass(frozen=True) +class MetricMetadata: + metric_name: str + expression: str + verbose_name: str | None = None + description: str | None = None + d3format: str | None = None + currency: dict[str, Any] | None = None + warning_text: str | None = None + certified_by: str | None = None + certification_details: str | None = None + + +@dataclass(frozen=True) +class ColumnMetadata: + column_name: str + type: str + is_dttm: bool + verbose_name: str | None = None + description: str | None = None + groupby: bool = True + filterable: bool = True + expression: str | None = None + python_date_format: str | None = None + advanced_data_type: str | None = None + extra: str | None = None + + +class SemanticLayer(AuditMixinNullable, Model): + """ + Semantic layer model. + + A semantic layer provides an abstraction over data sources, + allowing users to query data through a semantic interface. + """ + + __tablename__ = "semantic_layers" + + uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4) + + # Core fields + name = Column(String(250), nullable=False) + description = Column(Text, nullable=True) + type = Column(String(250), nullable=False) # snowflake, etc + + configuration = Column(encrypted_field_factory.create(JSONType), default=dict) + cache_timeout = Column(Integer, nullable=True) + + # Semantic views relationship + semantic_views: list[SemanticView] = relationship( + "SemanticView", + back_populates="semantic_layer", + cascade="all, delete-orphan", + passive_deletes=True, + ) + + def __repr__(self) -> str: + return self.name or str(self.uuid) + + @cached_property + def implementation( + self, + ) -> SemanticLayerABC[Any, SemanticViewABC]: + """ + Return semantic layer implementation. + """ + # TODO (betodealmeida): + # return extension_manager.get_contribution("semanticLayers", self.type) + class_ = registry[self.type] + return class_.from_configuration(json.loads(self.configuration)) + + +class SemanticView(AuditMixinNullable, Model): + """ + Semantic view model. + + A semantic view represents a queryable view within a semantic layer. + """ + + __tablename__ = "semantic_views" + + uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4) + id = Column(Integer, Identity(), unique=True) + + # Core fields + name = Column(String(250), nullable=False) + description = Column(Text, nullable=True) + + configuration = Column(encrypted_field_factory.create(JSONType), default=dict) + cache_timeout = Column(Integer, nullable=True) + + # Semantic layer relationship + semantic_layer_uuid = Column( + UUIDType(binary=True), + ForeignKey("semantic_layers.uuid", ondelete="CASCADE"), + nullable=False, + ) + semantic_layer: SemanticLayer = relationship( + "SemanticLayer", + back_populates="semantic_views", + foreign_keys=[semantic_layer_uuid], + ) + + def __repr__(self) -> str: + return self.name or str(self.uuid) + + @cached_property + def implementation(self) -> SemanticViewABC: + """ + Return semantic view implementation. + """ + return self.semantic_layer.implementation.get_semantic_view( + self.name, + json.loads(self.configuration), + ) Review Comment: **Suggestion:** The `SemanticView.implementation` property also calls `json.loads(self.configuration)` even though the column default is `dict`, so for new views `self.configuration` will be a dict and `json.loads` will raise a `TypeError`; it should normalize configuration values that may already be dicts or strings before use. [type error] <details> <summary><b>Severity Level:</b> Major ⚠️</summary> ```mdx - ❌ SemanticView implementation access fails when configuration is dict. - ❌ Breaks semantic view metrics/columns/data resolution paths. - ⚠️ Blocks Explore metadata for semantic_view datasources when enabled. ``` </details> ```suggestion @cached_property def implementation(self) -> SemanticViewABC: """ Return semantic view implementation. """ raw_config = self.configuration if isinstance(raw_config, str): config = json.loads(raw_config) else: config = raw_config or {} return self.semantic_layer.implementation.get_semantic_view( self.name, config, ) ``` <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. `SemanticView.configuration` is declared as `Column(encrypted_field_factory.create(JSONType), default=dict)` in `superset/semantic_layers/models.py:170`, and the migration `superset/migrations/versions/2025-11-04_11-26_33d7e0e21daa_add_semantic_layers_and_views.py:91–95` creates the `configuration` column with the same encrypted JSON type. 2. For such columns SQLAlchemy (and the JSONType/encrypted wrapper) commonly returns a Python dict when loading from the database, and application code can also assign a dict directly to `SemanticView.configuration` prior to commit. 3. The `SemanticView.implementation` cached property at `superset/semantic_layers/models.py:188–196` currently does `json.loads(self.configuration)` and passes the result as the configuration argument into `self.semantic_layer.implementation.get_semantic_view(self.name, ...)`. 4. If `self.configuration` is already a dict (e.g., for a persisted semantic view row whose JSON configuration was deserialized), accessing `view.implementation` causes `json.loads(self.configuration)` to raise `TypeError("the JSON object must be str, bytes or bytearray, not dict")`, which then breaks any downstream property that relies on the implementation such as `view.metrics` (`models.py:216–225`), `view.columns` (`models.py:227–241`), `view.data` (`models.py:247–337`), and thus any Explore/metadata APIs that operate on semantic views. ``` </details> <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** superset/semantic_layers/models.py **Line:** 188:196 **Comment:** *Type Error: The `SemanticView.implementation` property also calls `json.loads(self.configuration)` even though the column default is `dict`, so for new views `self.configuration` will be a dict and `json.loads` will raise a `TypeError`; it should normalize configuration values that may already be dicts or strings before use. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=bf0f828b750d2be910dd753944d96e40104037b390465a8366d11a67272895b4&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=bf0f828b750d2be910dd753944d96e40104037b390465a8366d11a67272895b4&reaction=dislike'>👎</a> ########## superset/daos/semantic_layer.py: ########## @@ -0,0 +1,152 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""DAOs for semantic layer models.""" + +from __future__ import annotations + +from superset.daos.base import BaseDAO +from superset.extensions import db +from superset.semantic_layers.models import SemanticLayer, SemanticView + + +class SemanticLayerDAO(BaseDAO[SemanticLayer]): + """ + Data Access Object for SemanticLayer model. + """ + Review Comment: **Suggestion:** The DAO for the semantic layer model does not override the `id_column_name`, so BaseDAO methods like `find_by_id` and `find_by_id_or_uuid` will try to access a non-existent `id` column on the `SemanticLayer` model (which only has a `uuid` primary key), leading to an AttributeError at runtime; explicitly setting `id_column_name = "uuid"` on the DAO fixes this mismatch. [logic error] <details> <summary><b>Severity Level:</b> Major ⚠️</summary> ```mdx - ❌ SemanticLayerDAO.find_by_id_or_uuid raises AttributeError on use. - ⚠️ Future semantic-layer APIs using DAO helpers may crash. ``` </details> ```suggestion # SemanticLayer uses 'uuid' as the primary key field id_column_name = "uuid" ``` <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Open a Python shell in the Superset environment with this PR code and import `SemanticLayerDAO` from `superset.daos.semantic_layer` (defined at `superset/daos/semantic_layer.py:27`). 2. Inspect the `SemanticLayer` model in `superset/semantic_layers/models.py:110-123` and note that it defines `uuid = Column(UUIDType(...), primary_key=True, ...)` at line 120 and does NOT define an `id` column anywhere in the class. 3. From the shell, call the inherited helper `SemanticLayerDAO.find_by_id_or_uuid("<some-uuid-string>")`, which is implemented in `BaseDAO.find_by_id_or_uuid` at `superset/daos/base.py:179-205`. 4. During execution, `find_by_id_or_uuid` reaches line 194 in `superset/daos/base.py`, executing `id_column = getattr(cls.model_cls, cls.id_column_name)` with `cls.model_cls` set to `SemanticLayer` and `cls.id_column_name` still the default `"id"`, causing Python to raise `AttributeError: type object 'SemanticLayer' has no attribute 'id'`; setting `id_column_name = "uuid"` on `SemanticLayerDAO` would prevent this by pointing BaseDAO to an existing column. ``` </details> <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** superset/daos/semantic_layer.py **Line:** 31:31 **Comment:** *Logic Error: The DAO for the semantic layer model does not override the `id_column_name`, so BaseDAO methods like `find_by_id` and `find_by_id_or_uuid` will try to access a non-existent `id` column on the `SemanticLayer` model (which only has a `uuid` primary key), leading to an AttributeError at runtime; explicitly setting `id_column_name = "uuid"` on the DAO fixes this mismatch. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=8c7948e66db8047c9070da6564d07bef263c056b32e5c140703bca3bc4595267&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=8c7948e66db8047c9070da6564d07bef263c056b32e5c140703bca3bc4595267&reaction=dislike'>👎</a> ########## tests/unit_tests/semantic_layers/models_test.py: ########## @@ -0,0 +1,621 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Tests for semantic layer models.""" + +from __future__ import annotations + +import uuid +from unittest.mock import MagicMock, patch + +import pytest +from superset_core.semantic_layers.types import ( + BINARY, + BOOLEAN, + DATE, + DATETIME, + DECIMAL, + INTEGER, + INTERVAL, + NUMBER, + OBJECT, + STRING, + TIME, + Day, + Dimension, + Metric, + Type, +) + +from superset.semantic_layers.models import ( + ColumnMetadata, + MetricMetadata, + SemanticLayer, + SemanticView, + get_column_type, +) +from superset.utils.core import GenericDataType + + +# ============================================================================= +# get_column_type tests +# ============================================================================= + + +def test_get_column_type_temporal_date() -> None: + """Test that DATE maps to TEMPORAL.""" + assert get_column_type(DATE) == GenericDataType.TEMPORAL + + +def test_get_column_type_temporal_datetime() -> None: + """Test that DATETIME maps to TEMPORAL.""" + assert get_column_type(DATETIME) == GenericDataType.TEMPORAL + + +def test_get_column_type_temporal_time() -> None: + """Test that TIME maps to TEMPORAL.""" + assert get_column_type(TIME) == GenericDataType.TEMPORAL + + +def test_get_column_type_numeric_integer() -> None: + """Test that INTEGER maps to NUMERIC.""" + assert get_column_type(INTEGER) == GenericDataType.NUMERIC + + +def test_get_column_type_numeric_number() -> None: + """Test that NUMBER maps to NUMERIC.""" + assert get_column_type(NUMBER) == GenericDataType.NUMERIC + + +def test_get_column_type_numeric_decimal() -> None: + """Test that DECIMAL maps to NUMERIC.""" + assert get_column_type(DECIMAL) == GenericDataType.NUMERIC + + +def test_get_column_type_numeric_interval() -> None: + """Test that INTERVAL maps to NUMERIC.""" + assert get_column_type(INTERVAL) == GenericDataType.NUMERIC + + +def test_get_column_type_boolean() -> None: + """Test that BOOLEAN maps to BOOLEAN.""" + assert get_column_type(BOOLEAN) == GenericDataType.BOOLEAN + + +def test_get_column_type_string() -> None: + """Test that STRING maps to STRING.""" + assert get_column_type(STRING) == GenericDataType.STRING + + +def test_get_column_type_object() -> None: + """Test that OBJECT maps to STRING.""" + assert get_column_type(OBJECT) == GenericDataType.STRING + + +def test_get_column_type_binary() -> None: + """Test that BINARY maps to STRING.""" + assert get_column_type(BINARY) == GenericDataType.STRING + + +def test_get_column_type_unknown() -> None: + """Test that unknown types default to STRING.""" + + class UnknownType(Type): + pass + + assert get_column_type(UnknownType) == GenericDataType.STRING + + +# ============================================================================= +# MetricMetadata tests +# ============================================================================= + + +def test_metric_metadata_required_fields() -> None: + """Test MetricMetadata with required fields only.""" + metadata = MetricMetadata( + metric_name="revenue", + expression="SUM(amount)", + ) + assert metadata.metric_name == "revenue" + assert metadata.expression == "SUM(amount)" + assert metadata.verbose_name is None + assert metadata.description is None + assert metadata.d3format is None + assert metadata.currency is None + assert metadata.warning_text is None + assert metadata.certified_by is None + assert metadata.certification_details is None + + +def test_metric_metadata_all_fields() -> None: + """Test MetricMetadata with all fields.""" + metadata = MetricMetadata( + metric_name="revenue", + expression="SUM(amount)", + verbose_name="Total Revenue", + description="Sum of all revenue", + d3format="$,.2f", + currency={"symbol": "$", "symbolPosition": "prefix"}, + warning_text="Data may be incomplete", + certified_by="Data Team", + certification_details="Verified Q1 2024", + ) + assert metadata.metric_name == "revenue" + assert metadata.expression == "SUM(amount)" + assert metadata.verbose_name == "Total Revenue" + assert metadata.description == "Sum of all revenue" + assert metadata.d3format == "$,.2f" + assert metadata.currency == {"symbol": "$", "symbolPosition": "prefix"} + assert metadata.warning_text == "Data may be incomplete" + assert metadata.certified_by == "Data Team" + assert metadata.certification_details == "Verified Q1 2024" + + +# ============================================================================= +# ColumnMetadata tests +# ============================================================================= + + +def test_column_metadata_required_fields() -> None: + """Test ColumnMetadata with required fields only.""" + metadata = ColumnMetadata( + column_name="order_date", + type="DATE", + is_dttm=True, + ) + assert metadata.column_name == "order_date" + assert metadata.type == "DATE" + assert metadata.is_dttm is True + assert metadata.verbose_name is None + assert metadata.description is None + assert metadata.groupby is True + assert metadata.filterable is True + assert metadata.expression is None + assert metadata.python_date_format is None + assert metadata.advanced_data_type is None + assert metadata.extra is None + + +def test_column_metadata_all_fields() -> None: + """Test ColumnMetadata with all fields.""" + metadata = ColumnMetadata( + column_name="order_date", + type="DATE", + is_dttm=True, + verbose_name="Order Date", + description="Date of the order", + groupby=True, + filterable=True, + expression="DATE(order_timestamp)", + python_date_format="%Y-%m-%d", + advanced_data_type="date", + extra='{"grain": "day"}', + ) + assert metadata.column_name == "order_date" + assert metadata.type == "DATE" + assert metadata.is_dttm is True + assert metadata.verbose_name == "Order Date" + assert metadata.description == "Date of the order" + assert metadata.groupby is True + assert metadata.filterable is True + assert metadata.expression == "DATE(order_timestamp)" + assert metadata.python_date_format == "%Y-%m-%d" + assert metadata.advanced_data_type == "date" + assert metadata.extra == '{"grain": "day"}' + + +# ============================================================================= +# SemanticLayer tests +# ============================================================================= + + +def test_semantic_layer_repr_with_name() -> None: + """Test SemanticLayer __repr__ with name.""" + layer = SemanticLayer() + layer.name = "My Semantic Layer" + layer.uuid = uuid.uuid4() + assert repr(layer) == "My Semantic Layer" + + +def test_semantic_layer_repr_without_name() -> None: + """Test SemanticLayer __repr__ without name (uses uuid).""" + layer = SemanticLayer() + layer.name = None + test_uuid = uuid.uuid4() + layer.uuid = test_uuid + assert repr(layer) == str(test_uuid) + + +def test_semantic_layer_implementation_not_implemented() -> None: + """Test that implementation raises NotImplementedError.""" + layer = SemanticLayer() + with pytest.raises(NotImplementedError): + _ = layer.implementation Review Comment: **Suggestion:** The `test_semantic_layer_implementation_not_implemented` test expects a `NotImplementedError` when accessing `SemanticLayer.implementation`, but the real implementation delegates to a registry and will not raise this error on a blank instance; instead the test should verify that the property correctly pulls an implementation from the registry. [logic error] <details> <summary><b>Severity Level:</b> Major ⚠️</summary> ```mdx - ❌ models_test.py fails for SemanticLayer.implementation registry behavior. - ⚠️ Registry-backed semantic layer behavior remains untested and brittle. ``` </details> ```suggestion """Test that implementation delegates to the registry-based semantic layer.""" class DummyLayer: @classmethod def from_configuration(cls, configuration): return "dummy_impl" layer = SemanticLayer() layer.type = "dummy" layer.configuration = "{}" with patch("superset.semantic_layers.models.registry", {"dummy": DummyLayer}): assert layer.implementation == "dummy_impl" ``` <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Run `pytest tests/unit_tests/semantic_layers/models_test.py::test_semantic_layer_implementation_not_implemented`. 2. The test at `tests/unit_tests/semantic_layers/models_test.py:244-248` constructs a bare `SemanticLayer()` without setting `type` or `configuration`. 3. Accessing `layer.implementation` invokes the cached_property at `superset/semantic_layers/models.py:141-151`, which does `class_ = registry[self.type]` and `class_.from_configuration(json.loads(self.configuration))`. 4. Because `self.type` is unset (`None`) and `registry[None]` is invalid, a `KeyError` is raised instead of the `NotImplementedError` expected by the test, causing the test to fail. ``` </details> <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** tests/unit_tests/semantic_layers/models_test.py **Line:** 245:248 **Comment:** *Logic Error: The `test_semantic_layer_implementation_not_implemented` test expects a `NotImplementedError` when accessing `SemanticLayer.implementation`, but the real implementation delegates to a registry and will not raise this error on a blank instance; instead the test should verify that the property correctly pulls an implementation from the registry. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=4605b39c2e62b4c83ba4b00cc6afa07fb44fe8ab554eed1f9601e43659a6ae54&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=4605b39c2e62b4c83ba4b00cc6afa07fb44fe8ab554eed1f9601e43659a6ae54&reaction=dislike'>👎</a> ########## tests/unit_tests/semantic_layers/models_test.py: ########## @@ -0,0 +1,621 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Tests for semantic layer models.""" + +from __future__ import annotations + +import uuid +from unittest.mock import MagicMock, patch + +import pytest +from superset_core.semantic_layers.types import ( + BINARY, + BOOLEAN, + DATE, + DATETIME, + DECIMAL, + INTEGER, + INTERVAL, + NUMBER, + OBJECT, + STRING, + TIME, + Day, + Dimension, + Metric, + Type, +) + +from superset.semantic_layers.models import ( + ColumnMetadata, + MetricMetadata, + SemanticLayer, + SemanticView, + get_column_type, +) +from superset.utils.core import GenericDataType + + +# ============================================================================= +# get_column_type tests +# ============================================================================= + + +def test_get_column_type_temporal_date() -> None: + """Test that DATE maps to TEMPORAL.""" + assert get_column_type(DATE) == GenericDataType.TEMPORAL + + +def test_get_column_type_temporal_datetime() -> None: + """Test that DATETIME maps to TEMPORAL.""" + assert get_column_type(DATETIME) == GenericDataType.TEMPORAL + + +def test_get_column_type_temporal_time() -> None: + """Test that TIME maps to TEMPORAL.""" + assert get_column_type(TIME) == GenericDataType.TEMPORAL + + +def test_get_column_type_numeric_integer() -> None: + """Test that INTEGER maps to NUMERIC.""" + assert get_column_type(INTEGER) == GenericDataType.NUMERIC + + +def test_get_column_type_numeric_number() -> None: + """Test that NUMBER maps to NUMERIC.""" + assert get_column_type(NUMBER) == GenericDataType.NUMERIC + + +def test_get_column_type_numeric_decimal() -> None: + """Test that DECIMAL maps to NUMERIC.""" + assert get_column_type(DECIMAL) == GenericDataType.NUMERIC + + +def test_get_column_type_numeric_interval() -> None: + """Test that INTERVAL maps to NUMERIC.""" + assert get_column_type(INTERVAL) == GenericDataType.NUMERIC + + +def test_get_column_type_boolean() -> None: + """Test that BOOLEAN maps to BOOLEAN.""" + assert get_column_type(BOOLEAN) == GenericDataType.BOOLEAN + + +def test_get_column_type_string() -> None: + """Test that STRING maps to STRING.""" + assert get_column_type(STRING) == GenericDataType.STRING + + +def test_get_column_type_object() -> None: + """Test that OBJECT maps to STRING.""" + assert get_column_type(OBJECT) == GenericDataType.STRING + + +def test_get_column_type_binary() -> None: + """Test that BINARY maps to STRING.""" + assert get_column_type(BINARY) == GenericDataType.STRING + + +def test_get_column_type_unknown() -> None: + """Test that unknown types default to STRING.""" + + class UnknownType(Type): + pass + + assert get_column_type(UnknownType) == GenericDataType.STRING + + +# ============================================================================= +# MetricMetadata tests +# ============================================================================= + + +def test_metric_metadata_required_fields() -> None: + """Test MetricMetadata with required fields only.""" + metadata = MetricMetadata( + metric_name="revenue", + expression="SUM(amount)", + ) + assert metadata.metric_name == "revenue" + assert metadata.expression == "SUM(amount)" + assert metadata.verbose_name is None + assert metadata.description is None + assert metadata.d3format is None + assert metadata.currency is None + assert metadata.warning_text is None + assert metadata.certified_by is None + assert metadata.certification_details is None + + +def test_metric_metadata_all_fields() -> None: + """Test MetricMetadata with all fields.""" + metadata = MetricMetadata( + metric_name="revenue", + expression="SUM(amount)", + verbose_name="Total Revenue", + description="Sum of all revenue", + d3format="$,.2f", + currency={"symbol": "$", "symbolPosition": "prefix"}, + warning_text="Data may be incomplete", + certified_by="Data Team", + certification_details="Verified Q1 2024", + ) + assert metadata.metric_name == "revenue" + assert metadata.expression == "SUM(amount)" + assert metadata.verbose_name == "Total Revenue" + assert metadata.description == "Sum of all revenue" + assert metadata.d3format == "$,.2f" + assert metadata.currency == {"symbol": "$", "symbolPosition": "prefix"} + assert metadata.warning_text == "Data may be incomplete" + assert metadata.certified_by == "Data Team" + assert metadata.certification_details == "Verified Q1 2024" + + +# ============================================================================= +# ColumnMetadata tests +# ============================================================================= + + +def test_column_metadata_required_fields() -> None: + """Test ColumnMetadata with required fields only.""" + metadata = ColumnMetadata( + column_name="order_date", + type="DATE", + is_dttm=True, + ) + assert metadata.column_name == "order_date" + assert metadata.type == "DATE" + assert metadata.is_dttm is True + assert metadata.verbose_name is None + assert metadata.description is None + assert metadata.groupby is True + assert metadata.filterable is True + assert metadata.expression is None + assert metadata.python_date_format is None + assert metadata.advanced_data_type is None + assert metadata.extra is None + + +def test_column_metadata_all_fields() -> None: + """Test ColumnMetadata with all fields.""" + metadata = ColumnMetadata( + column_name="order_date", + type="DATE", + is_dttm=True, + verbose_name="Order Date", + description="Date of the order", + groupby=True, + filterable=True, + expression="DATE(order_timestamp)", + python_date_format="%Y-%m-%d", + advanced_data_type="date", + extra='{"grain": "day"}', + ) + assert metadata.column_name == "order_date" + assert metadata.type == "DATE" + assert metadata.is_dttm is True + assert metadata.verbose_name == "Order Date" + assert metadata.description == "Date of the order" + assert metadata.groupby is True + assert metadata.filterable is True + assert metadata.expression == "DATE(order_timestamp)" + assert metadata.python_date_format == "%Y-%m-%d" + assert metadata.advanced_data_type == "date" + assert metadata.extra == '{"grain": "day"}' + + +# ============================================================================= +# SemanticLayer tests +# ============================================================================= + + +def test_semantic_layer_repr_with_name() -> None: + """Test SemanticLayer __repr__ with name.""" + layer = SemanticLayer() + layer.name = "My Semantic Layer" + layer.uuid = uuid.uuid4() + assert repr(layer) == "My Semantic Layer" + + +def test_semantic_layer_repr_without_name() -> None: + """Test SemanticLayer __repr__ without name (uses uuid).""" + layer = SemanticLayer() + layer.name = None + test_uuid = uuid.uuid4() + layer.uuid = test_uuid + assert repr(layer) == str(test_uuid) + + +def test_semantic_layer_implementation_not_implemented() -> None: + """Test that implementation raises NotImplementedError.""" + layer = SemanticLayer() + with pytest.raises(NotImplementedError): + _ = layer.implementation + + +# ============================================================================= +# SemanticView tests +# ============================================================================= + + [email protected] +def mock_dimensions() -> list[Dimension]: + """Create mock dimensions for testing.""" + return [ + Dimension( + id="orders.order_date", + name="order_date", + type=DATE, + definition="orders.order_date", + description="Date of the order", + grain=Day, + ), + Dimension( + id="products.category", + name="category", + type=STRING, + definition="products.category", + description="Product category", + grain=None, + ), + ] + + [email protected] +def mock_metrics() -> list[Metric]: + """Create mock metrics for testing.""" + return [ + Metric( + id="orders.revenue", + name="revenue", + type=NUMBER, + definition="SUM(orders.amount)", + description="Total revenue", + ), + Metric( + id="orders.count", + name="order_count", + type=INTEGER, + definition="COUNT(*)", + description="Number of orders", + ), + ] + + [email protected] +def mock_implementation( + mock_dimensions: list[Dimension], + mock_metrics: list[Metric], +) -> MagicMock: + """Create a mock implementation.""" + impl = MagicMock() + impl.get_dimensions.return_value = mock_dimensions + impl.get_metrics.return_value = mock_metrics + impl.uid.return_value = "semantic_view_uid_123" + return impl + + [email protected] +def semantic_view(mock_implementation: MagicMock) -> SemanticView: + """Create a SemanticView with mocked implementation.""" + view = SemanticView() + view.name = "Orders View" + view.description = "View of order data" + view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678") + view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765") + view.cache_timeout = 3600 + view.configuration = "{}" + + # Mock the implementation property + with patch.object( + SemanticView, + "implementation", + new_callable=lambda: property(lambda self: mock_implementation), + ): + # We need to return the view but the patch won't persist + pass + + return view + + +def test_semantic_view_repr_with_name() -> None: + """Test SemanticView __repr__ with name.""" + view = SemanticView() + view.name = "My View" + view.uuid = uuid.uuid4() + assert repr(view) == "My View" + + +def test_semantic_view_repr_without_name() -> None: + """Test SemanticView __repr__ without name (uses uuid).""" + view = SemanticView() + view.name = None + test_uuid = uuid.uuid4() + view.uuid = test_uuid + assert repr(view) == str(test_uuid) + + +def test_semantic_view_type() -> None: + """Test SemanticView type property.""" + view = SemanticView() + assert view.type == "semantic_view" + + +def test_semantic_view_offset() -> None: + """Test SemanticView offset property.""" + view = SemanticView() + assert view.offset == 0 + + +def test_semantic_view_is_rls_supported() -> None: + """Test SemanticView is_rls_supported property.""" + view = SemanticView() + assert view.is_rls_supported is False + + +def test_semantic_view_query_language() -> None: + """Test SemanticView query_language property.""" + view = SemanticView() + assert view.query_language is None + + +def test_semantic_view_get_query_str() -> None: + """Test SemanticView get_query_str method.""" + view = SemanticView() + result = view.get_query_str({}) + assert result == "Not implemented for semantic layers" + + +def test_semantic_view_get_extra_cache_keys() -> None: + """Test SemanticView get_extra_cache_keys method.""" + view = SemanticView() + result = view.get_extra_cache_keys({}) + assert result == [] + + +def test_semantic_view_perm() -> None: + """Test SemanticView perm property.""" + view = SemanticView() + view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678") + view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765") + assert view.perm == "87654321432187654321876543218765::12345678123456781234567812345678" + + +def test_semantic_view_uid( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], + mock_metrics: list[Metric], +) -> None: + """Test SemanticView uid property.""" + view = SemanticView() + view.name = "Test View" + view.uuid = uuid.uuid4() + view.semantic_layer_uuid = uuid.uuid4() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + assert view.uid == "semantic_view_uid_123" + + +def test_semantic_view_metrics( + mock_implementation: MagicMock, + mock_metrics: list[Metric], +) -> None: + """Test SemanticView metrics property.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + metrics = view.metrics + assert len(metrics) == 2 + assert metrics[0].metric_name == "revenue" + assert metrics[0].expression == "SUM(orders.amount)" + assert metrics[0].description == "Total revenue" + assert metrics[1].metric_name == "order_count" + + +def test_semantic_view_columns( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test SemanticView columns property.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + columns = view.columns + assert len(columns) == 2 + assert columns[0].column_name == "order_date" + assert columns[0].type == "DATE" + assert columns[0].is_dttm is True + assert columns[0].description == "Date of the order" + assert columns[1].column_name == "category" + assert columns[1].type == "STRING" + assert columns[1].is_dttm is False + + +def test_semantic_view_column_names( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test SemanticView column_names property.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + column_names = view.column_names + assert column_names == ["order_date", "category"] + + +def test_semantic_view_get_time_grains( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test SemanticView get_time_grains property.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + time_grains = view.get_time_grains + assert len(time_grains) == 1 + assert time_grains[0]["name"] == "Day" + assert time_grains[0]["duration"] == "P1D" + + +def test_semantic_view_has_drill_by_columns_all_exist( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test has_drill_by_columns when all columns exist.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + assert view.has_drill_by_columns(["order_date", "category"]) is True + + +def test_semantic_view_has_drill_by_columns_some_missing( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test has_drill_by_columns when some columns are missing.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + assert view.has_drill_by_columns(["order_date", "nonexistent"]) is False + + +def test_semantic_view_has_drill_by_columns_empty( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test has_drill_by_columns with empty list.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + assert view.has_drill_by_columns([]) is True + + +def test_semantic_view_data( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], + mock_metrics: list[Metric], +) -> None: + """Test SemanticView data property.""" + view = SemanticView() + view.name = "Orders View" + view.description = "View of order data" + view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678") + view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765") + view.cache_timeout = 3600 + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + data = view.data + + # Check core fields + assert data["id"] == "12345678123456781234567812345678" + assert data["uid"] == "semantic_view_uid_123" + assert data["type"] == "semantic_view" + assert data["name"] == "Orders View" + assert data["description"] == "View of order data" + assert data["cache_timeout"] == 3600 + + # Check columns + assert len(data["columns"]) == 2 + assert data["columns"][0]["column_name"] == "order_date" + assert data["columns"][0]["type"] == "DATE" + assert data["columns"][0]["is_dttm"] is True + assert data["columns"][0]["type_generic"] == GenericDataType.TEMPORAL + assert data["columns"][1]["column_name"] == "category" + assert data["columns"][1]["type"] == "STRING" + assert data["columns"][1]["type_generic"] == GenericDataType.STRING + + # Check metrics + assert len(data["metrics"]) == 2 + assert data["metrics"][0]["metric_name"] == "revenue" + assert data["metrics"][0]["expression"] == "SUM(orders.amount)" + assert data["metrics"][1]["metric_name"] == "order_count" + + # Check column_types and column_names + assert data["column_types"] == [ + GenericDataType.TEMPORAL, + GenericDataType.STRING, + ] + assert data["column_names"] == {"order_date", "category"} Review Comment: **Suggestion:** In `test_semantic_view_data` the assertion for `column_names` treats the field as a set, but `SemanticView.data` returns `column_names` as a list, so the test will fail even when the implementation is correct. [logic error] <details> <summary><b>Severity Level:</b> Major ⚠️</summary> ```mdx - ❌ models_test.py fails validating SemanticView.data column_names field. - ⚠️ CI for semantic_layers feature branch will be red. ``` </details> ```suggestion # Check column_types and column_names assert data["column_types"] == [ GenericDataType.TEMPORAL, GenericDataType.STRING, ] assert data["column_names"] == ["order_date", "category"] ``` <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Run `pytest tests/unit_tests/semantic_layers/models_test.py::test_semantic_view_data` from the repository root. 2. The test obtains `data = view.data` at `tests/unit_tests/semantic_layers/models_test.py:537-540`. 3. `SemanticView.data` at `superset/semantic_layers/models.py:305-311` builds `"column_names"` as a list `[dimension.name for dimension in self.implementation.get_dimensions()]`. 4. The assertion at `tests/unit_tests/semantic_layers/models_test.py:571` compares this list to the set `{"order_date", "category"}`, causing a failing assertion due to list vs set mismatch. ``` </details> <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** tests/unit_tests/semantic_layers/models_test.py **Line:** 566:571 **Comment:** *Logic Error: In `test_semantic_view_data` the assertion for `column_names` treats the field as a set, but `SemanticView.data` returns `column_names` as a list, so the test will fail even when the implementation is correct. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=ea1a4dbd75d409db4381ecddd745af0376820250ba5fe29c252993298b250c9f&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=ea1a4dbd75d409db4381ecddd745af0376820250ba5fe29c252993298b250c9f&reaction=dislike'>👎</a> ########## superset/semantic_layers/models.py: ########## @@ -0,0 +1,398 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Semantic layer models.""" + +from __future__ import annotations + +import uuid +from collections.abc import Hashable +from dataclasses import dataclass +from functools import cached_property +from typing import Any, TYPE_CHECKING + +from flask_appbuilder import Model +from sqlalchemy import Column, ForeignKey, Identity, Integer, String, Text +from sqlalchemy.orm import relationship +from sqlalchemy_utils import UUIDType +from sqlalchemy_utils.types.json import JSONType +from superset_core.semantic_layers.semantic_layer import ( + SemanticLayer as SemanticLayerABC, +) +from superset_core.semantic_layers.semantic_view import ( + SemanticView as SemanticViewABC, +) +from superset_core.semantic_layers.types import ( + BINARY, + BOOLEAN, + DATE, + DATETIME, + DECIMAL, + INTEGER, + INTERVAL, + NUMBER, + OBJECT, + STRING, + TIME, + Type, +) + +from superset.common.query_object import QueryObject +from superset.explorables.base import TimeGrainDict +from superset.extensions import encrypted_field_factory +from superset.models.helpers import AuditMixinNullable, QueryResult +from superset.semantic_layers.mapper import get_results +from superset.semantic_layers.registry import registry +from superset.utils import json +from superset.utils.core import GenericDataType + +if TYPE_CHECKING: + from superset.superset_typing import ExplorableData, QueryObjectDict + + +def get_column_type(semantic_type: type[Type]) -> GenericDataType: + """ + Map semantic layer types to generic data types. + """ + if semantic_type in {DATE, DATETIME, TIME}: + return GenericDataType.TEMPORAL + if semantic_type in {INTEGER, NUMBER, DECIMAL, INTERVAL}: + return GenericDataType.NUMERIC + if semantic_type is BOOLEAN: + return GenericDataType.BOOLEAN + if semantic_type in {STRING, OBJECT, BINARY}: + return GenericDataType.STRING + return GenericDataType.STRING + + +@dataclass(frozen=True) +class MetricMetadata: + metric_name: str + expression: str + verbose_name: str | None = None + description: str | None = None + d3format: str | None = None + currency: dict[str, Any] | None = None + warning_text: str | None = None + certified_by: str | None = None + certification_details: str | None = None + + +@dataclass(frozen=True) +class ColumnMetadata: + column_name: str + type: str + is_dttm: bool + verbose_name: str | None = None + description: str | None = None + groupby: bool = True + filterable: bool = True + expression: str | None = None + python_date_format: str | None = None + advanced_data_type: str | None = None + extra: str | None = None + + +class SemanticLayer(AuditMixinNullable, Model): + """ + Semantic layer model. + + A semantic layer provides an abstraction over data sources, + allowing users to query data through a semantic interface. + """ + + __tablename__ = "semantic_layers" + + uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4) + + # Core fields + name = Column(String(250), nullable=False) + description = Column(Text, nullable=True) + type = Column(String(250), nullable=False) # snowflake, etc + + configuration = Column(encrypted_field_factory.create(JSONType), default=dict) + cache_timeout = Column(Integer, nullable=True) + + # Semantic views relationship + semantic_views: list[SemanticView] = relationship( + "SemanticView", + back_populates="semantic_layer", + cascade="all, delete-orphan", + passive_deletes=True, + ) + + def __repr__(self) -> str: + return self.name or str(self.uuid) + + @cached_property + def implementation( + self, + ) -> SemanticLayerABC[Any, SemanticViewABC]: + """ + Return semantic layer implementation. + """ + # TODO (betodealmeida): + # return extension_manager.get_contribution("semanticLayers", self.type) + class_ = registry[self.type] + return class_.from_configuration(json.loads(self.configuration)) + + +class SemanticView(AuditMixinNullable, Model): + """ + Semantic view model. + + A semantic view represents a queryable view within a semantic layer. + """ + + __tablename__ = "semantic_views" + + uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4) + id = Column(Integer, Identity(), unique=True) + + # Core fields + name = Column(String(250), nullable=False) + description = Column(Text, nullable=True) + + configuration = Column(encrypted_field_factory.create(JSONType), default=dict) + cache_timeout = Column(Integer, nullable=True) + + # Semantic layer relationship + semantic_layer_uuid = Column( + UUIDType(binary=True), + ForeignKey("semantic_layers.uuid", ondelete="CASCADE"), + nullable=False, + ) + semantic_layer: SemanticLayer = relationship( + "SemanticLayer", + back_populates="semantic_views", + foreign_keys=[semantic_layer_uuid], + ) + + def __repr__(self) -> str: + return self.name or str(self.uuid) + + @cached_property + def implementation(self) -> SemanticViewABC: + """ + Return semantic view implementation. + """ + return self.semantic_layer.implementation.get_semantic_view( + self.name, + json.loads(self.configuration), + ) + + # ========================================================================= + # Explorable protocol implementation + # ========================================================================= + + def get_query_result(self, query_object: QueryObject) -> QueryResult: + return get_results(query_object) + + def get_query_str(self, query_obj: QueryObjectDict) -> str: + return "Not implemented for semantic layers" + + @property + def uid(self) -> str: + return self.implementation.uid() + + @property + def type(self) -> str: + return "semantic_view" + + @property + def metrics(self) -> list[MetricMetadata]: + return [ + MetricMetadata( + metric_name=metric.name, + expression=metric.definition, + description=metric.description, + ) + for metric in self.implementation.get_metrics() + ] + + @property + def columns(self) -> list[ColumnMetadata]: + return [ + ColumnMetadata( + column_name=dimension.name, + type=dimension.type.__name__, + is_dttm=dimension.type in {DATE, TIME, DATETIME}, + description=dimension.description, + expression=dimension.definition, + extra=json.dumps( + {"grain": dimension.grain.name if dimension.grain else None} + ), + ) + for dimension in self.implementation.get_dimensions() + ] + + @property + def column_names(self) -> list[str]: + return [dimension.name for dimension in self.implementation.get_dimensions()] + + @property + def data(self) -> ExplorableData: + return { + # core + "id": self.id, + "uid": self.uid, + "type": "semantic_view", + "name": self.name, + "columns": [ + { + "advanced_data_type": None, + "certification_details": None, + "certified_by": None, + "column_name": dimension.name, + "description": dimension.description, + "expression": dimension.definition, + "filterable": True, + "groupby": True, + "id": None, + "uuid": None, + "is_certified": False, + "is_dttm": dimension.type in {DATE, TIME, DATETIME}, + "python_date_format": None, + "type": dimension.type.__name__, + "type_generic": get_column_type(dimension.type), + "verbose_name": None, + "warning_markdown": None, + } + for dimension in self.implementation.get_dimensions() + ], + "metrics": [ + { + "certification_details": None, + "certified_by": None, + "d3format": None, + "description": metric.description, + "expression": metric.definition, + "id": None, + "uuid": None, + "is_certified": False, + "metric_name": metric.name, + "warning_markdown": None, + "warning_text": None, + "verbose_name": None, + } + for metric in self.implementation.get_metrics() + ], + "database": {}, + # UI features + "verbose_map": {}, + "order_by_choices": [], + "filter_select": True, + "filter_select_enabled": True, + "sql": None, + "select_star": None, + "owners": [], + "description": self.description, + "table_name": self.name, + "column_types": [ + get_column_type(dimension.type) + for dimension in self.implementation.get_dimensions() + ], + "column_names": [ + dimension.name for dimension in self.implementation.get_dimensions() + ], + # rare + "column_formats": {}, + "datasource_name": self.name, + "perm": self.perm, + "offset": self.offset, + "cache_timeout": self.cache_timeout, + "params": None, + # sql-specific + "schema": None, + "catalog": None, + "main_dttm_col": None, + "time_grain_sqla": [], + "granularity_sqla": [], + "fetch_values_predicate": None, + "template_params": None, + "is_sqllab_view": False, + "extra": None, + "always_filter_main_dttm": False, + "normalize_columns": False, + # TODO XXX + # "owners": [owner.id for owner in self.owners], + "edit_url": "", + "default_endpoint": None, + "folders": [], + "health_check_message": None, + } + + def data_for_slices(self, slices: list[Any]) -> dict[str, Any]: + return self.data + + def get_extra_cache_keys(self, query_obj: QueryObjectDict) -> list[Hashable]: + return [] + + @property + def perm(self) -> str: + return self.semantic_layer_uuid.hex + "::" + self.uuid.hex + + @property + def catalog_perm(self) -> str | None: + return None + + @property + def schema_perm(self) -> str | None: + return None + + @property + def schema(self) -> str | None: + return None + + @property + def url(self) -> str: + return f"/semantic_view/{self.uuid}/" + + @property + def explore_url(self) -> str: + return f"/explore/?datasource_type=semantic_view&datasource_id={self.id}" + + @property + def offset(self) -> int: + # always return datetime as UTC + return 0 + + @property + def get_time_grains(self) -> list[TimeGrainDict]: + return [ + { + "name": dimension.grain.name, + "function": "", + "duration": dimension.grain.representation, + } + for dimension in self.implementation.get_dimensions() + if dimension.grain + ] Review Comment: **Suggestion:** The `get_time_grains` member of `SemanticView` is defined as a `@property` but the `Explorable` protocol and other implementations (e.g., SQLA datasources and queries) expose it as a method, so generic code that calls `datasource.get_time_grains()` will fail with a `'list' object is not callable` error when the datasource is a semantic view. [logic error] <details> <summary><b>Severity Level:</b> Critical 🚨</summary> ```mdx - ❌ TIMEGRAINS chart-data requests for semantic views raise TypeError. - ⚠️ Breaks `_get_timegrains` in `superset/common/query_actions.py` for views. - ⚠️ Semantic_view Explore cannot load available time grain options. ``` </details> ```suggestion def get_time_grains(self) -> list[TimeGrainDict]: return [ { "name": dimension.grain.name, "function": "", "duration": dimension.grain.representation, } for dimension in self.implementation.get_dimensions() if dimension.grain ] ``` <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. In `superset/semantic_layers/models.py:374–384`, `SemanticView.get_time_grains` is implemented as a `@property` that returns a `list[TimeGrainDict]`. 2. The `Explorable` protocol in `superset/explorables/base.py:416–462` defines `def get_time_grains(self) -> list[TimeGrainDict]:` as a regular method, not a property. 3. The generic chart data helper `_get_timegrains` in `superset/common/query_actions.py:69–75` retrieves the datasource via `_get_datasource` and then calls `grains = datasource.get_time_grains()` assuming the Explorable method interface. 4. When a `QueryContext` is constructed with a `SemanticView` as its datasource for a chart using `datasource_type='semantic_view'` (supported by the migration updating the slices check constraint in `superset/migrations/versions/2025-11-04_11-26_33d7e0e21daa_add_semantic_layers_and_views.py:126–132`) and `_get_timegrains` is invoked, Python evaluates `datasource.get_time_grains()` where `datasource.get_time_grains` is actually a list, causing `TypeError: 'list' object is not callable` and failing the TIMEGRAINS chart-data request. ``` </details> <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** superset/semantic_layers/models.py **Line:** 374:384 **Comment:** *Logic Error: The `get_time_grains` member of `SemanticView` is defined as a `@property` but the `Explorable` protocol and other implementations (e.g., SQLA datasources and queries) expose it as a method, so generic code that calls `datasource.get_time_grains()` will fail with a `'list' object is not callable` error when the datasource is a semantic view. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=06d8e197cd0b2199f63f650339ee209c74e560388a2a8db87e1e234aef295fc8&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=06d8e197cd0b2199f63f650339ee209c74e560388a2a8db87e1e234aef295fc8&reaction=dislike'>👎</a> ########## superset/semantic_layers/models.py: ########## @@ -0,0 +1,398 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Semantic layer models.""" + +from __future__ import annotations + +import uuid +from collections.abc import Hashable +from dataclasses import dataclass +from functools import cached_property +from typing import Any, TYPE_CHECKING + +from flask_appbuilder import Model +from sqlalchemy import Column, ForeignKey, Identity, Integer, String, Text +from sqlalchemy.orm import relationship +from sqlalchemy_utils import UUIDType +from sqlalchemy_utils.types.json import JSONType +from superset_core.semantic_layers.semantic_layer import ( + SemanticLayer as SemanticLayerABC, +) +from superset_core.semantic_layers.semantic_view import ( + SemanticView as SemanticViewABC, +) +from superset_core.semantic_layers.types import ( + BINARY, + BOOLEAN, + DATE, + DATETIME, + DECIMAL, + INTEGER, + INTERVAL, + NUMBER, + OBJECT, + STRING, + TIME, + Type, +) + +from superset.common.query_object import QueryObject +from superset.explorables.base import TimeGrainDict +from superset.extensions import encrypted_field_factory +from superset.models.helpers import AuditMixinNullable, QueryResult +from superset.semantic_layers.mapper import get_results +from superset.semantic_layers.registry import registry +from superset.utils import json +from superset.utils.core import GenericDataType + +if TYPE_CHECKING: + from superset.superset_typing import ExplorableData, QueryObjectDict + + +def get_column_type(semantic_type: type[Type]) -> GenericDataType: + """ + Map semantic layer types to generic data types. + """ + if semantic_type in {DATE, DATETIME, TIME}: + return GenericDataType.TEMPORAL + if semantic_type in {INTEGER, NUMBER, DECIMAL, INTERVAL}: + return GenericDataType.NUMERIC + if semantic_type is BOOLEAN: + return GenericDataType.BOOLEAN + if semantic_type in {STRING, OBJECT, BINARY}: + return GenericDataType.STRING + return GenericDataType.STRING + + +@dataclass(frozen=True) +class MetricMetadata: + metric_name: str + expression: str + verbose_name: str | None = None + description: str | None = None + d3format: str | None = None + currency: dict[str, Any] | None = None + warning_text: str | None = None + certified_by: str | None = None + certification_details: str | None = None + + +@dataclass(frozen=True) +class ColumnMetadata: + column_name: str + type: str + is_dttm: bool + verbose_name: str | None = None + description: str | None = None + groupby: bool = True + filterable: bool = True + expression: str | None = None + python_date_format: str | None = None + advanced_data_type: str | None = None + extra: str | None = None + + +class SemanticLayer(AuditMixinNullable, Model): + """ + Semantic layer model. + + A semantic layer provides an abstraction over data sources, + allowing users to query data through a semantic interface. + """ + + __tablename__ = "semantic_layers" + + uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4) + + # Core fields + name = Column(String(250), nullable=False) + description = Column(Text, nullable=True) + type = Column(String(250), nullable=False) # snowflake, etc + + configuration = Column(encrypted_field_factory.create(JSONType), default=dict) + cache_timeout = Column(Integer, nullable=True) + + # Semantic views relationship + semantic_views: list[SemanticView] = relationship( + "SemanticView", + back_populates="semantic_layer", + cascade="all, delete-orphan", + passive_deletes=True, + ) + + def __repr__(self) -> str: + return self.name or str(self.uuid) + + @cached_property + def implementation( + self, + ) -> SemanticLayerABC[Any, SemanticViewABC]: + """ + Return semantic layer implementation. + """ + # TODO (betodealmeida): + # return extension_manager.get_contribution("semanticLayers", self.type) + class_ = registry[self.type] + return class_.from_configuration(json.loads(self.configuration)) Review Comment: **Suggestion:** The `implementation` property calls `json.loads(self.configuration)` even though the ORM column has `default=dict`, so on newly created `SemanticLayer` instances `self.configuration` will be a Python dict, causing `json.loads` to raise a `TypeError`; it should handle both dict and string values safely before passing the configuration to the semantic layer implementation. [type error] <details> <summary><b>Severity Level:</b> Major ⚠️</summary> ```mdx - ❌ SemanticLayer implementation resolution crashes when configuration already dict. - ⚠️ Blocks registry-based semantic layer backends from initializing correctly. - ⚠️ Prevents queries using configured semantic layers from executing. ``` </details> ```suggestion @cached_property def implementation( self, ) -> SemanticLayerABC[Any, SemanticViewABC]: """ Return semantic layer implementation. """ # TODO (betodealmeida): # return extension_manager.get_contribution("semanticLayers", self.type) class_ = registry[self.type] raw_config = self.configuration if isinstance(raw_config, str): config = json.loads(raw_config) else: config = raw_config or {} return class_.from_configuration(config) ``` <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Note `SemanticLayer.configuration` is defined as `Column(encrypted_field_factory.create(JSONType), default=dict)` in `superset/semantic_layers/models.py:127`, while the Alembic migration `superset/migrations/versions/2025-11-04_11-26_33d7e0e21daa_add_semantic_layers_and_views.py:55–58` creates the `configuration` column with the same encrypted JSON type. 2. SQLAlchemy JSON/JSONType columns (especially when wrapped by an encrypted type) are typically deserialized to Python objects (dicts) when loaded from the database, and callers can also assign a Python dict directly to `SemanticLayer.configuration` before flushing. 3. The `SemanticLayer.implementation` property at `superset/semantic_layers/models.py:141–151` resolves `class_ = registry[self.type]` and then calls `class_.from_configuration(json.loads(self.configuration))`, unconditionally passing `self.configuration` through `json.loads`. 4. When a `SemanticLayer` instance has `configuration` already as a Python dict (for example, a row loaded from the DB with JSON configuration), any code that accesses `layer.implementation` (to instantiate the concrete `superset_core.semantic_layers.semantic_layer.SemanticLayer` backend from the `registry`) will hit `json.loads(self.configuration)` and raise `TypeError("the JSON object must be str, bytes or bytearray, not dict")`, preventing the semantic layer implementation from being constructed. ``` </details> <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** superset/semantic_layers/models.py **Line:** 141:151 **Comment:** *Type Error: The `implementation` property calls `json.loads(self.configuration)` even though the ORM column has `default=dict`, so on newly created `SemanticLayer` instances `self.configuration` will be a Python dict, causing `json.loads` to raise a `TypeError`; it should handle both dict and string values safely before passing the configuration to the semantic layer implementation. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=1b19108f946fe671b604eb25d3e5fe1a3b82facad7587be70c2cab2745d30727&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=1b19108f946fe671b604eb25d3e5fe1a3b82facad7587be70c2cab2745d30727&reaction=dislike'>👎</a> ########## tests/unit_tests/semantic_layers/models_test.py: ########## @@ -0,0 +1,621 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Tests for semantic layer models.""" + +from __future__ import annotations + +import uuid +from unittest.mock import MagicMock, patch + +import pytest +from superset_core.semantic_layers.types import ( + BINARY, + BOOLEAN, + DATE, + DATETIME, + DECIMAL, + INTEGER, + INTERVAL, + NUMBER, + OBJECT, + STRING, + TIME, + Day, + Dimension, + Metric, + Type, +) + +from superset.semantic_layers.models import ( + ColumnMetadata, + MetricMetadata, + SemanticLayer, + SemanticView, + get_column_type, +) +from superset.utils.core import GenericDataType + + +# ============================================================================= +# get_column_type tests +# ============================================================================= + + +def test_get_column_type_temporal_date() -> None: + """Test that DATE maps to TEMPORAL.""" + assert get_column_type(DATE) == GenericDataType.TEMPORAL + + +def test_get_column_type_temporal_datetime() -> None: + """Test that DATETIME maps to TEMPORAL.""" + assert get_column_type(DATETIME) == GenericDataType.TEMPORAL + + +def test_get_column_type_temporal_time() -> None: + """Test that TIME maps to TEMPORAL.""" + assert get_column_type(TIME) == GenericDataType.TEMPORAL + + +def test_get_column_type_numeric_integer() -> None: + """Test that INTEGER maps to NUMERIC.""" + assert get_column_type(INTEGER) == GenericDataType.NUMERIC + + +def test_get_column_type_numeric_number() -> None: + """Test that NUMBER maps to NUMERIC.""" + assert get_column_type(NUMBER) == GenericDataType.NUMERIC + + +def test_get_column_type_numeric_decimal() -> None: + """Test that DECIMAL maps to NUMERIC.""" + assert get_column_type(DECIMAL) == GenericDataType.NUMERIC + + +def test_get_column_type_numeric_interval() -> None: + """Test that INTERVAL maps to NUMERIC.""" + assert get_column_type(INTERVAL) == GenericDataType.NUMERIC + + +def test_get_column_type_boolean() -> None: + """Test that BOOLEAN maps to BOOLEAN.""" + assert get_column_type(BOOLEAN) == GenericDataType.BOOLEAN + + +def test_get_column_type_string() -> None: + """Test that STRING maps to STRING.""" + assert get_column_type(STRING) == GenericDataType.STRING + + +def test_get_column_type_object() -> None: + """Test that OBJECT maps to STRING.""" + assert get_column_type(OBJECT) == GenericDataType.STRING + + +def test_get_column_type_binary() -> None: + """Test that BINARY maps to STRING.""" + assert get_column_type(BINARY) == GenericDataType.STRING + + +def test_get_column_type_unknown() -> None: + """Test that unknown types default to STRING.""" + + class UnknownType(Type): + pass + + assert get_column_type(UnknownType) == GenericDataType.STRING + + +# ============================================================================= +# MetricMetadata tests +# ============================================================================= + + +def test_metric_metadata_required_fields() -> None: + """Test MetricMetadata with required fields only.""" + metadata = MetricMetadata( + metric_name="revenue", + expression="SUM(amount)", + ) + assert metadata.metric_name == "revenue" + assert metadata.expression == "SUM(amount)" + assert metadata.verbose_name is None + assert metadata.description is None + assert metadata.d3format is None + assert metadata.currency is None + assert metadata.warning_text is None + assert metadata.certified_by is None + assert metadata.certification_details is None + + +def test_metric_metadata_all_fields() -> None: + """Test MetricMetadata with all fields.""" + metadata = MetricMetadata( + metric_name="revenue", + expression="SUM(amount)", + verbose_name="Total Revenue", + description="Sum of all revenue", + d3format="$,.2f", + currency={"symbol": "$", "symbolPosition": "prefix"}, + warning_text="Data may be incomplete", + certified_by="Data Team", + certification_details="Verified Q1 2024", + ) + assert metadata.metric_name == "revenue" + assert metadata.expression == "SUM(amount)" + assert metadata.verbose_name == "Total Revenue" + assert metadata.description == "Sum of all revenue" + assert metadata.d3format == "$,.2f" + assert metadata.currency == {"symbol": "$", "symbolPosition": "prefix"} + assert metadata.warning_text == "Data may be incomplete" + assert metadata.certified_by == "Data Team" + assert metadata.certification_details == "Verified Q1 2024" + + +# ============================================================================= +# ColumnMetadata tests +# ============================================================================= + + +def test_column_metadata_required_fields() -> None: + """Test ColumnMetadata with required fields only.""" + metadata = ColumnMetadata( + column_name="order_date", + type="DATE", + is_dttm=True, + ) + assert metadata.column_name == "order_date" + assert metadata.type == "DATE" + assert metadata.is_dttm is True + assert metadata.verbose_name is None + assert metadata.description is None + assert metadata.groupby is True + assert metadata.filterable is True + assert metadata.expression is None + assert metadata.python_date_format is None + assert metadata.advanced_data_type is None + assert metadata.extra is None + + +def test_column_metadata_all_fields() -> None: + """Test ColumnMetadata with all fields.""" + metadata = ColumnMetadata( + column_name="order_date", + type="DATE", + is_dttm=True, + verbose_name="Order Date", + description="Date of the order", + groupby=True, + filterable=True, + expression="DATE(order_timestamp)", + python_date_format="%Y-%m-%d", + advanced_data_type="date", + extra='{"grain": "day"}', + ) + assert metadata.column_name == "order_date" + assert metadata.type == "DATE" + assert metadata.is_dttm is True + assert metadata.verbose_name == "Order Date" + assert metadata.description == "Date of the order" + assert metadata.groupby is True + assert metadata.filterable is True + assert metadata.expression == "DATE(order_timestamp)" + assert metadata.python_date_format == "%Y-%m-%d" + assert metadata.advanced_data_type == "date" + assert metadata.extra == '{"grain": "day"}' + + +# ============================================================================= +# SemanticLayer tests +# ============================================================================= + + +def test_semantic_layer_repr_with_name() -> None: + """Test SemanticLayer __repr__ with name.""" + layer = SemanticLayer() + layer.name = "My Semantic Layer" + layer.uuid = uuid.uuid4() + assert repr(layer) == "My Semantic Layer" + + +def test_semantic_layer_repr_without_name() -> None: + """Test SemanticLayer __repr__ without name (uses uuid).""" + layer = SemanticLayer() + layer.name = None + test_uuid = uuid.uuid4() + layer.uuid = test_uuid + assert repr(layer) == str(test_uuid) + + +def test_semantic_layer_implementation_not_implemented() -> None: + """Test that implementation raises NotImplementedError.""" + layer = SemanticLayer() + with pytest.raises(NotImplementedError): + _ = layer.implementation + + +# ============================================================================= +# SemanticView tests +# ============================================================================= + + [email protected] +def mock_dimensions() -> list[Dimension]: + """Create mock dimensions for testing.""" + return [ + Dimension( + id="orders.order_date", + name="order_date", + type=DATE, + definition="orders.order_date", + description="Date of the order", + grain=Day, + ), + Dimension( + id="products.category", + name="category", + type=STRING, + definition="products.category", + description="Product category", + grain=None, + ), + ] + + [email protected] +def mock_metrics() -> list[Metric]: + """Create mock metrics for testing.""" + return [ + Metric( + id="orders.revenue", + name="revenue", + type=NUMBER, + definition="SUM(orders.amount)", + description="Total revenue", + ), + Metric( + id="orders.count", + name="order_count", + type=INTEGER, + definition="COUNT(*)", + description="Number of orders", + ), + ] + + [email protected] +def mock_implementation( + mock_dimensions: list[Dimension], + mock_metrics: list[Metric], +) -> MagicMock: + """Create a mock implementation.""" + impl = MagicMock() + impl.get_dimensions.return_value = mock_dimensions + impl.get_metrics.return_value = mock_metrics + impl.uid.return_value = "semantic_view_uid_123" + return impl + + [email protected] +def semantic_view(mock_implementation: MagicMock) -> SemanticView: + """Create a SemanticView with mocked implementation.""" + view = SemanticView() + view.name = "Orders View" + view.description = "View of order data" + view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678") + view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765") + view.cache_timeout = 3600 + view.configuration = "{}" + + # Mock the implementation property + with patch.object( + SemanticView, + "implementation", + new_callable=lambda: property(lambda self: mock_implementation), + ): + # We need to return the view but the patch won't persist + pass + + return view + + +def test_semantic_view_repr_with_name() -> None: + """Test SemanticView __repr__ with name.""" + view = SemanticView() + view.name = "My View" + view.uuid = uuid.uuid4() + assert repr(view) == "My View" + + +def test_semantic_view_repr_without_name() -> None: + """Test SemanticView __repr__ without name (uses uuid).""" + view = SemanticView() + view.name = None + test_uuid = uuid.uuid4() + view.uuid = test_uuid + assert repr(view) == str(test_uuid) + + +def test_semantic_view_type() -> None: + """Test SemanticView type property.""" + view = SemanticView() + assert view.type == "semantic_view" + + +def test_semantic_view_offset() -> None: + """Test SemanticView offset property.""" + view = SemanticView() + assert view.offset == 0 + + +def test_semantic_view_is_rls_supported() -> None: + """Test SemanticView is_rls_supported property.""" + view = SemanticView() + assert view.is_rls_supported is False + + +def test_semantic_view_query_language() -> None: + """Test SemanticView query_language property.""" + view = SemanticView() + assert view.query_language is None + + +def test_semantic_view_get_query_str() -> None: + """Test SemanticView get_query_str method.""" + view = SemanticView() + result = view.get_query_str({}) + assert result == "Not implemented for semantic layers" + + +def test_semantic_view_get_extra_cache_keys() -> None: + """Test SemanticView get_extra_cache_keys method.""" + view = SemanticView() + result = view.get_extra_cache_keys({}) + assert result == [] + + +def test_semantic_view_perm() -> None: + """Test SemanticView perm property.""" + view = SemanticView() + view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678") + view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765") + assert view.perm == "87654321432187654321876543218765::12345678123456781234567812345678" + + +def test_semantic_view_uid( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], + mock_metrics: list[Metric], +) -> None: + """Test SemanticView uid property.""" + view = SemanticView() + view.name = "Test View" + view.uuid = uuid.uuid4() + view.semantic_layer_uuid = uuid.uuid4() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + assert view.uid == "semantic_view_uid_123" + + +def test_semantic_view_metrics( + mock_implementation: MagicMock, + mock_metrics: list[Metric], +) -> None: + """Test SemanticView metrics property.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + metrics = view.metrics + assert len(metrics) == 2 + assert metrics[0].metric_name == "revenue" + assert metrics[0].expression == "SUM(orders.amount)" + assert metrics[0].description == "Total revenue" + assert metrics[1].metric_name == "order_count" + + +def test_semantic_view_columns( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test SemanticView columns property.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + columns = view.columns + assert len(columns) == 2 + assert columns[0].column_name == "order_date" + assert columns[0].type == "DATE" + assert columns[0].is_dttm is True + assert columns[0].description == "Date of the order" + assert columns[1].column_name == "category" + assert columns[1].type == "STRING" + assert columns[1].is_dttm is False + + +def test_semantic_view_column_names( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test SemanticView column_names property.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + column_names = view.column_names + assert column_names == ["order_date", "category"] + + +def test_semantic_view_get_time_grains( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test SemanticView get_time_grains property.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + time_grains = view.get_time_grains + assert len(time_grains) == 1 + assert time_grains[0]["name"] == "Day" + assert time_grains[0]["duration"] == "P1D" + + +def test_semantic_view_has_drill_by_columns_all_exist( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test has_drill_by_columns when all columns exist.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + assert view.has_drill_by_columns(["order_date", "category"]) is True + + +def test_semantic_view_has_drill_by_columns_some_missing( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test has_drill_by_columns when some columns are missing.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + assert view.has_drill_by_columns(["order_date", "nonexistent"]) is False + + +def test_semantic_view_has_drill_by_columns_empty( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], +) -> None: + """Test has_drill_by_columns with empty list.""" + view = SemanticView() + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + assert view.has_drill_by_columns([]) is True + + +def test_semantic_view_data( + mock_implementation: MagicMock, + mock_dimensions: list[Dimension], + mock_metrics: list[Metric], +) -> None: + """Test SemanticView data property.""" + view = SemanticView() + view.name = "Orders View" + view.description = "View of order data" + view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678") + view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765") + view.cache_timeout = 3600 + + with patch.object( + SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) + ): + data = view.data + + # Check core fields + assert data["id"] == "12345678123456781234567812345678" Review Comment: **Suggestion:** The `test_semantic_view_data` test assumes that the `id` field in the serialized data is a UUID-hex string derived from `view.uuid`, but the actual `SemanticView.data` implementation exposes the integer `self.id` (the database identity), so the test will always fail (and currently uses `None`) instead of validating the real contract. [logic error] <details> <summary><b>Severity Level:</b> Major ⚠️</summary> ```mdx - ❌ models_test.py fails validating SemanticView.data core id field. - ⚠️ CI for semantic_layers feature branch will be red. ``` </details> ```suggestion view.id = 1 with patch.object( SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation) ): data = view.data # Check core fields assert data["id"] == 1 ``` <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Run `pytest tests/unit_tests/semantic_layers/models_test.py::test_semantic_view_data` from the repository root. 2. The test at `tests/unit_tests/semantic_layers/models_test.py:530-535` constructs a `SemanticView` and sets `uuid` but never sets `id`. 3. Accessing `view.data` calls `SemanticView.data` at `superset/semantic_layers/models.py:248-252`, which serializes `"id": self.id` (currently `None` on this unsaved instance). 4. The assertion at `tests/unit_tests/semantic_layers/models_test.py:543` compares `data["id"]` to `"12345678123456781234567812345678"`, causing a failing assertion because `None != "12345678123456781234567812345678"`. ``` </details> <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** tests/unit_tests/semantic_layers/models_test.py **Line:** 536:543 **Comment:** *Logic Error: The `test_semantic_view_data` test assumes that the `id` field in the serialized data is a UUID-hex string derived from `view.uuid`, but the actual `SemanticView.data` implementation exposes the integer `self.id` (the database identity), so the test will always fail (and currently uses `None`) instead of validating the real contract. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=01df7fabd9696833a52c3f88a3ba9004e62b7ccf05fe4da4ec49049077ea221c&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37816&comment_hash=01df7fabd9696833a52c3f88a3ba9004e62b7ccf05fe4da4ec49049077ea221c&reaction=dislike'>👎</a> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
