This is an automated email from the ASF dual-hosted git repository.

aminghadersohi pushed a commit to branch mcp-rls-plugins-99978
in repository https://gitbox.apache.org/repos/asf/superset.git

commit e14a92f881d178ff8b939299816d5ec2dd238083
Author: Beto Dealmeida <[email protected]>
AuthorDate: Thu May 21 09:25:27 2026 -0400

    fix(semantic layers): coerce filter types (#40222)
---
 superset/semantic_layers/mapper.py              | 192 ++++++++++--
 tests/unit_tests/semantic_layers/mapper_test.py | 372 +++++++++++++++++++++++-
 2 files changed, 543 insertions(+), 21 deletions(-)

diff --git a/superset/semantic_layers/mapper.py 
b/superset/semantic_layers/mapper.py
index 23dea06124d..b71dfeb44db 100644
--- a/superset/semantic_layers/mapper.py
+++ b/superset/semantic_layers/mapper.py
@@ -24,9 +24,10 @@ single dataframe.
 
 """
 
-from datetime import datetime, timedelta
-from time import time
+from datetime import date, datetime, time, timedelta, tzinfo
+from time import time as current_time
 from typing import Any, cast, Sequence, TypeGuard
+from zoneinfo import ZoneInfo
 
 import isodate
 import numpy as np
@@ -103,7 +104,7 @@ def get_results(query_object: QueryObject) -> QueryResult:
         raise ValueError("QueryObject must have a datasource defined.")
 
     # Track execution time
-    start_time = time()
+    start_time = current_time()
 
     semantic_view = query_object.datasource.implementation
     dispatcher = (
@@ -127,7 +128,7 @@ def get_results(query_object: QueryObject) -> QueryResult:
 
     # If no time offsets, return the main result as-is
     if not query_object.time_offsets or len(queries) <= 1:
-        duration = timedelta(seconds=time() - start_time)
+        duration = timedelta(seconds=current_time() - start_time)
         return map_semantic_result_to_query_result(
             main_result,
             query_object,
@@ -197,7 +198,7 @@ def get_results(query_object: QueryObject) -> QueryResult:
         requests=all_requests,
         results=pa.Table.from_pandas(main_df),
     )
-    duration = timedelta(seconds=time() - start_time)
+    duration = timedelta(seconds=current_time() - start_time)
     return map_semantic_result_to_query_result(
         semantic_result,
         query_object,
@@ -541,21 +542,29 @@ def _convert_query_object_filter(
     if operator_str == FilterOperator.TEMPORAL_RANGE.value:
         if not isinstance(value, str) or value == NO_TIME_RANGE:
             return None
-        start, end = value.split(" : ")
-        return {
-            Filter(
-                type=PredicateType.WHERE,
-                column=dimension,
-                operator=Operator.GREATER_THAN_OR_EQUAL,
-                value=start,
-            ),
-            Filter(
-                type=PredicateType.WHERE,
-                column=dimension,
-                operator=Operator.LESS_THAN,
-                value=end,
-            ),
-        }
+        start, end = (side.strip() for side in value.split(" : "))
+        filters: set[Filter] = set()
+        if start:
+            filters.add(
+                Filter(
+                    type=PredicateType.WHERE,
+                    column=dimension,
+                    operator=Operator.GREATER_THAN_OR_EQUAL,
+                    value=_coerce_scalar_filter_value(start, dimension),
+                )
+            )
+        if end:
+            filters.add(
+                Filter(
+                    type=PredicateType.WHERE,
+                    column=dimension,
+                    operator=Operator.LESS_THAN,
+                    value=_coerce_scalar_filter_value(end, dimension),
+                )
+            )
+        return filters or None
+
+    value = _coerce_filter_value(value, dimension)
 
     # Map QueryObject operators to semantic layer operators
     operator_mapping = {
@@ -588,6 +597,149 @@ def _convert_query_object_filter(
     }
 
 
+def _coerce_filter_value(
+    value: FilterValues | frozenset[FilterValues],
+    dimension: Dimension,
+) -> FilterValues | frozenset[FilterValues]:
+    if isinstance(value, frozenset):
+        return frozenset(_coerce_scalar_filter_value(v, dimension) for v in 
value)
+    return _coerce_scalar_filter_value(value, dimension)
+
+
+def _timestamp_target_tz(dtype: pa.DataType) -> tzinfo | None:
+    tz_name = getattr(dtype, "tz", None)
+    return ZoneInfo(tz_name) if tz_name else None
+
+
+def _align_tz(dt: datetime, target_tz: tzinfo | None) -> datetime:
+    if target_tz is None:
+        return dt
+    if dt.tzinfo is None:
+        return dt.replace(tzinfo=target_tz)
+    return dt.astimezone(target_tz)
+
+
+def _coerce_scalar_filter_value(  # noqa: C901 — type dispatch, complexity is 
inherent
+    value: FilterValues, dimension: Dimension
+) -> FilterValues:
+    if value is None:
+        return None
+
+    dtype = dimension.type
+
+    if pa.types.is_boolean(dtype):
+        if isinstance(value, bool):
+            return value
+        if isinstance(value, (int, float)) and value in (0, 1):
+            return bool(value)
+        if isinstance(value, str):
+            parsed = value.strip().lower()
+            if parsed in {"true", "t", "1", "yes", "y", "on"}:
+                return True
+            if parsed in {"false", "f", "0", "no", "n", "off"}:
+                return False
+        raise ValueError(
+            f"Invalid boolean value {value!r} for filter column 
{dimension.name}"
+        )
+
+    if pa.types.is_integer(dtype):
+        if isinstance(value, bool):
+            raise ValueError(
+                f"Invalid integer value {value!r} for filter column 
{dimension.name}"
+            )
+        if isinstance(value, int):
+            return value
+        if isinstance(value, float) and value.is_integer():
+            return int(value)
+        if isinstance(value, str):
+            try:
+                return int(value.strip())
+            except ValueError as ex:
+                raise ValueError(
+                    f"Invalid integer value {value!r} for filter column "
+                    f"{dimension.name}"
+                ) from ex
+        raise ValueError(
+            f"Invalid integer value {value!r} for filter column 
{dimension.name}"
+        )
+
+    if pa.types.is_floating(dtype) or pa.types.is_decimal(dtype):
+        # Decimal dimensions are coerced through ``float`` because 
``FilterValues``
+        # does not include ``Decimal``. That is lossless for the common case
+        # (≤ ~15 significant digits) and matches how downstream semantic-view
+        # implementations consume numeric filters; high-precision decimals 
would
+        # need a wider ``FilterValues`` union and propagation through the 
cache's
+        # comparability checks.
+        if isinstance(value, bool):
+            raise ValueError(
+                f"Invalid numeric value {value!r} for filter column 
{dimension.name}"
+            )
+        if isinstance(value, (int, float)):
+            return float(value)
+        if isinstance(value, str):
+            try:
+                return float(value.strip())
+            except ValueError as ex:
+                raise ValueError(
+                    f"Invalid numeric value {value!r} for filter column "
+                    f"{dimension.name}"
+                ) from ex
+        raise ValueError(
+            f"Invalid numeric value {value!r} for filter column 
{dimension.name}"
+        )
+
+    if pa.types.is_date(dtype):
+        if isinstance(value, datetime):
+            return value.date()
+        if isinstance(value, date):
+            return value
+        if isinstance(value, str):
+            try:
+                return datetime.fromisoformat(value.strip()).date()
+            except ValueError as ex:
+                raise ValueError(
+                    f"Invalid date value {value!r} for filter column 
{dimension.name}"
+                ) from ex
+        raise ValueError(
+            f"Invalid date value {value!r} for filter column {dimension.name}"
+        )
+
+    if pa.types.is_timestamp(dtype):
+        target_tz = _timestamp_target_tz(dtype)
+        if isinstance(value, datetime):
+            return _align_tz(value, target_tz)
+        if isinstance(value, date):
+            return _align_tz(datetime.combine(value, time.min), target_tz)
+        if isinstance(value, str):
+            normalized = value.strip().replace("Z", "+00:00")
+            try:
+                return _align_tz(datetime.fromisoformat(normalized), target_tz)
+            except ValueError as ex:
+                raise ValueError(
+                    f"Invalid timestamp value {value!r} for filter column "
+                    f"{dimension.name}"
+                ) from ex
+        raise ValueError(
+            f"Invalid timestamp value {value!r} for filter column 
{dimension.name}"
+        )
+
+    if pa.types.is_time(dtype):
+        if isinstance(value, time):
+            return value
+        if isinstance(value, str):
+            try:
+                return time.fromisoformat(value.strip())
+            except ValueError as ex:
+                raise ValueError(
+                    f"Invalid time value {value!r} for filter column 
{dimension.name}"
+                ) from ex
+        raise ValueError(
+            f"Invalid time value {value!r} for filter column {dimension.name}"
+        )
+
+    return value
+
+
 def _get_order_from_query_object(
     query_object: ValidatedQueryObject,
     all_metrics: dict[str, Metric],
diff --git a/tests/unit_tests/semantic_layers/mapper_test.py 
b/tests/unit_tests/semantic_layers/mapper_test.py
index 7abdce91a6e..0cbb59c8430 100644
--- a/tests/unit_tests/semantic_layers/mapper_test.py
+++ b/tests/unit_tests/semantic_layers/mapper_test.py
@@ -15,8 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from datetime import datetime
+from datetime import date, datetime, time, timezone
+from typing import Any
 from unittest.mock import MagicMock
+from zoneinfo import ZoneInfo
 
 import pandas as pd
 import pyarrow as pa
@@ -40,6 +42,7 @@ from superset_core.semantic_layers.types import (
 from superset_core.semantic_layers.view import SemanticViewFeature
 
 from superset.semantic_layers.mapper import (
+    _coerce_scalar_filter_value,
     _convert_query_object_filter,
     _convert_time_grain,
     _get_filters_from_extras,
@@ -1204,6 +1207,91 @@ def test_convert_query_object_filter_like() -> None:
     }
 
 
+def test_convert_query_object_filter_coerces_integer_string_value() -> None:
+    """Test scalar filter values are coerced to dimension type."""
+    all_dimensions = {
+        "birthyear": Dimension(
+            "birthyear",
+            "birthyear",
+            pa.int64(),
+            "birthyear",
+            "Birthyear",
+        )
+    }
+
+    filter_: ValidatedQueryObjectFilterClause = {
+        "op": FilterOperator.GREATER_THAN_OR_EQUALS.value,
+        "col": "birthyear",
+        "val": "1982",
+    }
+
+    result = _convert_query_object_filter(filter_, all_dimensions)
+
+    assert result == {
+        Filter(
+            type=PredicateType.WHERE,
+            column=all_dimensions["birthyear"],
+            operator=Operator.GREATER_THAN_OR_EQUAL,
+            value=1982,
+        )
+    }
+
+
+def test_convert_query_object_filter_coerces_in_integer_values() -> None:
+    """Test IN filter list values are coerced element-wise."""
+    all_dimensions = {
+        "order_id__amount": Dimension(
+            "order_id__amount",
+            "order_id__amount",
+            pa.int64(),
+            "order_id__amount",
+            "Order amount",
+        )
+    }
+
+    filter_: ValidatedQueryObjectFilterClause = {
+        "op": FilterOperator.IN.value,
+        "col": "order_id__amount",
+        "val": ["58", "61"],
+    }
+
+    result = _convert_query_object_filter(filter_, all_dimensions)
+
+    assert result == {
+        Filter(
+            type=PredicateType.WHERE,
+            column=all_dimensions["order_id__amount"],
+            operator=Operator.IN,
+            value=frozenset({58, 61}),
+        )
+    }
+
+
+def test_convert_query_object_filter_invalid_integer_value_raises() -> None:
+    """Test invalid integer value raises a clear error."""
+    all_dimensions = {
+        "birthyear": Dimension(
+            "birthyear",
+            "birthyear",
+            pa.int64(),
+            "birthyear",
+            "Birthyear",
+        )
+    }
+
+    filter_: ValidatedQueryObjectFilterClause = {
+        "op": FilterOperator.GREATER_THAN_OR_EQUALS.value,
+        "col": "birthyear",
+        "val": "nineteen-eighty-two",
+    }
+
+    with pytest.raises(
+        ValueError,
+        match="Invalid integer value 'nineteen-eighty-two' for filter column 
birthyear",
+    ):
+        _convert_query_object_filter(filter_, all_dimensions)
+
+
 def test_get_results_without_time_offsets(
     mock_datasource: MagicMock,
     mocker: MockerFixture,
@@ -1923,6 +2011,86 @@ def 
test_convert_query_object_filter_temporal_range_with_value() -> None:
     }
 
 
+def test_convert_query_object_filter_temporal_range_coerces_date_bounds() -> 
None:
+    """
+    TEMPORAL_RANGE bounds should be coerced against the dimension's dtype so
+    date/timestamp columns are not compared against raw strings.
+    """
+    all_dimensions = {
+        "order_date": Dimension(
+            "order_date", "order_date", pa.date32(), "order_date", "Order date"
+        )
+    }
+    filter_: ValidatedQueryObjectFilterClause = {
+        "op": FilterOperator.TEMPORAL_RANGE.value,
+        "col": "order_date",
+        "val": "2025-01-01 : 2025-12-31",
+    }
+
+    result = _convert_query_object_filter(filter_, all_dimensions)
+
+    assert result == {
+        Filter(
+            type=PredicateType.WHERE,
+            column=all_dimensions["order_date"],
+            operator=Operator.GREATER_THAN_OR_EQUAL,
+            value=date(2025, 1, 1),
+        ),
+        Filter(
+            type=PredicateType.WHERE,
+            column=all_dimensions["order_date"],
+            operator=Operator.LESS_THAN,
+            value=date(2025, 12, 31),
+        ),
+    }
+
+
+def test_convert_query_object_filter_temporal_range_open_ended() -> None:
+    """
+    Open-ended TEMPORAL_RANGE bounds should emit only the bounded predicate.
+    """
+    all_dimensions = {
+        "order_date": Dimension(
+            "order_date", "order_date", pa.date32(), "order_date", "Order date"
+        )
+    }
+
+    only_start: ValidatedQueryObjectFilterClause = {
+        "op": FilterOperator.TEMPORAL_RANGE.value,
+        "col": "order_date",
+        "val": "2025-01-01 : ",
+    }
+    assert _convert_query_object_filter(only_start, all_dimensions) == {
+        Filter(
+            type=PredicateType.WHERE,
+            column=all_dimensions["order_date"],
+            operator=Operator.GREATER_THAN_OR_EQUAL,
+            value=date(2025, 1, 1),
+        ),
+    }
+
+    only_end: ValidatedQueryObjectFilterClause = {
+        "op": FilterOperator.TEMPORAL_RANGE.value,
+        "col": "order_date",
+        "val": " : 2025-12-31",
+    }
+    assert _convert_query_object_filter(only_end, all_dimensions) == {
+        Filter(
+            type=PredicateType.WHERE,
+            column=all_dimensions["order_date"],
+            operator=Operator.LESS_THAN,
+            value=date(2025, 12, 31),
+        ),
+    }
+
+    empty: ValidatedQueryObjectFilterClause = {
+        "op": FilterOperator.TEMPORAL_RANGE.value,
+        "col": "order_date",
+        "val": " : ",
+    }
+    assert _convert_query_object_filter(empty, all_dimensions) is None
+
+
 def test_get_order_adhoc_with_none_sql_expression(mock_datasource: MagicMock) 
-> None:
     """
     Test order extraction skips adhoc expression with None sqlExpression.
@@ -2741,3 +2909,205 @@ def test_get_group_limit_filters_no_granularity(
 
     # Should return None - no granularity means no time filters added
     assert result is None
+
+
+# ---------------------------------------------------------------------------
+# _coerce_scalar_filter_value: per-dtype branches
+# ---------------------------------------------------------------------------
+
+
+def _dim(dtype: pa.DataType, name: str = "d") -> Dimension:
+    return Dimension(name, name, dtype, name, name.capitalize())
+
+
+def test_coerce_none_returns_none() -> None:
+    assert _coerce_scalar_filter_value(None, _dim(pa.int64())) is None
+
+
+def test_coerce_unsupported_dtype_passes_through() -> None:
+    # utf8 (and any dtype not branched in the function) returns the value 
as-is.
+    assert _coerce_scalar_filter_value("abc", _dim(pa.utf8())) == "abc"
+
+
[email protected](
+    "raw,expected",
+    [
+        (True, True),
+        (False, False),
+        (1, True),
+        (0, False),
+        (1.0, True),
+        (0.0, False),
+        ("true", True),
+        ("T", True),
+        (" 1 ", True),
+        ("yes", True),
+        ("Y", True),
+        ("on", True),
+        ("false", False),
+        ("F", False),
+        ("0", False),
+        ("no", False),
+        ("N", False),
+        ("off", False),
+    ],
+)
+def test_coerce_boolean(raw: Any, expected: bool) -> None:
+    assert _coerce_scalar_filter_value(raw, _dim(pa.bool_())) is expected
+
+
[email protected]("raw", ["maybe", 2, 0.5, -1])
+def test_coerce_boolean_invalid_raises(raw: Any) -> None:
+    with pytest.raises(ValueError, match="Invalid boolean value"):
+        _coerce_scalar_filter_value(raw, _dim(pa.bool_()))
+
+
+def test_coerce_integer_passthrough() -> None:
+    assert _coerce_scalar_filter_value(42, _dim(pa.int64())) == 42
+
+
+def test_coerce_integer_accepts_integer_valued_float() -> None:
+    # JSON round-trips can turn an int into ``42.0``; accept losslessly.
+    assert _coerce_scalar_filter_value(42.0, _dim(pa.int64())) == 42
+
+
+def test_coerce_integer_rejects_bool() -> None:
+    # bool is a subclass of int; we explicitly reject it.
+    with pytest.raises(ValueError, match="Invalid integer value"):
+        _coerce_scalar_filter_value(True, _dim(pa.int64()))
+
+
+def test_coerce_integer_rejects_non_integer_float() -> None:
+    with pytest.raises(ValueError, match="Invalid integer value"):
+        _coerce_scalar_filter_value(1.5, _dim(pa.int64()))
+
+
+def test_coerce_integer_rejects_other_types() -> None:
+    with pytest.raises(ValueError, match="Invalid integer value"):
+        _coerce_scalar_filter_value([1], _dim(pa.int64()))
+
+
[email protected](
+    "dtype",
+    [pa.float64(), pa.decimal128(10, 2)],
+)
+def test_coerce_floating_or_decimal(dtype: pa.DataType) -> None:
+    assert _coerce_scalar_filter_value(1, _dim(dtype)) == 1.0
+    assert _coerce_scalar_filter_value(1.5, _dim(dtype)) == 1.5
+    assert _coerce_scalar_filter_value(" 2.5 ", _dim(dtype)) == 2.5
+
+
+def test_coerce_floating_rejects_bool() -> None:
+    with pytest.raises(ValueError, match="Invalid numeric value"):
+        _coerce_scalar_filter_value(True, _dim(pa.float64()))
+
+
+def test_coerce_floating_invalid_string_raises() -> None:
+    with pytest.raises(ValueError, match="Invalid numeric value"):
+        _coerce_scalar_filter_value("not-a-number", _dim(pa.float64()))
+
+
+def test_coerce_floating_rejects_other_types() -> None:
+    with pytest.raises(ValueError, match="Invalid numeric value"):
+        _coerce_scalar_filter_value([1.0], _dim(pa.float64()))
+
+
+def test_coerce_date_from_datetime() -> None:
+    out = _coerce_scalar_filter_value(datetime(2025, 1, 2, 12, 0), 
_dim(pa.date32()))
+    assert out == date(2025, 1, 2)
+
+
+def test_coerce_date_passthrough() -> None:
+    out = _coerce_scalar_filter_value(date(2025, 1, 2), _dim(pa.date32()))
+    assert out == date(2025, 1, 2)
+
+
+def test_coerce_date_from_iso_string() -> None:
+    out = _coerce_scalar_filter_value(" 2025-01-02 ", _dim(pa.date32()))
+    assert out == date(2025, 1, 2)
+
+
+def test_coerce_date_invalid_string_raises() -> None:
+    with pytest.raises(ValueError, match="Invalid date value"):
+        _coerce_scalar_filter_value("not-a-date", _dim(pa.date32()))
+
+
+def test_coerce_date_rejects_other_types() -> None:
+    with pytest.raises(ValueError, match="Invalid date value"):
+        _coerce_scalar_filter_value(20250102, _dim(pa.date32()))
+
+
+def test_coerce_timestamp_from_datetime_passthrough() -> None:
+    dt = datetime(2025, 1, 2, 3, 4, 5)
+    # Naive dtype: returned as-is, still naive.
+    assert _coerce_scalar_filter_value(dt, _dim(pa.timestamp("us"))) == dt
+
+
+def test_coerce_timestamp_from_date() -> None:
+    out = _coerce_scalar_filter_value(date(2025, 1, 2), 
_dim(pa.timestamp("us")))
+    assert out == datetime(2025, 1, 2, 0, 0)
+
+
+def test_coerce_timestamp_from_iso_string_with_z() -> None:
+    out = _coerce_scalar_filter_value("2025-01-02T03:04:05Z", 
_dim(pa.timestamp("us")))
+    assert out == datetime.fromisoformat("2025-01-02T03:04:05+00:00")
+
+
+def test_coerce_timestamp_invalid_string_raises() -> None:
+    with pytest.raises(ValueError, match="Invalid timestamp value"):
+        _coerce_scalar_filter_value("not-a-ts", _dim(pa.timestamp("us")))
+
+
+def test_coerce_timestamp_rejects_other_types() -> None:
+    with pytest.raises(ValueError, match="Invalid timestamp value"):
+        _coerce_scalar_filter_value(1234567890, _dim(pa.timestamp("us")))
+
+
+def test_coerce_timestamp_tz_aware_dtype_attaches_tz_to_naive_datetime() -> 
None:
+    dt = datetime(2025, 1, 2, 3, 4, 5)
+    out = _coerce_scalar_filter_value(dt, _dim(pa.timestamp("us", tz="UTC")))
+    assert out == datetime(2025, 1, 2, 3, 4, 5, tzinfo=ZoneInfo("UTC"))
+
+
+def test_coerce_timestamp_tz_aware_dtype_converts_aware_datetime() -> None:
+    dt = datetime(2025, 1, 2, 12, 0, tzinfo=timezone.utc)
+    out = _coerce_scalar_filter_value(
+        dt, _dim(pa.timestamp("us", tz="America/New_York"))
+    )
+    # 12:00 UTC == 07:00 in New York
+    assert out == datetime(2025, 1, 2, 7, 0, 
tzinfo=ZoneInfo("America/New_York"))
+
+
+def test_coerce_timestamp_tz_aware_dtype_attaches_tz_to_date() -> None:
+    out = _coerce_scalar_filter_value(
+        date(2025, 1, 2), _dim(pa.timestamp("us", tz="UTC"))
+    )
+    assert out == datetime(2025, 1, 2, 0, 0, tzinfo=ZoneInfo("UTC"))
+
+
+def test_coerce_timestamp_tz_aware_dtype_parses_string_with_tz() -> None:
+    out = _coerce_scalar_filter_value(
+        "2025-01-02T03:04:05", _dim(pa.timestamp("us", tz="UTC"))
+    )
+    # Naive string gets UTC attached.
+    assert out == datetime(2025, 1, 2, 3, 4, 5, tzinfo=ZoneInfo("UTC"))
+
+
+def test_coerce_time_passthrough() -> None:
+    out = _coerce_scalar_filter_value(time(3, 4, 5), _dim(pa.time64("us")))
+    assert out == time(3, 4, 5)
+
+
+def test_coerce_time_from_iso_string() -> None:
+    out = _coerce_scalar_filter_value(" 03:04:05 ", _dim(pa.time64("us")))
+    assert out == time(3, 4, 5)
+
+
+def test_coerce_time_invalid_string_raises() -> None:
+    with pytest.raises(ValueError, match="Invalid time value"):
+        _coerce_scalar_filter_value("not-a-time", _dim(pa.time64("us")))
+
+
+def test_coerce_time_rejects_other_types() -> None:
+    with pytest.raises(ValueError, match="Invalid time value"):
+        _coerce_scalar_filter_value(123, _dim(pa.time64("us")))

Reply via email to