This is an automated email from the ASF dual-hosted git repository. rusackas pushed a commit to branch pr-38174 in repository https://gitbox.apache.org/repos/asf/superset.git
commit 925e20bb046d96975c21ca61dcb433c2bf57c821 Author: Claude Code <[email protected]> AuthorDate: Wed Mar 11 14:00:48 2026 -0700 fix(odps): address review feedback - security, recursion, typing, tests - Move security check before ODPS partition detection (auth before backend calls) - Wrap is_odps_partitioned_table in try/except with warning log and fallback - Replace OdpsBaseEngineSpec.get_table_metadata body with NotImplementedError - Fix select_star signature: engine: Engine -> dialect: Dialect (matches base) - Update Optional[X] -> X | None for modern Python typing - Remove broken __eq__ that violated frozen dataclass hash contract - Fix Partition docstring typos and __str__ description - Add warning log when ODPS URI does not match expected pattern - Add tests/unit_tests/db_engine_specs/test_odps.py with 7 unit tests Co-Authored-By: Claude Sonnet 4.6 <[email protected]> --- superset/daos/database.py | 5 + superset/databases/api.py | 15 ++- superset/db_engine_specs/odps.py | 20 +-- superset/sql/parse.py | 9 +- tests/unit_tests/db_engine_specs/test_odps.py | 184 ++++++++++++++++++++++++++ 5 files changed, 214 insertions(+), 19 deletions(-) diff --git a/superset/daos/database.py b/superset/daos/database.py index 3d9fb55c1b2..229a61bfd4a 100644 --- a/superset/daos/database.py +++ b/superset/daos/database.py @@ -286,6 +286,11 @@ class DatabaseDAO(BaseDAO[Database]): partition_fields = [partition.name for partition in partition_spec] return True, partition_fields return False, [] + logger.warning( + "ODPS sqlalchemy_uri did not match the expected pattern; " + "unable to determine partition info for table %r", + table_name, + ) return False, [] diff --git a/superset/databases/api.py b/superset/databases/api.py index a56ddd94cda..929e27734d9 100644 --- a/superset/databases/api.py +++ b/superset/databases/api.py @@ -1081,14 +1081,23 @@ class DatabaseRestApi(BaseSupersetModelRestApi): raise InvalidPayloadSchemaError(ex) from ex table_name = str(parameters["name"]) table = Table(table_name, parameters["schema"], parameters["catalog"]) - is_partitioned_table, partition_fields = DatabaseDAO.is_odps_partitioned_table( - database, table_name - ) try: security_manager.raise_for_access(database=database, table=table) except SupersetSecurityException as ex: # instead of raising 403, raise 404 to hide table existence raise TableNotFoundException("No such table") from ex + try: + is_partitioned_table, partition_fields = ( + DatabaseDAO.is_odps_partitioned_table(database, table_name) + ) + except Exception as ex: # pylint: disable=broad-except + logger.warning( + "Error determining ODPS partition info for table %s: %s; " + "falling back to non-partitioned path", + table_name, + error_msg_from_exception(ex), + ) + is_partitioned_table, partition_fields = False, [] partition = Partition(is_partitioned_table, partition_fields) if is_partitioned_table: from superset.db_engine_specs.odps import OdpsEngineSpec diff --git a/superset/db_engine_specs/odps.py b/superset/db_engine_specs/odps.py index 6a6abe0cc58..8fd5ae2e670 100644 --- a/superset/db_engine_specs/odps.py +++ b/superset/db_engine_specs/odps.py @@ -17,10 +17,10 @@ from __future__ import annotations import logging -from typing import Any, Optional, TYPE_CHECKING +from typing import Any, TYPE_CHECKING from sqlalchemy import select, text -from sqlalchemy.engine.base import Engine +from sqlalchemy.engine import Dialect from superset.databases.schemas import ( TableMetadataColumnsResponse, @@ -47,7 +47,7 @@ class OdpsBaseEngineSpec(BaseEngineSpec): cls, database: Database, table: Table, - partition: Optional[Partition] = None, + partition: Partition | None = None, ) -> TableMetadataResponse: """ Returns basic table metadata @@ -56,7 +56,7 @@ class OdpsBaseEngineSpec(BaseEngineSpec): :param partition: A Table partition info :return: Basic table metadata """ - return cls.get_table_metadata(database, table, partition) + raise NotImplementedError class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec): @@ -66,7 +66,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec): @classmethod def get_table_metadata( - cls, database: Any, table: Table, partition: Optional[Partition] = None + cls, database: Any, table: Table, partition: Partition | None = None ) -> TableMetadataResponse: """ Get table metadata information, including type, pk, fks. @@ -112,7 +112,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec): "selectStar": cls.select_star( database=database, table=table, - engine=engine, + dialect=engine.dialect, limit=100, show_cols=False, indent=True, @@ -131,13 +131,13 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec): cls, database: Database, table: Table, - engine: Engine, + dialect: Dialect, limit: int = 100, show_cols: bool = False, indent: bool = True, latest_partition: bool = True, cols: list[ResultSetColumnType] | None = None, - partition: Optional[Partition] = None, + partition: Partition | None = None, ) -> str: """ Generate a "SELECT * from [schema.]table_name" query with appropriate limit. @@ -147,7 +147,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec): :param partition: The table's partition info :param database: Database instance :param table: Table instance - :param engine: SqlAlchemy Engine instance + :param dialect: SqlAlchemy Dialect instance :param limit: limit to impose on query :param show_cols: Show columns in query; otherwise use "*" :param indent: Add indentation to query @@ -163,7 +163,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec): if show_cols: fields = cls._get_fields(cols) - full_table_name = cls.quote_table(table, engine.dialect) + full_table_name = cls.quote_table(table, dialect) qry = select(fields).select_from(text(full_table_name)) if database.backend == "odps": if ( diff --git a/superset/sql/parse.py b/superset/sql/parse.py index fb3f7c5f113..13f837ff2c5 100644 --- a/superset/sql/parse.py +++ b/superset/sql/parse.py @@ -326,10 +326,10 @@ class Table: class Partition: """ Partition object, with two attribute keys: - ispartitioned_table and partition_comlumn, + is_partitioned_table and partition_column, used to provide partition information Here is an example of an object: - {"ispartitioned_table":true,"partition_column":["month","day"]} + {"is_partitioned_table": true, "partition_column": ["month", "day"]} """ is_partitioned_table: bool @@ -337,7 +337,7 @@ class Partition: def __str__(self) -> str: """ - Return the partition columns of table name. + Return a string representation of the Partition object. """ partition_column_str = ( ", ".join(map(str, self.partition_column)) @@ -349,9 +349,6 @@ class Partition: f"partition_column=[{partition_column_str}])" ) - def __eq__(self, other: Any) -> bool: - return str(self) == str(other) - # To avoid unnecessary parsing/formatting of queries, the statement has the concept of # an "internal representation", which is the AST of the SQL statement. For most of the diff --git a/tests/unit_tests/db_engine_specs/test_odps.py b/tests/unit_tests/db_engine_specs/test_odps.py new file mode 100644 index 00000000000..b3eafc91163 --- /dev/null +++ b/tests/unit_tests/db_engine_specs/test_odps.py @@ -0,0 +1,184 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from unittest.mock import MagicMock, patch + +import pytest + +from superset.db_engine_specs.odps import OdpsBaseEngineSpec, OdpsEngineSpec +from superset.sql.parse import Partition, Table + + +def test_odps_base_engine_spec_get_table_metadata_raises() -> None: + """OdpsBaseEngineSpec.get_table_metadata must not be called directly.""" + with pytest.raises(NotImplementedError): + OdpsBaseEngineSpec.get_table_metadata( + database=MagicMock(), + table=Table("my_table", None, None), + ) + + +def test_odps_engine_spec_select_star_no_partition() -> None: + """select_star for a non-partitioned ODPS table produces a plain SELECT *.""" + database = MagicMock() + database.backend = "odps" + database.get_columns.return_value = [] + database.compile_sqla_query.return_value = "SELECT * FROM my_table LIMIT 100" + dialect = MagicMock() + + sql = OdpsEngineSpec.select_star( + database=database, + table=Table("my_table", None, None), + dialect=dialect, + limit=100, + show_cols=False, + indent=False, + latest_partition=False, + partition=None, + ) + + assert "SELECT" in sql + assert "my_table" in sql + + +def test_odps_engine_spec_select_star_with_partition() -> None: + """select_star for a partitioned ODPS table adds a WHERE clause.""" + database = MagicMock() + database.backend = "odps" + database.get_columns.return_value = [] + database.compile_sqla_query.return_value = ( + "SELECT * FROM my_table WHERE CAST(month AS STRING) LIKE '%' LIMIT 100" + ) + dialect = MagicMock() + partition = Partition(is_partitioned_table=True, partition_column=["month"]) + + sql = OdpsEngineSpec.select_star( + database=database, + table=Table("my_table", None, None), + dialect=dialect, + limit=100, + show_cols=False, + indent=False, + latest_partition=False, + partition=partition, + ) + + assert "WHERE" in sql + + +def test_is_odps_partitioned_table_non_odps_backend() -> None: + """Returns (False, []) immediately for non-ODPS databases; no network call made.""" + from superset.daos.database import DatabaseDAO + + database = MagicMock() + database.backend = "postgresql" + + result = DatabaseDAO.is_odps_partitioned_table(database, "some_table") + + assert result == (False, []) + + +def test_is_odps_partitioned_table_missing_pyodps() -> None: + """Returns (False, []) with a warning when pyodps is not installed.""" + from superset.daos.database import DatabaseDAO + + database = MagicMock() + database.backend = "odps" + database.sqlalchemy_uri = ( + "odps://mykey:mysecret@myproject/?endpoint=http://service.odps.test" + ) + database.password = "mysecret" # noqa: S105 + + with patch.dict("sys.modules", {"odps": None}): + result = DatabaseDAO.is_odps_partitioned_table(database, "some_table") + + assert result == (False, []) + + +def test_is_odps_partitioned_table_uri_no_match( + caplog: pytest.LogCaptureFixture, +) -> None: + """Logs a warning and returns (False, []) when the URI doesn't match the pattern.""" + from superset.daos.database import DatabaseDAO + + database = MagicMock() + database.backend = "odps" + database.sqlalchemy_uri = "odps://invalid-uri-format" + database.password = "secret" # noqa: S105 + + with patch("superset.daos.database.DatabaseDAO.is_odps_partitioned_table.__func__"): + pass + + import logging + + with caplog.at_level(logging.WARNING, logger="superset.daos.database"): + result = DatabaseDAO.is_odps_partitioned_table(database, "some_table") + + assert result == (False, []) + assert "did not match" in caplog.text + + +def test_is_odps_partitioned_table_partitioned(monkeypatch: pytest.MonkeyPatch) -> None: + """Returns (True, [field_names]) for a partitioned ODPS table.""" + from superset.daos.database import DatabaseDAO + + database = MagicMock() + database.backend = "odps" + database.sqlalchemy_uri = ( + "odps://mykey:mysecret@myproject/?endpoint=http://service.odps.test" + ) + database.password = "mysecret" # noqa: S105 + + mock_partition = MagicMock() + mock_partition.name = "month" + mock_table = MagicMock() + mock_table.exist_partition = True + mock_table.table_schema.partitions = [mock_partition] + + mock_odps_client = MagicMock() + mock_odps_client.get_table.return_value = mock_table + mock_odps_class = MagicMock(return_value=mock_odps_client) + + with patch.dict("sys.modules", {"odps": MagicMock(ODPS=mock_odps_class)}): + with patch("superset.daos.database.ODPS", mock_odps_class, create=True): + result = DatabaseDAO.is_odps_partitioned_table(database, "my_table") + + assert result == (True, ["month"]) + + +def test_is_odps_partitioned_table_not_partitioned( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Returns (False, []) for a non-partitioned ODPS table.""" + from superset.daos.database import DatabaseDAO + + database = MagicMock() + database.backend = "odps" + database.sqlalchemy_uri = ( + "odps://mykey:mysecret@myproject/?endpoint=http://service.odps.test" + ) + database.password = "mysecret" # noqa: S105 + + mock_table = MagicMock() + mock_table.exist_partition = False + mock_odps_client = MagicMock() + mock_odps_client.get_table.return_value = mock_table + mock_odps_class = MagicMock(return_value=mock_odps_client) + + with patch("superset.daos.database.ODPS", mock_odps_class, create=True): + result = DatabaseDAO.is_odps_partitioned_table(database, "my_table") + + assert result == (False, [])
