korbit-ai[bot] commented on code in PR #32556: URL: https://github.com/apache/superset/pull/32556#discussion_r1986517126
########## superset/db_engine_specs/odps.py: ########## @@ -0,0 +1,192 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging +from typing import Any, Optional, TYPE_CHECKING + +from sqlalchemy import select, text +from sqlalchemy.engine.base import Engine + +from superset.databases.schemas import ( + TableMetadataColumnsResponse, + TableMetadataResponse, +) +from superset.databases.utils import ( + get_col_type, + get_foreign_keys_metadata, + get_indexes_metadata, +) +from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin +from superset.sql.parse import Partition, SQLScript +from superset.sql_parse import Table +from superset.superset_typing import ResultSetColumnType + +if TYPE_CHECKING: + from superset.models.core import Database + +logger = logging.getLogger(__name__) + + +class OdpsBaseEngineSpec(BaseEngineSpec): + @classmethod + def get_table_metadata( + cls, + database: Database, + table: Table, + partition: Optional[Partition] = None, + ) -> TableMetadataResponse: + """ + Returns basic table metadata + :param database: Database instance + :param table: A Table instance + :param partition: A Table partition info + :return: Basic table metadata + """ + return cls.get_table_metadata(database, table, partition) + + +class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec): + default_driver = "odps" + + @classmethod + def get_table_metadata( + cls, database: Any, table: Table, partition: Optional[Partition] = None + ) -> TableMetadataResponse: + """ + Get table metadata information, including type, pk, fks. + This function raises SQLAlchemyError when a schema is not found. + + :param partition: The table's partition info + :param database: The database model + :param table: Table instance + :return: Dict table metadata ready for API response + """ + keys = [] + columns = database.get_columns(table) + primary_key = database.get_pk_constraint(table) + if primary_key and primary_key.get("constrained_columns"): + primary_key["column_names"] = primary_key.pop("constrained_columns") + primary_key["type"] = "pk" + keys += [primary_key] + foreign_keys = get_foreign_keys_metadata(database, table) + indexes = get_indexes_metadata(database, table) + keys += foreign_keys + indexes + payload_columns: list[TableMetadataColumnsResponse] = [] + table_comment = database.get_table_comment(table) + for col in columns: + dtype = get_col_type(col) + payload_columns.append( + { + "name": col["column_name"], + "type": dtype.split("(")[0] if "(" in dtype else dtype, + "longType": dtype, + "keys": [ + k for k in keys if col["column_name"] in k["column_names"] + ], + "comment": col.get("comment"), + } + ) + + with database.get_sqla_engine( + catalog=table.catalog, schema=table.schema + ) as engine: + return { + "name": table.table, + "columns": payload_columns, + "selectStar": cls.select_star( + database=database, + table=table, + engine=engine, + limit=100, + show_cols=False, + indent=True, + latest_partition=True, + cols=columns, + partition=partition, + ), + "primaryKey": primary_key, + "foreignKeys": foreign_keys, + "indexes": keys, + "comment": table_comment, + } + + @classmethod + def select_star( # pylint: disable=too-many-arguments + cls, + database: Database, + table: Table, + engine: Engine, + limit: int = 100, + show_cols: bool = False, + indent: bool = True, + latest_partition: bool = True, + cols: list[ResultSetColumnType] | None = None, + partition: Optional[Partition] = None, + ) -> str: + """ + Generate a "SELECT * from [schema.]table_name" query with appropriate limit. + + WARNING: expects only unquoted table and schema names. + + :param partition: The table's partition info + :param database: Database instance + :param table: Table instance + :param engine: SqlAlchemy Engine instance + :param limit: limit to impose on query + :param show_cols: Show columns in query; otherwise use "*" + :param indent: Add indentation to query + :param latest_partition: Only query the latest partition + :param cols: Columns to include in query + :return: SQL query + """ + # pylint: disable=redefined-outer-name + fields: str | list[Any] = "*" + cols = cols or [] + if (show_cols or latest_partition) and not cols: + cols = database.get_columns(table) Review Comment: ### Redundant Column Metadata Fetch <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? Database column metadata is fetched redundantly in select_star() when cols are already available from the get_table_metadata() call ###### Why this matters This causes an unnecessary extra database round-trip to fetch the same column metadata that was already retrieved earlier in the call chain ###### Suggested change ∙ *Feature Preview* Pass the previously fetched columns from get_table_metadata() to select_star() instead of retrieving them again. Modify the code to: ```python cols = cols or columns # Use columns passed from get_table_metadata ``` </details> <sub> [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/2e6f02d0-9773-4c14-9cd5-0f13a1db6614?suggestedFixEnabled=true) 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:7c56619e-2b94-4a9e-b599-2058c9518235 --> ########## superset/databases/api.py: ########## @@ -1055,15 +1057,21 @@ def table_metadata(self, pk: int) -> FlaskResponse: parameters = QualifiedTableSchema().load(request.args) except ValidationError as ex: raise InvalidPayloadSchemaError(ex) from ex - - table = Table(parameters["name"], parameters["schema"], parameters["catalog"]) + table_name = str(parameters["name"]) + table = Table(table_name, parameters["schema"], parameters["catalog"]) + is_partitioned_table, partition_fields = DatabaseDAO.is_odps_partitioned_table( + database, table_name + ) try: security_manager.raise_for_access(database=database, table=table) except SupersetSecurityException as ex: # instead of raising 403, raise 404 to hide table existence raise TableNotFoundException("No such table") from ex - - payload = database.db_engine_spec.get_table_metadata(database, table) + partition = Partition(is_partitioned_table, partition_fields) + if is_partitioned_table: + payload = OdpsEngineSpec.get_table_metadata(database, table, partition) + else: + payload = database.db_engine_spec.get_table_metadata(database, table) Review Comment: ### Tight coupling with concrete engine implementation <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? Direct usage of OdpsEngineSpec creates tight coupling and violates the Open-Closed Principle. The code specifically checks for a partition case and directly calls a concrete implementation. ###### Why this matters If new partition handling is needed for other database engines, the code will require modification. This makes the code harder to extend and maintain. ###### Suggested change ∙ *Feature Preview* Move partition handling logic into the engine spec base class: ```python payload = database.db_engine_spec.get_table_metadata(database, table, partition=partition) ``` And let each engine spec implementation handle partitions appropriately. </details> <sub> [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/02f013e6-11d8-48d1-811b-1cf88a554771?suggestedFixEnabled=true) 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:966f872d-64f3-4090-bce5-0829e9205a9c --> ########## superset/daos/database.py: ########## @@ -166,6 +170,37 @@ def get_ssh_tunnel(cls, database_id: int) -> SSHTunnel | None: return ssh_tunnel + @classmethod + def is_odps_partitioned_table( + cls, database: Database, table_name: str + ) -> Tuple[bool, List[str]]: + """ + This function is used to determine and retrieve + partition information of the odsp table. + The return values are whether the partition + table is partitioned and the names of all partition fields. + """ + if not database: + raise ValueError("Database not found") + uri = database.sqlalchemy_uri + access_key = database.password + pattern = re.compile( + r"odps://(?P<username>[^:]+):(?P<password>[^@]+)@(?P<project>[^/]+)/(?:\?endpoint=(?P<endpoint>[^&]+))" + ) + if match := pattern.match(unquote(uri)): Review Comment: ### Overly Strict ODPS URI Pattern Matching <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? The regex pattern only matches ODPS URIs with a specific format, but does not handle variations in the URI structure that might include additional parameters or different orderings. ###### Why this matters This could cause the function to fail for valid ODPS URIs that have different parameter orderings or additional query parameters, leading to false negatives in partition detection. ###### Suggested change ∙ *Feature Preview* Modify the regex pattern to be more flexible and handle different URI formats: ```python pattern = re.compile( r'odps://(?P<username>[^:]+):(?P<password>[^@]+)@(?P<project>[^/?]+).*?[?&]endpoint=(?P<endpoint>[^&]+)' ) ``` </details> <sub> [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/eb1367b3-93b1-4494-8ddb-22e926e62814?suggestedFixEnabled=true) 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:a34aa7df-2ac7-4599-bb57-c6fbb397eb4a --> ########## superset/databases/api.py: ########## @@ -1055,15 +1057,21 @@ parameters = QualifiedTableSchema().load(request.args) except ValidationError as ex: raise InvalidPayloadSchemaError(ex) from ex - - table = Table(parameters["name"], parameters["schema"], parameters["catalog"]) + table_name = str(parameters["name"]) + table = Table(table_name, parameters["schema"], parameters["catalog"]) + is_partitioned_table, partition_fields = DatabaseDAO.is_odps_partitioned_table( + database, table_name + ) Review Comment: ### Database-specific logic in generic DAO <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? The method name suggests it's specific to ODPS but resides in the generic DatabaseDAO class ###### Why this matters Mixing database-specific logic in a generic DAO violates Single Responsibility Principle and makes the code less maintainable. ###### Suggested change ∙ *Feature Preview* Move database-specific logic to appropriate specialized classes: ```python class OdpsDAO(DatabaseDAO): def is_partitioned_table(self, database, table_name): # ODPS specific logic ``` </details> <sub> [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/c4f81959-3282-47a9-8a87-b16846be3d95?suggestedFixEnabled=true) 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:fb190ae4-139f-409b-95c1-42b084c15f84 --> ########## superset/db_engine_specs/odps.py: ########## @@ -0,0 +1,192 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging +from typing import Any, Optional, TYPE_CHECKING + +from sqlalchemy import select, text +from sqlalchemy.engine.base import Engine + +from superset.databases.schemas import ( + TableMetadataColumnsResponse, + TableMetadataResponse, +) +from superset.databases.utils import ( + get_col_type, + get_foreign_keys_metadata, + get_indexes_metadata, +) +from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin +from superset.sql.parse import Partition, SQLScript +from superset.sql_parse import Table +from superset.superset_typing import ResultSetColumnType + +if TYPE_CHECKING: + from superset.models.core import Database + +logger = logging.getLogger(__name__) + + +class OdpsBaseEngineSpec(BaseEngineSpec): + @classmethod + def get_table_metadata( + cls, + database: Database, + table: Table, + partition: Optional[Partition] = None, + ) -> TableMetadataResponse: + """ + Returns basic table metadata + :param database: Database instance + :param table: A Table instance + :param partition: A Table partition info + :return: Basic table metadata + """ + return cls.get_table_metadata(database, table, partition) + + +class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec): + default_driver = "odps" + + @classmethod + def get_table_metadata( + cls, database: Any, table: Table, partition: Optional[Partition] = None + ) -> TableMetadataResponse: + """ + Get table metadata information, including type, pk, fks. + This function raises SQLAlchemyError when a schema is not found. + + :param partition: The table's partition info + :param database: The database model + :param table: Table instance + :return: Dict table metadata ready for API response + """ + keys = [] + columns = database.get_columns(table) + primary_key = database.get_pk_constraint(table) + if primary_key and primary_key.get("constrained_columns"): + primary_key["column_names"] = primary_key.pop("constrained_columns") + primary_key["type"] = "pk" + keys += [primary_key] + foreign_keys = get_foreign_keys_metadata(database, table) + indexes = get_indexes_metadata(database, table) + keys += foreign_keys + indexes + payload_columns: list[TableMetadataColumnsResponse] = [] + table_comment = database.get_table_comment(table) + for col in columns: + dtype = get_col_type(col) + payload_columns.append( + { + "name": col["column_name"], + "type": dtype.split("(")[0] if "(" in dtype else dtype, + "longType": dtype, + "keys": [ + k for k in keys if col["column_name"] in k["column_names"] + ], + "comment": col.get("comment"), + } + ) + + with database.get_sqla_engine( + catalog=table.catalog, schema=table.schema + ) as engine: + return { + "name": table.table, + "columns": payload_columns, + "selectStar": cls.select_star( + database=database, + table=table, + engine=engine, + limit=100, + show_cols=False, + indent=True, + latest_partition=True, + cols=columns, + partition=partition, + ), + "primaryKey": primary_key, + "foreignKeys": foreign_keys, + "indexes": keys, + "comment": table_comment, + } + + @classmethod + def select_star( # pylint: disable=too-many-arguments + cls, + database: Database, + table: Table, + engine: Engine, + limit: int = 100, + show_cols: bool = False, + indent: bool = True, + latest_partition: bool = True, + cols: list[ResultSetColumnType] | None = None, + partition: Optional[Partition] = None, + ) -> str: + """ + Generate a "SELECT * from [schema.]table_name" query with appropriate limit. + + WARNING: expects only unquoted table and schema names. + + :param partition: The table's partition info + :param database: Database instance + :param table: Table instance + :param engine: SqlAlchemy Engine instance + :param limit: limit to impose on query + :param show_cols: Show columns in query; otherwise use "*" + :param indent: Add indentation to query + :param latest_partition: Only query the latest partition + :param cols: Columns to include in query + :return: SQL query + """ + # pylint: disable=redefined-outer-name + fields: str | list[Any] = "*" + cols = cols or [] + if (show_cols or latest_partition) and not cols: + cols = database.get_columns(table) + + if show_cols: + fields = cls._get_fields(cols) + print() + full_table_name = cls.quote_table(table, engine.dialect) + qry = select(fields).select_from(text(full_table_name)) + if database.backend == "odps": + if ( + partition is not None + and partition.is_partitioned_table + and partition.partition_column is not None + and len(partition.partition_column) > 0 + ): + partition_str = partition.partition_column[0] + partition_str_where = f"CAST({partition_str} AS STRING) LIKE '%'" Review Comment: ### Ineffective Partition Filtering <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? The partition filtering logic uses a LIKE '%' condition which will always be true, effectively not filtering the partitions at all. ###### Why this matters This defeats the purpose of partition filtering and may return more data than intended, potentially causing performance issues. ###### Suggested change ∙ *Feature Preview* Implement proper partition filtering logic that actually restricts the data based on the partition value: ```python partition_str = partition.partition_column[0] partition_value = partition.partition_values[0] if partition.partition_values else None if partition_value: partition_str_where = f"{partition_str} = '{partition_value}'" qry = qry.where(text(partition_str_where)) ``` </details> <sub> [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/26838762-5fa1-41e9-9343-3b771dfb1ddc?suggestedFixEnabled=true) 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:dd72d5f1-ac3e-4810-bad2-719bad7a70df --> ########## superset/db_engine_specs/odps.py: ########## @@ -0,0 +1,192 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging +from typing import Any, Optional, TYPE_CHECKING + +from sqlalchemy import select, text +from sqlalchemy.engine.base import Engine + +from superset.databases.schemas import ( + TableMetadataColumnsResponse, + TableMetadataResponse, +) +from superset.databases.utils import ( + get_col_type, + get_foreign_keys_metadata, + get_indexes_metadata, +) +from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin +from superset.sql.parse import Partition, SQLScript +from superset.sql_parse import Table +from superset.superset_typing import ResultSetColumnType + +if TYPE_CHECKING: + from superset.models.core import Database + +logger = logging.getLogger(__name__) + + +class OdpsBaseEngineSpec(BaseEngineSpec): + @classmethod + def get_table_metadata( + cls, + database: Database, + table: Table, + partition: Optional[Partition] = None, + ) -> TableMetadataResponse: + """ + Returns basic table metadata + :param database: Database instance + :param table: A Table instance + :param partition: A Table partition info + :return: Basic table metadata + """ + return cls.get_table_metadata(database, table, partition) + + +class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec): + default_driver = "odps" + + @classmethod + def get_table_metadata( + cls, database: Any, table: Table, partition: Optional[Partition] = None + ) -> TableMetadataResponse: + """ + Get table metadata information, including type, pk, fks. + This function raises SQLAlchemyError when a schema is not found. + + :param partition: The table's partition info + :param database: The database model + :param table: Table instance + :return: Dict table metadata ready for API response + """ + keys = [] + columns = database.get_columns(table) + primary_key = database.get_pk_constraint(table) + if primary_key and primary_key.get("constrained_columns"): + primary_key["column_names"] = primary_key.pop("constrained_columns") + primary_key["type"] = "pk" + keys += [primary_key] + foreign_keys = get_foreign_keys_metadata(database, table) + indexes = get_indexes_metadata(database, table) + keys += foreign_keys + indexes + payload_columns: list[TableMetadataColumnsResponse] = [] + table_comment = database.get_table_comment(table) + for col in columns: + dtype = get_col_type(col) + payload_columns.append( + { + "name": col["column_name"], + "type": dtype.split("(")[0] if "(" in dtype else dtype, + "longType": dtype, + "keys": [ + k for k in keys if col["column_name"] in k["column_names"] + ], + "comment": col.get("comment"), + } + ) + + with database.get_sqla_engine( + catalog=table.catalog, schema=table.schema + ) as engine: + return { + "name": table.table, + "columns": payload_columns, + "selectStar": cls.select_star( + database=database, + table=table, + engine=engine, + limit=100, + show_cols=False, + indent=True, + latest_partition=True, + cols=columns, + partition=partition, + ), + "primaryKey": primary_key, + "foreignKeys": foreign_keys, + "indexes": keys, + "comment": table_comment, + } + + @classmethod + def select_star( # pylint: disable=too-many-arguments + cls, + database: Database, + table: Table, + engine: Engine, + limit: int = 100, + show_cols: bool = False, + indent: bool = True, + latest_partition: bool = True, + cols: list[ResultSetColumnType] | None = None, + partition: Optional[Partition] = None, + ) -> str: + """ + Generate a "SELECT * from [schema.]table_name" query with appropriate limit. + + WARNING: expects only unquoted table and schema names. + + :param partition: The table's partition info + :param database: Database instance + :param table: Table instance + :param engine: SqlAlchemy Engine instance + :param limit: limit to impose on query + :param show_cols: Show columns in query; otherwise use "*" + :param indent: Add indentation to query + :param latest_partition: Only query the latest partition + :param cols: Columns to include in query + :return: SQL query + """ + # pylint: disable=redefined-outer-name + fields: str | list[Any] = "*" + cols = cols or [] + if (show_cols or latest_partition) and not cols: + cols = database.get_columns(table) + + if show_cols: + fields = cls._get_fields(cols) + print() Review Comment: ### Unnecessary Debug Print Statement <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? There is a stray print() statement in the select_star method that serves no purpose. ###### Why this matters This will unnecessarily print a blank line to stdout every time a query is generated, potentially cluttering logs and console output. ###### Suggested change ∙ *Feature Preview* Remove the print() statement: ```python full_table_name = cls.quote_table(table, engine.dialect) ``` </details> <sub> [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/1d876a0a-8509-4b9d-bcda-2a79b18ab53e?suggestedFixEnabled=true) 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:a23b8cfc-ed7a-44ae-96a5-3b53b9ea2fc0 --> ########## superset/db_engine_specs/odps.py: ########## @@ -0,0 +1,192 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging +from typing import Any, Optional, TYPE_CHECKING + +from sqlalchemy import select, text +from sqlalchemy.engine.base import Engine + +from superset.databases.schemas import ( + TableMetadataColumnsResponse, + TableMetadataResponse, +) +from superset.databases.utils import ( + get_col_type, + get_foreign_keys_metadata, + get_indexes_metadata, +) +from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin +from superset.sql.parse import Partition, SQLScript +from superset.sql_parse import Table +from superset.superset_typing import ResultSetColumnType + +if TYPE_CHECKING: + from superset.models.core import Database + +logger = logging.getLogger(__name__) + + +class OdpsBaseEngineSpec(BaseEngineSpec): + @classmethod + def get_table_metadata( + cls, + database: Database, + table: Table, + partition: Optional[Partition] = None, + ) -> TableMetadataResponse: + """ + Returns basic table metadata + :param database: Database instance + :param table: A Table instance + :param partition: A Table partition info + :return: Basic table metadata + """ + return cls.get_table_metadata(database, table, partition) + + +class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec): + default_driver = "odps" + + @classmethod + def get_table_metadata( + cls, database: Any, table: Table, partition: Optional[Partition] = None + ) -> TableMetadataResponse: + """ + Get table metadata information, including type, pk, fks. + This function raises SQLAlchemyError when a schema is not found. + + :param partition: The table's partition info + :param database: The database model + :param table: Table instance + :return: Dict table metadata ready for API response + """ + keys = [] + columns = database.get_columns(table) + primary_key = database.get_pk_constraint(table) + if primary_key and primary_key.get("constrained_columns"): + primary_key["column_names"] = primary_key.pop("constrained_columns") + primary_key["type"] = "pk" + keys += [primary_key] + foreign_keys = get_foreign_keys_metadata(database, table) + indexes = get_indexes_metadata(database, table) + keys += foreign_keys + indexes + payload_columns: list[TableMetadataColumnsResponse] = [] + table_comment = database.get_table_comment(table) + for col in columns: + dtype = get_col_type(col) + payload_columns.append( + { + "name": col["column_name"], + "type": dtype.split("(")[0] if "(" in dtype else dtype, + "longType": dtype, + "keys": [ + k for k in keys if col["column_name"] in k["column_names"] + ], + "comment": col.get("comment"), + } + ) + + with database.get_sqla_engine( + catalog=table.catalog, schema=table.schema + ) as engine: + return { + "name": table.table, + "columns": payload_columns, + "selectStar": cls.select_star( + database=database, + table=table, + engine=engine, + limit=100, + show_cols=False, + indent=True, + latest_partition=True, + cols=columns, + partition=partition, + ), + "primaryKey": primary_key, + "foreignKeys": foreign_keys, + "indexes": keys, + "comment": table_comment, + } + + @classmethod + def select_star( # pylint: disable=too-many-arguments + cls, + database: Database, + table: Table, + engine: Engine, + limit: int = 100, + show_cols: bool = False, + indent: bool = True, + latest_partition: bool = True, + cols: list[ResultSetColumnType] | None = None, + partition: Optional[Partition] = None, + ) -> str: + """ + Generate a "SELECT * from [schema.]table_name" query with appropriate limit. + + WARNING: expects only unquoted table and schema names. + + :param partition: The table's partition info + :param database: Database instance + :param table: Table instance + :param engine: SqlAlchemy Engine instance + :param limit: limit to impose on query + :param show_cols: Show columns in query; otherwise use "*" + :param indent: Add indentation to query + :param latest_partition: Only query the latest partition + :param cols: Columns to include in query + :return: SQL query + """ + # pylint: disable=redefined-outer-name + fields: str | list[Any] = "*" + cols = cols or [] + if (show_cols or latest_partition) and not cols: + cols = database.get_columns(table) + + if show_cols: + fields = cls._get_fields(cols) + print() + full_table_name = cls.quote_table(table, engine.dialect) + qry = select(fields).select_from(text(full_table_name)) + if database.backend == "odps": + if ( + partition is not None + and partition.is_partitioned_table + and partition.partition_column is not None + and len(partition.partition_column) > 0 + ): + partition_str = partition.partition_column[0] + partition_str_where = f"CAST({partition_str} AS STRING) LIKE '%'" + qry = qry.where(text(partition_str_where)) Review Comment: ### SQL Injection Risk in Partition Query Construction <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? Direct string interpolation of partition_str into an SQL query creates a potential SQL injection vulnerability. ###### Why this matters If partition_str contains malicious SQL code, it could be executed since the value is directly interpolated into the query string without proper escaping or parameterization. ###### Suggested change ∙ *Feature Preview* Use SQLAlchemy's parameterized queries: ```python partition_str = partition.partition_column[0] qry = qry.where(text("CAST(:partition_str AS STRING) LIKE '%'").bindparams(partition_str=partition_str)) ``` </details> <sub> [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/71e6b494-1f9a-48e1-9159-2caa26f342e6?suggestedFixEnabled=true) 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:95453f4c-adc4-4270-9287-4ed9758ebb40 --> ########## superset/daos/database.py: ########## @@ -166,6 +170,37 @@ return ssh_tunnel + @classmethod + def is_odps_partitioned_table( + cls, database: Database, table_name: str + ) -> Tuple[bool, List[str]]: + """ + This function is used to determine and retrieve + partition information of the odsp table. + The return values are whether the partition + table is partitioned and the names of all partition fields. + """ + if not database: + raise ValueError("Database not found") + uri = database.sqlalchemy_uri + access_key = database.password + pattern = re.compile( + r"odps://(?P<username>[^:]+):(?P<password>[^@]+)@(?P<project>[^/]+)/(?:\?endpoint=(?P<endpoint>[^&]+))" + ) Review Comment: ### Undocumented Complex Regex Pattern <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? Complex regex pattern without inline comments explaining the pattern components and their purpose ###### Why this matters Without explanation, future maintainers will need to decipher the regex pattern, which increases cognitive load and maintenance time ###### Suggested change ∙ *Feature Preview* ```python # Pattern to extract ODPS connection components from URI # Format: odps://username:password@project/?endpoint=endpoint_url pattern = re.compile( r"odps://" # Protocol r"(?P<username>[^:]+)" # Username until ':' r":" r"(?P<password>[^@]+)" # Password until '@' r"@" r"(?P<project>[^/]+)" # Project name until '/' r"/" r"(?:\?endpoint=" # Endpoint parameter r"(?P<endpoint>[^&]+))" # Endpoint value until '&' or end ) ``` </details> <sub> [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/1296b488-4131-4336-b580-a053b89fad4d?suggestedFixEnabled=true) 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:fb3b8162-ce56-43ce-a64b-9e51910621cb --> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
