Copilot commented on code in PR #36136: URL: https://github.com/apache/superset/pull/36136#discussion_r2539250250
########## superset/utils/sql_sanitizer.py: ########## @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import Any + +import sqlglot +from sqlglot import Dialects, exp +from sqlglot.errors import ParseError + +from superset.sql.parse import SQLGLOT_DIALECTS + + +def get_sqlglot_dialect(database_backend: str) -> Any: + """Map Superset database backend name to SQLGlot dialect.""" + return SQLGLOT_DIALECTS.get(database_backend, Dialects.DIALECT) + + +def sanitize_sql_with_sqlglot( + raw_sql: str, + database_backend: str, + validate_structure: bool = True, +) -> str: + """ + Sanitize a raw SQL WHERE/HAVING clause using SQLGlot. + + Parses the SQL into an AST and regenerates it with proper + escaping for the target database dialect. + + Raises ValueError if SQL contains disallowed constructs + (subqueries, DDL/DML commands, set operations). + """ + if not raw_sql or not raw_sql.strip(): + return "" + + dialect = get_sqlglot_dialect(database_backend) + + try: + parsed = sqlglot.parse_one(raw_sql, dialect=dialect) + + if validate_structure: + _validate_sql_structure(parsed) + + return parsed.sql(dialect=dialect) + + except ParseError: Review Comment: When SQLGlot parsing fails with a `ParseError`, the function returns the original, unparsed SQL. This creates a security bypass: malformed SQL that can't be parsed by SQLGlot will skip validation entirely and be returned unchanged, potentially allowing injection attacks through syntax that SQLGlot doesn't understand. If validation is enabled (`validate_structure=True`), the function should raise an error on ParseError rather than returning the original SQL. The graceful fallback should only apply when validation is disabled: ```python except ParseError: if validate_structure: raise ValueError("Unable to parse and validate SQL clause") from None return raw_sql ``` This ensures that when security validation is requested, it either succeeds or fails explicitly, rather than silently bypassing validation. ```suggestion except ParseError: if validate_structure: raise ValueError("Unable to parse and validate SQL clause") from None ``` ########## superset/utils/sql_sanitizer.py: ########## @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import Any + +import sqlglot +from sqlglot import Dialects, exp +from sqlglot.errors import ParseError + +from superset.sql.parse import SQLGLOT_DIALECTS + + +def get_sqlglot_dialect(database_backend: str) -> Any: + """Map Superset database backend name to SQLGlot dialect.""" + return SQLGLOT_DIALECTS.get(database_backend, Dialects.DIALECT) + + +def sanitize_sql_with_sqlglot( + raw_sql: str, + database_backend: str, + validate_structure: bool = True, +) -> str: + """ + Sanitize a raw SQL WHERE/HAVING clause using SQLGlot. + + Parses the SQL into an AST and regenerates it with proper + escaping for the target database dialect. + + Raises ValueError if SQL contains disallowed constructs + (subqueries, DDL/DML commands, set operations). Review Comment: The docstring states "Raises ValueError if SQL contains disallowed constructs" but doesn't document that the function also returns the original SQL unchanged for ParseError exceptions. This fallback behavior is a critical part of the function's contract and should be documented: ```python """ Sanitize a raw SQL WHERE/HAVING clause using SQLGlot. Parses the SQL into an AST and regenerates it with proper escaping for the target database dialect. Args: raw_sql: The SQL clause to sanitize database_backend: The database backend name (e.g., 'postgresql', 'mysql') validate_structure: Whether to validate for dangerous SQL constructs Returns: The sanitized SQL clause. If SQLGlot cannot parse the SQL, returns the original SQL unchanged. Raises: ValueError: If SQL contains disallowed constructs (subqueries, set operations, DDL/DML commands) when validate_structure=True """ ``` ```suggestion Args: raw_sql: The SQL clause to sanitize. database_backend: The database backend name (e.g., 'postgresql', 'mysql'). validate_structure: Whether to validate for dangerous SQL constructs. Returns: The sanitized SQL clause. If SQLGlot cannot parse the SQL, returns the original SQL unchanged. Raises: ValueError: If SQL contains disallowed constructs (subqueries, DDL/DML commands, set operations) when validate_structure=True. ``` ########## superset/utils/sql_sanitizer.py: ########## @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import Any + +import sqlglot +from sqlglot import Dialects, exp +from sqlglot.errors import ParseError + +from superset.sql.parse import SQLGLOT_DIALECTS + + +def get_sqlglot_dialect(database_backend: str) -> Any: + """Map Superset database backend name to SQLGlot dialect.""" + return SQLGLOT_DIALECTS.get(database_backend, Dialects.DIALECT) + + +def sanitize_sql_with_sqlglot( + raw_sql: str, + database_backend: str, + validate_structure: bool = True, +) -> str: + """ + Sanitize a raw SQL WHERE/HAVING clause using SQLGlot. + + Parses the SQL into an AST and regenerates it with proper + escaping for the target database dialect. + + Raises ValueError if SQL contains disallowed constructs + (subqueries, DDL/DML commands, set operations). + """ + if not raw_sql or not raw_sql.strip(): + return "" + + dialect = get_sqlglot_dialect(database_backend) + + try: + parsed = sqlglot.parse_one(raw_sql, dialect=dialect) + + if validate_structure: + _validate_sql_structure(parsed) + + return parsed.sql(dialect=dialect) + + except ParseError: + return raw_sql + except ValueError: + raise + except Exception: # pylint: disable=broad-except Review Comment: The broad `except Exception` clause silently catches and suppresses all unexpected exceptions, returning the original SQL unchanged. This creates a security risk: any unexpected error during validation will cause the function to bypass security checks and return potentially dangerous SQL. If validation is enabled, unexpected exceptions should be raised rather than silently suppressed: ```python except Exception: # pylint: disable=broad-except if validate_structure: raise return raw_sql ``` This ensures that when security validation is requested, it either succeeds or fails explicitly, rather than silently bypassing validation. ```suggestion except Exception: # pylint: disable=broad-except if validate_structure: raise ``` ########## superset/common/query_object.py: ########## @@ -352,6 +355,15 @@ def _sanitize_filters(self) -> None: msg=ex.message, ) ) from ex + + if use_sqlglot_escaping: + database_backend = database.db_engine_spec.engine + clause = sanitize_sql_with_sqlglot( + clause, + database_backend, + validate_structure=True, + ) Review Comment: The `sanitize_sql_with_sqlglot` function can raise `ValueError` for dangerous SQL constructs (subqueries, set operations, etc.), but this exception is not being caught in the surrounding try-except block. This will cause an unhandled `ValueError` to propagate, bypassing the error handling that converts exceptions to `QueryObjectValidationError`. Add a catch for `ValueError` and convert it to `QueryObjectValidationError` like other exceptions in this block: ```python if use_sqlglot_escaping: try: database_backend = database.db_engine_spec.engine clause = sanitize_sql_with_sqlglot( clause, database_backend, validate_structure=True, ) except ValueError as ex: raise QueryObjectValidationError(str(ex)) from ex ``` ```suggestion try: clause = sanitize_sql_with_sqlglot( clause, database_backend, validate_structure=True, ) except ValueError as ex: raise QueryObjectValidationError(str(ex)) from ex ``` ########## superset/utils/sql_sanitizer.py: ########## @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import Any + +import sqlglot +from sqlglot import Dialects, exp +from sqlglot.errors import ParseError + +from superset.sql.parse import SQLGLOT_DIALECTS + + +def get_sqlglot_dialect(database_backend: str) -> Any: + """Map Superset database backend name to SQLGlot dialect.""" + return SQLGLOT_DIALECTS.get(database_backend, Dialects.DIALECT) + + +def sanitize_sql_with_sqlglot( + raw_sql: str, + database_backend: str, + validate_structure: bool = True, +) -> str: + """ + Sanitize a raw SQL WHERE/HAVING clause using SQLGlot. + + Parses the SQL into an AST and regenerates it with proper + escaping for the target database dialect. + + Raises ValueError if SQL contains disallowed constructs + (subqueries, DDL/DML commands, set operations). + """ + if not raw_sql or not raw_sql.strip(): + return "" + + dialect = get_sqlglot_dialect(database_backend) + + try: + parsed = sqlglot.parse_one(raw_sql, dialect=dialect) + + if validate_structure: + _validate_sql_structure(parsed) + + return parsed.sql(dialect=dialect) + + except ParseError: + return raw_sql + except ValueError: + raise + except Exception: # pylint: disable=broad-except + return raw_sql + + +def _validate_sql_structure(parsed: exp.Expression) -> None: + """Validate that parsed SQL doesn't contain dangerous constructs.""" + disallowed_ddl_types = [ + exp.Command, + exp.Create, + exp.Drop, + exp.Insert, + exp.Update, + exp.Delete, + ] + + if hasattr(exp, "AlterTable"): + disallowed_ddl_types.append(exp.AlterTable) + if hasattr(exp, "Truncate"): + disallowed_ddl_types.append(exp.Truncate) + + disallowed_ddl_tuple = tuple(disallowed_ddl_types) + + for node in parsed.walk(): + if isinstance(node, (exp.Subquery, exp.Select)): + raise ValueError("Subqueries are not allowed in filter clauses") + + if isinstance(node, disallowed_ddl_tuple): + raise ValueError("SQL commands are not allowed in filter clauses") Review Comment: The error message "SQL commands are not allowed in filter clauses" is ambiguous because it doesn't specify what kind of SQL commands. Since this catch block handles DDL/DML operations (DROP, INSERT, UPDATE, DELETE, CREATE, ALTER, TRUNCATE), the error message should be more specific: ```python raise ValueError("DDL/DML commands (CREATE, DROP, INSERT, UPDATE, DELETE, etc.) are not allowed in filter clauses") ``` This helps users understand exactly what was blocked. ```suggestion raise ValueError("DDL/DML commands (CREATE, DROP, INSERT, UPDATE, DELETE, etc.) are not allowed in filter clauses") ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
