Copilot commented on code in PR #40662:
URL: https://github.com/apache/superset/pull/40662#discussion_r3338864646
##########
superset/commands/sql_lab/estimate.py:
##########
@@ -69,6 +77,49 @@ def validate(self) -> None:
)
security_manager.raise_for_access(database=self._database)
+ def _apply_sql_security(self, sql: str) -> str:
+ """Run the disallowed-function/table, DML and RLS controls against the
+ SQL to be estimated, mirroring ``sql_lab.execute_sql_statements``.
+
+ Returns the SQL with RLS predicates injected (when ``RLS_IN_SQLLAB`` is
+ enabled), so the cost estimate reflects the same constrained query the
+ user would actually be allowed to run.
+ """
+ db_engine_spec = self._database.db_engine_spec
+ parsed_script = SQLScript(sql, engine=db_engine_spec.engine)
+
+ disallowed_functions = app.config["DISALLOWED_SQL_FUNCTIONS"].get(
+ db_engine_spec.engine,
+ set(),
+ )
+ if disallowed_functions and parsed_script.check_functions_present(
+ disallowed_functions
+ ):
+ raise SupersetDisallowedSQLFunctionException(disallowed_functions)
+
+ disallowed_tables = app.config["DISALLOWED_SQL_TABLES"].get(
+ db_engine_spec.engine,
+ set(),
+ )
+ if disallowed_tables and
parsed_script.check_tables_present(disallowed_tables):
+ found_tables = set()
+ for statement in parsed_script.statements:
+ present = {table.table.lower() for table in statement.tables}
+ for table in disallowed_tables:
+ if table.lower() in present:
+ found_tables.add(table)
+ raise SupersetDisallowedSQLTableException(found_tables or
disallowed_tables)
+
+ if parsed_script.has_mutation() and not self._database.allow_dml:
+ raise SupersetDMLNotAllowedException()
+
+ if is_feature_enabled("RLS_IN_SQLLAB"):
+ for statement in parsed_script.statements:
+ apply_rls(self._database, self._catalog, self._schema,
statement)
+ return parsed_script.format()
Review Comment:
RLS is applied with `self._schema` (which is `""` when the caller didn't
pass a schema) and `self._catalog` (which is `None`). In the execution path
that this is intended to mirror, the default schema is resolved before
injection via `query.database.get_default_schema_for_query(query)` (see
`superset/sql_lab.py:432-435`), and `SQLExecutor` resolves both via
`get_default_catalog()` / `get_default_schema(catalog)`
(`superset/sql/execution/executor.py:440-444`).
Without that resolution, `apply_rls` cannot match unqualified tables against
datasets registered under the database's default schema, so the cost estimate
can still avoid RLS predicates that the actual execution would apply —
defeating the central goal of this change. Consider resolving catalog/default
schema (e.g. `self._database.get_default_catalog()` /
`get_default_schema(catalog)`) before passing them to `apply_rls`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]