sha174n commented on code in PR #41120:
URL: https://github.com/apache/superset/pull/41120#discussion_r3425329723
##########
tests/unit_tests/commands/sql_lab/test_estimate.py:
##########
@@ -181,19 +181,61 @@ def
test_apply_sql_security_allows_dml_when_enabled(mock_app: MagicMock) -> None
assert command._apply_sql_security("INSERT INTO t VALUES (1)")
+@patch("superset.commands.sql_lab.estimate.Query")
+@patch("superset.commands.sql_lab.estimate.db")
+@patch("superset.commands.sql_lab.estimate.is_feature_enabled",
return_value=False)
@patch("superset.commands.sql_lab.estimate.app")
-def test_apply_sql_security_blocks_disallowed_table(mock_app: MagicMock) ->
None:
+def test_apply_sql_security_blocks_disallowed_table(
+ mock_app: MagicMock,
+ mock_is_feature_enabled: MagicMock,
+ mock_db: MagicMock,
+ mock_query: MagicMock,
+) -> None:
Review Comment:
Added a docstring to this test in 102f2ec.
##########
superset/config.py:
##########
@@ -1872,6 +1872,21 @@ def engine_context_manager( # pylint:
disable=unused-argument
"pg_read_file",
"pg_ls_dir",
"pg_read_binary_file",
+ # PostgreSQL large-object functions: writers can plant arbitrary
+ # bytes on the server filesystem (lo_export, lo_from_bytea, lowrite,
+ # lo_put, lo_create, lo_creat, lo_import), readers can pull bytes back
+ # out (lo_get, loread), and lo_unlink deletes large objects outright.
+ # Defense-in-depth on top of is_mutating()'s function-name check.
+ "lo_from_bytea",
+ "lo_export",
+ "lo_import",
+ "lo_put",
+ "lo_create",
+ "lo_creat",
+ "lowrite",
+ "lo_get",
+ "loread",
+ "lo_unlink",
Review Comment:
Added lo_truncate/lo_truncate64 to the PostgreSQL function list and to the
mutating-function detector in 102f2ec.
##########
superset/models/helpers.py:
##########
@@ -1221,24 +1221,25 @@ def _process_sql_expression( # pylint:
disable=too-many-arguments
disallowed_functions
):
raise
SupersetDisallowedSQLFunctionException(disallowed_functions)
- if disallowed_tables and
parsed.check_tables_present(disallowed_tables):
+ if disallowed_tables:
# Report only the tables actually found in the expression,
# mirroring the canonical execution-time gate in
# `superset.sql_lab._validate_query` so the user-facing
- # error doesn't echo the operator's full denylist.
- present_tables = {
- table.table.lower()
- for statement in parsed.statements
- for table in statement.tables
- }
- found_tables = {
- table
- for table in disallowed_tables
- if table.lower() in present_tables
- }
- raise SupersetDisallowedSQLTableException(
- found_tables or disallowed_tables
+ # error doesn't echo the operator's full denylist. Honors
+ # schema-qualified denylist entries (e.g.
+ # ``information_schema.tables``) and resolves unqualified
+ # references against the effective schema, falling back to
+ # the database default when none was supplied (e.g. a
+ # virtual dataset with no schema) so a qualified entry
+ # cannot be bypassed by an unqualified reference.
+ effective_schema = schema or
self.database.get_default_schema(
+ self.catalog
)
Review Comment:
The effective schema is now resolved once per datasource (memoized) instead
of per adhoc expression, so this no longer issues a lookup per
column/metric/order-by. Done in 102f2ec.
##########
superset/models/helpers.py:
##########
@@ -1465,19 +1466,23 @@ def _raise_for_disallowed_sql(self, sql: str) -> None:
disallowed_functions
):
raise SupersetDisallowedSQLFunctionException(disallowed_functions)
- if disallowed_tables and
parsed_script.check_tables_present(disallowed_tables):
+ if disallowed_tables:
# Report only the tables actually found in the query, mirroring the
# canonical execution-time gate so the user-facing error doesn't
- # echo the operator's full denylist.
- present_tables = {
- table.table.lower()
- for statement in parsed_script.statements
- for table in statement.tables
- }
- found_tables = {
- table for table in disallowed_tables if table.lower() in
present_tables
- }
- raise SupersetDisallowedSQLTableException(found_tables or
disallowed_tables)
+ # echo the operator's full denylist. Honors schema-qualified
+ # denylist entries (e.g. ``information_schema.tables``) and
resolves
+ # unqualified references against the effective schema, falling back
+ # to the database default when the datasource has none (e.g. a
+ # virtual dataset) so a qualified entry cannot be bypassed by an
+ # unqualified reference.
+ effective_schema = self.schema or self.database.get_default_schema(
+ self.catalog
+ )
Review Comment:
Switched this path to the query-aware get_default_schema_for_query (same
resolver SQL Lab and the executor use) in 102f2ec.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]