sha174n commented on code in PR #41120:
URL: https://github.com/apache/superset/pull/41120#discussion_r3423400146
##########
UPDATING.md:
##########
@@ -24,6 +24,15 @@ assists people when migrating to a new version.
## Next
+### SQL Lab denies large-object and information_schema access by default
+
+`DISALLOWED_SQL_FUNCTIONS` and `DISALLOWED_SQL_TABLES` now ship with
additional default entries, so SQL Lab and chart-data queries that reference
them are rejected where they were previously allowed:
+
+- PostgreSQL large-object routines (`lo_from_bytea`, `lo_export`, `lo_import`,
`lo_put`, `lo_create`, `lowrite`, `lo_get`, `lo_unlink`), which read and write
bytes on the database server's filesystem.
+- The SQL-standard `information_schema` views (`tables`, `columns`,
`routines`, `views`, the privilege/grant views, etc.), which expose table,
column, privilege, and view-definition metadata across the whole database.
Review Comment:
Added `loread` and `lo_creat` to the migration note's large-object list so
it matches the config defaults, in 22ad47d081.
##########
superset/sql_lab.py:
##########
@@ -433,15 +433,18 @@ def execute_sql_statements( # noqa: C901
db_engine_spec.engine,
set(),
)
- if disallowed_tables and
parsed_script.check_tables_present(disallowed_tables):
- # Report only the tables actually found in the query
- found_tables = set()
- for statement in parsed_script.statements:
- present = {table.table.lower() for table in statement.tables}
- for table in disallowed_tables:
- if table.lower() in present:
- found_tables.add(table)
- raise SupersetDisallowedSQLTableException(found_tables or
disallowed_tables)
+ if disallowed_tables:
+ # Report only the denylisted tables actually referenced in the query,
+ # honoring schema-qualified entries (e.g.
``information_schema.tables``).
+ # Resolve the effective default schema so an unqualified reference that
+ # the connection resolves to it at runtime (e.g. Postgres ``public``)
+ # still matches a qualified entry, even when no schema was selected.
+ effective_schema = query.schema or
database.get_default_schema(query.catalog)
+ found_tables = parsed_script.get_disallowed_tables(
+ disallowed_tables, effective_schema
Review Comment:
Switched to `database.get_default_schema_for_query(query)` so the denylist
resolves the schema the same query-aware way the RLS gate does, in 22ad47d081.
##########
superset/models/helpers.py:
##########
@@ -1221,24 +1221,19 @@ def _process_sql_expression( # pylint:
disable=too-many-arguments
disallowed_functions
):
raise
SupersetDisallowedSQLFunctionException(disallowed_functions)
- if disallowed_tables and
parsed.check_tables_present(disallowed_tables):
+ if disallowed_tables:
# Report only the tables actually found in the expression,
# mirroring the canonical execution-time gate in
# `superset.sql_lab._validate_query` so the user-facing
- # error doesn't echo the operator's full denylist.
- present_tables = {
- table.table.lower()
- for statement in parsed.statements
- for table in statement.tables
- }
- found_tables = {
- table
- for table in disallowed_tables
- if table.lower() in present_tables
- }
- raise SupersetDisallowedSQLTableException(
- found_tables or disallowed_tables
+ # error doesn't echo the operator's full denylist. Honors
+ # schema-qualified denylist entries (e.g.
+ # ``information_schema.tables``) and resolves unqualified
+ # references against the query schema.
+ found_tables = parsed.get_disallowed_tables(
+ disallowed_tables, schema
)
Review Comment:
Now resolves an effective schema (explicit schema, else the database default
via `get_default_schema`) before `get_disallowed_tables`, in 22ad47d081.
##########
superset/models/helpers.py:
##########
@@ -1465,19 +1460,17 @@ def _raise_for_disallowed_sql(self, sql: str) -> None:
disallowed_functions
):
raise SupersetDisallowedSQLFunctionException(disallowed_functions)
- if disallowed_tables and
parsed_script.check_tables_present(disallowed_tables):
+ if disallowed_tables:
# Report only the tables actually found in the query, mirroring the
# canonical execution-time gate so the user-facing error doesn't
- # echo the operator's full denylist.
- present_tables = {
- table.table.lower()
- for statement in parsed_script.statements
- for table in statement.tables
- }
- found_tables = {
- table for table in disallowed_tables if table.lower() in
present_tables
- }
- raise SupersetDisallowedSQLTableException(found_tables or
disallowed_tables)
+ # echo the operator's full denylist. Honors schema-qualified
+ # denylist entries (e.g. ``information_schema.tables``) and
resolves
+ # unqualified references against the query schema.
+ found_tables = parsed_script.get_disallowed_tables(
+ disallowed_tables, self.schema
+ )
Review Comment:
Now falls back to the database default schema when the datasource has none
before `get_disallowed_tables`, in 22ad47d081.
##########
superset/config.py:
##########
@@ -1872,6 +1872,20 @@ def engine_context_manager( # pylint:
disable=unused-argument
"pg_read_file",
"pg_ls_dir",
"pg_read_binary_file",
+ # PostgreSQL large-object functions: writers can plant arbitrary
+ # bytes on the server filesystem (lo_export, lo_from_bytea, lowrite,
+ # lo_put, lo_create, lo_import), readers can pull bytes back out
+ # (lo_get, loread), and lo_unlink deletes large objects outright.
+ # Defense-in-depth on top of is_mutating()'s function-name check.
+ "lo_from_bytea",
+ "lo_export",
+ "lo_import",
+ "lo_put",
+ "lo_create",
+ "lowrite",
+ "lo_get",
+ "loread",
+ "lo_unlink",
Review Comment:
Added `lo_creat` alongside `lo_create` in the PostgreSQL defaults, in
22ad47d081.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]