korbit-ai[bot] commented on code in PR #33542:
URL: https://github.com/apache/superset/pull/33542#discussion_r2098160045
##########
superset/sql_lab.py:
##########
@@ -197,101 +197,119 @@
return handle_query_error(ex, query)
-def execute_sql_statement( # pylint: disable=too-many-statements,
too-many-locals # noqa: C901
- sql_statement: str,
- query: Query,
- cursor: Any,
- log_params: Optional[dict[str, Any]],
- apply_ctas: bool = False,
-) -> SupersetResultSet:
- """Executes a single SQL statement"""
- database: Database = query.database
- db_engine_spec = database.db_engine_spec
-
- parsed_query = ParsedQuery(sql_statement, engine=db_engine_spec.engine)
- if is_feature_enabled("RLS_IN_SQLLAB"):
- # There are two ways to insert RLS: either replacing the table with a
subquery
- # that has the RLS, or appending the RLS to the ``WHERE`` clause. The
former is
- # safer, but not supported in all databases.
- insert_rls = (
- insert_rls_as_subquery
- if database.db_engine_spec.allows_subqueries
- and database.db_engine_spec.allows_alias_in_select
- else insert_rls_in_predicate
- )
+def apply_rls(query: Query, parsed_statement: BaseSQLStatement) -> None:
+ """
+ Modify statement inplace to ensure RLS rules are applied.
+ """
+ # we need the default schema to fully qualify the table names
+ default_schema = query.database.get_default_schema_for_query(query)
+
+ # There are two ways to insert RLS: either replacing the table with a
subquery
+ # that has the RLS, or appending the RLS to the ``WHERE`` clause. The
former is
+ # safer, but not supported in all databases.
+ method = (
+ RLSMethod.AS_SUBQUERY
+ if query.database.db_engine_spec.allows_subqueries
+ and query.database.db_engine_spec.allows_alias_in_select
+ else RLSMethod.AS_PREDICATE
+ )
- # Insert any applicable RLS predicates
- parsed_query = ParsedQuery(
- str(
- insert_rls(
- parsed_query._parsed[0], # pylint:
disable=protected-access
- database.id,
- query.schema,
- )
- ),
- engine=db_engine_spec.engine,
- )
+ # collect all RLS predicates
+ predicates = defaultdict(list)
+ for table in parsed_statement.tables:
+ if predicates := get_predicates_for_table(
+ query.database,
+ table,
+ query.catalog,
+ default_schema,
+ ):
+ predicates[table].extend(
+ parsed_statement.parse_predicate(predicate) for predicate in
predicates
+ )
- sql = parsed_query.stripped()
+ parsed_statement.apply_rls(query.catalog, default_schema, predicates,
method)
- # This is a test to see if the query is being
- # limited by either the dropdown or the sql.
- # We are testing to see if more rows exist than the limit.
- increased_limit = None if query.limit is None else query.limit + 1
- if not database.allow_dml:
- errors = []
- try:
- parsed_statement = SQLStatement(
- statement=sql_statement,
- engine=db_engine_spec.engine,
- )
- disallowed = parsed_statement.is_mutating()
- except SupersetParseError as ex:
- # if we fail to parse the query, disallow by default
- disallowed = True
- errors.append(ex.error)
-
- if disallowed:
- errors.append(
- SupersetError(
- message=__(
- "This database does not allow for DDL/DML, and the
query "
- "could not be parsed to confirm it is a read-only
query. Please " # noqa: E501
- "contact your administrator for more assistance."
- ),
- error_type=SupersetErrorType.DML_NOT_ALLOWED_ERROR,
- level=ErrorLevel.ERROR,
- )
+def get_predicates_for_table(
+ database: Database,
+ table: Table,
+ catalog: str,
+ schema: str,
+) -> list[str]:
Review Comment:
### Uncached RLS Predicate Lookups <sub></sub>
<details>
<summary>Tell me more</summary>
###### What is the issue?
Database query for RLS predicates is executed for each table individually
without any caching mechanism.
###### Why this matters
For queries involving multiple tables, this will result in multiple separate
database lookups for RLS predicates, increasing latency and database load.
###### Suggested change ∙ *Feature Preview*
Implement caching for RLS predicates with an appropriate cache invalidation
strategy:
```python
@cache.memoize(timeout=300) # Cache for 5 minutes
def get_predicates_for_table(database, table, catalog, schema):
# Existing logic
```
###### Provide feedback to improve future suggestions
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/09bc03db-a9b5-40be-aedd-16795b3f3e1e/upvote)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/09bc03db-a9b5-40be-aedd-16795b3f3e1e?what_not_true=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/09bc03db-a9b5-40be-aedd-16795b3f3e1e?what_out_of_scope=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/09bc03db-a9b5-40be-aedd-16795b3f3e1e?what_not_in_standard=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/09bc03db-a9b5-40be-aedd-16795b3f3e1e)
</details>
<sub>
💬 Looking for more details? Reply to this comment to chat with Korbit.
</sub>
<!--- korbi internal id:ff3f8758-c4cc-464c-9a9f-67bdb0e42e1e -->
[](ff3f8758-c4cc-464c-9a9f-67bdb0e42e1e)
##########
superset/sql_lab.py:
##########
@@ -197,101 +197,119 @@ def get_sql_results( # pylint:
disable=too-many-arguments
return handle_query_error(ex, query)
-def execute_sql_statement( # pylint: disable=too-many-statements,
too-many-locals # noqa: C901
- sql_statement: str,
- query: Query,
- cursor: Any,
- log_params: Optional[dict[str, Any]],
- apply_ctas: bool = False,
-) -> SupersetResultSet:
- """Executes a single SQL statement"""
- database: Database = query.database
- db_engine_spec = database.db_engine_spec
-
- parsed_query = ParsedQuery(sql_statement, engine=db_engine_spec.engine)
- if is_feature_enabled("RLS_IN_SQLLAB"):
- # There are two ways to insert RLS: either replacing the table with a
subquery
- # that has the RLS, or appending the RLS to the ``WHERE`` clause. The
former is
- # safer, but not supported in all databases.
- insert_rls = (
- insert_rls_as_subquery
- if database.db_engine_spec.allows_subqueries
- and database.db_engine_spec.allows_alias_in_select
- else insert_rls_in_predicate
- )
+def apply_rls(query: Query, parsed_statement: BaseSQLStatement) -> None:
+ """
+ Modify statement inplace to ensure RLS rules are applied.
+ """
+ # we need the default schema to fully qualify the table names
+ default_schema = query.database.get_default_schema_for_query(query)
+
+ # There are two ways to insert RLS: either replacing the table with a
subquery
+ # that has the RLS, or appending the RLS to the ``WHERE`` clause. The
former is
+ # safer, but not supported in all databases.
+ method = (
+ RLSMethod.AS_SUBQUERY
+ if query.database.db_engine_spec.allows_subqueries
+ and query.database.db_engine_spec.allows_alias_in_select
+ else RLSMethod.AS_PREDICATE
+ )
- # Insert any applicable RLS predicates
- parsed_query = ParsedQuery(
- str(
- insert_rls(
- parsed_query._parsed[0], # pylint:
disable=protected-access
- database.id,
- query.schema,
- )
- ),
- engine=db_engine_spec.engine,
- )
+ # collect all RLS predicates
+ predicates = defaultdict(list)
+ for table in parsed_statement.tables:
+ if predicates := get_predicates_for_table(
+ query.database,
+ table,
+ query.catalog,
+ default_schema,
+ ):
+ predicates[table].extend(
+ parsed_statement.parse_predicate(predicate) for predicate in
predicates
+ )
Review Comment:
### Inefficient Repeated Predicate Parsing <sub></sub>
<details>
<summary>Tell me more</summary>
###### What is the issue?
Nested loop with repeated predicate parsing inside apply_rls() function
could be inefficient for large number of predicates.
###### Why this matters
For tables with many RLS predicates, parsing each predicate individually in
a loop creates unnecessary overhead. This could significantly impact
performance when processing queries with multiple tables each having multiple
RLS rules.
###### Suggested change ∙ *Feature Preview*
Parse predicates in bulk or cache parsed predicates to avoid repeated
parsing operations:
```python
# Parse predicates once upfront
parsed_predicates = [parsed_statement.parse_predicate(p) for p in predicates]
predicates[table].extend(parsed_predicates)
```
###### Provide feedback to improve future suggestions
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a84c26ca-67ab-4f7f-8951-993e3e52f52c/upvote)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a84c26ca-67ab-4f7f-8951-993e3e52f52c?what_not_true=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a84c26ca-67ab-4f7f-8951-993e3e52f52c?what_out_of_scope=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a84c26ca-67ab-4f7f-8951-993e3e52f52c?what_not_in_standard=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a84c26ca-67ab-4f7f-8951-993e3e52f52c)
</details>
<sub>
💬 Looking for more details? Reply to this comment to chat with Korbit.
</sub>
<!--- korbi internal id:ef5b3490-4620-4f57-aeef-0621a5dad8d2 -->
[](ef5b3490-4620-4f57-aeef-0621a5dad8d2)
##########
superset/sql/parse.py:
##########
@@ -1128,6 +1149,15 @@ def set_limit_value(
self._parsed = "".join(val for _, val in tokens)
+ def parse_predicate(self, predicate: str) -> str:
+ """
+ Parse a predicate string into an AST.
+
+ :param predicate: The predicate to parse.
+ :return: The parsed predicate.
+ """
+ return predicate
Review Comment:
### Missing Predicate Parsing for Kusto KQL <sub></sub>
<details>
<summary>Tell me more</summary>
###### What is the issue?
The KustoKQLStatement.parse_predicate method returns the predicate string
without any actual parsing, which contradicts the method's purpose of parsing a
predicate into an AST.
###### Why this matters
Without proper predicate parsing, the RLS (Row Level Security) functionality
won't work correctly for Kusto KQL, as predicates need to be properly parsed to
be integrated into queries.
###### Suggested change ∙ *Feature Preview*
Implement proper predicate parsing for Kusto KQL predicates or raise
NotImplementedError if KQL predicate parsing is not supported. Example:
```python
def parse_predicate(self, predicate: str) -> str:
"""Parse a predicate string into an AST.
:param predicate: The predicate to parse.
:raises NotImplementedError: Kusto KQL predicate parsing is not
supported.
"""
raise NotImplementedError("Predicate parsing is not supported for Kusto
KQL")
```
###### Provide feedback to improve future suggestions
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/4953f8ed-884a-46e1-920c-4de255f07d93/upvote)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/4953f8ed-884a-46e1-920c-4de255f07d93?what_not_true=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/4953f8ed-884a-46e1-920c-4de255f07d93?what_out_of_scope=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/4953f8ed-884a-46e1-920c-4de255f07d93?what_not_in_standard=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/4953f8ed-884a-46e1-920c-4de255f07d93)
</details>
<sub>
💬 Looking for more details? Reply to this comment to chat with Korbit.
</sub>
<!--- korbi internal id:86c29942-3626-4eff-aca4-1e7b02a89685 -->
[](86c29942-3626-4eff-aca4-1e7b02a89685)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]