Re: [PR] WIP [superset]

via GitHub Tue, 20 May 2025 07:40:13 -0700


korbit-ai[bot] commented on code in PR #33542:
URL: https://github.com/apache/superset/pull/33542#discussion_r2098160045



##########
superset/sql_lab.py:
##########
@@ -197,101 +197,119 @@
                 return handle_query_error(ex, query)
 
 
-def execute_sql_statement(  # pylint: disable=too-many-statements, 
too-many-locals  # noqa: C901
-    sql_statement: str,
-    query: Query,
-    cursor: Any,
-    log_params: Optional[dict[str, Any]],
-    apply_ctas: bool = False,
-) -> SupersetResultSet:
-    """Executes a single SQL statement"""
-    database: Database = query.database
-    db_engine_spec = database.db_engine_spec
-
-    parsed_query = ParsedQuery(sql_statement, engine=db_engine_spec.engine)
-    if is_feature_enabled("RLS_IN_SQLLAB"):
-        # There are two ways to insert RLS: either replacing the table with a 
subquery
-        # that has the RLS, or appending the RLS to the ``WHERE`` clause. The 
former is
-        # safer, but not supported in all databases.
-        insert_rls = (
-            insert_rls_as_subquery
-            if database.db_engine_spec.allows_subqueries
-            and database.db_engine_spec.allows_alias_in_select
-            else insert_rls_in_predicate
-        )
+def apply_rls(query: Query, parsed_statement: BaseSQLStatement) -> None:
+    """
+    Modify statement inplace to ensure RLS rules are applied.
+    """
+    # we need the default schema to fully qualify the table names
+    default_schema = query.database.get_default_schema_for_query(query)
+
+    # There are two ways to insert RLS: either replacing the table with a 
subquery
+    # that has the RLS, or appending the RLS to the ``WHERE`` clause. The 
former is
+    # safer, but not supported in all databases.
+    method = (
+        RLSMethod.AS_SUBQUERY
+        if query.database.db_engine_spec.allows_subqueries
+        and query.database.db_engine_spec.allows_alias_in_select
+        else RLSMethod.AS_PREDICATE
+    )
 
-        # Insert any applicable RLS predicates
-        parsed_query = ParsedQuery(
-            str(
-                insert_rls(
-                    parsed_query._parsed[0],  # pylint: 
disable=protected-access
-                    database.id,
-                    query.schema,
-                )
-            ),
-            engine=db_engine_spec.engine,
-        )
+    # collect all RLS predicates
+    predicates = defaultdict(list)
+    for table in parsed_statement.tables:
+        if predicates := get_predicates_for_table(
+            query.database,
+            table,
+            query.catalog,
+            default_schema,
+        ):
+            predicates[table].extend(
+                parsed_statement.parse_predicate(predicate) for predicate in 
predicates
+            )
 
-    sql = parsed_query.stripped()
+    parsed_statement.apply_rls(query.catalog, default_schema, predicates, 
method)
 
-    # This is a test to see if the query is being
-    # limited by either the dropdown or the sql.
-    # We are testing to see if more rows exist than the limit.
-    increased_limit = None if query.limit is None else query.limit + 1
 
-    if not database.allow_dml:
-        errors = []
-        try:
-            parsed_statement = SQLStatement(
-                statement=sql_statement,
-                engine=db_engine_spec.engine,
-            )
-            disallowed = parsed_statement.is_mutating()
-        except SupersetParseError as ex:
-            # if we fail to parse the query, disallow by default
-            disallowed = True
-            errors.append(ex.error)
-
-        if disallowed:
-            errors.append(
-                SupersetError(
-                    message=__(
-                        "This database does not allow for DDL/DML, and the 
query "
-                        "could not be parsed to confirm it is a read-only 
query. Please "  # noqa: E501
-                        "contact your administrator for more assistance."
-                    ),
-                    error_type=SupersetErrorType.DML_NOT_ALLOWED_ERROR,
-                    level=ErrorLevel.ERROR,
-                )
+def get_predicates_for_table(
+    database: Database,
+    table: Table,
+    catalog: str,
+    schema: str,
+) -> list[str]:

Review Comment:
   ### Uncached RLS Predicate Lookups <sub>![category 
Performance](https://img.shields.io/badge/Performance-4f46e5)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   Database query for RLS predicates is executed for each table individually 
without any caching mechanism.
   
   
   ###### Why this matters
   For queries involving multiple tables, this will result in multiple separate 
database lookups for RLS predicates, increasing latency and database load.
   
   ###### Suggested change ∙ *Feature Preview*
   Implement caching for RLS predicates with an appropriate cache invalidation 
strategy:
   ```python
   @cache.memoize(timeout=300)  # Cache for 5 minutes
   def get_predicates_for_table(database, table, catalog, schema):
       # Existing logic
   ```
   
   
   ###### Provide feedback to improve future suggestions
   [![Nice 
Catch](https://img.shields.io/badge/👍%20Nice%20Catch-71BC78)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/09bc03db-a9b5-40be-aedd-16795b3f3e1e/upvote)
 
[![Incorrect](https://img.shields.io/badge/👎%20Incorrect-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/09bc03db-a9b5-40be-aedd-16795b3f3e1e?what_not_true=true)
  [![Not in 
Scope](https://img.shields.io/badge/👎%20Out%20of%20PR%20scope-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/09bc03db-a9b5-40be-aedd-16795b3f3e1e?what_out_of_scope=true)
 [![Not in coding 
standard](https://img.shields.io/badge/👎%20Not%20in%20our%20standards-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/09bc03db-a9b5-40be-aedd-16795b3f3e1e?what_not_in_standard=true)
 
[![Other](https://img.shields.io/badge/👎%20Other-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/09bc03db-a9b5-40be-aedd-16795b3f3e1e)
   </details>
   
   <sub>
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:ff3f8758-c4cc-464c-9a9f-67bdb0e42e1e -->
   
   
   [](ff3f8758-c4cc-464c-9a9f-67bdb0e42e1e)



##########
superset/sql_lab.py:
##########
@@ -197,101 +197,119 @@ def get_sql_results(  # pylint: 
disable=too-many-arguments
                 return handle_query_error(ex, query)
 
 
-def execute_sql_statement(  # pylint: disable=too-many-statements, 
too-many-locals  # noqa: C901
-    sql_statement: str,
-    query: Query,
-    cursor: Any,
-    log_params: Optional[dict[str, Any]],
-    apply_ctas: bool = False,
-) -> SupersetResultSet:
-    """Executes a single SQL statement"""
-    database: Database = query.database
-    db_engine_spec = database.db_engine_spec
-
-    parsed_query = ParsedQuery(sql_statement, engine=db_engine_spec.engine)
-    if is_feature_enabled("RLS_IN_SQLLAB"):
-        # There are two ways to insert RLS: either replacing the table with a 
subquery
-        # that has the RLS, or appending the RLS to the ``WHERE`` clause. The 
former is
-        # safer, but not supported in all databases.
-        insert_rls = (
-            insert_rls_as_subquery
-            if database.db_engine_spec.allows_subqueries
-            and database.db_engine_spec.allows_alias_in_select
-            else insert_rls_in_predicate
-        )
+def apply_rls(query: Query, parsed_statement: BaseSQLStatement) -> None:
+    """
+    Modify statement inplace to ensure RLS rules are applied.
+    """
+    # we need the default schema to fully qualify the table names
+    default_schema = query.database.get_default_schema_for_query(query)
+
+    # There are two ways to insert RLS: either replacing the table with a 
subquery
+    # that has the RLS, or appending the RLS to the ``WHERE`` clause. The 
former is
+    # safer, but not supported in all databases.
+    method = (
+        RLSMethod.AS_SUBQUERY
+        if query.database.db_engine_spec.allows_subqueries
+        and query.database.db_engine_spec.allows_alias_in_select
+        else RLSMethod.AS_PREDICATE
+    )
 
-        # Insert any applicable RLS predicates
-        parsed_query = ParsedQuery(
-            str(
-                insert_rls(
-                    parsed_query._parsed[0],  # pylint: 
disable=protected-access
-                    database.id,
-                    query.schema,
-                )
-            ),
-            engine=db_engine_spec.engine,
-        )
+    # collect all RLS predicates
+    predicates = defaultdict(list)
+    for table in parsed_statement.tables:
+        if predicates := get_predicates_for_table(
+            query.database,
+            table,
+            query.catalog,
+            default_schema,
+        ):
+            predicates[table].extend(
+                parsed_statement.parse_predicate(predicate) for predicate in 
predicates
+            )

Review Comment:
   ### Inefficient Repeated Predicate Parsing <sub>![category 
Performance](https://img.shields.io/badge/Performance-4f46e5)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   Nested loop with repeated predicate parsing inside apply_rls() function 
could be inefficient for large number of predicates.
   
   
   ###### Why this matters
   For tables with many RLS predicates, parsing each predicate individually in 
a loop creates unnecessary overhead. This could significantly impact 
performance when processing queries with multiple tables each having multiple 
RLS rules.
   
   ###### Suggested change ∙ *Feature Preview*
   Parse predicates in bulk or cache parsed predicates to avoid repeated 
parsing operations:
   ```python
   # Parse predicates once upfront
   parsed_predicates = [parsed_statement.parse_predicate(p) for p in predicates]
   predicates[table].extend(parsed_predicates)
   ```
   
   
   ###### Provide feedback to improve future suggestions
   [![Nice 
Catch](https://img.shields.io/badge/👍%20Nice%20Catch-71BC78)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a84c26ca-67ab-4f7f-8951-993e3e52f52c/upvote)
 
[![Incorrect](https://img.shields.io/badge/👎%20Incorrect-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a84c26ca-67ab-4f7f-8951-993e3e52f52c?what_not_true=true)
  [![Not in 
Scope](https://img.shields.io/badge/👎%20Out%20of%20PR%20scope-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a84c26ca-67ab-4f7f-8951-993e3e52f52c?what_out_of_scope=true)
 [![Not in coding 
standard](https://img.shields.io/badge/👎%20Not%20in%20our%20standards-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a84c26ca-67ab-4f7f-8951-993e3e52f52c?what_not_in_standard=true)
 
[![Other](https://img.shields.io/badge/👎%20Other-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a84c26ca-67ab-4f7f-8951-993e3e52f52c)
   </details>
   
   <sub>
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:ef5b3490-4620-4f57-aeef-0621a5dad8d2 -->
   
   
   [](ef5b3490-4620-4f57-aeef-0621a5dad8d2)



##########
superset/sql/parse.py:
##########
@@ -1128,6 +1149,15 @@ def set_limit_value(
 
         self._parsed = "".join(val for _, val in tokens)
 
+    def parse_predicate(self, predicate: str) -> str:
+        """
+        Parse a predicate string into an AST.
+
+        :param predicate: The predicate to parse.
+        :return: The parsed predicate.
+        """
+        return predicate

Review Comment:
   ### Missing Predicate Parsing for Kusto KQL <sub>![category 
Functionality](https://img.shields.io/badge/Functionality-0284c7)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The KustoKQLStatement.parse_predicate method returns the predicate string 
without any actual parsing, which contradicts the method's purpose of parsing a 
predicate into an AST.
   
   
   ###### Why this matters
   Without proper predicate parsing, the RLS (Row Level Security) functionality 
won't work correctly for Kusto KQL, as predicates need to be properly parsed to 
be integrated into queries.
   
   ###### Suggested change ∙ *Feature Preview*
   Implement proper predicate parsing for Kusto KQL predicates or raise 
NotImplementedError if KQL predicate parsing is not supported. Example:
   ```python
   def parse_predicate(self, predicate: str) -> str:
       """Parse a predicate string into an AST.
   
       :param predicate: The predicate to parse.
       :raises NotImplementedError: Kusto KQL predicate parsing is not 
supported.
       """
       raise NotImplementedError("Predicate parsing is not supported for Kusto 
KQL")
   ```
   
   
   ###### Provide feedback to improve future suggestions
   [![Nice 
Catch](https://img.shields.io/badge/👍%20Nice%20Catch-71BC78)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/4953f8ed-884a-46e1-920c-4de255f07d93/upvote)
 
[![Incorrect](https://img.shields.io/badge/👎%20Incorrect-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/4953f8ed-884a-46e1-920c-4de255f07d93?what_not_true=true)
  [![Not in 
Scope](https://img.shields.io/badge/👎%20Out%20of%20PR%20scope-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/4953f8ed-884a-46e1-920c-4de255f07d93?what_out_of_scope=true)
 [![Not in coding 
standard](https://img.shields.io/badge/👎%20Not%20in%20our%20standards-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/4953f8ed-884a-46e1-920c-4de255f07d93?what_not_in_standard=true)
 
[![Other](https://img.shields.io/badge/👎%20Other-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/4953f8ed-884a-46e1-920c-4de255f07d93)
   </details>
   
   <sub>
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:86c29942-3626-4eff-aca4-1e7b02a89685 -->
   
   
   [](86c29942-3626-4eff-aca4-1e7b02a89685)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] WIP [superset]

Reply via email to