This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch hackathon-12-2025
in repository https://gitbox.apache.org/repos/asf/superset.git

commit c07382d606584b697db21fa1b6b1edeb7ea08eb3
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri Dec 19 10:56:43 2025 -0500

    Snowflake fixes
---
 superset/data_access_rules/utils.py |   8 +--
 superset/sql/parse.py               |  41 +++++++++++---
 tests/unit_tests/sql/parse_tests.py | 103 ++++++++++++++++++++++++++++++++++++
 3 files changed, 143 insertions(+), 9 deletions(-)

diff --git a/superset/data_access_rules/utils.py 
b/superset/data_access_rules/utils.py
index b36c82fe6b..14acdb727b 100644
--- a/superset/data_access_rules/utils.py
+++ b/superset/data_access_rules/utils.py
@@ -551,7 +551,7 @@ def apply_data_access_rules(
         if table_cls:
             cls_rules[qualified_table] = table_cls
 
-    # Apply CLS first (before RLS) so that hidden columns are removed
+    # Apply CLS first (before RLS) so that column transformations happen
     # before RLS wraps the query in a subquery
     if cls_rules:
         # Build schema dict for sqlglot's qualify() to expand SELECT *
@@ -591,8 +591,10 @@ def apply_data_access_rules(
 
         parsed_statement.apply_cls(cls_rules, schema=table_schemas if 
table_schemas else None)
 
-    # Apply RLS after CLS - RLS wraps the query in a subquery with SELECT *
-    # which will pick up the already-transformed columns from CLS
+    # Apply RLS after CLS
+    # Note: CLS runs qualify() which may change identifier case for some 
databases
+    # (e.g., Snowflake uppercases identifiers). The RLSTransformer normalizes
+    # table names to lowercase for case-insensitive matching.
     if rls_predicates:
         parsed_statement.apply_rls(catalog, schema, rls_predicates, method)
 
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index c674342820..709085d728 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -295,16 +295,33 @@ class RLSTransformer:
     ) -> None:
         self.catalog = catalog
         self.schema = schema
-        self.rules = rules
+        # Normalize table keys to lowercase for case-insensitive matching
+        # This is needed because apply_cls calls qualify() which may change
+        # identifier case (e.g., Snowflake uppercases identifiers)
+        self.rules = {
+            self._normalize_table(table): predicates
+            for table, predicates in rules.items()
+        }
+
+    @staticmethod
+    def _normalize_table(table: Table) -> Table:
+        """Normalize table to lowercase for case-insensitive matching."""
+        return Table(
+            table=table.table.lower() if table.table else table.table,
+            schema=table.schema.lower() if table.schema else table.schema,
+            catalog=table.catalog.lower() if table.catalog else table.catalog,
+        )
 
     def get_predicate(self, table_node: exp.Table) -> exp.Expression | None:
         """
         Get the combined RLS predicate for a table.
         """
-        table = Table(
-            table_node.name,
-            table_node.db if table_node.db else self.schema,
-            table_node.catalog if table_node.catalog else self.catalog,
+        table = self._normalize_table(
+            Table(
+                table_node.name,
+                table_node.db if table_node.db else self.schema,
+                table_node.catalog if table_node.catalog else self.catalog,
+            )
         )
         if predicates := self.rules.get(table):
             return sqlglot.and_(*predicates)
@@ -396,7 +413,19 @@ class RLSAsSubqueryTransformer(RLSTransformer):
 
         if predicate := self.get_predicate(node):
             if node.alias:
-                alias = node.alias
+                # After qualify(), alias might be a string instead of 
TableAlias
+                if isinstance(node.alias, str):
+                    # Check if the original alias was quoted by looking at the 
SQL
+                    # If the alias contains special chars or is a reserved 
word, quote it
+                    needs_quoting = (
+                        not node.alias.isidentifier()
+                        or "." in node.alias
+                    )
+                    alias = exp.TableAlias(
+                        this=exp.Identifier(this=node.alias, 
quoted=needs_quoting)
+                    )
+                else:
+                    alias = node.alias
             else:
                 name = ".".join(
                     part
diff --git a/tests/unit_tests/sql/parse_tests.py 
b/tests/unit_tests/sql/parse_tests.py
index 61063f8f88..5273fda42b 100644
--- a/tests/unit_tests/sql/parse_tests.py
+++ b/tests/unit_tests/sql/parse_tests.py
@@ -4690,3 +4690,106 @@ def test_merge_cls_rules_complex_scenario() -> None:
             "credit_card": CLSAction.HIDE,
         },
     }
+
+
+def test_combined_rls_and_cls_snowflake() -> None:
+    """
+    Test combined RLS and CLS application with Snowflake dialect.
+
+    This tests a real-world scenario where:
+    - A table has both RLS (row-level security) and CLS (column-level 
security) rules
+    - The column names in the rule are lowercase but Snowflake uppercases 
identifiers
+    - Both transformations should be applied correctly
+
+    Rule configuration:
+    {
+      "allowed": [
+        {"database": "Snowflake", "catalog": "SAMPLE_DATA", "schema": 
"tpcds_sf10tcl"},
+        {
+          "database": "Snowflake",
+          "table": "customer",
+          "catalog": "SAMPLE_DATA",
+          "schema": "tpcds_sf10tcl",
+          "rls": {"predicate": "C_BIRTH_COUNTRY = 'BRAZIL'"},
+          "cls": {"c_email_address": "hash"}
+        }
+      ]
+    }
+    """
+    sql = "SELECT C_BIRTH_COUNTRY, C_EMAIL_ADDRESS FROM customer LIMIT 10"
+    statement = SQLStatement(sql, engine="snowflake")
+
+    # CLS rules with lowercase column name (as stored in the rule)
+    cls_rules = {
+        Table("customer", "tpcds_sf10tcl", "SAMPLE_DATA"): {
+            "c_email_address": CLSAction.HASH
+        }
+    }
+
+    # RLS predicate
+    rls_predicate = statement.parse_predicate("C_BIRTH_COUNTRY = 'BRAZIL'")
+    rls_predicates = {
+        Table("customer", "tpcds_sf10tcl", "SAMPLE_DATA"): [rls_predicate]
+    }
+
+    # Schema for qualify() to expand SELECT *
+    table_schema = {
+        "SAMPLE_DATA": {
+            "tpcds_sf10tcl": {
+                "customer": {
+                    "C_BIRTH_COUNTRY": "VARCHAR",
+                    "C_EMAIL_ADDRESS": "VARCHAR",
+                }
+            }
+        }
+    }
+
+    # Apply CLS first (before RLS)
+    statement.apply_cls(cls_rules, schema=table_schema)
+
+    # Apply RLS after CLS
+    statement.apply_rls(
+        "SAMPLE_DATA", "tpcds_sf10tcl", rls_predicates, RLSMethod.AS_SUBQUERY
+    )
+
+    result = statement.format()
+
+    # Verify CLS is applied: c_email_address should be hashed with MD5
+    assert "MD5(TO_CHAR(" in result
+    assert "C_EMAIL_ADDRESS" in result
+
+    # Verify RLS is applied: should have subquery with WHERE clause
+    assert "WHERE" in result
+    assert "C_BIRTH_COUNTRY = 'BRAZIL'" in result
+
+    # Verify the structure: should be SELECT ... FROM (SELECT * FROM ... WHERE 
...) AS ...
+    assert "SELECT" in result
+    assert "FROM (" in result
+
+
+def test_combined_rls_and_cls_case_insensitive_matching() -> None:
+    """
+    Test that RLS and CLS matching is case-insensitive.
+
+    Snowflake uppercases identifiers, so the rule column names (lowercase)
+    must match the query column names (uppercase after qualify()).
+    """
+    sql = "SELECT email FROM users"
+    statement = SQLStatement(sql, engine="snowflake")
+
+    # Rules with lowercase names
+    cls_rules = {Table("users"): {"email": CLSAction.HASH}}
+    rls_predicate = statement.parse_predicate("active = TRUE")
+    rls_predicates = {Table("users"): [rls_predicate]}
+
+    table_schema = {"users": {"email": "VARCHAR", "active": "BOOLEAN"}}
+
+    # Apply CLS then RLS
+    statement.apply_cls(cls_rules, schema=table_schema)
+    statement.apply_rls(None, None, rls_predicates, RLSMethod.AS_SUBQUERY)
+
+    result = statement.format()
+
+    # Both should be applied despite case differences
+    assert "MD5" in result  # CLS hash applied
+    assert "active = TRUE" in result  # RLS predicate applied

Reply via email to