bito-code-review[bot] commented on code in PR #37920:
URL: https://github.com/apache/superset/pull/37920#discussion_r2796319304


##########
superset/db_engine_specs/base.py:
##########
@@ -1280,6 +1280,109 @@ def handle_comparison_filter(
         else:
             raise ValueError(f"Invalid comparison filter operator: {op}")
 
+    # DSPM: SO-82
+    @classmethod
+    def handle_array_filter_big_query(
+        cls, sqla_col: Any, op: utils.FilterOperator, value: list[Any]
+    ) -> BinaryExpression:
+        """
+        Handle array filter operations (CONTAINS, NOT_CONTAINS, EQUALS, 
NOT_EQUALS)
+        specifically for BigQuery, with SQL injection protection.
+        """
+        # Only allow safe column names: letters, numbers, underscores
+        col_key = (
+            getattr(sqla_col, "key", None)
+            or getattr(sqla_col, "name", None)
+            or str(sqla_col)
+        )
+        if not re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", col_key):
+            raise ValueError(f"Unsafe column name: {col_key}")
+        col_expr = f"`{col_key}`"
+
+        if op == utils.FilterOperator.CONTAINS:
+            clauses = []
+            params = {}
+            for i, v in enumerate(value):
+                clause = (
+                    f"EXISTS (SELECT 1 FROM UNNEST({col_expr}) AS x WHERE x = 
:val_{i})"  # nosec B608  # noqa: S608
+                )
+                clauses.append(clause)
+                params[f"val_{i}"] = v
+            sql = " AND ".join(clauses)
+            return text(sql).bindparams(*[bindparam(k, v) for k, v in 
params.items()])
+
+        elif op == utils.FilterOperator.NOT_CONTAINS:
+            clauses = []
+            params = {}
+            for i, v in enumerate(value):
+                clause = (
+                    f"NOT EXISTS (SELECT 1 FROM UNNEST({col_expr}) AS x "  # 
nosec B608  # noqa: S608
+                    f"WHERE x = :val_{i})"
+                )
+                clauses.append(clause)
+                params[f"val_{i}"] = v
+            sql = " AND ".join(clauses)
+            return text(sql).bindparams(*[bindparam(k, v) for k, v in 
params.items()])
+
+        elif op == utils.FilterOperator.EQUALS:
+            arr = value
+            if not isinstance(arr, (list, tuple)):
+                arr = [arr]
+            length_clause = f"ARRAY_LENGTH({col_expr}) = {len(arr)}"
+            element_clauses = [
+                (f"{col_expr}[OFFSET({i})] = {repr(arr[i])}") for i in 
range(len(arr))
+            ]
+            sql = f"({length_clause} AND " + " AND ".join(element_clauses) + 
")"
+            return text(sql)
+        elif op == utils.FilterOperator.NOT_EQUALS:
+            arr = value
+            if not isinstance(arr, (list, tuple)):
+                arr = [arr]
+            length_clause = f"ARRAY_LENGTH({col_expr}) != {len(arr)}"
+            element_clauses = [
+                (f"{col_expr}[OFFSET({i})] != {repr(arr[i])}") for i in 
range(len(arr))
+            ]
+            sql = f"({length_clause} OR " + " OR ".join(element_clauses) + ")"
+            return text(sql)
+        else:
+            raise Exception(f"Unsupported array filter operator: {op}")
+
+    # DSPM: SO-82
+    @classmethod
+    def handle_array_filter(
+        cls, sqla_col: Any, op: utils.FilterOperator, value: list[Any]
+    ) -> BinaryExpression:
+        """
+        Handle array filter operations (CONTAINS, NOT_CONTAINS, EQUALS, 
NOT_EQUALS).
+
+        BigQuery:
+            - CONTAINS uses 'value IN UNNEST(column)'
+            - NOT_CONTAINS uses 'NOT (value IN UNNEST(column))'
+            - EQUALS/NOT_EQUALS use array equality.
+        Fallback to previous logic for other engines.
+        """
+        from sqlalchemy import func
+
+        engine_name = cls.engine_name
+
+        val = (
+            value[0] if isinstance(value, (list, tuple)) and len(value) == 1 
else value
+        )
+
+        if engine_name == "Google BigQuery":
+            return cls.handle_array_filter_big_query(sqla_col, op, value)
+
+        if op == utils.FilterOperator.CONTAINS:
+            return func.array_contains(sqla_col, val)
+        elif op == utils.FilterOperator.NOT_CONTAINS:
+            return ~func.array_contains(sqla_col, val)

Review Comment:
   <div>
   
   
   <div id="suggestion">
   <div id="issue"><b>Incorrect handling of multiple values in 
CONTAINS/NOT_CONTAINS</b></div>
   <div id="fix">
   
   The current logic assumes single values but the type hint indicates a list; 
for multiple values, it passes the entire list to func.array_contains, which 
may not work. Consider handling each value separately.
   </div>
   
   
   </div>
   
   
   
   
   <small><i>Code Review Run #2757f6</i></small>
   </div>
   
   ---
   Should Bito avoid suggestions like this for future reviews? (<a 
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
   - [ ] Yes, avoid them



##########
tests/unit_tests/db_engine_specs/test_bigquery.py:
##########
@@ -529,3 +529,81 @@ def test_get_view_names_excludes_materialized_views() -> 
None:
     assert "table_type = 'VIEW'" in executed_query
     # Ensure it's not querying for materialized views
     assert "MATERIALIZED VIEW" not in executed_query
+
+
+# DSPM: SO-82
+def test_handle_array_filter_bigquery() -> None:
+    """
+    Test handle_array_filter for BigQuery array columns and all supported 
operators.
+    """
+    from superset.db_engine_specs.bigquery import BigQueryEngineSpec
+    from superset.utils.core import FilterOperator
+
+    # Simulate a SQLAlchemy column with .key attribute
+    class FakeCol:
+        def __init__(self, key):
+            self.key = key
+
+    col = FakeCol("arr_col")
+
+    # CONTAINS (single value)
+    expr = BigQueryEngineSpec.handle_array_filter(col, 
FilterOperator.CONTAINS, [1])
+    compiled = str(
+        expr.compile(dialect=BigQueryDialect(), 
compile_kwargs={"literal_binds": True})
+    )
+    assert compiled == "EXISTS (SELECT 1 FROM UNNEST(`arr_col`) AS x WHERE x = 
1)"
+
+    # CONTAINS (multiple values)
+    expr = BigQueryEngineSpec.handle_array_filter(col, 
FilterOperator.CONTAINS, [1, 2])
+    compiled = str(
+        expr.compile(dialect=BigQueryDialect(), 
compile_kwargs={"literal_binds": True})
+    )
+    assert compiled == (
+        "EXISTS (SELECT 1 FROM UNNEST(`arr_col`) AS x WHERE x = 1) "
+        "AND EXISTS (SELECT 1 FROM UNNEST(`arr_col`) AS x WHERE x = 2)"
+    )
+
+    # NOT_CONTAINS (single value)
+    expr = BigQueryEngineSpec.handle_array_filter(col, 
FilterOperator.NOT_CONTAINS, [1])
+    compiled = str(
+        expr.compile(dialect=BigQueryDialect(), 
compile_kwargs={"literal_binds": True})
+    )
+    assert compiled == "NOT EXISTS (SELECT 1 FROM UNNEST(`arr_col`) AS x WHERE 
x = 1)"
+
+    # NOT_CONTAINS (multiple values)
+    expr = BigQueryEngineSpec.handle_array_filter(
+        col, FilterOperator.NOT_CONTAINS, [1, 2]
+    )
+    compiled = str(
+        expr.compile(dialect=BigQueryDialect(), 
compile_kwargs={"literal_binds": True})
+    )
+    assert compiled == (
+        "NOT EXISTS (SELECT 1 FROM UNNEST(`arr_col`) AS x WHERE x = 1) "
+        "AND NOT EXISTS (SELECT 1 FROM UNNEST(`arr_col`) AS x WHERE x = 2)"
+    )
+
+    # EQUALS (exact array match)
+    expr = BigQueryEngineSpec.handle_array_filter(col, FilterOperator.EQUALS, 
[1, 2, 3])
+    compiled = str(
+        expr.compile(dialect=BigQueryDialect(), 
compile_kwargs={"literal_binds": True})
+    )
+    assert compiled == (
+        "(ARRAY_LENGTH(`arr_col`) = 3 AND "
+        "`arr_col`[OFFSET(0)] = 1 AND "
+        "`arr_col`[OFFSET(1)] = 2 AND "
+        "`arr_col`[OFFSET(2)] = 3)"
+    )
+
+    # NOT_EQUALS (exact array mismatch)
+    expr = BigQueryEngineSpec.handle_array_filter(
+        col, FilterOperator.NOT_EQUALS, [1, 2, 3]
+    )
+    compiled = str(
+        expr.compile(dialect=BigQueryDialect(), 
compile_kwargs={"literal_binds": True})
+    )
+    assert compiled == (
+        "(ARRAY_LENGTH(`arr_col`) != 3 OR "
+        "`arr_col`[OFFSET(0)] != 1 OR "
+        "`arr_col`[OFFSET(1)] != 2 OR "
+        "`arr_col`[OFFSET(2)] != 3)"
+    )

Review Comment:
   <div>
   
   
   <div id="suggestion">
   <div id="issue"><b>Test fails due to SQL literal bug</b></div>
   <div id="fix">
   
   The test assertions for EQUALS and NOT_EQUALS expect correctly formatted SQL 
literals (e.g., = 1 for integers), but the current implementation in base.py 
uses repr() which incorrectly quotes numeric values as strings (= '1'). This 
causes the test to fail. Additionally, the implementation generates invalid SQL 
for empty value lists (trailing AND/OR). The test itself is correct and 
comprehensive for the operators, but the PR should include the implementation 
fix to make the test pass.
   </div>
   
   
   </div>
   
   
   
   
   <small><i>Code Review Run #2757f6</i></small>
   </div>
   
   ---
   Should Bito avoid suggestions like this for future reviews? (<a 
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
   - [ ] Yes, avoid them



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to