Copilot commented on code in PR #20830:
URL: https://github.com/apache/datafusion/pull/20830#discussion_r2907955425
##########
datafusion/sqllogictest/test_files/array.slt:
##########
@@ -7003,8 +7003,76 @@ select count(*) from arrays where 'X'=any(column3);
----
0
-query error DataFusion error: Error during planning: Unsupported AnyOp: '>',
only '=' is supported
-select count(*) from arrays where 'X'>any(column3);
+# any operator with comparison operators
+# Use inline arrays so the test data is visible and the needle (5)
+# falls within the range of some arrays but not others.
+statement ok
+CREATE TABLE any_op_test AS VALUES
+ (1, make_array(1, 2, 3)),
+ (2, make_array(4, 5, 6)),
+ (3, make_array(7, 8, 9)),
+ (4, make_array(3, 5, 7));
+
+# 5 > ANY(arr): true when array_min < 5
+# row1: min=1 < 5 ✓, row2: min=4 < 5 ✓, row3: min=7 < 5 ✗, row4: min=3 < 5 ✓
+query I?
+select column1, column2 from any_op_test where 5 > any(column2) order by
column1;
+----
+1 [1, 2, 3]
+2 [4, 5, 6]
+4 [3, 5, 7]
+
+# 5 >= ANY(arr): true when array_min <= 5
+# row1: min=1 <= 5 ✓, row2: min=4 <= 5 ✓, row3: min=7 <= 5 ✗, row4: min=3 <= 5
✓
+query I?
+select column1, column2 from any_op_test where 5 >= any(column2) order by
column1;
+----
+1 [1, 2, 3]
+2 [4, 5, 6]
+4 [3, 5, 7]
+
+# 5 < ANY(arr): true when array_max > 5
+# row1: max=3 > 5 ✗, row2: max=6 > 5 ✓, row3: max=9 > 5 ✓, row4: max=7 > 5 ✓
+query I?
+select column1, column2 from any_op_test where 5 < any(column2) order by
column1;
+----
+2 [4, 5, 6]
+3 [7, 8, 9]
+4 [3, 5, 7]
+
+# 5 <= ANY(arr): true when array_max >= 5
+# row1: max=3 >= 5 ✗, row2: max=6 >= 5 ✓, row3: max=9 >= 5 ✓, row4: max=7 >= 5
✓
+query I?
+select column1, column2 from any_op_test where 5 <= any(column2) order by
column1;
+----
+2 [4, 5, 6]
+3 [7, 8, 9]
+4 [3, 5, 7]
+
+# 5 <> ANY(arr): true when array_min != 5 OR array_max != 5
+# row1: [1,2,3] min=1!=5 ✓, row2: [4,5,6] min=4!=5 ✓, row3: [7,8,9] min=7!=5
✓, row4: [3,5,7] min=3!=5 ✓
+query I?
+select column1, column2 from any_op_test where 5 <> any(column2) order by
column1;
+----
+1 [1, 2, 3]
+2 [4, 5, 6]
+3 [7, 8, 9]
+4 [3, 5, 7]
+
+# For a single-element array where the element equals the needle, <> should
return false
+query B
+select 5 <> any(make_array(5));
+----
+false
+
+# For a uniform array [5,5,5], <> should also return false
+query B
+select 5 <> any(make_array(5, 5, 5));
+----
+false
Review Comment:
The new SLT coverage doesn't currently exercise edge cases like `ANY` over
an empty array (`make_array()`) or an array with only NULL elements. These
cases are important because the implementation uses array_min/array_max, which
return NULL for empty/all-null arrays, and could change the truth value of `x
op ANY(arr)` compared to expected existential semantics. Please add a couple of
scalar queries asserting the expected boolean result for each operator on
empty/all-null arrays.
##########
datafusion/sql/src/expr/mod.rs:
##########
@@ -612,16 +612,24 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
planner_context,
),
_ => {
- if compare_op != BinaryOperator::Eq {
- plan_err!(
- "Unsupported AnyOp: '{compare_op}', only '=' is
supported"
- )
- } else {
- let left_expr =
- self.sql_to_expr(*left, schema, planner_context)?;
- let right_expr =
- self.sql_to_expr(*right, schema, planner_context)?;
- Ok(array_has(right_expr, left_expr))
+ let left_expr = self.sql_to_expr(*left, schema,
planner_context)?;
+ let right_expr = self.sql_to_expr(*right, schema,
planner_context)?;
+ match compare_op {
+ BinaryOperator::Eq => Ok(array_has(right_expr,
left_expr)),
+ BinaryOperator::NotEq =>
Ok(array_min(right_expr.clone())
+ .not_eq(left_expr.clone())
+ .or(array_max(right_expr).not_eq(left_expr))),
+ BinaryOperator::Gt =>
Ok(array_min(right_expr).lt(left_expr)),
Review Comment:
The ANY comparison rewrite using array_min/array_max will return NULL
(unknown) for empty arrays and for arrays whose elements are all NULL (because
array_min/array_max return NULL in those cases). For existential ANY semantics
(and consistent with the existing `=` ANY implementation via `array_has`, which
returns false for empty/all-null arrays), these cases should evaluate to false
(while still returning NULL when the array itself is NULL). Consider guarding
with a list-null check and treating NULL results from array_min/array_max as
false (e.g., via CASE/COALESCE) across these match arms.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]