buraksenn commented on code in PR #21416:
URL: https://github.com/apache/datafusion/pull/21416#discussion_r3104983851
##########
datafusion/sql/src/expr/mod.rs:
##########
@@ -1297,6 +1303,64 @@ fn plan_any_op(
}
}
+/// Plans `left_expr <compare_op> ALL(right_expr)` with proper SQL NULL
semantics.
+///
+/// CASE/WHEN structure:
+/// WHEN arr IS NULL → NULL
+/// WHEN empty → TRUE
+/// WHEN lhs IS NULL → NULL
+/// WHEN decisive_condition → FALSE
+/// WHEN has_nulls → NULL
+/// ELSE → TRUE
+fn plan_all_op(
+ left_expr: &Expr,
+ right_expr: &Expr,
+ compare_op: &BinaryOperator,
+) -> Result<Expr> {
+ let null_arr_check = right_expr.clone().is_null();
+ let empty_check = cardinality(right_expr.clone()).eq(lit(0u64));
+ let null_lhs_check = left_expr.clone().is_null();
+ // DataFusion's array_position uses is_null() checks internally (not
equality),
+ // so it can locate NULL elements even though NULL = NULL is NULL in
standard SQL.
+ let has_nulls = array_position(right_expr.clone(), lit(ScalarValue::Null),
lit(1i64))
+ .is_not_null();
+
+ let decisive_condition = match compare_op {
+ BinaryOperator::NotEq => array_has(right_expr.clone(),
left_expr.clone()),
+ BinaryOperator::Eq => {
+ let all_equal = array_min(right_expr.clone())
+ .eq(left_expr.clone())
+ .and(array_max(right_expr.clone()).eq(left_expr.clone()));
+ Expr::Not(Box::new(all_equal))
+ }
+ BinaryOperator::Gt => Expr::Not(Box::new(
+ left_expr.clone().gt(array_max(right_expr.clone())),
+ )),
+ BinaryOperator::Lt => Expr::Not(Box::new(
+ left_expr.clone().lt(array_min(right_expr.clone())),
+ )),
+ BinaryOperator::GtEq => Expr::Not(Box::new(
+ left_expr.clone().gt_eq(array_max(right_expr.clone())),
+ )),
+ BinaryOperator::LtEq => Expr::Not(Box::new(
+ left_expr.clone().lt_eq(array_min(right_expr.clone())),
+ )),
+ _ => {
+ return plan_err!(
+ "Unsupported AllOp: '{compare_op}', only '=', '<>', '>', '<',
'>=', '<=' are supported"
+ );
+ }
+ };
+
+ let null_bool = lit(ScalarValue::Boolean(None));
+ when(null_arr_check, null_bool.clone())
+ .when(empty_check, lit(true))
+ .when(null_lhs_check, null_bool.clone())
+ .when(decisive_condition, lit(false))
+ .when(has_nulls, null_bool)
+ .otherwise(lit(true))
Review Comment:
I was thinking of this as well especially regarding performance wise. As
you've said I thought maybe I can have a followup UDF implementation and
compare results
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]