pepijnve commented on code in PR #17813:
URL: https://github.com/apache/datafusion/pull/17813#discussion_r2545958309


##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -282,14 +284,80 @@ impl ExprSchemable for Expr {
             Expr::OuterReferenceColumn(field, _) => Ok(field.is_nullable()),
             Expr::Literal(value, _) => Ok(value.is_null()),
             Expr::Case(case) => {
-                // This expression is nullable if any of the input expressions 
are nullable
-                let then_nullable = case
+                let nullable_then = case
                     .when_then_expr
                     .iter()
-                    .map(|(_, t)| t.nullable(input_schema))
-                    .collect::<Result<Vec<_>>>()?;
-                if then_nullable.contains(&true) {
-                    Ok(true)
+                    .filter_map(|(w, t)| {
+                        let is_nullable = match t.nullable(input_schema) {
+                            Err(e) => return Some(Err(e)),
+                            Ok(n) => n,
+                        };
+
+                        // Branches with a then expression that is not 
nullable do not impact the
+                        // nullability of the case expression.
+                        if !is_nullable {
+                            return None;
+                        }
+
+                        // For case-with-expression assume all 'then' 
expressions are reachable
+                        if case.expr.is_some() {
+                            return Some(Ok(()));
+                        }
+
+                        // For branches with a nullable 'then' expression, try 
to determine
+                        // if the 'then' expression is ever reachable in the 
situation where
+                        // it would evaluate to null.
+
+                        // First, derive a variant of the 'when' expression, 
where all occurrences
+                        // of the 'then' expression have been replaced by 
'NULL'.
+                        let certainly_null_expr = 
unwrap_certainly_null_expr(t).clone();
+                        let certainly_null_type =
+                            match certainly_null_expr.get_type(input_schema) {
+                                Err(e) => return Some(Err(e)),
+                                Ok(datatype) => datatype,
+                            };
+                        let null_interval = NullableInterval::Null {
+                            datatype: certainly_null_type,
+                        };
+                        let guarantees = vec![(certainly_null_expr, 
null_interval)];
+                        let when_with_null =
+                            match rewrite_with_guarantees(*w.clone(), 
&guarantees) {
+                                Err(e) => return Some(Err(e)),
+                                Ok(e) => e.data,
+                            };
+
+                        // Next, determine the bounds of the derived 'when' 
expression to see if it
+                        // can ever evaluate to true.
+                        let bounds = match predicate_bounds::evaluate_bounds(
+                            &when_with_null,
+                            input_schema,
+                        ) {
+                            Err(e) => return Some(Err(e)),
+                            Ok(b) => b,
+                        };
+
+                        let can_be_true = match bounds
+                            .contains_value(ScalarValue::Boolean(Some(true)))
+                        {
+                            Err(e) => return Some(Err(e)),
+                            Ok(b) => b,
+                        };
+
+                        if !can_be_true {
+                            // If the derived 'when' expression can never 
evaluate to true, the
+                            // 'then' expression is not reachable when it 
would evaluate to NULL.
+                            // The most common pattern for this is `WHEN x IS 
NOT NULL THEN x`.
+                            None
+                        } else {
+                            // The branch might be taken
+                            Some(Ok(()))
+                        }
+                    })
+                    .next();

Review Comment:
   The change from `collect` to `next().is_some()` does mitigate the 
performance overhead a little bit. As soon as one nullable branch is found the 
iteration will stop.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to