rluvaton commented on code in PR #18152:
URL: https://github.com/apache/datafusion/pull/18152#discussion_r2466053067
##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -196,82 +577,135 @@ impl CaseExpr {
/// END
fn case_when_with_expr(&self, batch: &RecordBatch) ->
Result<ColumnarValue> {
let return_type = self.data_type(&batch.schema())?;
- let expr = self.expr.as_ref().unwrap();
- let base_value = expr.evaluate(batch)?;
- let base_value = base_value.into_array(batch.num_rows())?;
+ let mut result_builder = ResultBuilder::new(&return_type,
batch.num_rows());
+
+ // `remainder_rows` contains the indices of the rows that need to be
evaluated
+ let mut remainder_rows: ArrayRef =
+ Arc::new(UInt32Array::from_iter_values(0..batch.num_rows() as
u32));
+ // `remainder_batch` contains the rows themselves that need to be
evaluated
+ let mut remainder_batch = Cow::Borrowed(batch);
+
+ // evaluate the base expression
+ let mut base_value = self
+ .expr
+ .as_ref()
+ .unwrap()
+ .evaluate(batch)?
+ .into_array(batch.num_rows())?;
+
+ // Fill in a result value already for rows where the base expression
value is null
+ // Since each when expression is tested against the base expression
using the equality
+ // operator, null base values can never match any when expression. `x
= NULL` is falsy,
+ // for all possible values of `x`.
let base_nulls = is_null(base_value.as_ref())?;
+ if base_nulls.true_count() > 0 {
+ // If there is an else expression, use that as the default value
for the null rows
+ // Otherwise the default `null` value from the result builder will
be used.
+ if let Some(e) = self.else_expr() {
+ let expr = try_cast(Arc::clone(e), &batch.schema(),
return_type.clone())?;
- // start with nulls as default output
- let mut current_value = new_null_array(&return_type, batch.num_rows());
- // We only consider non-null values while comparing with whens
- let mut remainder = not(&base_nulls)?;
- let mut non_null_remainder_count = remainder.true_count();
- for i in 0..self.when_then_expr.len() {
- // If there are no rows left to process, break out of the loop
early
- if non_null_remainder_count == 0 {
- break;
+ if base_nulls.true_count() == remainder_batch.num_rows() {
+ // All base values were null, so no need to filter
+ let nulls_value = expr.evaluate(&remainder_batch)?;
+ result_builder.add_branch_result(&remainder_rows,
nulls_value)?;
+ } else {
+ let nulls_filter = create_filter(&base_nulls);
+ let nulls_batch =
+ filter_record_batch(&remainder_batch, &nulls_filter)?;
+ let nulls_rows = filter_array(&remainder_rows,
&nulls_filter)?;
+ let nulls_value = expr.evaluate(&nulls_batch)?;
+ result_builder.add_branch_result(&nulls_rows,
nulls_value)?;
+ }
}
- let when_predicate = &self.when_then_expr[i].0;
- let when_value = when_predicate.evaluate_selection(batch,
&remainder)?;
- let when_value = when_value.into_array(batch.num_rows())?;
- // build boolean array representing which rows match the "when"
value
- let when_match = compare_with_eq(
- &when_value,
- &base_value,
- // The types of case and when expressions will be coerced to
match.
- // We only need to check if the base_value is nested.
- base_value.data_type().is_nested(),
- )?;
- // Treat nulls as false
- let when_match = match when_match.null_count() {
- 0 => Cow::Borrowed(&when_match),
- _ => Cow::Owned(prep_null_mask_filter(&when_match)),
- };
- // Make sure we only consider rows that have not been matched yet
- let when_value = and(&when_match, &remainder)?;
+ // All base values were null, so we can return early
+ if base_nulls.true_count() == remainder_batch.num_rows() {
Review Comment:
same unnecessary computation as in:
https://github.com/apache/datafusion/pull/18152/files#r2466038692
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]