pepijnve commented on code in PR #18152:
URL: https://github.com/apache/datafusion/pull/18152#discussion_r2462742063
##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -122,6 +123,276 @@ fn is_cheap_and_infallible(expr: &Arc<dyn PhysicalExpr>)
-> bool {
expr.as_any().is::<Column>()
}
+/// Creates a [FilterPredicate] from a boolean array.
+fn create_filter(predicate: &BooleanArray) -> FilterPredicate {
+ let mut filter_builder = FilterBuilder::new(predicate);
+ // Always optimize the filter since we use them multiple times.
+ filter_builder = filter_builder.optimize();
+ filter_builder.build()
+}
+
+// This should be removed when https://github.com/apache/arrow-rs/pull/8693
+// is merged and becomes available.
+fn filter_record_batch(
+ record_batch: &RecordBatch,
+ filter: &FilterPredicate,
+) -> std::result::Result<RecordBatch, ArrowError> {
+ let filtered_columns = record_batch
+ .columns()
+ .iter()
+ .map(|a| filter_array(a, filter))
+ .collect::<std::result::Result<Vec<_>, _>>()?;
+ // SAFETY: since we start from a valid RecordBatch, there's no need to
revalidate the schema
+ // since the set of columns has not changed.
+ // The input column arrays all had the same length (since they're coming
from a valid RecordBatch)
+ // and the filtering them with the same filter will produces a new set of
arrays with identical
+ // lengths.
+ unsafe {
+ Ok(RecordBatch::new_unchecked(
+ record_batch.schema(),
+ filtered_columns,
+ filter.count(),
+ ))
+ }
+}
+
+#[inline(always)]
+fn filter_array(
+ array: &dyn Array,
+ filter: &FilterPredicate,
+) -> std::result::Result<ArrayRef, ArrowError> {
+ filter.filter(array)
+}
+
+///
+/// Merges elements by index from a list of [`ArrayData`], creating a new
[`ColumnarValue`] from
+/// those values.
+///
+/// Each element in `indices` is the index of an array in `values` offset by
1. `indices` is
+/// processed sequentially. The first occurrence of index value `n` will be
mapped to the first
+/// value of array `n - 1`. The second occurrence to the second value, and so
on.
+///
+/// The index value `0` is used to indicate null values.
+///
+/// ```text
+/// ┌─────────────────┐ ┌─────────┐
┌─────────────────┐
+/// │ A │ │ 0 │ merge( │
NULL │
+/// ├─────────────────┤ ├─────────┤ [values0, values1],
├─────────────────┤
+/// │ D │ │ 2 │ indices │
B │
+/// └─────────────────┘ ├─────────┤ )
├─────────────────┤
+/// values array 0 │ 2 │ ─────────────────────────▶ │
C │
+/// ├─────────┤
├─────────────────┤
+/// │ 1 │ │
A │
+/// ├─────────┤
├─────────────────┤
+/// │ 1 │ │
D │
+/// ┌─────────────────┐ ├─────────┤
├─────────────────┤
+/// │ B │ │ 2 │ │
E │
+/// ├─────────────────┤ └─────────┘
└─────────────────┘
+/// │ C │
+/// ├─────────────────┤ indices
+/// │ E │ array
result
+/// └─────────────────┘
+/// values array 1
+/// ```
+fn merge(values: &[ArrayData], indices: &[usize]) -> Result<ArrayRef> {
+ let data_refs = values.iter().collect();
+ let mut mutable = MutableArrayData::new(data_refs, true, indices.len());
+
+ // This loop extends the mutable array by taking slices from the partial
results.
+ //
+ // take_offsets keeps track of how many values have been taken from each
array.
+ let mut take_offsets = vec![0; values.len() + 1];
+ let mut start_row_ix = 0;
+ loop {
+ let array_ix = indices[start_row_ix];
+
+ // Determine the length of the slice to take.
+ let mut end_row_ix = start_row_ix + 1;
+ while end_row_ix < indices.len() && indices[end_row_ix] == array_ix {
+ end_row_ix += 1;
+ }
+ let slice_length = end_row_ix - start_row_ix;
+
+ // Extend mutable with either nulls or with values from the array.
+ let start_offset = take_offsets[array_ix];
+ let end_offset = start_offset + slice_length;
+ if array_ix == 0 {
+ mutable.extend_nulls(slice_length);
+ } else {
+ mutable.extend(array_ix - 1, start_offset, end_offset);
+ }
+
+ if end_row_ix == indices.len() {
+ break;
+ } else {
+ // Update the take_offsets array.
+ take_offsets[array_ix] = end_offset;
+ // Set the start_row_ix for the next slice.
+ start_row_ix = end_row_ix;
+ }
+ }
+
+ Ok(make_array(mutable.freeze()))
+}
+
+/// A builder for constructing result arrays for CASE expressions.
+///
+/// Rather than building a monolithic array containing all results, it
maintains a set of
+/// partial result arrays and a mapping that indicates for each row which
partial array
+/// contains the result value for that row.
+///
+/// On finish(), the builder will merge all partial results into a single
array if necessary.
+/// If all rows evaluated to the same array, that array can be returned
directly without
+/// any merging overhead.
+struct ResultBuilder {
+ data_type: DataType,
+ // A Vec of partial results that should be merged.
`partial_result_indices` contains
+ // indexes into this vec.
+ partial_results: Vec<ArrayData>,
+ // Indicates per result row from which array in `partial_results` a value
should be taken.
+ // The indexes in this array are offset by +1. The special value 0
indicates null values.
+ partial_result_indices: Vec<usize>,
+ // An optional result that is the covering result for all rows.
+ // This is used as an optimisation to avoid the cost of merging when all
rows
+ // evaluate to the same case branch.
+ covering_result: Option<ColumnarValue>,
+}
+
+impl ResultBuilder {
+ /// Creates a new ResultBuilder that will produce arrays of the given data
type.
+ ///
+ /// The capacity parameter indicates the number of rows in the result.
+ fn new(data_type: &DataType, capacity: usize) -> Self {
+ Self {
+ data_type: data_type.clone(),
+ partial_result_indices: vec![0; capacity],
+ partial_results: vec![],
+ covering_result: None,
+ }
+ }
+
+ /// Adds a result for one branch of the case expression.
+ ///
+ /// `row_indices` should be a [UInt32Array] containing [RecordBatch]
relative row indices
+ /// for which `value` contains result values.
+ ///
+ /// If `value` is a scalar, the scalar value will be used as the value for
each row in `row_indices`.
+ ///
+ /// If `value` is an array, the values from the array and the indices from
`row_indices` will be
+ /// processed pairwise. The lengths of `value` and `row_indices` must
match.
+ ///
+ /// The diagram below shows a situation where a when expression matched
rows 1 and 4 of the
+ /// record batch. The then expression produced the value array `[A, D]`.
+ /// After adding this result, the result array will have been added to
`partial_results` and
+ /// `partial_indices` will have been updated at indexes 1 and 4.
+ ///
+ /// ```text
+ /// ┌─────────┐ ┌─────────┐┌───────────┐
┌─────────┐┌───────────┐
+ /// │ A │ │ 0 ││ │ │
0 ││┌─────────┐│
+ /// ├─────────┤ ├─────────┤│ │
├─────────┤││ A ││
+ /// │ D │ │ 0 ││ │ │
1 ││├─────────┤│
+ /// └─────────┘ ├─────────┤│ │ add_branch_result(
├─────────┤││ D ││
+ /// value │ 0 ││ │ row indices, │
0 ││└─────────┘│
+ /// ├─────────┤│ │ value
├─────────┤│ │
+ /// │ 0 ││ │ ) │
0 ││ │
+ /// ┌─────────┐ ├─────────┤│ │ ─────────────────────────▶
├─────────┤│ │
+ /// │ 1 │ │ 0 ││ │ │
1 ││ │
+ /// ├─────────┤ ├─────────┤│ │
├─────────┤│ │
+ /// │ 4 │ │ 0 ││ │ │
0 ││ │
+ /// └─────────┘ └─────────┘└───────────┘
└─────────┘└───────────┘
+ /// row indices
+ /// partial partial
partial partial
+ /// indices results
indices results
+ /// ```
+ fn add_branch_result(
+ &mut self,
+ row_indices: &ArrayRef,
+ value: ColumnarValue,
+ ) -> Result<()> {
+ match value {
+ ColumnarValue::Array(a) => {
+ assert_eq!(a.len(), row_indices.len());
+ if row_indices.len() == self.partial_result_indices.len() {
Review Comment:
Yes, the code very much makes that assumption. It's tightly coupled to the
way `case_when_with_expr` and `case_when_no_expr` are implemented. We could add
validation checks here, but you end up spending quite some time verifying
invariants. I couldn't figure out a way to check this without introducing
overhead.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]