Jefffrey commented on code in PR #20620:
URL: https://github.com/apache/datafusion/pull/20620#discussion_r2872440160
##########
datafusion/functions-nested/src/concat.rs:
##########
@@ -396,58 +396,55 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
.iter()
.map(|arg| as_generic_list_array::<O>(arg))
.collect::<Result<Vec<_>>>()?;
- // Assume number of rows is the same for all arrays
let row_count = list_arrays[0].len();
- let mut array_lengths = vec![];
- let mut arrays = vec![];
+ // Extract underlying values ArrayData from each list array for
MutableArrayData.
+ let values_data: Vec<ArrayData> =
+ list_arrays.iter().map(|la| la.values().to_data()).collect();
+ let values_data_refs: Vec<&ArrayData> = values_data.iter().collect();
+
+ // Estimate capacity as the sum of all values arrays' lengths.
+ let total_capacity: usize = values_data.iter().map(|d| d.len()).sum();
+
+ let mut mutable = MutableArrayData::with_capacities(
+ values_data_refs,
+ false,
+ Capacities::Array(total_capacity),
+ );
+ let mut offsets: Vec<O> = Vec::with_capacity(row_count + 1);
+ offsets.push(O::zero());
let mut valid = NullBufferBuilder::new(row_count);
- for i in 0..row_count {
- let nulls = list_arrays
- .iter()
- .map(|arr| arr.is_null(i))
- .collect::<Vec<_>>();
- // If all the arrays are null, the concatenated array is null
- let is_null = nulls.iter().all(|&x| x);
- if is_null {
- array_lengths.push(0);
- valid.append_null();
- } else {
- // Get all the arrays on i-th row
- let values = list_arrays
- .iter()
- .map(|arr| arr.value(i))
- .collect::<Vec<_>>();
-
- let elements = values
- .iter()
- .map(|a| a.as_ref())
- .collect::<Vec<&dyn Array>>();
-
- // Concatenated array on i-th row
- let concatenated_array =
arrow::compute::concat(elements.as_slice())?;
- array_lengths.push(concatenated_array.len());
- arrays.push(concatenated_array);
+ for row_idx in 0..row_count {
+ let mut row_has_values = false;
Review Comment:
We could also create the valid buffer once upfront outside this loop by
using
[`NullBuffer::union`](https://docs.rs/arrow/latest/arrow/buffer/struct.NullBuffer.html#method.union),
which should remove the need for this variable?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]