Jefffrey commented on code in PR #9975:
URL: https://github.com/apache/arrow-datafusion/pull/9975#discussion_r1554728440
##########
datafusion/physical-plan/src/unnest.rs:
##########
@@ -364,32 +364,31 @@ fn unnest_generic_list<T: OffsetSizeTrait, P:
ArrowPrimitiveType<Native = T>>(
options: &UnnestOptions,
) -> Result<Arc<dyn Array + 'static>> {
let values = list_array.values();
- if list_array.null_count() == 0 || !options.preserve_nulls {
- Ok(values.clone())
- } else {
- let mut take_indicies_builder =
- PrimitiveArray::<P>::builder(values.len() +
list_array.null_count());
- let mut take_offset = 0;
+ if list_array.null_count() == 0 {
+ return Ok(values.clone());
+ }
- list_array.iter().for_each(|elem| match elem {
- Some(array) => {
- for i in 0..array.len() {
- // take_offset + i is always positive
- let take_index = P::Native::from_usize(take_offset +
i).unwrap();
- take_indicies_builder.append_value(take_index);
- }
- take_offset += array.len();
- }
- None => {
+ let mut take_indicies_builder =
+ PrimitiveArray::<P>::builder(values.len() + list_array.null_count());
+ let offsets = list_array.value_offsets();
+ for row in 0..list_array.len() {
+ if list_array.is_null(row) {
+ if options.preserve_nulls {
Review Comment:
Bit surprised that clippy didn't suggest collapsing these together? Either
way, would it be slightly more efficient to put the `options.preserve_nulls`
check first since this would never change per iteration?
##########
datafusion/physical-plan/src/unnest.rs:
##########
@@ -596,3 +595,99 @@ where
Ok(RecordBatch::try_new(schema.clone(), arrays.to_vec())?)
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use arrow::{
+ array::AsArray,
+ datatypes::{DataType, Field},
+ };
+ use arrow_array::StringArray;
+ use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
+
+ // Create a ListArray with the following list values:
+ // [A, B, C], [], NULL, [D], NULL, [NULL, F]
Review Comment:
Nice tests :+1:
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]