This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 98e894240c refactor: switch BooleanBufferBuilder to NullBufferBuilder
in functions-nested functions (#14201)
98e894240c is described below
commit 98e894240cc621a790864567b079c82cd4ec3b95
Author: Ian Lai <[email protected]>
AuthorDate: Mon Jan 20 09:10:31 2025 +0800
refactor: switch BooleanBufferBuilder to NullBufferBuilder in
functions-nested functions (#14201)
Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
---
datafusion/functions-nested/src/concat.rs | 11 +++++------
datafusion/functions-nested/src/range.rs | 10 +++++-----
datafusion/functions-nested/src/replace.rs | 12 ++++++------
datafusion/functions-nested/src/resize.rs | 11 +++++------
4 files changed, 21 insertions(+), 23 deletions(-)
diff --git a/datafusion/functions-nested/src/concat.rs
b/datafusion/functions-nested/src/concat.rs
index 934c5a5fec..a6557e36da 100644
--- a/datafusion/functions-nested/src/concat.rs
+++ b/datafusion/functions-nested/src/concat.rs
@@ -22,7 +22,7 @@ use std::{any::Any, cmp::Ordering};
use arrow::array::{Capacities, MutableArrayData};
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
+use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
use arrow_schema::{DataType, Field};
use datafusion_common::Result;
use datafusion_common::{
@@ -354,7 +354,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
let mut array_lengths = vec![];
let mut arrays = vec![];
- let mut valid = BooleanBufferBuilder::new(row_count);
+ let mut valid = NullBufferBuilder::new(row_count);
for i in 0..row_count {
let nulls = list_arrays
.iter()
@@ -365,7 +365,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
let is_null = nulls.iter().all(|&x| x);
if is_null {
array_lengths.push(0);
- valid.append(false);
+ valid.append_null();
} else {
// Get all the arrays on i-th row
let values = list_arrays
@@ -382,12 +382,11 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
let concatenated_array =
arrow::compute::concat(elements.as_slice())?;
array_lengths.push(concatenated_array.len());
arrays.push(concatenated_array);
- valid.append(true);
+ valid.append_non_null();
}
}
// Assume all arrays have the same data type
let data_type = list_arrays[0].value_type();
- let buffer = valid.finish();
let elements = arrays
.iter()
@@ -398,7 +397,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
Arc::new(Field::new_list_field(data_type, true)),
OffsetBuffer::from_lengths(array_lengths),
Arc::new(arrow::compute::concat(elements.as_slice())?),
- Some(NullBuffer::new(buffer)),
+ valid.finish(),
);
Ok(Arc::new(list_arr))
diff --git a/datafusion/functions-nested/src/range.rs
b/datafusion/functions-nested/src/range.rs
index 4f8132f59e..ff148f04ac 100644
--- a/datafusion/functions-nested/src/range.rs
+++ b/datafusion/functions-nested/src/range.rs
@@ -27,7 +27,7 @@ use arrow_array::types::{
Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT,
};
use arrow_array::{NullArray, TimestampNanosecondArray};
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
+use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
use arrow_schema::DataType::*;
use arrow_schema::IntervalUnit::MonthDayNano;
use arrow_schema::TimeUnit::Nanosecond;
@@ -345,7 +345,7 @@ pub(super) fn gen_range_inner(
let mut values = vec![];
let mut offsets = vec![0];
- let mut valid = BooleanBufferBuilder::new(stop_array.len());
+ let mut valid = NullBufferBuilder::new(stop_array.len());
for (idx, stop) in stop_array.iter().enumerate() {
match retrieve_range_args(start_array, stop, step_array, idx) {
Some((_, _, 0)) => {
@@ -369,12 +369,12 @@ pub(super) fn gen_range_inner(
.step_by(step_abs),
);
offsets.push(values.len() as i32);
- valid.append(true);
+ valid.append_non_null();
}
// If any of the arguments is NULL, append a NULL value to the
result.
None => {
offsets.push(values.len() as i32);
- valid.append(false);
+ valid.append_null();
}
};
}
@@ -382,7 +382,7 @@ pub(super) fn gen_range_inner(
Arc::new(Field::new_list_field(Int64, true)),
OffsetBuffer::new(offsets.into()),
Arc::new(Int64Array::from(values)),
- Some(NullBuffer::new(valid.finish())),
+ valid.finish(),
)?);
Ok(arr)
}
diff --git a/datafusion/functions-nested/src/replace.rs
b/datafusion/functions-nested/src/replace.rs
index 0d3db07c64..106887c513 100644
--- a/datafusion/functions-nested/src/replace.rs
+++ b/datafusion/functions-nested/src/replace.rs
@@ -23,7 +23,7 @@ use arrow::array::{
use arrow::datatypes::DataType;
use arrow_array::GenericListArray;
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
+use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
use arrow_schema::Field;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
@@ -306,12 +306,12 @@ fn general_replace<O: OffsetSizeTrait>(
capacity,
);
- let mut valid = BooleanBufferBuilder::new(list_array.len());
+ let mut valid = NullBufferBuilder::new(list_array.len());
for (row_index, offset_window) in
list_array.offsets().windows(2).enumerate() {
if list_array.is_null(row_index) {
offsets.push(offsets[row_index]);
- valid.append(false);
+ valid.append_null();
continue;
}
@@ -338,7 +338,7 @@ fn general_replace<O: OffsetSizeTrait>(
end.to_usize().unwrap(),
);
offsets.push(offsets[row_index] + (end - start));
- valid.append(true);
+ valid.append_non_null();
continue;
}
@@ -367,7 +367,7 @@ fn general_replace<O: OffsetSizeTrait>(
}
offsets.push(offsets[row_index] + (end - start));
- valid.append(true);
+ valid.append_non_null();
}
let data = mutable.freeze();
@@ -376,7 +376,7 @@ fn general_replace<O: OffsetSizeTrait>(
Arc::new(Field::new_list_field(list_array.value_type(), true)),
OffsetBuffer::<O>::new(offsets.into()),
arrow_array::make_array(data),
- Some(NullBuffer::new(valid.finish())),
+ valid.finish(),
)?))
}
diff --git a/datafusion/functions-nested/src/resize.rs
b/datafusion/functions-nested/src/resize.rs
index a2b95debd2..441f44e47f 100644
--- a/datafusion/functions-nested/src/resize.rs
+++ b/datafusion/functions-nested/src/resize.rs
@@ -22,7 +22,7 @@ use arrow::array::{Capacities, MutableArrayData};
use arrow_array::{
new_null_array, Array, ArrayRef, GenericListArray, Int64Array,
OffsetSizeTrait,
};
-use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer,
OffsetBuffer};
+use arrow_buffer::{ArrowNativeType, NullBufferBuilder, OffsetBuffer};
use arrow_schema::DataType::{FixedSizeList, LargeList, List};
use arrow_schema::{DataType, FieldRef};
use datafusion_common::cast::{as_int64_array, as_large_list_array,
as_list_array};
@@ -198,15 +198,15 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
capacity,
);
- let mut null_builder = BooleanBufferBuilder::new(array.len());
+ let mut null_builder = NullBufferBuilder::new(array.len());
for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
if array.is_null(row_index) {
- null_builder.append(false);
+ null_builder.append_null();
offsets.push(offsets[row_index]);
continue;
}
- null_builder.append(true);
+ null_builder.append_non_null();
let count = count_array.value(row_index).to_usize().ok_or_else(|| {
internal_datafusion_err!("array_resize: failed to convert size to
usize")
@@ -234,12 +234,11 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
}
let data = mutable.freeze();
- let null_bit_buffer: NullBuffer = null_builder.finish().into();
Ok(Arc::new(GenericListArray::<O>::try_new(
Arc::clone(field),
OffsetBuffer::<O>::new(offsets.into()),
arrow_array::make_array(data),
- Some(null_bit_buffer),
+ null_builder.finish(),
)?))
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]