This is an automated email from the ASF dual-hosted git repository.

jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 98e894240c refactor: switch BooleanBufferBuilder to NullBufferBuilder 
in functions-nested functions (#14201)
98e894240c is described below

commit 98e894240cc621a790864567b079c82cd4ec3b95
Author: Ian Lai <[email protected]>
AuthorDate: Mon Jan 20 09:10:31 2025 +0800

    refactor: switch BooleanBufferBuilder to NullBufferBuilder in 
functions-nested functions (#14201)
    
    Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
---
 datafusion/functions-nested/src/concat.rs  | 11 +++++------
 datafusion/functions-nested/src/range.rs   | 10 +++++-----
 datafusion/functions-nested/src/replace.rs | 12 ++++++------
 datafusion/functions-nested/src/resize.rs  | 11 +++++------
 4 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/datafusion/functions-nested/src/concat.rs 
b/datafusion/functions-nested/src/concat.rs
index 934c5a5fec..a6557e36da 100644
--- a/datafusion/functions-nested/src/concat.rs
+++ b/datafusion/functions-nested/src/concat.rs
@@ -22,7 +22,7 @@ use std::{any::Any, cmp::Ordering};
 
 use arrow::array::{Capacities, MutableArrayData};
 use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
+use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
 use arrow_schema::{DataType, Field};
 use datafusion_common::Result;
 use datafusion_common::{
@@ -354,7 +354,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) 
-> Result<ArrayRef> {
 
     let mut array_lengths = vec![];
     let mut arrays = vec![];
-    let mut valid = BooleanBufferBuilder::new(row_count);
+    let mut valid = NullBufferBuilder::new(row_count);
     for i in 0..row_count {
         let nulls = list_arrays
             .iter()
@@ -365,7 +365,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) 
-> Result<ArrayRef> {
         let is_null = nulls.iter().all(|&x| x);
         if is_null {
             array_lengths.push(0);
-            valid.append(false);
+            valid.append_null();
         } else {
             // Get all the arrays on i-th row
             let values = list_arrays
@@ -382,12 +382,11 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) 
-> Result<ArrayRef> {
             let concatenated_array = 
arrow::compute::concat(elements.as_slice())?;
             array_lengths.push(concatenated_array.len());
             arrays.push(concatenated_array);
-            valid.append(true);
+            valid.append_non_null();
         }
     }
     // Assume all arrays have the same data type
     let data_type = list_arrays[0].value_type();
-    let buffer = valid.finish();
 
     let elements = arrays
         .iter()
@@ -398,7 +397,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) 
-> Result<ArrayRef> {
         Arc::new(Field::new_list_field(data_type, true)),
         OffsetBuffer::from_lengths(array_lengths),
         Arc::new(arrow::compute::concat(elements.as_slice())?),
-        Some(NullBuffer::new(buffer)),
+        valid.finish(),
     );
 
     Ok(Arc::new(list_arr))
diff --git a/datafusion/functions-nested/src/range.rs 
b/datafusion/functions-nested/src/range.rs
index 4f8132f59e..ff148f04ac 100644
--- a/datafusion/functions-nested/src/range.rs
+++ b/datafusion/functions-nested/src/range.rs
@@ -27,7 +27,7 @@ use arrow_array::types::{
     Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT,
 };
 use arrow_array::{NullArray, TimestampNanosecondArray};
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
+use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
 use arrow_schema::DataType::*;
 use arrow_schema::IntervalUnit::MonthDayNano;
 use arrow_schema::TimeUnit::Nanosecond;
@@ -345,7 +345,7 @@ pub(super) fn gen_range_inner(
 
     let mut values = vec![];
     let mut offsets = vec![0];
-    let mut valid = BooleanBufferBuilder::new(stop_array.len());
+    let mut valid = NullBufferBuilder::new(stop_array.len());
     for (idx, stop) in stop_array.iter().enumerate() {
         match retrieve_range_args(start_array, stop, step_array, idx) {
             Some((_, _, 0)) => {
@@ -369,12 +369,12 @@ pub(super) fn gen_range_inner(
                         .step_by(step_abs),
                 );
                 offsets.push(values.len() as i32);
-                valid.append(true);
+                valid.append_non_null();
             }
             // If any of the arguments is NULL, append a NULL value to the 
result.
             None => {
                 offsets.push(values.len() as i32);
-                valid.append(false);
+                valid.append_null();
             }
         };
     }
@@ -382,7 +382,7 @@ pub(super) fn gen_range_inner(
         Arc::new(Field::new_list_field(Int64, true)),
         OffsetBuffer::new(offsets.into()),
         Arc::new(Int64Array::from(values)),
-        Some(NullBuffer::new(valid.finish())),
+        valid.finish(),
     )?);
     Ok(arr)
 }
diff --git a/datafusion/functions-nested/src/replace.rs 
b/datafusion/functions-nested/src/replace.rs
index 0d3db07c64..106887c513 100644
--- a/datafusion/functions-nested/src/replace.rs
+++ b/datafusion/functions-nested/src/replace.rs
@@ -23,7 +23,7 @@ use arrow::array::{
 use arrow::datatypes::DataType;
 
 use arrow_array::GenericListArray;
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
+use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
 use arrow_schema::Field;
 use datafusion_common::cast::as_int64_array;
 use datafusion_common::{exec_err, Result};
@@ -306,12 +306,12 @@ fn general_replace<O: OffsetSizeTrait>(
         capacity,
     );
 
-    let mut valid = BooleanBufferBuilder::new(list_array.len());
+    let mut valid = NullBufferBuilder::new(list_array.len());
 
     for (row_index, offset_window) in 
list_array.offsets().windows(2).enumerate() {
         if list_array.is_null(row_index) {
             offsets.push(offsets[row_index]);
-            valid.append(false);
+            valid.append_null();
             continue;
         }
 
@@ -338,7 +338,7 @@ fn general_replace<O: OffsetSizeTrait>(
                 end.to_usize().unwrap(),
             );
             offsets.push(offsets[row_index] + (end - start));
-            valid.append(true);
+            valid.append_non_null();
             continue;
         }
 
@@ -367,7 +367,7 @@ fn general_replace<O: OffsetSizeTrait>(
         }
 
         offsets.push(offsets[row_index] + (end - start));
-        valid.append(true);
+        valid.append_non_null();
     }
 
     let data = mutable.freeze();
@@ -376,7 +376,7 @@ fn general_replace<O: OffsetSizeTrait>(
         Arc::new(Field::new_list_field(list_array.value_type(), true)),
         OffsetBuffer::<O>::new(offsets.into()),
         arrow_array::make_array(data),
-        Some(NullBuffer::new(valid.finish())),
+        valid.finish(),
     )?))
 }
 
diff --git a/datafusion/functions-nested/src/resize.rs 
b/datafusion/functions-nested/src/resize.rs
index a2b95debd2..441f44e47f 100644
--- a/datafusion/functions-nested/src/resize.rs
+++ b/datafusion/functions-nested/src/resize.rs
@@ -22,7 +22,7 @@ use arrow::array::{Capacities, MutableArrayData};
 use arrow_array::{
     new_null_array, Array, ArrayRef, GenericListArray, Int64Array, 
OffsetSizeTrait,
 };
-use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, 
OffsetBuffer};
+use arrow_buffer::{ArrowNativeType, NullBufferBuilder, OffsetBuffer};
 use arrow_schema::DataType::{FixedSizeList, LargeList, List};
 use arrow_schema::{DataType, FieldRef};
 use datafusion_common::cast::{as_int64_array, as_large_list_array, 
as_list_array};
@@ -198,15 +198,15 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
         capacity,
     );
 
-    let mut null_builder = BooleanBufferBuilder::new(array.len());
+    let mut null_builder = NullBufferBuilder::new(array.len());
 
     for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
         if array.is_null(row_index) {
-            null_builder.append(false);
+            null_builder.append_null();
             offsets.push(offsets[row_index]);
             continue;
         }
-        null_builder.append(true);
+        null_builder.append_non_null();
 
         let count = count_array.value(row_index).to_usize().ok_or_else(|| {
             internal_datafusion_err!("array_resize: failed to convert size to 
usize")
@@ -234,12 +234,11 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
     }
 
     let data = mutable.freeze();
-    let null_bit_buffer: NullBuffer = null_builder.finish().into();
 
     Ok(Arc::new(GenericListArray::<O>::try_new(
         Arc::clone(field),
         OffsetBuffer::<O>::new(offsets.into()),
         arrow_array::make_array(data),
-        Some(null_bit_buffer),
+        null_builder.finish(),
     )?))
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to