This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 721150286b feat: support append_nulls on additional builders (#7606)
721150286b is described below
commit 721150286b00bece40ffcc6f5ac14ebb5d64785b
Author: albertlockett <[email protected]>
AuthorDate: Tue Jun 10 12:29:23 2025 -0400
feat: support append_nulls on additional builders (#7606)
# Which issue does this PR close?
- Closes https://github.com/apache/arrow-rs/issues/7605
# Rationale for this change
I thought it would be nice if `append_nulls` was supported for
additional types of array builders. Currently it is available on some
builder types, but not all.
# What changes are included in this PR?
Add an `append_nulls` method to:
- FixedSizeBinaryDictionaryBuilder
- FixedSizedBinaryBuilder
- GenericBytesBuilder
- GenericListBuilder
- StructBuilder
# Are there any user-facing changes?
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
.../src/builder/fixed_size_binary_builder.rs | 19 +++++++++--
.../fixed_size_binary_dictionary_builder.rs | 19 ++++++++++-
arrow-array/src/builder/generic_bytes_builder.rs | 37 ++++++++++++++++++----
arrow-array/src/builder/generic_list_builder.rs | 20 ++++++++++--
arrow-array/src/builder/struct_builder.rs | 36 +++++++++++++++------
5 files changed, 107 insertions(+), 24 deletions(-)
diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs
b/arrow-array/src/builder/fixed_size_binary_builder.rs
index b7fc461559..b5f268917c 100644
--- a/arrow-array/src/builder/fixed_size_binary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_builder.rs
@@ -93,6 +93,14 @@ impl FixedSizeBinaryBuilder {
self.null_buffer_builder.append_null();
}
+ /// Appends `n` `null`s into the builder.
+ #[inline]
+ pub fn append_nulls(&mut self, n: usize) {
+ self.values_builder
+ .append_slice(&vec![0u8; self.value_length as usize * n][..]);
+ self.null_buffer_builder.append_n_nulls(n);
+ }
+
/// Returns the current values buffer as a slice
pub fn values_slice(&self) -> &[u8] {
self.values_builder.as_slice()
@@ -169,17 +177,22 @@ mod tests {
fn test_fixed_size_binary_builder() {
let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
- // [b"hello", null, "arrow"]
+ // [b"hello", null, "arrow", null, null, "world"]
builder.append_value(b"hello").unwrap();
builder.append_null();
builder.append_value(b"arrow").unwrap();
+ builder.append_nulls(2);
+ builder.append_value(b"world").unwrap();
let array: FixedSizeBinaryArray = builder.finish();
assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
- assert_eq!(3, array.len());
- assert_eq!(1, array.null_count());
+ assert_eq!(6, array.len());
+ assert_eq!(3, array.null_count());
assert_eq!(10, array.value_offset(2));
+ assert_eq!(15, array.value_offset(3));
assert_eq!(5, array.value_length());
+ assert!(array.is_null(3));
+ assert!(array.is_null(4));
}
#[test]
diff --git a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
index 007f3de0a2..f3460353b1 100644
--- a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
@@ -192,6 +192,12 @@ where
self.keys_builder.append_null()
}
+ /// Appends `n` `null`s into the builder.
+ #[inline]
+ pub fn append_nulls(&mut self, n: usize) {
+ self.keys_builder.append_nulls(n);
+ }
+
/// Infallibly append a value to this builder
///
/// # Panics
@@ -265,11 +271,22 @@ mod tests {
assert_eq!(b.append(values[1]).unwrap(), 1);
assert_eq!(b.append(values[1]).unwrap(), 1);
assert_eq!(b.append(values[0]).unwrap(), 0);
+ b.append_nulls(2);
+ assert_eq!(b.append(values[0]).unwrap(), 0);
let array = b.finish();
assert_eq!(
array.keys(),
- &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)]),
+ &Int8Array::from(vec![
+ Some(0),
+ None,
+ Some(1),
+ Some(1),
+ Some(0),
+ None,
+ None,
+ Some(0)
+ ]),
);
// Values are polymorphic and so require a downcast.
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs
b/arrow-array/src/builder/generic_bytes_builder.rs
index ae82921b0b..91ac2a483e 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -129,6 +129,14 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
self.offsets_builder.append(self.next_offset());
}
+ /// Appends `n` `null`s into the builder.
+ #[inline]
+ pub fn append_nulls(&mut self, n: usize) {
+ self.null_buffer_builder.append_n_nulls(n);
+ let next_offset = self.next_offset();
+ self.offsets_builder.append_n(n, next_offset);
+ }
+
/// Appends array values and null to this builder as is
/// (this means that underlying null values are copied as is).
#[inline]
@@ -439,15 +447,18 @@ mod tests {
builder.append_null();
builder.append_null();
builder.append_null();
- assert_eq!(3, builder.len());
+ builder.append_nulls(2);
+ assert_eq!(5, builder.len());
assert!(!builder.is_empty());
let array = builder.finish();
- assert_eq!(3, array.null_count());
- assert_eq!(3, array.len());
+ assert_eq!(5, array.null_count());
+ assert_eq!(5, array.len());
assert!(array.is_null(0));
assert!(array.is_null(1));
assert!(array.is_null(2));
+ assert!(array.is_null(3));
+ assert!(array.is_null(4));
}
#[test]
@@ -475,16 +486,23 @@ mod tests {
builder.append_null();
builder.append_value(b"arrow");
builder.append_value(b"");
+ builder.append_nulls(2);
+ builder.append_value(b"hi");
let array = builder.finish();
- assert_eq!(4, array.len());
- assert_eq!(1, array.null_count());
+ assert_eq!(7, array.len());
+ assert_eq!(3, array.null_count());
assert_eq!(b"parquet", array.value(0));
assert!(array.is_null(1));
+ assert!(array.is_null(4));
+ assert!(array.is_null(5));
assert_eq!(b"arrow", array.value(2));
assert_eq!(b"", array.value(1));
+ assert_eq!(b"hi", array.value(6));
+
assert_eq!(O::zero(), array.value_offsets()[0]);
assert_eq!(O::from_usize(7).unwrap(), array.value_offsets()[2]);
+ assert_eq!(O::from_usize(14).unwrap(), array.value_offsets()[7]);
assert_eq!(O::from_usize(5).unwrap(), array.value_length(2));
}
@@ -509,7 +527,9 @@ mod tests {
builder.append_option(Some("rust"));
builder.append_option(None::<&str>);
builder.append_option(None::<String>);
- assert_eq!(7, builder.len());
+ builder.append_nulls(2);
+ builder.append_value("parquet");
+ assert_eq!(10, builder.len());
assert_eq!(
GenericStringArray::<O>::from(vec![
@@ -519,7 +539,10 @@ mod tests {
None,
Some("rust"),
None,
- None
+ None,
+ None,
+ None,
+ Some("parquet")
]),
builder.finish()
);
diff --git a/arrow-array/src/builder/generic_list_builder.rs
b/arrow-array/src/builder/generic_list_builder.rs
index a9c88ec6c5..463b498c55 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -270,6 +270,14 @@ where
self.null_buffer_builder.append_null();
}
+ /// Appends `n` `null`s into the builder.
+ #[inline]
+ pub fn append_nulls(&mut self, n: usize) {
+ let next_offset = self.next_offset();
+ self.offsets_builder.append_n(n, next_offset);
+ self.null_buffer_builder.append_n_nulls(n);
+ }
+
/// Appends an optional value into this [`GenericListBuilder`]
///
/// If `Some` calls [`Self::append_value`] otherwise calls
[`Self::append_null`]
@@ -406,7 +414,7 @@ mod tests {
let values_builder = Int32Builder::with_capacity(10);
let mut builder = GenericListBuilder::<O, _>::new(values_builder);
- // [[0, 1, 2], null, [3, null, 5], [6, 7]]
+ // [[0, 1, 2], null, [3, null, 5], [6, 7], null, null, [8]]
builder.values().append_value(0);
builder.values().append_value(1);
builder.values().append_value(2);
@@ -419,14 +427,20 @@ mod tests {
builder.values().append_value(6);
builder.values().append_value(7);
builder.append(true);
+ builder.append_nulls(2);
+ builder.values().append_value(8);
+ builder.append(true);
let list_array = builder.finish();
assert_eq!(DataType::Int32, list_array.value_type());
- assert_eq!(4, list_array.len());
- assert_eq!(1, list_array.null_count());
+ assert_eq!(7, list_array.len());
+ assert_eq!(3, list_array.null_count());
assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]);
+ assert_eq!(O::from_usize(9).unwrap(), list_array.value_offsets()[7]);
assert_eq!(O::from_usize(3).unwrap(), list_array.value_length(2));
+ assert!(list_array.is_null(4));
+ assert!(list_array.is_null(5));
}
#[test]
diff --git a/arrow-array/src/builder/struct_builder.rs
b/arrow-array/src/builder/struct_builder.rs
index 245e9df41e..3afee5863f 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -214,6 +214,12 @@ impl StructBuilder {
self.append(false)
}
+ /// Appends `n` `null`s into the builder.
+ #[inline]
+ pub fn append_nulls(&mut self, n: usize) {
+ self.null_buffer_builder.append_slice(&vec![false; n]);
+ }
+
/// Builds the `StructArray` and reset this builder.
pub fn finish(&mut self) -> StructArray {
self.validate_content();
@@ -313,6 +319,8 @@ mod tests {
string_builder.append_null();
string_builder.append_null();
string_builder.append_value("mark");
+ string_builder.append_nulls(2);
+ string_builder.append_value("terry");
let int_builder = builder
.field_builder::<Int32Builder>(1)
@@ -321,35 +329,43 @@ mod tests {
int_builder.append_value(2);
int_builder.append_null();
int_builder.append_value(4);
+ int_builder.append_nulls(2);
+ int_builder.append_value(3);
builder.append(true);
builder.append(true);
builder.append_null();
builder.append(true);
+ builder.append_nulls(2);
+ builder.append(true);
+
let struct_data = builder.finish().into_data();
- assert_eq!(4, struct_data.len());
- assert_eq!(1, struct_data.null_count());
- assert_eq!(&[11_u8], struct_data.nulls().unwrap().validity());
+ assert_eq!(7, struct_data.len());
+ assert_eq!(3, struct_data.null_count());
+ assert_eq!(&[75_u8], struct_data.nulls().unwrap().validity());
let expected_string_data = ArrayData::builder(DataType::Utf8)
- .len(4)
- .null_bit_buffer(Some(Buffer::from(&[9_u8])))
- .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7]))
- .add_buffer(Buffer::from_slice_ref(b"joemark"))
+ .len(7)
+ .null_bit_buffer(Some(Buffer::from(&[73_u8])))
+ .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7, 7, 7, 12]))
+ .add_buffer(Buffer::from_slice_ref(b"joemarkterry"))
.build()
.unwrap();
let expected_int_data = ArrayData::builder(DataType::Int32)
- .len(4)
- .null_bit_buffer(Some(Buffer::from_slice_ref([11_u8])))
- .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4]))
+ .len(7)
+ .null_bit_buffer(Some(Buffer::from_slice_ref([75_u8])))
+ .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4, 4, 4, 3]))
.build()
.unwrap();
assert_eq!(expected_string_data, struct_data.child_data()[0]);
assert_eq!(expected_int_data, struct_data.child_data()[1]);
+
+ assert!(struct_data.is_null(4));
+ assert!(struct_data.is_null(5));
}
#[test]