This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 8fff5e4a07 Add ListBuilder::with_field to support non nullable list
fields (#5330) (#5331)
8fff5e4a07 is described below
commit 8fff5e4a075c20619690c967e217163cd74e0656
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Jan 25 12:13:43 2024 +0000
Add ListBuilder::with_field to support non nullable list fields (#5330)
(#5331)
* Add ListBuilder::with_field (#5330)
* Tweak docs
* Review feedback
---
arrow-array/src/builder/generic_list_builder.rs | 111 +++++++++++++++---------
1 file changed, 72 insertions(+), 39 deletions(-)
diff --git a/arrow-array/src/builder/generic_list_builder.rs
b/arrow-array/src/builder/generic_list_builder.rs
index 116e2553cf..b857224c5d 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -17,10 +17,9 @@
use crate::builder::{ArrayBuilder, BufferBuilder};
use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
-use arrow_buffer::Buffer;
use arrow_buffer::NullBufferBuilder;
-use arrow_data::ArrayData;
-use arrow_schema::Field;
+use arrow_buffer::{Buffer, OffsetBuffer};
+use arrow_schema::{Field, FieldRef};
use std::any::Any;
use std::sync::Arc;
@@ -92,6 +91,7 @@ pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T:
ArrayBuilder> {
offsets_builder: BufferBuilder<OffsetSize>,
null_buffer_builder: NullBufferBuilder,
values_builder: T,
+ field: Option<FieldRef>,
}
impl<O: OffsetSizeTrait, T: ArrayBuilder + Default> Default for
GenericListBuilder<O, T> {
@@ -116,6 +116,20 @@ impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder>
GenericListBuilder<OffsetSize
offsets_builder,
null_buffer_builder: NullBufferBuilder::new(capacity),
values_builder,
+ field: None,
+ }
+ }
+
+ /// Override the field passed to [`GenericListArray::new`]
+ ///
+ /// By default a nullable field is created with the name `item`
+ ///
+ /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
+ /// field's data type does not match that of `T`
+ pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
+ Self {
+ field: Some(field.into()),
+ ..self
}
}
}
@@ -275,53 +289,37 @@ where
/// Builds the [`GenericListArray`] and reset this builder.
pub fn finish(&mut self) -> GenericListArray<OffsetSize> {
- let len = self.len();
- let values_arr = self.values_builder.finish();
- let values_data = values_arr.to_data();
+ let values = self.values_builder.finish();
+ let nulls = self.null_buffer_builder.finish();
- let offset_buffer = self.offsets_builder.finish();
- let null_bit_buffer = self.null_buffer_builder.finish();
+ let offsets = self.offsets_builder.finish();
+ // Safety: Safe by construction
+ let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
self.offsets_builder.append(OffsetSize::zero());
- let field = Arc::new(Field::new(
- "item",
- values_data.data_type().clone(),
- true, // TODO: find a consistent way of getting this
- ));
- let data_type =
GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field);
- let array_data_builder = ArrayData::builder(data_type)
- .len(len)
- .add_buffer(offset_buffer)
- .add_child_data(values_data)
- .nulls(null_bit_buffer);
- let array_data = unsafe { array_data_builder.build_unchecked() };
+ let field = match &self.field {
+ Some(f) => f.clone(),
+ None => Arc::new(Field::new("item", values.data_type().clone(),
true)),
+ };
- GenericListArray::<OffsetSize>::from(array_data)
+ GenericListArray::new(field, offsets, values, nulls)
}
/// Builds the [`GenericListArray`] without resetting the builder.
pub fn finish_cloned(&self) -> GenericListArray<OffsetSize> {
- let len = self.len();
- let values_arr = self.values_builder.finish_cloned();
- let values_data = values_arr.to_data();
-
- let offset_buffer =
Buffer::from_slice_ref(self.offsets_builder.as_slice());
+ let values = self.values_builder.finish_cloned();
let nulls = self.null_buffer_builder.finish_cloned();
- let field = Arc::new(Field::new(
- "item",
- values_data.data_type().clone(),
- true, // TODO: find a consistent way of getting this
- ));
- let data_type =
GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field);
- let array_data_builder = ArrayData::builder(data_type)
- .len(len)
- .add_buffer(offset_buffer)
- .add_child_data(values_data)
- .nulls(nulls);
- let array_data = unsafe { array_data_builder.build_unchecked() };
+ let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice());
+ // Safety: safe by construction
+ let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
+
+ let field = match &self.field {
+ Some(f) => f.clone(),
+ None => Arc::new(Field::new("item", values.data_type().clone(),
true)),
+ };
- GenericListArray::<OffsetSize>::from(array_data)
+ GenericListArray::new(field, offsets, values, nulls)
}
/// Returns the current offsets buffer as a slice
@@ -765,4 +763,39 @@ mod tests {
assert_eq!(0, i1.null_count());
assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
}
+
+ #[test]
+ fn test_with_field() {
+ let field = Arc::new(Field::new("bar", DataType::Int32, false));
+ let mut builder =
ListBuilder::new(Int32Builder::new()).with_field(field.clone());
+ builder.append_value([Some(1), Some(2), Some(3)]);
+ builder.append_null(); // This is fine as nullability refers to
nullability of values
+ builder.append_value([Some(4)]);
+ let array = builder.finish();
+ assert_eq!(array.len(), 3);
+ assert_eq!(array.data_type(), &DataType::List(field.clone()));
+
+ builder.append_value([Some(4), Some(5)]);
+ let array = builder.finish();
+ assert_eq!(array.data_type(), &DataType::List(field));
+ assert_eq!(array.len(), 1);
+ }
+
+ #[test]
+ #[should_panic(expected = "Non-nullable field of ListArray \\\"item\\\"
cannot contain nulls")]
+ fn test_checks_nullability() {
+ let field = Arc::new(Field::new("item", DataType::Int32, false));
+ let mut builder =
ListBuilder::new(Int32Builder::new()).with_field(field.clone());
+ builder.append_value([Some(1), None]);
+ builder.finish();
+ }
+
+ #[test]
+ #[should_panic(expected = "ListArray expected data type Int64 got Int32")]
+ fn test_checks_data_type() {
+ let field = Arc::new(Field::new("item", DataType::Int64, false));
+ let mut builder =
ListBuilder::new(Int32Builder::new()).with_field(field.clone());
+ builder.append_value([Some(1)]);
+ builder.finish();
+ }
}