This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 200e8c8008 Implementing `ArrayBuilder` for `Box<dyn ArrayBuilder>`
(#5109)
200e8c8008 is described below
commit 200e8c80084442d9579e00967e407cd83191565d
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Wed Nov 22 06:09:00 2023 -0800
Implementing `ArrayBuilder` for `Box<dyn ArrayBuilder>` (#5109)
* Implementing ArrayBuilder for Box<dyn ArrayBuilder>
* Update existing test
---
arrow-array/src/builder/generic_list_builder.rs | 202 +++++++++++++++++++++++-
arrow-array/src/builder/mod.rs | 30 ++++
arrow-array/src/builder/struct_builder.rs | 15 +-
3 files changed, 243 insertions(+), 4 deletions(-)
diff --git a/arrow-array/src/builder/generic_list_builder.rs
b/arrow-array/src/builder/generic_list_builder.rs
index 5cc7f7b04e..21eaadd520 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -353,7 +353,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
- use crate::builder::{Int32Builder, ListBuilder};
+ use crate::builder::{make_builder, Int32Builder, ListBuilder};
use crate::cast::AsArray;
use crate::types::Int32Type;
use crate::{Array, Int32Array};
@@ -548,4 +548,204 @@ mod tests {
assert_eq!(elements.null_count(), 1);
assert!(elements.is_null(3));
}
+
+ #[test]
+ fn test_boxed_primitive_aray_builder() {
+ let values_builder = make_builder(&DataType::Int32, 5);
+ let mut builder = ListBuilder::new(values_builder);
+
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_slice(&[1, 2, 3]);
+ builder.append(true);
+
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_slice(&[4, 5, 6]);
+ builder.append(true);
+
+ let arr = builder.finish();
+ assert_eq!(2, arr.len());
+
+ let elements = arr.values().as_primitive::<Int32Type>();
+ assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]);
+ }
+
+ #[test]
+ fn test_boxed_list_list_array_builder() {
+ // This test is same as `test_list_list_array_builder` but uses boxed
builders.
+ let values_builder = make_builder(
+ &DataType::List(Arc::new(Field::new("item", DataType::Int32,
true))),
+ 10,
+ );
+ let mut builder = ListBuilder::new(values_builder);
+
+ // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_value(1);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_value(2);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .append(true);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_value(3);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_value(4);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .append(true);
+ builder.append(true);
+
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_value(5);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_value(6);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_value(7);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .append(true);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .append(false);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_value(8);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .append(true);
+ builder.append(true);
+
+ builder.append(false);
+
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_value(9);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .values()
+ .as_any_mut()
+ .downcast_mut::<Int32Builder>()
+ .expect("should be an Int32Builder")
+ .append_value(10);
+ builder
+ .values()
+ .as_any_mut()
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+ .expect("should be an ListBuilder")
+ .append(true);
+ builder.append(true);
+
+ let l1 = builder.finish();
+
+ assert_eq!(4, l1.len());
+ assert_eq!(1, l1.null_count());
+
+ assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6]);
+ let l2 = l1.values().as_list::<i32>();
+
+ assert_eq!(6, l2.len());
+ assert_eq!(1, l2.null_count());
+ assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10]);
+
+ let i1 = l2.values().as_primitive::<Int32Type>();
+ assert_eq!(10, i1.len());
+ assert_eq!(0, i1.null_count());
+ assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+ }
}
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index 38a7500dd5..8382f7af87 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -265,6 +265,36 @@ pub trait ArrayBuilder: Any + Send {
fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
}
+impl ArrayBuilder for Box<dyn ArrayBuilder> {
+ fn len(&self) -> usize {
+ (**self).len()
+ }
+
+ fn is_empty(&self) -> bool {
+ (**self).is_empty()
+ }
+
+ fn finish(&mut self) -> ArrayRef {
+ (**self).finish()
+ }
+
+ fn finish_cloned(&self) -> ArrayRef {
+ (**self).finish_cloned()
+ }
+
+ fn as_any(&self) -> &dyn Any {
+ (**self).as_any()
+ }
+
+ fn as_any_mut(&mut self) -> &mut dyn Any {
+ (**self).as_any_mut()
+ }
+
+ fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+ self
+ }
+}
+
/// Builder for [`ListArray`](crate::array::ListArray)
pub type ListBuilder<T> = GenericListBuilder<i32, T>;
diff --git a/arrow-array/src/builder/struct_builder.rs
b/arrow-array/src/builder/struct_builder.rs
index 0f40b8a487..06b8385b31 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -169,6 +169,10 @@ pub fn make_builder(datatype: &DataType, capacity: usize)
-> Box<dyn ArrayBuilde
DataType::Duration(TimeUnit::Nanosecond) => {
Box::new(DurationNanosecondBuilder::with_capacity(capacity))
}
+ DataType::List(field) => {
+ let builder = make_builder(field.data_type(), capacity);
+ Box::new(ListBuilder::with_capacity(builder, capacity))
+ }
DataType::Struct(fields) =>
Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
t => panic!("Data type {t:?} is not currently supported"),
}
@@ -507,13 +511,18 @@ mod tests {
#[test]
#[should_panic(
- expected = "Data type List(Field { name: \"item\", data_type: Int64,
nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) is not
currently supported"
+ expected = "Data type Map(Field { name: \"entries\", data_type:
Struct([Field { name: \"keys\", data_type: Int32, nullable: false, dict_id: 0,
dict_is_ordered: false, metadata: {} }, Field { name: \"values\", data_type:
UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]),
nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) is
not currently supported"
)]
fn test_struct_array_builder_from_schema_unsupported_type() {
- let list_type = DataType::List(Arc::new(Field::new("item",
DataType::Int64, true)));
+ let keys = Arc::new(Field::new("keys", DataType::Int32, false));
+ let values = Arc::new(Field::new("values", DataType::UInt32, false));
+ let struct_type = DataType::Struct(Fields::from(vec![keys, values]));
+ let map_data_type =
+ DataType::Map(Arc::new(Field::new("entries", struct_type, false)),
false);
+
let fields = vec![
Field::new("f1", DataType::Int16, false),
- Field::new("f2", list_type, false),
+ Field::new("f2", map_data_type, false),
];
let _ = StructBuilder::from_fields(fields, 5);