This is an automated email from the ASF dual-hosted git repository.
jeffreyvo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new fc927bcc12 Implement a more generic from_nested_iter method for list
arrays (#9268)
fc927bcc12 is described below
commit fc927bcc12e956496dc0cb91419b85e0aec02b88
Author: Jörn Horstmann <[email protected]>
AuthorDate: Fri Jan 30 01:02:25 2026 +0100
Implement a more generic from_nested_iter method for list arrays (#9268)
# Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax.
-->
- Closes #9267.
# Rationale for this change
Implement a convenient function to create list arrays of many more types
from nested iterators.
# What changes are included in this PR?
<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
# Are these changes tested?
yes, new tests added and the existing `from_iter_primitive` delegates to
the new method for additional converage.
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
# Are there any user-facing changes?
No breaking changes. The `from_iter_primitive` could potentially get
deprecated.
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
If there are any breaking changes to public APIs, please call them out.
-->
---------
Co-authored-by: Jeffrey Vo <[email protected]>
---
arrow-array/src/array/list_array.rs | 104 ++++++++++++++++++++++++++++++++----
1 file changed, 95 insertions(+), 9 deletions(-)
diff --git a/arrow-array/src/array/list_array.rs
b/arrow-array/src/array/list_array.rs
index 53e1db1e15..e4c603e0d9 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -16,7 +16,7 @@
// under the License.
use crate::array::{get_offsets_from_buffer, make_array, print_long_array};
-use crate::builder::{GenericListBuilder, PrimitiveBuilder};
+use crate::builder::{ArrayBuilder, GenericListBuilder, PrimitiveBuilder};
use crate::{
Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
iterator::GenericListArrayIter, new_empty_array,
@@ -418,18 +418,43 @@ impl<OffsetSize: OffsetSizeTrait>
GenericListArray<OffsetSize> {
T: ArrowPrimitiveType,
P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
I: IntoIterator<Item = Option<P>>,
+ {
+ Self::from_nested_iter::<PrimitiveBuilder<T>, T::Native, P, I>(iter)
+ }
+
+ /// Creates a [`GenericListArray`] from a nested iterator of values.
+ /// This method works for any values type that has a corresponding builder
that implements the
+ /// `Extend` trait. That includes all numeric types, booleans, binary and
string types and also
+ /// dictionary encoded binary and strings.
+ ///
+ /// # Example
+ /// ```
+ /// # use arrow_array::ListArray;
+ /// # use arrow_array::types::Int32Type;
+ /// # use arrow_array::builder::StringDictionaryBuilder;
+ /// let data = vec![
+ /// Some(vec![Some("foo"), Some("bar"), Some("baz")]),
+ /// None,
+ /// Some(vec![Some("bar"), None, Some("foo")]),
+ /// Some(vec![]),
+ /// ];
+ /// let list_array =
ListArray::from_nested_iter::<StringDictionaryBuilder<Int32Type>, _, _,
_>(data);
+ /// println!("{:?}", list_array);
+ /// ```
+ pub fn from_nested_iter<B, T, P, I>(iter: I) -> Self
+ where
+ B: ArrayBuilder + Default + Extend<Option<T>>,
+ P: IntoIterator<Item = Option<T>>,
+ I: IntoIterator<Item = Option<P>>,
{
let iter = iter.into_iter();
let size_hint = iter.size_hint().0;
- let mut builder =
- GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(),
size_hint);
+ let mut builder = GenericListBuilder::with_capacity(B::default(),
size_hint);
for i in iter {
match i {
Some(p) => {
- for t in p {
- builder.values().append_option(t);
- }
+ builder.values().extend(p);
builder.append(true);
}
None => builder.append(false),
@@ -634,10 +659,15 @@ pub type LargeListArray = GenericListArray<i64>;
#[cfg(test)]
mod tests {
use super::*;
- use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder,
UnionBuilder};
+ use crate::builder::{
+ BooleanBuilder, FixedSizeListBuilder, Int32Builder, ListBuilder,
StringBuilder,
+ StringDictionaryBuilder, UnionBuilder,
+ };
use crate::cast::AsArray;
- use crate::types::Int32Type;
- use crate::{Int32Array, Int64Array};
+ use crate::types::{Int8Type, Int32Type};
+ use crate::{
+ BooleanArray, Int8Array, Int8DictionaryArray, Int32Array, Int64Array,
StringArray,
+ };
use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
use arrow_schema::Field;
@@ -1294,4 +1324,60 @@ mod tests {
let array = ListArray::new_null(field, 5);
assert_eq!(array.len(), 5);
}
+
+ #[test]
+ fn test_list_from_iter_i32() {
+ let array = ListArray::from_nested_iter::<Int32Builder, _, _, _>(vec![
+ None,
+ Some(vec![Some(1), None, Some(2)]),
+ ]);
+ let expected_offsets = &[0, 0, 3];
+ let expected_values: ArrayRef =
Arc::new(Int32Array::from(vec![Some(1), None, Some(2)]));
+ assert_eq!(array.value_offsets(), expected_offsets);
+ assert_eq!(array.values(), &expected_values);
+ }
+
+ #[test]
+ fn test_list_from_iter_bool() {
+ let array = ListArray::from_nested_iter::<BooleanBuilder, _, _,
_>(vec![
+ Some(vec![None, Some(false), Some(true)]),
+ None,
+ ]);
+ let expected_offsets = &[0, 3, 3];
+ let expected_values: ArrayRef =
+ Arc::new(BooleanArray::from(vec![None, Some(false), Some(true)]));
+ assert_eq!(array.value_offsets(), expected_offsets);
+ assert_eq!(array.values(), &expected_values);
+ }
+
+ #[test]
+ fn test_list_from_iter_str() {
+ let array = ListArray::from_nested_iter::<StringBuilder, _, _, _>(vec![
+ Some(vec![Some("foo"), None, Some("bar")]),
+ None,
+ ]);
+ let expected_offsets = &[0, 3, 3];
+ let expected_values: ArrayRef =
+ Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")]));
+ assert_eq!(array.value_offsets(), expected_offsets);
+ assert_eq!(array.values(), &expected_values);
+ }
+
+ #[test]
+ fn test_list_from_iter_dict_str() {
+ let array =
+ ListArray::from_nested_iter::<StringDictionaryBuilder<Int8Type>,
_, _, _>(vec![
+ Some(vec![Some("foo"), None, Some("bar"), Some("foo")]),
+ None,
+ ]);
+ let expected_offsets = &[0, 4, 4];
+ let expected_dict_values: ArrayRef =
+ Arc::new(StringArray::from(vec![Some("foo"), Some("bar")]));
+ let expected_dict_keys = Int8Array::from(vec![Some(0), None, Some(1),
Some(0)]);
+ let expected_values: ArrayRef = Arc::new(
+ Int8DictionaryArray::try_new(expected_dict_keys,
expected_dict_values).unwrap(),
+ );
+ assert_eq!(array.value_offsets(), expected_offsets);
+ assert_eq!(array.values(), &expected_values);
+ }
}