This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 52f7bf1ac3 [Variant] Make `VariantArray` iterable (#8613)
52f7bf1ac3 is described below
commit 52f7bf1ac3546e107b701814eb25a0c02ae58c0d
Author: Matthew Kim <[email protected]>
AuthorDate: Wed Oct 15 06:42:58 2025 -0400
[Variant] Make `VariantArray` iterable (#8613)
# Which issue does this PR close?
- Closes https://github.com/apache/arrow-rs/issues/8609
- Closes https://github.com/apache/arrow-rs/issues/8612
This PR introduces an Iterator over `VariantArray`. Since `VariantArray`
does not `impl Array`, we can't make use of `ArrayIter`
---
parquet-variant-compute/src/variant_array.rs | 175 +++++++++++++++++++++++++++
1 file changed, 175 insertions(+)
diff --git a/parquet-variant-compute/src/variant_array.rs
b/parquet-variant-compute/src/variant_array.rs
index 522c5a7546..5f7f826819 100644
--- a/parquet-variant-compute/src/variant_array.rs
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -420,6 +420,11 @@ impl VariantArray {
pub fn is_valid(&self, index: usize) -> bool {
!self.is_null(index)
}
+
+ /// Returns an iterator over the values in this array
+ pub fn iter(&self) -> VariantArrayIter<'_> {
+ VariantArrayIter::new(self)
+ }
}
impl From<VariantArray> for StructArray {
@@ -434,6 +439,89 @@ impl From<VariantArray> for ArrayRef {
}
}
+/// An iterator over [`VariantArray`]
+///
+/// This iterator returns `Option<Option<Variant<'a, 'a>>>` where:
+/// - `None` indicates the end of iteration
+/// - `Some(None)` indicates a null value at this position
+/// - `Some(Some(variant))` indicates a valid variant value
+///
+/// # Example
+///
+/// ```
+/// # use parquet_variant::Variant;
+/// # use parquet_variant_compute::VariantArrayBuilder;
+/// let mut builder = VariantArrayBuilder::new(10);
+/// builder.append_variant(Variant::from(42));
+/// builder.append_null();
+/// builder.append_variant(Variant::from("hello"));
+/// let array = builder.build();
+///
+/// let values = array.iter().collect::<Vec<_>>();
+/// assert_eq!(values.len(), 3);
+/// assert_eq!(values[0], Some(Variant::from(42)));
+/// assert_eq!(values[1], None);
+/// assert_eq!(values[2], Some(Variant::from("hello")));
+/// ```
+#[derive(Debug)]
+pub struct VariantArrayIter<'a> {
+ array: &'a VariantArray,
+ head_i: usize,
+ tail_i: usize,
+}
+
+impl<'a> VariantArrayIter<'a> {
+ /// Creates a new iterator over the given [`VariantArray`]
+ pub fn new(array: &'a VariantArray) -> Self {
+ Self {
+ array,
+ head_i: 0,
+ tail_i: array.len(),
+ }
+ }
+
+ fn value_opt(&self, i: usize) -> Option<Variant<'a, 'a>> {
+ self.array.is_valid(i).then(|| self.array.value(i))
+ }
+}
+
+impl<'a> Iterator for VariantArrayIter<'a> {
+ type Item = Option<Variant<'a, 'a>>;
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.head_i == self.tail_i {
+ return None;
+ }
+
+ let out = self.value_opt(self.head_i);
+
+ self.head_i += 1;
+
+ Some(out)
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let remainder = self.tail_i - self.head_i;
+
+ (remainder, Some(remainder))
+ }
+}
+
+impl<'a> DoubleEndedIterator for VariantArrayIter<'a> {
+ fn next_back(&mut self) -> Option<Self::Item> {
+ if self.head_i == self.tail_i {
+ return None;
+ }
+
+ self.tail_i -= 1;
+
+ Some(self.value_opt(self.tail_i))
+ }
+}
+
+impl<'a> ExactSizeIterator for VariantArrayIter<'a> {}
+
/// One shredded field of a partially or prefectly shredded variant. For
example, suppose the
/// shredding schema for variant `v` treats it as an object with a single
field `a`, where `a` is
/// itself a struct with the single field `b` of type INT. Then the physical
layout of the column
@@ -1048,6 +1136,8 @@ fn canonicalize_and_verify_field(field: &Arc<Field>) ->
Result<Cow<'_, Arc<Field
#[cfg(test)]
mod test {
+ use crate::VariantArrayBuilder;
+
use super::*;
use arrow::array::{BinaryViewArray, Int32Array};
use arrow_schema::{Field, Fields};
@@ -1230,4 +1320,89 @@ mod test {
}
));
}
+
+ #[test]
+ fn test_variant_array_iterable() {
+ let mut b = VariantArrayBuilder::new(6);
+
+ b.append_null();
+ b.append_variant(Variant::from(1_i8));
+ b.append_variant(Variant::Null);
+ b.append_variant(Variant::from(2_i32));
+ b.append_variant(Variant::from(3_i64));
+ b.append_null();
+
+ let v = b.build();
+
+ let variants = v.iter().collect::<Vec<_>>();
+
+ assert_eq!(
+ variants,
+ vec![
+ None,
+ Some(Variant::Int8(1)),
+ Some(Variant::Null),
+ Some(Variant::Int32(2)),
+ Some(Variant::Int64(3)),
+ None,
+ ]
+ );
+ }
+
+ #[test]
+ fn test_variant_array_iter_double_ended() {
+ let mut b = VariantArrayBuilder::new(5);
+
+ b.append_variant(Variant::from(0_i32));
+ b.append_null();
+ b.append_variant(Variant::from(2_i32));
+ b.append_null();
+ b.append_variant(Variant::from(4_i32));
+
+ let array = b.build();
+ let mut iter = array.iter();
+
+ assert_eq!(iter.next(), Some(Some(Variant::from(0_i32))));
+ assert_eq!(iter.next(), Some(None));
+
+ assert_eq!(iter.next_back(), Some(Some(Variant::from(4_i32))));
+ assert_eq!(iter.next_back(), Some(None));
+ assert_eq!(iter.next_back(), Some(Some(Variant::from(2_i32))));
+
+ assert_eq!(iter.next_back(), None);
+ assert_eq!(iter.next(), None);
+ }
+
+ #[test]
+ fn test_variant_array_iter_reverse() {
+ let mut b = VariantArrayBuilder::new(5);
+
+ b.append_variant(Variant::from("a"));
+ b.append_null();
+ b.append_variant(Variant::from("aaa"));
+ b.append_null();
+ b.append_variant(Variant::from("aaaaa"));
+
+ let array = b.build();
+
+ let result: Vec<_> = array.iter().rev().collect();
+ assert_eq!(
+ result,
+ vec![
+ Some(Variant::from("aaaaa")),
+ None,
+ Some(Variant::from("aaa")),
+ None,
+ Some(Variant::from("a")),
+ ]
+ );
+ }
+
+ #[test]
+ fn test_variant_array_iter_empty() {
+ let v = VariantArrayBuilder::new(0).build();
+ let mut i = v.iter();
+ assert!(i.next().is_none());
+ assert!(i.next_back().is_none());
+ }
}