This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 19e3e8c83 Implement Extend for ArrayBuilder (#1841) (#3563)
19e3e8c83 is described below

commit 19e3e8c8314f87d8c2acf3a7b69538fdec6f793c
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Fri Jan 20 17:09:44 2023 +0000

    Implement Extend for ArrayBuilder (#1841) (#3563)
    
    * Implement Extend for ArrayBuilder (#1841)
    
    * Add dictionaries
    
    * Add tests
---
 arrow-array/src/array/dictionary_array.rs          | 13 +-----
 arrow-array/src/builder/boolean_builder.rs         | 22 ++++++++++
 arrow-array/src/builder/generic_bytes_builder.rs   | 23 +++++++++++
 .../builder/generic_bytes_dictionary_builder.rs    | 47 ++++++++++++++++++++-
 arrow-array/src/builder/generic_list_builder.rs    | 47 +++++++++++++++++++++
 arrow-array/src/builder/primitive_builder.rs       | 22 ++++++++++
 .../src/builder/primitive_dictionary_builder.rs    | 48 +++++++++++++++++++++-
 7 files changed, 207 insertions(+), 15 deletions(-)

diff --git a/arrow-array/src/array/dictionary_array.rs 
b/arrow-array/src/array/dictionary_array.rs
index 6cff5bfdc..fb2868c27 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -481,18 +481,7 @@ impl<'a, T: ArrowDictionaryKeyType> 
FromIterator<Option<&'a str>> for Dictionary
         let it = iter.into_iter();
         let (lower, _) = it.size_hint();
         let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 
1024);
-        it.for_each(|i| {
-            if let Some(i) = i {
-                // Note: impl ... for Result<DictionaryArray<T>> fails with
-                // error[E0117]: only traits defined in the current crate can 
be implemented for arbitrary types
-                builder
-                    .append(i)
-                    .expect("Unable to append a value to a dictionary array.");
-            } else {
-                builder.append_null();
-            }
-        });
-
+        builder.extend(it);
         builder.finish()
     }
 }
diff --git a/arrow-array/src/builder/boolean_builder.rs 
b/arrow-array/src/builder/boolean_builder.rs
index 96f436253..06709e5f3 100644
--- a/arrow-array/src/builder/boolean_builder.rs
+++ b/arrow-array/src/builder/boolean_builder.rs
@@ -211,6 +211,15 @@ impl ArrayBuilder for BooleanBuilder {
     }
 }
 
+impl Extend<Option<bool>> for BooleanBuilder {
+    #[inline]
+    fn extend<T: IntoIterator<Item = Option<bool>>>(&mut self, iter: T) {
+        for v in iter {
+            self.append_option(v)
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -304,4 +313,17 @@ mod tests {
         assert_eq!(0, array.null_count());
         assert!(array.data().null_buffer().is_none());
     }
+
+    #[test]
+    fn test_extend() {
+        let mut builder = BooleanBuilder::new();
+        builder.extend([false, false, true, false, 
false].into_iter().map(Some));
+        builder.extend([true, true, false].into_iter().map(Some));
+        let array = builder.finish();
+        let values = array.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
+        assert_eq!(
+            &values,
+            &[false, false, true, false, false, true, true, false]
+        )
+    }
 }
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs 
b/arrow-array/src/builder/generic_bytes_builder.rs
index 8be3ac7f4..73600d9e0 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -88,6 +88,10 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
     }
 
     /// Appends a value into the builder.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the resulting length of [`Self::values_slice`] would exceed 
`T::Offset::MAX`
     #[inline]
     pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
         self.value_builder.append_slice(value.as_ref().as_ref());
@@ -219,6 +223,15 @@ impl<T: ByteArrayType> ArrayBuilder for 
GenericByteBuilder<T> {
     }
 }
 
+impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for 
GenericByteBuilder<T> {
+    #[inline]
+    fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
+        for v in iter {
+            self.append_option(v)
+        }
+    }
+}
+
 ///  Array builder for [`GenericStringArray`][crate::GenericStringArray]
 pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
 
@@ -420,4 +433,14 @@ mod tests {
     fn test_large_string_array_builder_finish_cloned() {
         _test_generic_string_array_builder_finish_cloned::<i64>()
     }
+
+    #[test]
+    fn test_extend() {
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.extend(["a", "b", "c", "", "a", "b", 
"c"].into_iter().map(Some));
+        builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
+        let array = builder.finish();
+        assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 
20]);
+        assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
+    }
 }
diff --git a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs 
b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
index 4a920f3ee..449100da1 100644
--- a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
@@ -214,7 +214,7 @@ where
     K: ArrowDictionaryKeyType,
     T: ByteArrayType,
 {
-    /// Append a primitive value to the array. Return an existing index
+    /// Append a value to the array. Return an existing index
     /// if already present in the values array or a new index if the
     /// value is appended to the values array.
     ///
@@ -255,12 +255,34 @@ where
         Ok(key)
     }
 
+    /// Infallibly append a value to this builder
+    ///
+    /// # Panics
+    ///
+    /// Panics if the resulting length of the dictionary values array would 
exceed `T::Native::MAX`
+    pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
+        self.append(value).expect("dictionary key overflow");
+    }
+
     /// Appends a null slot into the builder
     #[inline]
     pub fn append_null(&mut self) {
         self.keys_builder.append_null()
     }
 
+    /// Append an `Option` value into the builder
+    ///
+    /// # Panics
+    ///
+    /// Panics if the resulting length of the dictionary values array would 
exceed `T::Native::MAX`
+    #[inline]
+    pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
+        match value {
+            None => self.append_null(),
+            Some(v) => self.append_value(v),
+        };
+    }
+
     /// Builds the `DictionaryArray` and reset this builder.
     pub fn finish(&mut self) -> DictionaryArray<K> {
         self.dedup.clear();
@@ -297,6 +319,17 @@ where
     }
 }
 
+impl<K: ArrowDictionaryKeyType, T: ByteArrayType, V: AsRef<T::Native>> 
Extend<Option<V>>
+    for GenericByteDictionaryBuilder<K, T>
+{
+    #[inline]
+    fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
+        for v in iter {
+            self.append_option(v)
+        }
+    }
+}
+
 fn get_bytes<'a, K: ArrowNativeType, T: ByteArrayType>(
     values: &'a GenericByteBuilder<T>,
     key: &K,
@@ -405,7 +438,7 @@ mod tests {
 
     use crate::array::Array;
     use crate::array::Int8Array;
-    use crate::types::{Int16Type, Int8Type};
+    use crate::types::{Int16Type, Int32Type, Int8Type, Utf8Type};
     use crate::{BinaryArray, StringArray};
 
     fn test_bytes_dictionary_builder<T>(values: Vec<&T::Native>)
@@ -622,4 +655,14 @@ mod tests {
             vec![b"abc", b"def"],
         );
     }
+
+    #[test]
+    fn test_extend() {
+        let mut builder = GenericByteDictionaryBuilder::<Int32Type, 
Utf8Type>::new();
+        builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
+        builder.extend(["c", "d", "a"].into_iter().map(Some));
+        let dict = builder.finish();
+        assert_eq!(dict.keys().values(), &[0, 1, 2, 0, 1, 2, 2, 3, 0]);
+        assert_eq!(dict.values().len(), 4);
+    }
 }
diff --git a/arrow-array/src/builder/generic_list_builder.rs 
b/arrow-array/src/builder/generic_list_builder.rs
index 8f3f881c4..622847554 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -111,6 +111,10 @@ where
     }
 
     /// Finish the current variable-length list array slot
+    ///
+    /// # Panics
+    ///
+    /// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX`
     #[inline]
     pub fn append(&mut self, is_valid: bool) {
         self.offsets_builder
@@ -178,10 +182,32 @@ where
     }
 }
 
+impl<O, B, V, E> Extend<Option<V>> for GenericListBuilder<O, B>
+where
+    O: OffsetSizeTrait,
+    B: ArrayBuilder + Extend<E>,
+    V: IntoIterator<Item = E>,
+{
+    #[inline]
+    fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) {
+        for v in iter {
+            match v {
+                Some(elements) => {
+                    self.values_builder.extend(elements);
+                    self.append(true);
+                }
+                None => self.append(false),
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::builder::{Int32Builder, ListBuilder};
+    use crate::cast::as_primitive_array;
+    use crate::types::Int32Type;
     use crate::{Array, Int32Array};
     use arrow_buffer::Buffer;
     use arrow_schema::DataType;
@@ -364,4 +390,25 @@ mod tests {
             list_array.values().data().child_data()[0].buffers()[0].clone()
         );
     }
+
+    #[test]
+    fn test_extend() {
+        let mut builder = ListBuilder::new(Int32Builder::new());
+        builder.extend([
+            Some(vec![Some(1), Some(2), Some(7), None]),
+            Some(vec![]),
+            Some(vec![Some(4), Some(5)]),
+            None,
+        ]);
+
+        let array = builder.finish();
+        assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]);
+        assert_eq!(array.null_count(), 1);
+        assert!(array.is_null(3));
+        let a_values = array.values();
+        let elements = as_primitive_array::<Int32Type>(a_values.as_ref());
+        assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
+        assert_eq!(elements.null_count(), 1);
+        assert!(elements.is_null(3));
+    }
 }
diff --git a/arrow-array/src/builder/primitive_builder.rs 
b/arrow-array/src/builder/primitive_builder.rs
index a969e1218..2d88ea50f 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -238,6 +238,10 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
     }
 
     /// Appends values from a slice of type `T` and a validity boolean slice
+    ///
+    /// # Panics
+    ///
+    /// Panics if `values` and `is_valid` have different lengths
     #[inline]
     pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) {
         assert_eq!(
@@ -328,6 +332,15 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
     }
 }
 
+impl<P: ArrowPrimitiveType> Extend<Option<P::Native>> for PrimitiveBuilder<P> {
+    #[inline]
+    fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
+        for v in iter {
+            self.append_option(v)
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -578,4 +591,13 @@ mod tests {
     fn test_invalid_with_data_type() {
         Int32Builder::new().with_data_type(DataType::Int64);
     }
+
+    #[test]
+    fn test_extend() {
+        let mut builder = PrimitiveBuilder::<Int16Type>::new();
+        builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some));
+        builder.extend([2, 4, 6, 2].into_iter().map(Some));
+        let array = builder.finish();
+        assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]);
+    }
 }
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs 
b/arrow-array/src/builder/primitive_dictionary_builder.rs
index 4640902d8..f44f0e306 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -193,12 +193,34 @@ where
         Ok(key)
     }
 
+    /// Infallibly append a value to this builder
+    ///
+    /// # Panics
+    ///
+    /// Panics if the resulting length of the dictionary values array would 
exceed `T::Native::MAX`
+    pub fn append_value(&mut self, value: V::Native) {
+        self.append(value).expect("dictionary key overflow");
+    }
+
     /// Appends a null slot into the builder
     #[inline]
     pub fn append_null(&mut self) {
         self.keys_builder.append_null()
     }
 
+    /// Append an `Option` value into the builder
+    ///
+    /// # Panics
+    ///
+    /// Panics if the resulting length of the dictionary values array would 
exceed `T::Native::MAX`
+    #[inline]
+    pub fn append_option(&mut self, value: Option<V::Native>) {
+        match value {
+            None => self.append_null(),
+            Some(v) => self.append_value(v),
+        };
+    }
+
     /// Builds the `DictionaryArray` and reset this builder.
     pub fn finish(&mut self) -> DictionaryArray<K> {
         self.map.clear();
@@ -235,6 +257,17 @@ where
     }
 }
 
+impl<K: ArrowPrimitiveType, P: ArrowPrimitiveType> Extend<Option<P::Native>>
+    for PrimitiveDictionaryBuilder<K, P>
+{
+    #[inline]
+    fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
+        for v in iter {
+            self.append_option(v)
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -242,7 +275,7 @@ mod tests {
     use crate::array::Array;
     use crate::array::UInt32Array;
     use crate::array::UInt8Array;
-    use crate::types::{UInt32Type, UInt8Type};
+    use crate::types::{Int32Type, UInt32Type, UInt8Type};
 
     #[test]
     fn test_primitive_dictionary_builder() {
@@ -270,6 +303,19 @@ mod tests {
         assert_eq!(avs, &[12345678, 22345678]);
     }
 
+    #[test]
+    fn test_extend() {
+        let mut builder = PrimitiveDictionaryBuilder::<Int32Type, 
Int32Type>::new();
+        builder.extend([1, 2, 3, 1, 2, 3, 1, 2, 3].into_iter().map(Some));
+        builder.extend([4, 5, 1, 3, 1].into_iter().map(Some));
+        let dict = builder.finish();
+        assert_eq!(
+            dict.keys().values(),
+            &[0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 4, 0, 2, 0]
+        );
+        assert_eq!(dict.values().len(), 5);
+    }
+
     #[test]
     #[should_panic(expected = "DictionaryKeyOverflowError")]
     fn test_primitive_dictionary_overflow() {

Reply via email to