This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 19e3e8c83 Implement Extend for ArrayBuilder (#1841) (#3563)
19e3e8c83 is described below
commit 19e3e8c8314f87d8c2acf3a7b69538fdec6f793c
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Fri Jan 20 17:09:44 2023 +0000
Implement Extend for ArrayBuilder (#1841) (#3563)
* Implement Extend for ArrayBuilder (#1841)
* Add dictionaries
* Add tests
---
arrow-array/src/array/dictionary_array.rs | 13 +-----
arrow-array/src/builder/boolean_builder.rs | 22 ++++++++++
arrow-array/src/builder/generic_bytes_builder.rs | 23 +++++++++++
.../builder/generic_bytes_dictionary_builder.rs | 47 ++++++++++++++++++++-
arrow-array/src/builder/generic_list_builder.rs | 47 +++++++++++++++++++++
arrow-array/src/builder/primitive_builder.rs | 22 ++++++++++
.../src/builder/primitive_dictionary_builder.rs | 48 +++++++++++++++++++++-
7 files changed, 207 insertions(+), 15 deletions(-)
diff --git a/arrow-array/src/array/dictionary_array.rs
b/arrow-array/src/array/dictionary_array.rs
index 6cff5bfdc..fb2868c27 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -481,18 +481,7 @@ impl<'a, T: ArrowDictionaryKeyType>
FromIterator<Option<&'a str>> for Dictionary
let it = iter.into_iter();
let (lower, _) = it.size_hint();
let mut builder = StringDictionaryBuilder::with_capacity(lower, 256,
1024);
- it.for_each(|i| {
- if let Some(i) = i {
- // Note: impl ... for Result<DictionaryArray<T>> fails with
- // error[E0117]: only traits defined in the current crate can
be implemented for arbitrary types
- builder
- .append(i)
- .expect("Unable to append a value to a dictionary array.");
- } else {
- builder.append_null();
- }
- });
-
+ builder.extend(it);
builder.finish()
}
}
diff --git a/arrow-array/src/builder/boolean_builder.rs
b/arrow-array/src/builder/boolean_builder.rs
index 96f436253..06709e5f3 100644
--- a/arrow-array/src/builder/boolean_builder.rs
+++ b/arrow-array/src/builder/boolean_builder.rs
@@ -211,6 +211,15 @@ impl ArrayBuilder for BooleanBuilder {
}
}
+impl Extend<Option<bool>> for BooleanBuilder {
+ #[inline]
+ fn extend<T: IntoIterator<Item = Option<bool>>>(&mut self, iter: T) {
+ for v in iter {
+ self.append_option(v)
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -304,4 +313,17 @@ mod tests {
assert_eq!(0, array.null_count());
assert!(array.data().null_buffer().is_none());
}
+
+ #[test]
+ fn test_extend() {
+ let mut builder = BooleanBuilder::new();
+ builder.extend([false, false, true, false,
false].into_iter().map(Some));
+ builder.extend([true, true, false].into_iter().map(Some));
+ let array = builder.finish();
+ let values = array.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
+ assert_eq!(
+ &values,
+ &[false, false, true, false, false, true, true, false]
+ )
+ }
}
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs
b/arrow-array/src/builder/generic_bytes_builder.rs
index 8be3ac7f4..73600d9e0 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -88,6 +88,10 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
}
/// Appends a value into the builder.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the resulting length of [`Self::values_slice`] would exceed
`T::Offset::MAX`
#[inline]
pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
self.value_builder.append_slice(value.as_ref().as_ref());
@@ -219,6 +223,15 @@ impl<T: ByteArrayType> ArrayBuilder for
GenericByteBuilder<T> {
}
}
+impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for
GenericByteBuilder<T> {
+ #[inline]
+ fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
+ for v in iter {
+ self.append_option(v)
+ }
+ }
+}
+
/// Array builder for [`GenericStringArray`][crate::GenericStringArray]
pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
@@ -420,4 +433,14 @@ mod tests {
fn test_large_string_array_builder_finish_cloned() {
_test_generic_string_array_builder_finish_cloned::<i64>()
}
+
+ #[test]
+ fn test_extend() {
+ let mut builder = GenericStringBuilder::<i32>::new();
+ builder.extend(["a", "b", "c", "", "a", "b",
"c"].into_iter().map(Some));
+ builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
+ let array = builder.finish();
+ assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15,
20]);
+ assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
+ }
}
diff --git a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
index 4a920f3ee..449100da1 100644
--- a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
@@ -214,7 +214,7 @@ where
K: ArrowDictionaryKeyType,
T: ByteArrayType,
{
- /// Append a primitive value to the array. Return an existing index
+ /// Append a value to the array. Return an existing index
/// if already present in the values array or a new index if the
/// value is appended to the values array.
///
@@ -255,12 +255,34 @@ where
Ok(key)
}
+ /// Infallibly append a value to this builder
+ ///
+ /// # Panics
+ ///
+ /// Panics if the resulting length of the dictionary values array would
exceed `T::Native::MAX`
+ pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
+ self.append(value).expect("dictionary key overflow");
+ }
+
/// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
self.keys_builder.append_null()
}
+ /// Append an `Option` value into the builder
+ ///
+ /// # Panics
+ ///
+ /// Panics if the resulting length of the dictionary values array would
exceed `T::Native::MAX`
+ #[inline]
+ pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
+ match value {
+ None => self.append_null(),
+ Some(v) => self.append_value(v),
+ };
+ }
+
/// Builds the `DictionaryArray` and reset this builder.
pub fn finish(&mut self) -> DictionaryArray<K> {
self.dedup.clear();
@@ -297,6 +319,17 @@ where
}
}
+impl<K: ArrowDictionaryKeyType, T: ByteArrayType, V: AsRef<T::Native>>
Extend<Option<V>>
+ for GenericByteDictionaryBuilder<K, T>
+{
+ #[inline]
+ fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
+ for v in iter {
+ self.append_option(v)
+ }
+ }
+}
+
fn get_bytes<'a, K: ArrowNativeType, T: ByteArrayType>(
values: &'a GenericByteBuilder<T>,
key: &K,
@@ -405,7 +438,7 @@ mod tests {
use crate::array::Array;
use crate::array::Int8Array;
- use crate::types::{Int16Type, Int8Type};
+ use crate::types::{Int16Type, Int32Type, Int8Type, Utf8Type};
use crate::{BinaryArray, StringArray};
fn test_bytes_dictionary_builder<T>(values: Vec<&T::Native>)
@@ -622,4 +655,14 @@ mod tests {
vec![b"abc", b"def"],
);
}
+
+ #[test]
+ fn test_extend() {
+ let mut builder = GenericByteDictionaryBuilder::<Int32Type,
Utf8Type>::new();
+ builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
+ builder.extend(["c", "d", "a"].into_iter().map(Some));
+ let dict = builder.finish();
+ assert_eq!(dict.keys().values(), &[0, 1, 2, 0, 1, 2, 2, 3, 0]);
+ assert_eq!(dict.values().len(), 4);
+ }
}
diff --git a/arrow-array/src/builder/generic_list_builder.rs
b/arrow-array/src/builder/generic_list_builder.rs
index 8f3f881c4..622847554 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -111,6 +111,10 @@ where
}
/// Finish the current variable-length list array slot
+ ///
+ /// # Panics
+ ///
+ /// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX`
#[inline]
pub fn append(&mut self, is_valid: bool) {
self.offsets_builder
@@ -178,10 +182,32 @@ where
}
}
+impl<O, B, V, E> Extend<Option<V>> for GenericListBuilder<O, B>
+where
+ O: OffsetSizeTrait,
+ B: ArrayBuilder + Extend<E>,
+ V: IntoIterator<Item = E>,
+{
+ #[inline]
+ fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) {
+ for v in iter {
+ match v {
+ Some(elements) => {
+ self.values_builder.extend(elements);
+ self.append(true);
+ }
+ None => self.append(false),
+ }
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
use crate::builder::{Int32Builder, ListBuilder};
+ use crate::cast::as_primitive_array;
+ use crate::types::Int32Type;
use crate::{Array, Int32Array};
use arrow_buffer::Buffer;
use arrow_schema::DataType;
@@ -364,4 +390,25 @@ mod tests {
list_array.values().data().child_data()[0].buffers()[0].clone()
);
}
+
+ #[test]
+ fn test_extend() {
+ let mut builder = ListBuilder::new(Int32Builder::new());
+ builder.extend([
+ Some(vec![Some(1), Some(2), Some(7), None]),
+ Some(vec![]),
+ Some(vec![Some(4), Some(5)]),
+ None,
+ ]);
+
+ let array = builder.finish();
+ assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]);
+ assert_eq!(array.null_count(), 1);
+ assert!(array.is_null(3));
+ let a_values = array.values();
+ let elements = as_primitive_array::<Int32Type>(a_values.as_ref());
+ assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
+ assert_eq!(elements.null_count(), 1);
+ assert!(elements.is_null(3));
+ }
}
diff --git a/arrow-array/src/builder/primitive_builder.rs
b/arrow-array/src/builder/primitive_builder.rs
index a969e1218..2d88ea50f 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -238,6 +238,10 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
}
/// Appends values from a slice of type `T` and a validity boolean slice
+ ///
+ /// # Panics
+ ///
+ /// Panics if `values` and `is_valid` have different lengths
#[inline]
pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) {
assert_eq!(
@@ -328,6 +332,15 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
}
}
+impl<P: ArrowPrimitiveType> Extend<Option<P::Native>> for PrimitiveBuilder<P> {
+ #[inline]
+ fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
+ for v in iter {
+ self.append_option(v)
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -578,4 +591,13 @@ mod tests {
fn test_invalid_with_data_type() {
Int32Builder::new().with_data_type(DataType::Int64);
}
+
+ #[test]
+ fn test_extend() {
+ let mut builder = PrimitiveBuilder::<Int16Type>::new();
+ builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some));
+ builder.extend([2, 4, 6, 2].into_iter().map(Some));
+ let array = builder.finish();
+ assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]);
+ }
}
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs
b/arrow-array/src/builder/primitive_dictionary_builder.rs
index 4640902d8..f44f0e306 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -193,12 +193,34 @@ where
Ok(key)
}
+ /// Infallibly append a value to this builder
+ ///
+ /// # Panics
+ ///
+ /// Panics if the resulting length of the dictionary values array would
exceed `T::Native::MAX`
+ pub fn append_value(&mut self, value: V::Native) {
+ self.append(value).expect("dictionary key overflow");
+ }
+
/// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
self.keys_builder.append_null()
}
+ /// Append an `Option` value into the builder
+ ///
+ /// # Panics
+ ///
+ /// Panics if the resulting length of the dictionary values array would
exceed `T::Native::MAX`
+ #[inline]
+ pub fn append_option(&mut self, value: Option<V::Native>) {
+ match value {
+ None => self.append_null(),
+ Some(v) => self.append_value(v),
+ };
+ }
+
/// Builds the `DictionaryArray` and reset this builder.
pub fn finish(&mut self) -> DictionaryArray<K> {
self.map.clear();
@@ -235,6 +257,17 @@ where
}
}
+impl<K: ArrowPrimitiveType, P: ArrowPrimitiveType> Extend<Option<P::Native>>
+ for PrimitiveDictionaryBuilder<K, P>
+{
+ #[inline]
+ fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
+ for v in iter {
+ self.append_option(v)
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -242,7 +275,7 @@ mod tests {
use crate::array::Array;
use crate::array::UInt32Array;
use crate::array::UInt8Array;
- use crate::types::{UInt32Type, UInt8Type};
+ use crate::types::{Int32Type, UInt32Type, UInt8Type};
#[test]
fn test_primitive_dictionary_builder() {
@@ -270,6 +303,19 @@ mod tests {
assert_eq!(avs, &[12345678, 22345678]);
}
+ #[test]
+ fn test_extend() {
+ let mut builder = PrimitiveDictionaryBuilder::<Int32Type,
Int32Type>::new();
+ builder.extend([1, 2, 3, 1, 2, 3, 1, 2, 3].into_iter().map(Some));
+ builder.extend([4, 5, 1, 3, 1].into_iter().map(Some));
+ let dict = builder.finish();
+ assert_eq!(
+ dict.keys().values(),
+ &[0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 4, 0, 2, 0]
+ );
+ assert_eq!(dict.values().len(), 5);
+ }
+
#[test]
#[should_panic(expected = "DictionaryKeyOverflowError")]
fn test_primitive_dictionary_overflow() {