This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 4adbeee14c Add append_many to dictionary arrays to allow adding 
repeated values (#6534)
4adbeee14c is described below

commit 4adbeee14c40c080f8fca3bd42c0c3856bbb151c
Author: Adrian Garcia Badaracco <[email protected]>
AuthorDate: Thu Oct 10 15:55:00 2024 -0500

    Add append_many to dictionary arrays to allow adding repeated values (#6534)
    
    * Add append_many to dictionary arrays to allow adding repeated values
    
    * fix merge
    
    * rename func
---
 arrow-array/src/array/dictionary_array.rs          | 68 ++++++++++++++++++++++
 .../builder/generic_bytes_dictionary_builder.rs    | 58 +++++++++++++++---
 arrow-array/src/builder/primitive_builder.rs       |  7 +++
 .../src/builder/primitive_dictionary_builder.rs    | 62 ++++++++++++++++----
 4 files changed, 175 insertions(+), 20 deletions(-)

diff --git a/arrow-array/src/array/dictionary_array.rs 
b/arrow-array/src/array/dictionary_array.rs
index d6c5dd4c3e..bdb6f0d4b5 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -1072,6 +1072,74 @@ mod tests {
         assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4]));
     }
 
+    #[test]
+    fn test_dictionary_builder_append_many() {
+        let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, 
UInt32Type>::new();
+
+        builder.append(1).unwrap();
+        builder.append_n(2, 2).unwrap();
+        builder.append_options(None, 2);
+        builder.append_options(Some(3), 3);
+
+        let array = builder.finish();
+
+        let values = array
+            .values()
+            .as_primitive::<UInt32Type>()
+            .iter()
+            .map(Option::unwrap)
+            .collect::<Vec<_>>();
+        assert_eq!(values, &[1, 2, 3]);
+        let keys = array.keys().iter().collect::<Vec<_>>();
+        assert_eq!(
+            keys,
+            &[
+                Some(0),
+                Some(1),
+                Some(1),
+                None,
+                None,
+                Some(2),
+                Some(2),
+                Some(2)
+            ]
+        );
+    }
+
+    #[test]
+    fn test_string_dictionary_builder_append_many() {
+        let mut builder = StringDictionaryBuilder::<Int8Type>::new();
+
+        builder.append("a").unwrap();
+        builder.append_n("b", 2).unwrap();
+        builder.append_options(None::<&str>, 2);
+        builder.append_options(Some("c"), 3);
+
+        let array = builder.finish();
+
+        let values = array
+            .values()
+            .as_string::<i32>()
+            .iter()
+            .map(Option::unwrap)
+            .collect::<Vec<_>>();
+        assert_eq!(values, &["a", "b", "c"]);
+        let keys = array.keys().iter().collect::<Vec<_>>();
+        assert_eq!(
+            keys,
+            &[
+                Some(0),
+                Some(1),
+                Some(1),
+                None,
+                None,
+                Some(2),
+                Some(2),
+                Some(2)
+            ]
+        );
+    }
+
     #[test]
     fn test_dictionary_array_fmt_debug() {
         let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, 
UInt32Type>::with_capacity(3, 2);
diff --git a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs 
b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
index 128a4f8206..a327c622a7 100644
--- a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
@@ -195,12 +195,7 @@ where
     K: ArrowDictionaryKeyType,
     T: ByteArrayType,
 {
-    /// Append a value to the array. Return an existing index
-    /// if already present in the values array or a new index if the
-    /// value is appended to the values array.
-    ///
-    /// Returns an error if the new index would overflow the key type.
-    pub fn append(&mut self, value: impl AsRef<T::Native>) -> 
Result<K::Native, ArrowError> {
+    fn get_or_insert_key(&mut self, value: impl AsRef<T::Native>) -> 
Result<K::Native, ArrowError> {
         let value_native: &T::Native = value.as_ref();
         let value_bytes: &[u8] = value_native.as_ref();
 
@@ -223,8 +218,32 @@ where
             .get();
 
         let key = 
K::Native::from_usize(idx).ok_or(ArrowError::DictionaryKeyOverflowError)?;
+
+        Ok(key)
+    }
+
+    /// Append a value to the array. Return an existing index
+    /// if already present in the values array or a new index if the
+    /// value is appended to the values array.
+    ///
+    /// Returns an error if the new index would overflow the key type.
+    pub fn append(&mut self, value: impl AsRef<T::Native>) -> 
Result<K::Native, ArrowError> {
+        let key = self.get_or_insert_key(value)?;
         self.keys_builder.append_value(key);
+        Ok(key)
+    }
 
+    /// Append a value multiple times to the array.
+    /// This is the same as `append` but allows to append the same value 
multiple times without doing multiple lookups.
+    ///
+    /// Returns an error if the new index would overflow the key type.
+    pub fn append_n(
+        &mut self,
+        value: impl AsRef<T::Native>,
+        count: usize,
+    ) -> Result<K::Native, ArrowError> {
+        let key = self.get_or_insert_key(value)?;
+        self.keys_builder.append_value_n(key, count);
         Ok(key)
     }
 
@@ -237,6 +256,17 @@ where
         self.append(value).expect("dictionary key overflow");
     }
 
+    /// Infallibly append a value to this builder repeatedly `count` times.
+    /// This is the same as `append_value` but allows to append the same value 
multiple times without doing multiple lookups.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the resulting length of the dictionary values array would 
exceed `T::Native::MAX`
+    pub fn append_values(&mut self, value: impl AsRef<T::Native>, count: 
usize) {
+        self.append_n(value, count)
+            .expect("dictionary key overflow");
+    }
+
     /// Appends a null slot into the builder
     #[inline]
     pub fn append_null(&mut self) {
@@ -256,6 +286,19 @@ where
         };
     }
 
+    /// Append an `Option` value into the builder repeatedly `count` times.
+    /// This is the same as `append_option` but allows to append the same 
value multiple times without doing multiple lookups.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the resulting length of the dictionary values array would 
exceed `T::Native::MAX`
+    pub fn append_options(&mut self, value: Option<impl AsRef<T::Native>>, 
count: usize) {
+        match value {
+            None => self.keys_builder.append_nulls(count),
+            Some(v) => self.append_values(v, count),
+        };
+    }
+
     /// Builds the `DictionaryArray` and reset this builder.
     pub fn finish(&mut self) -> DictionaryArray<K> {
         self.dedup.clear();
@@ -331,8 +374,7 @@ fn get_bytes<T: ByteArrayType>(values: 
&GenericByteBuilder<T>, idx: usize) -> &[
 /// // The builder builds the dictionary value by value
 /// builder.append("abc").unwrap();
 /// builder.append_null();
-/// builder.append("def").unwrap();
-/// builder.append("def").unwrap();
+/// builder.append_n("def", 2).unwrap();  // appends "def" twice with a single 
lookup
 /// builder.append("abc").unwrap();
 /// let array = builder.finish();
 ///
diff --git a/arrow-array/src/builder/primitive_builder.rs 
b/arrow-array/src/builder/primitive_builder.rs
index 39b27bfca8..3191fea6e4 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -202,6 +202,13 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
         self.values_builder.append(v);
     }
 
+    /// Appends a value of type `T` into the builder `n` times
+    #[inline]
+    pub fn append_value_n(&mut self, v: T::Native, n: usize) {
+        self.null_buffer_builder.append_n_non_nulls(n);
+        self.values_builder.append_n(n, v);
+    }
+
     /// Appends a null slot into the builder
     #[inline]
     pub fn append_null(&mut self) {
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs 
b/arrow-array/src/builder/primitive_dictionary_builder.rs
index a764fa4c29..35abe5ba5f 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -21,7 +21,6 @@ use crate::{Array, ArrayRef, ArrowPrimitiveType, 
DictionaryArray};
 use arrow_buffer::{ArrowNativeType, ToByteSlice};
 use arrow_schema::{ArrowError, DataType};
 use std::any::Any;
-use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 use std::sync::Arc;
 
@@ -210,26 +209,41 @@ where
     K: ArrowDictionaryKeyType,
     V: ArrowPrimitiveType,
 {
-    /// Append a primitive value to the array. Return an existing index
-    /// if already present in the values array or a new index if the
-    /// value is appended to the values array.
     #[inline]
-    pub fn append(&mut self, value: V::Native) -> Result<K::Native, 
ArrowError> {
-        let key = match self.map.entry(Value(value)) {
-            Entry::Vacant(vacant) => {
-                // Append new value.
+    fn get_or_insert_key(&mut self, value: V::Native) -> Result<K::Native, 
ArrowError> {
+        match self.map.get(&Value(value)) {
+            Some(&key) => {
+                
Ok(K::Native::from_usize(key).ok_or(ArrowError::DictionaryKeyOverflowError)?)
+            }
+            None => {
                 let key = self.values_builder.len();
                 self.values_builder.append_value(value);
-                vacant.insert(key);
-                
K::Native::from_usize(key).ok_or(ArrowError::DictionaryKeyOverflowError)?
+                self.map.insert(Value(value), key);
+                
Ok(K::Native::from_usize(key).ok_or(ArrowError::DictionaryKeyOverflowError)?)
             }
-            Entry::Occupied(o) => K::Native::usize_as(*o.get()),
-        };
+        }
+    }
 
+    /// Append a primitive value to the array. Return an existing index
+    /// if already present in the values array or a new index if the
+    /// value is appended to the values array.
+    #[inline]
+    pub fn append(&mut self, value: V::Native) -> Result<K::Native, 
ArrowError> {
+        let key = self.get_or_insert_key(value)?;
         self.keys_builder.append_value(key);
         Ok(key)
     }
 
+    /// Append a value multiple times to the array.
+    /// This is the same as `append` but allows to append the same value 
multiple times without doing multiple lookups.
+    ///
+    /// Returns an error if the new index would overflow the key type.
+    pub fn append_n(&mut self, value: V::Native, count: usize) -> 
Result<K::Native, ArrowError> {
+        let key = self.get_or_insert_key(value)?;
+        self.keys_builder.append_value_n(key, count);
+        Ok(key)
+    }
+
     /// Infallibly append a value to this builder
     ///
     /// # Panics
@@ -240,6 +254,17 @@ where
         self.append(value).expect("dictionary key overflow");
     }
 
+    /// Infallibly append a value to this builder repeatedly `count` times.
+    /// This is the same as `append_value` but allows to append the same value 
multiple times without doing multiple lookups.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the resulting length of the dictionary values array would 
exceed `T::Native::MAX`
+    pub fn append_values(&mut self, value: V::Native, count: usize) {
+        self.append_n(value, count)
+            .expect("dictionary key overflow");
+    }
+
     /// Appends a null slot into the builder
     #[inline]
     pub fn append_null(&mut self) {
@@ -259,6 +284,19 @@ where
         };
     }
 
+    /// Append an `Option` value into the builder repeatedly `count` times.
+    /// This is the same as `append_option` but allows to append the same 
value multiple times without doing multiple lookups.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the resulting length of the dictionary values array would 
exceed `T::Native::MAX`
+    pub fn append_options(&mut self, value: Option<V::Native>, count: usize) {
+        match value {
+            None => self.keys_builder.append_nulls(count),
+            Some(v) => self.append_values(v, count),
+        };
+    }
+
     /// Builds the `DictionaryArray` and reset this builder.
     pub fn finish(&mut self) -> DictionaryArray<K> {
         self.map.clear();

Reply via email to