This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 4350b8c84 Remove PrimitiveBuilder::finish_dict (#1978) (#1980)
4350b8c84 is described below

commit 4350b8c84bc4715301f950373f0bb71c5fb3f63d
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Fri Jul 1 06:25:36 2022 +0100

    Remove PrimitiveBuilder::finish_dict (#1978) (#1980)
---
 arrow/src/array/array_primitive.rs                 | 11 +++++++++
 arrow/src/array/array_string.rs                    | 11 +++++++++
 arrow/src/array/builder/primitive_builder.rs       | 26 ----------------------
 .../array/builder/primitive_dictionary_builder.rs  | 22 ++++++++++++------
 .../src/array/builder/string_dictionary_builder.rs | 17 +++++++++++---
 arrow/src/array/data.rs                            | 25 +++++++++++++++++++++
 6 files changed, 76 insertions(+), 36 deletions(-)

diff --git a/arrow/src/array/array_primitive.rs 
b/arrow/src/array/array_primitive.rs
index 427bda636..d36caaca8 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -166,6 +166,17 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
     ) -> impl Iterator<Item = Option<T::Native>> + 'a {
         indexes.map(|opt_index| opt_index.map(|index| 
self.value_unchecked(index)))
     }
+
+    /// Returns the backing [`ArrayData`] of this [`PrimitiveArray`]
+    pub fn into_data(self) -> ArrayData {
+        self.into()
+    }
+}
+
+impl<T: ArrowPrimitiveType> From<PrimitiveArray<T>> for ArrayData {
+    fn from(a: PrimitiveArray<T>) -> Self {
+        a.data
+    }
 }
 
 impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs
index dba983b01..d00758293 100644
--- a/arrow/src/array/array_string.rs
+++ b/arrow/src/array/array_string.rs
@@ -195,6 +195,11 @@ impl<OffsetSize: OffsetSizeTrait> 
GenericStringArray<OffsetSize> {
     ) -> impl Iterator<Item = Option<&str>> + 'a {
         indexes.map(|opt_index| opt_index.map(|index| 
self.value_unchecked(index)))
     }
+
+    /// Returns the backing [`ArrayData`] of this [`GenericStringArray`]
+    pub fn into_data(self) -> ArrayData {
+        self.into()
+    }
 }
 
 impl<'a, Ptr, OffsetSize: OffsetSizeTrait> FromIterator<&'a Option<Ptr>>
@@ -336,6 +341,12 @@ impl<OffsetSize: OffsetSizeTrait> From<Vec<String>> for 
GenericStringArray<Offse
     }
 }
 
+impl<OffsetSize: OffsetSizeTrait> From<GenericStringArray<OffsetSize>> for 
ArrayData {
+    fn from(a: GenericStringArray<OffsetSize>) -> Self {
+        a.data
+    }
+}
+
 /// An array where each element is a variable-sized sequence of bytes 
representing a string
 /// whose maximum length (in bytes) is represented by a i32.
 ///
diff --git a/arrow/src/array/builder/primitive_builder.rs 
b/arrow/src/array/builder/primitive_builder.rs
index b18239b75..2f5eeac73 100644
--- a/arrow/src/array/builder/primitive_builder.rs
+++ b/arrow/src/array/builder/primitive_builder.rs
@@ -20,10 +20,8 @@ use std::sync::Arc;
 
 use crate::array::ArrayData;
 use crate::array::ArrayRef;
-use crate::array::DictionaryArray;
 use crate::array::PrimitiveArray;
 use crate::datatypes::ArrowPrimitiveType;
-use crate::datatypes::DataType;
 use crate::error::{ArrowError, Result};
 
 use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
@@ -197,30 +195,6 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
         PrimitiveArray::<T>::from(array_data)
     }
 
-    /// Builds the `DictionaryArray` and reset this builder.
-    pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
-        let null_count = len
-            - null_bit_buffer
-                .as_ref()
-                .map(|b| b.count_set_bits())
-                .unwrap_or(len);
-        let data_type = DataType::Dictionary(
-            Box::new(T::DATA_TYPE),
-            Box::new(values.data_type().clone()),
-        );
-        let mut builder = ArrayData::builder(data_type)
-            .len(len)
-            .add_buffer(self.values_builder.finish());
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(null_bit_buffer);
-        }
-        builder = builder.add_child_data(values.data().clone());
-        let array_data = unsafe { builder.build_unchecked() };
-        DictionaryArray::<T>::from(array_data)
-    }
-
     fn materialize_bitmap_builder(&mut self) {
         if self.bitmap_builder.is_some() {
             return;
diff --git a/arrow/src/array/builder/primitive_dictionary_builder.rs 
b/arrow/src/array/builder/primitive_dictionary_builder.rs
index 93695e0b7..5e989f3cc 100644
--- a/arrow/src/array/builder/primitive_dictionary_builder.rs
+++ b/arrow/src/array/builder/primitive_dictionary_builder.rs
@@ -19,11 +19,8 @@ use std::any::Any;
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use crate::array::ArrayRef;
-use crate::array::ArrowPrimitiveType;
-use crate::array::DictionaryArray;
-use crate::datatypes::ArrowNativeType;
-use crate::datatypes::ToByteSlice;
+use crate::array::{ArrayRef, ArrowPrimitiveType, DictionaryArray};
+use crate::datatypes::{ArrowNativeType, DataType, ToByteSlice};
 use crate::error::{ArrowError, Result};
 
 use super::ArrayBuilder;
@@ -164,8 +161,19 @@ where
     /// Builds the `DictionaryArray` and reset this builder.
     pub fn finish(&mut self) -> DictionaryArray<K> {
         self.map.clear();
-        let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
-        self.keys_builder.finish_dict(value_ref)
+        let values = self.values_builder.finish();
+        let keys = self.keys_builder.finish();
+
+        let data_type =
+            DataType::Dictionary(Box::new(K::DATA_TYPE), 
Box::new(V::DATA_TYPE));
+
+        let builder = keys
+            .into_data()
+            .into_builder()
+            .data_type(data_type)
+            .child_data(vec![values.into_data()]);
+
+        DictionaryArray::from(unsafe { builder.build_unchecked() })
     }
 }
 
diff --git a/arrow/src/array/builder/string_dictionary_builder.rs 
b/arrow/src/array/builder/string_dictionary_builder.rs
index 918bf9756..77b2b2316 100644
--- a/arrow/src/array/builder/string_dictionary_builder.rs
+++ b/arrow/src/array/builder/string_dictionary_builder.rs
@@ -19,7 +19,7 @@ use super::PrimitiveBuilder;
 use crate::array::{
     Array, ArrayBuilder, ArrayRef, DictionaryArray, StringArray, StringBuilder,
 };
-use crate::datatypes::{ArrowDictionaryKeyType, ArrowNativeType};
+use crate::datatypes::{ArrowDictionaryKeyType, ArrowNativeType, DataType};
 use crate::error::{ArrowError, Result};
 use hashbrown::hash_map::RawEntryMut;
 use hashbrown::HashMap;
@@ -250,8 +250,19 @@ where
     /// Builds the `DictionaryArray` and reset this builder.
     pub fn finish(&mut self) -> DictionaryArray<K> {
         self.dedup.clear();
-        let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
-        self.keys_builder.finish_dict(value_ref)
+        let values = self.values_builder.finish();
+        let keys = self.keys_builder.finish();
+
+        let data_type =
+            DataType::Dictionary(Box::new(K::DATA_TYPE), 
Box::new(DataType::Utf8));
+
+        let builder = keys
+            .into_data()
+            .into_builder()
+            .data_type(data_type)
+            .child_data(vec![values.into_data()]);
+
+        DictionaryArray::from(unsafe { builder.build_unchecked() })
     }
 }
 
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 52920827d..c1d608228 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -1217,6 +1217,11 @@ impl ArrayData {
             .zip(other.child_data.iter())
             .all(|(a, b)| a.ptr_eq(b))
     }
+
+    /// Converts this [`ArrayData`] into an [`ArrayDataBuilder`]
+    pub fn into_builder(self) -> ArrayDataBuilder {
+        self.into()
+    }
 }
 
 /// Return the expected [`DataTypeLayout`] Arrays of this data
@@ -1408,6 +1413,10 @@ impl ArrayDataBuilder {
         }
     }
 
+    pub fn data_type(self, data_type: DataType) -> Self {
+        Self { data_type, ..self }
+    }
+
     #[inline]
     #[allow(clippy::len_without_is_empty)]
     pub const fn len(mut self, n: usize) -> Self {
@@ -1482,6 +1491,22 @@ impl ArrayDataBuilder {
     }
 }
 
+impl From<ArrayData> for ArrayDataBuilder {
+    fn from(d: ArrayData) -> Self {
+        // TODO: Store Bitmap on ArrayData (#1799)
+        let null_bit_buffer = d.null_buffer().cloned();
+        Self {
+            null_bit_buffer,
+            data_type: d.data_type,
+            len: d.len,
+            null_count: Some(d.null_count),
+            offset: d.offset,
+            buffers: d.buffers,
+            child_data: d.child_data,
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

Reply via email to