This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 4350b8c84 Remove PrimitiveBuilder::finish_dict (#1978) (#1980)
4350b8c84 is described below
commit 4350b8c84bc4715301f950373f0bb71c5fb3f63d
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Fri Jul 1 06:25:36 2022 +0100
Remove PrimitiveBuilder::finish_dict (#1978) (#1980)
---
arrow/src/array/array_primitive.rs | 11 +++++++++
arrow/src/array/array_string.rs | 11 +++++++++
arrow/src/array/builder/primitive_builder.rs | 26 ----------------------
.../array/builder/primitive_dictionary_builder.rs | 22 ++++++++++++------
.../src/array/builder/string_dictionary_builder.rs | 17 +++++++++++---
arrow/src/array/data.rs | 25 +++++++++++++++++++++
6 files changed, 76 insertions(+), 36 deletions(-)
diff --git a/arrow/src/array/array_primitive.rs
b/arrow/src/array/array_primitive.rs
index 427bda636..d36caaca8 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -166,6 +166,17 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
) -> impl Iterator<Item = Option<T::Native>> + 'a {
indexes.map(|opt_index| opt_index.map(|index|
self.value_unchecked(index)))
}
+
+ /// Returns the backing [`ArrayData`] of this [`PrimitiveArray`]
+ pub fn into_data(self) -> ArrayData {
+ self.into()
+ }
+}
+
+impl<T: ArrowPrimitiveType> From<PrimitiveArray<T>> for ArrayData {
+ fn from(a: PrimitiveArray<T>) -> Self {
+ a.data
+ }
}
impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs
index dba983b01..d00758293 100644
--- a/arrow/src/array/array_string.rs
+++ b/arrow/src/array/array_string.rs
@@ -195,6 +195,11 @@ impl<OffsetSize: OffsetSizeTrait>
GenericStringArray<OffsetSize> {
) -> impl Iterator<Item = Option<&str>> + 'a {
indexes.map(|opt_index| opt_index.map(|index|
self.value_unchecked(index)))
}
+
+ /// Returns the backing [`ArrayData`] of this [`GenericStringArray`]
+ pub fn into_data(self) -> ArrayData {
+ self.into()
+ }
}
impl<'a, Ptr, OffsetSize: OffsetSizeTrait> FromIterator<&'a Option<Ptr>>
@@ -336,6 +341,12 @@ impl<OffsetSize: OffsetSizeTrait> From<Vec<String>> for
GenericStringArray<Offse
}
}
+impl<OffsetSize: OffsetSizeTrait> From<GenericStringArray<OffsetSize>> for
ArrayData {
+ fn from(a: GenericStringArray<OffsetSize>) -> Self {
+ a.data
+ }
+}
+
/// An array where each element is a variable-sized sequence of bytes
representing a string
/// whose maximum length (in bytes) is represented by a i32.
///
diff --git a/arrow/src/array/builder/primitive_builder.rs
b/arrow/src/array/builder/primitive_builder.rs
index b18239b75..2f5eeac73 100644
--- a/arrow/src/array/builder/primitive_builder.rs
+++ b/arrow/src/array/builder/primitive_builder.rs
@@ -20,10 +20,8 @@ use std::sync::Arc;
use crate::array::ArrayData;
use crate::array::ArrayRef;
-use crate::array::DictionaryArray;
use crate::array::PrimitiveArray;
use crate::datatypes::ArrowPrimitiveType;
-use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};
use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
@@ -197,30 +195,6 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
PrimitiveArray::<T>::from(array_data)
}
- /// Builds the `DictionaryArray` and reset this builder.
- pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> {
- let len = self.len();
- let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
- let null_count = len
- - null_bit_buffer
- .as_ref()
- .map(|b| b.count_set_bits())
- .unwrap_or(len);
- let data_type = DataType::Dictionary(
- Box::new(T::DATA_TYPE),
- Box::new(values.data_type().clone()),
- );
- let mut builder = ArrayData::builder(data_type)
- .len(len)
- .add_buffer(self.values_builder.finish());
- if null_count > 0 {
- builder = builder.null_bit_buffer(null_bit_buffer);
- }
- builder = builder.add_child_data(values.data().clone());
- let array_data = unsafe { builder.build_unchecked() };
- DictionaryArray::<T>::from(array_data)
- }
-
fn materialize_bitmap_builder(&mut self) {
if self.bitmap_builder.is_some() {
return;
diff --git a/arrow/src/array/builder/primitive_dictionary_builder.rs
b/arrow/src/array/builder/primitive_dictionary_builder.rs
index 93695e0b7..5e989f3cc 100644
--- a/arrow/src/array/builder/primitive_dictionary_builder.rs
+++ b/arrow/src/array/builder/primitive_dictionary_builder.rs
@@ -19,11 +19,8 @@ use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;
-use crate::array::ArrayRef;
-use crate::array::ArrowPrimitiveType;
-use crate::array::DictionaryArray;
-use crate::datatypes::ArrowNativeType;
-use crate::datatypes::ToByteSlice;
+use crate::array::{ArrayRef, ArrowPrimitiveType, DictionaryArray};
+use crate::datatypes::{ArrowNativeType, DataType, ToByteSlice};
use crate::error::{ArrowError, Result};
use super::ArrayBuilder;
@@ -164,8 +161,19 @@ where
/// Builds the `DictionaryArray` and reset this builder.
pub fn finish(&mut self) -> DictionaryArray<K> {
self.map.clear();
- let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
- self.keys_builder.finish_dict(value_ref)
+ let values = self.values_builder.finish();
+ let keys = self.keys_builder.finish();
+
+ let data_type =
+ DataType::Dictionary(Box::new(K::DATA_TYPE),
Box::new(V::DATA_TYPE));
+
+ let builder = keys
+ .into_data()
+ .into_builder()
+ .data_type(data_type)
+ .child_data(vec![values.into_data()]);
+
+ DictionaryArray::from(unsafe { builder.build_unchecked() })
}
}
diff --git a/arrow/src/array/builder/string_dictionary_builder.rs
b/arrow/src/array/builder/string_dictionary_builder.rs
index 918bf9756..77b2b2316 100644
--- a/arrow/src/array/builder/string_dictionary_builder.rs
+++ b/arrow/src/array/builder/string_dictionary_builder.rs
@@ -19,7 +19,7 @@ use super::PrimitiveBuilder;
use crate::array::{
Array, ArrayBuilder, ArrayRef, DictionaryArray, StringArray, StringBuilder,
};
-use crate::datatypes::{ArrowDictionaryKeyType, ArrowNativeType};
+use crate::datatypes::{ArrowDictionaryKeyType, ArrowNativeType, DataType};
use crate::error::{ArrowError, Result};
use hashbrown::hash_map::RawEntryMut;
use hashbrown::HashMap;
@@ -250,8 +250,19 @@ where
/// Builds the `DictionaryArray` and reset this builder.
pub fn finish(&mut self) -> DictionaryArray<K> {
self.dedup.clear();
- let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
- self.keys_builder.finish_dict(value_ref)
+ let values = self.values_builder.finish();
+ let keys = self.keys_builder.finish();
+
+ let data_type =
+ DataType::Dictionary(Box::new(K::DATA_TYPE),
Box::new(DataType::Utf8));
+
+ let builder = keys
+ .into_data()
+ .into_builder()
+ .data_type(data_type)
+ .child_data(vec![values.into_data()]);
+
+ DictionaryArray::from(unsafe { builder.build_unchecked() })
}
}
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 52920827d..c1d608228 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -1217,6 +1217,11 @@ impl ArrayData {
.zip(other.child_data.iter())
.all(|(a, b)| a.ptr_eq(b))
}
+
+ /// Converts this [`ArrayData`] into an [`ArrayDataBuilder`]
+ pub fn into_builder(self) -> ArrayDataBuilder {
+ self.into()
+ }
}
/// Return the expected [`DataTypeLayout`] Arrays of this data
@@ -1408,6 +1413,10 @@ impl ArrayDataBuilder {
}
}
+ pub fn data_type(self, data_type: DataType) -> Self {
+ Self { data_type, ..self }
+ }
+
#[inline]
#[allow(clippy::len_without_is_empty)]
pub const fn len(mut self, n: usize) -> Self {
@@ -1482,6 +1491,22 @@ impl ArrayDataBuilder {
}
}
+impl From<ArrayData> for ArrayDataBuilder {
+ fn from(d: ArrayData) -> Self {
+ // TODO: Store Bitmap on ArrayData (#1799)
+ let null_bit_buffer = d.null_buffer().cloned();
+ Self {
+ null_bit_buffer,
+ data_type: d.data_type,
+ len: d.len,
+ null_count: Some(d.null_count),
+ offset: d.offset,
+ buffers: d.buffers,
+ child_data: d.child_data,
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;