This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 72474a674 Add `into_primitive_dict_builder` to `DictionaryArray`
(#3715)
72474a674 is described below
commit 72474a674270685d6ea2d631760da4cd19dfeeea
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Sat Mar 4 00:03:23 2023 -0800
Add `into_primitive_dict_builder` to `DictionaryArray` (#3715)
* Add into_primitive_dict_builder
* For review
---
arrow-array/src/array/dictionary_array.rs | 97 +++++++++++++++++++++-
.../src/builder/primitive_dictionary_builder.rs | 38 ++++++++-
2 files changed, 132 insertions(+), 3 deletions(-)
diff --git a/arrow-array/src/array/dictionary_array.rs
b/arrow-array/src/array/dictionary_array.rs
index 60426e5b3..22e99a44c 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -15,7 +15,8 @@
// specific language governing permissions and limitations
// under the License.
-use crate::builder::StringDictionaryBuilder;
+use crate::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder};
+use crate::cast::as_primitive_array;
use crate::iterator::ArrayIter;
use crate::types::*;
use crate::{
@@ -394,6 +395,44 @@ impl<K: ArrowPrimitiveType> DictionaryArray<K> {
// Offsets were valid before and verified length is greater than or
equal
Self::from(unsafe { builder.build_unchecked() })
}
+
+ /// Returns `PrimitiveDictionaryBuilder` of this dictionary array for
mutating
+ /// its keys and values if the underlying data buffer is not shared by
others.
+ pub fn into_primitive_dict_builder<V>(
+ self,
+ ) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
+ where
+ V: ArrowPrimitiveType,
+ {
+ if !self.value_type().is_primitive() {
+ return Err(self);
+ }
+
+ let key_array = as_primitive_array::<K>(self.keys()).clone();
+ let value_array = as_primitive_array::<V>(self.values()).clone();
+
+ drop(self.data);
+ drop(self.keys);
+ drop(self.values);
+
+ let key_builder = key_array.into_builder();
+ let value_builder = value_array.into_builder();
+
+ match (key_builder, value_builder) {
+ (Ok(key_builder), Ok(value_builder)) => Ok(unsafe {
+ PrimitiveDictionaryBuilder::new_from_builders(key_builder,
value_builder)
+ }),
+ (Err(key_array), Ok(mut value_builder)) => {
+ Err(Self::try_new(&key_array,
&value_builder.finish()).unwrap())
+ }
+ (Ok(mut key_builder), Err(value_array)) => {
+ Err(Self::try_new(&key_builder.finish(),
&value_array).unwrap())
+ }
+ (Err(key_array), Err(value_array)) => {
+ Err(Self::try_new(&key_array, &value_array).unwrap())
+ }
+ }
+ }
}
/// Constructs a `DictionaryArray` from an array data reference.
@@ -644,11 +683,13 @@ where
mod tests {
use super::*;
use crate::builder::PrimitiveDictionaryBuilder;
+ use crate::cast::as_dictionary_array;
use crate::types::{
Float32Type, Int16Type, Int32Type, Int8Type, UInt32Type, UInt8Type,
};
use crate::{Float32Array, Int16Array, Int32Array, Int8Array};
use arrow_buffer::{Buffer, ToByteSlice};
+ use std::sync::Arc;
#[test]
fn test_dictionary_array() {
@@ -930,4 +971,58 @@ mod tests {
let a = DictionaryArray::<Int32Type>::from_iter(["32"]);
let _ = DictionaryArray::<Int64Type>::from(a.into_data());
}
+
+ #[test]
+ fn test_into_primitive_dict_builder() {
+ let values = Int32Array::from_iter_values([10_i32, 12, 15]);
+ let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
+
+ let dict_array = DictionaryArray::<Int8Type>::try_new(&keys,
&values).unwrap();
+
+ let boxed: ArrayRef = Arc::new(dict_array);
+ let col: DictionaryArray<Int8Type> =
as_dictionary_array(&boxed).clone();
+
+ drop(boxed);
+ drop(keys);
+ drop(values);
+
+ let mut builder =
col.into_primitive_dict_builder::<Int32Type>().unwrap();
+
+ let slice = builder.values_slice_mut();
+ assert_eq!(slice, &[10, 12, 15]);
+
+ slice[0] = 4;
+ slice[1] = 2;
+ slice[2] = 1;
+
+ let values = Int32Array::from_iter_values([4_i32, 2, 1]);
+ let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
+
+ let expected = DictionaryArray::<Int8Type>::try_new(&keys,
&values).unwrap();
+
+ let new_array = builder.finish();
+ assert_eq!(expected, new_array);
+ }
+
+ #[test]
+ fn test_into_primitive_dict_builder_cloned_array() {
+ let values = Int32Array::from_iter_values([10_i32, 12, 15]);
+ let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
+
+ let dict_array = DictionaryArray::<Int8Type>::try_new(&keys,
&values).unwrap();
+
+ let boxed: ArrayRef = Arc::new(dict_array);
+
+ let col: DictionaryArray<Int8Type> =
+ DictionaryArray::<Int8Type>::from(boxed.data().clone());
+ let err = col.into_primitive_dict_builder::<Int32Type>();
+
+ let returned = err.unwrap_err();
+
+ let values = Int32Array::from_iter_values([10_i32, 12, 15]);
+ let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
+
+ let expected = DictionaryArray::<Int8Type>::try_new(&keys,
&values).unwrap();
+ assert_eq!(expected, returned);
+ }
}
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs
b/arrow-array/src/builder/primitive_dictionary_builder.rs
index 742c09d8c..9f4109941 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -118,7 +118,7 @@ where
/// # Panics
///
/// This method panics if `keys_builder` or `values_builder` is not empty.
- pub fn new_from_builders(
+ pub fn new_from_empty_builders(
keys_builder: PrimitiveBuilder<K>,
values_builder: PrimitiveBuilder<V>,
) -> Self {
@@ -133,6 +133,30 @@ where
}
}
+ /// Creates a new `PrimitiveDictionaryBuilder` from existing
`PrimitiveBuilder`s of keys and values.
+ ///
+ /// # Safety
+ ///
+ /// caller must ensure that the passed in builders are valid for
DictionaryArray.
+ pub unsafe fn new_from_builders(
+ keys_builder: PrimitiveBuilder<K>,
+ values_builder: PrimitiveBuilder<V>,
+ ) -> Self {
+ let keys = keys_builder.values_slice();
+ let values = values_builder.values_slice();
+ let mut map = HashMap::with_capacity(values.len());
+
+ keys.iter().zip(values.iter()).for_each(|(key, value)| {
+ map.insert(Value(*value), K::Native::to_usize(*key).unwrap());
+ });
+
+ Self {
+ keys_builder,
+ values_builder,
+ map,
+ }
+ }
+
/// Creates a new `PrimitiveDictionaryBuilder` with the provided capacities
///
/// `keys_capacity`: the number of keys, i.e. length of array to build
@@ -276,6 +300,16 @@ where
DictionaryArray::from(unsafe { builder.build_unchecked() })
}
+
+ /// Returns the current dictionary values buffer as a slice
+ pub fn values_slice(&self) -> &[V::Native] {
+ self.values_builder.values_slice()
+ }
+
+ /// Returns the current dictionary values buffer as a mutable slice
+ pub fn values_slice_mut(&mut self) -> &mut [V::Native] {
+ self.values_builder.values_slice_mut()
+ }
}
impl<K: ArrowPrimitiveType, P: ArrowPrimitiveType> Extend<Option<P::Native>>
@@ -357,7 +391,7 @@ mod tests {
let values_builder =
Decimal128Builder::new().with_data_type(DataType::Decimal128(1,
2));
let mut builder =
- PrimitiveDictionaryBuilder::<Int32Type,
Decimal128Type>::new_from_builders(
+ PrimitiveDictionaryBuilder::<Int32Type,
Decimal128Type>::new_from_empty_builders(
keys_builder,
values_builder,
);