This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 1db1a8869c Avoid a clone when creating `DictionaryArray` from
ArrayData (#9185)
1db1a8869c is described below
commit 1db1a8869cceb179aa885ed58da9f0b49c03eafe
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Jan 15 15:43:56 2026 -0500
Avoid a clone when creating `DictionaryArray` from ArrayData (#9185)
# Which issue does this PR close?
- Part of https://github.com/apache/arrow-rs/issues/9061
- broken out of https://github.com/apache/arrow-rs/pull/9058
# Rationale for this change
Let's make arrow-rs the fastest we can and the fewer allocations the
better
# What changes are included in this PR?
Apply pattern from https://github.com/apache/arrow-rs/pull/9114
# Are these changes tested?
Existing tests
# Are there any user-facing changes?
No
---
arrow-array/src/array/dictionary_array.rs | 27 +++++++++++----------------
1 file changed, 11 insertions(+), 16 deletions(-)
diff --git a/arrow-array/src/array/dictionary_array.rs
b/arrow-array/src/array/dictionary_array.rs
index be7703b13c..190a5038a0 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -25,7 +25,7 @@ use crate::{
};
use arrow_buffer::bit_util::set_bit;
use arrow_buffer::buffer::NullBuffer;
-use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder};
+use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder,
ScalarBuffer};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
@@ -580,21 +580,25 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
}
}
-/// Constructs a `DictionaryArray` from an array data reference.
+/// Constructs a `DictionaryArray` from an `ArrayData`
impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
fn from(data: ArrayData) -> Self {
+ let (data_type, len, nulls, offset, mut buffers, mut child_data) =
data.into_parts();
+
assert_eq!(
- data.buffers().len(),
+ buffers.len(),
1,
"DictionaryArray data should contain a single buffer only (keys)."
);
+ let buffer = buffers.pop().expect("checked above");
assert_eq!(
- data.child_data().len(),
+ child_data.len(),
1,
"DictionaryArray should contain a single child array (values)."
);
+ let cd = child_data.pop().expect("checked above");
- if let DataType::Dictionary(key_data_type, _) = data.data_type() {
+ if let DataType::Dictionary(key_data_type, _) = &data_type {
assert_eq!(
&T::DATA_TYPE,
key_data_type.as_ref(),
@@ -603,19 +607,10 @@ impl<T: ArrowDictionaryKeyType> From<ArrayData> for
DictionaryArray<T> {
key_data_type
);
- let values = make_array(data.child_data()[0].clone());
- let data_type = data.data_type().clone();
+ let values = make_array(cd);
// create a zero-copy of the keys' data
- // SAFETY:
- // ArrayData is valid and verified type above
-
- let keys = PrimitiveArray::<T>::from(unsafe {
- data.into_builder()
- .data_type(T::DATA_TYPE)
- .child_data(vec![])
- .build_unchecked()
- });
+ let keys = PrimitiveArray::<T>::new(ScalarBuffer::new(buffer,
offset, len), nulls);
Self {
data_type,