This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 1db1a8869c Avoid a clone when creating `DictionaryArray` from 
ArrayData (#9185)
1db1a8869c is described below

commit 1db1a8869cceb179aa885ed58da9f0b49c03eafe
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Jan 15 15:43:56 2026 -0500

    Avoid a clone when creating `DictionaryArray` from ArrayData (#9185)
    
    # Which issue does this PR close?
    
    - Part of https://github.com/apache/arrow-rs/issues/9061
    - broken out of https://github.com/apache/arrow-rs/pull/9058
    
    # Rationale for this change
    
    Let's make arrow-rs the fastest we can and the fewer allocations the
    better
    
    # What changes are included in this PR?
    
    Apply pattern from https://github.com/apache/arrow-rs/pull/9114
    
    # Are these changes tested?
    
    Existing tests
    
    # Are there any user-facing changes?
    
    No
---
 arrow-array/src/array/dictionary_array.rs | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/arrow-array/src/array/dictionary_array.rs 
b/arrow-array/src/array/dictionary_array.rs
index be7703b13c..190a5038a0 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -25,7 +25,7 @@ use crate::{
 };
 use arrow_buffer::bit_util::set_bit;
 use arrow_buffer::buffer::NullBuffer;
-use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder};
+use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder, 
ScalarBuffer};
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
 use std::any::Any;
@@ -580,21 +580,25 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
     }
 }
 
-/// Constructs a `DictionaryArray` from an array data reference.
+/// Constructs a `DictionaryArray` from an `ArrayData`
 impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
     fn from(data: ArrayData) -> Self {
+        let (data_type, len, nulls, offset, mut buffers, mut child_data) = 
data.into_parts();
+
         assert_eq!(
-            data.buffers().len(),
+            buffers.len(),
             1,
             "DictionaryArray data should contain a single buffer only (keys)."
         );
+        let buffer = buffers.pop().expect("checked above");
         assert_eq!(
-            data.child_data().len(),
+            child_data.len(),
             1,
             "DictionaryArray should contain a single child array (values)."
         );
+        let cd = child_data.pop().expect("checked above");
 
-        if let DataType::Dictionary(key_data_type, _) = data.data_type() {
+        if let DataType::Dictionary(key_data_type, _) = &data_type {
             assert_eq!(
                 &T::DATA_TYPE,
                 key_data_type.as_ref(),
@@ -603,19 +607,10 @@ impl<T: ArrowDictionaryKeyType> From<ArrayData> for 
DictionaryArray<T> {
                 key_data_type
             );
 
-            let values = make_array(data.child_data()[0].clone());
-            let data_type = data.data_type().clone();
+            let values = make_array(cd);
 
             // create a zero-copy of the keys' data
-            // SAFETY:
-            // ArrayData is valid and verified type above
-
-            let keys = PrimitiveArray::<T>::from(unsafe {
-                data.into_builder()
-                    .data_type(T::DATA_TYPE)
-                    .child_data(vec![])
-                    .build_unchecked()
-            });
+            let keys = PrimitiveArray::<T>::new(ScalarBuffer::new(buffer, 
offset, len), nulls);
 
             Self {
                 data_type,

Reply via email to