This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 93ebd3a2a5 Avoid a clone when creating `RunEndArray` from ArrayData 
(#9189)
93ebd3a2a5 is described below

commit 93ebd3a2a5a55a654ce198b8d18d1f288ab13cd0
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Jan 21 08:53:11 2026 -0500

    Avoid a clone when creating `RunEndArray` from ArrayData (#9189)
    
    # Which issue does this PR close?
    
    - Part of https://github.com/apache/arrow-rs/issues/9061
    - broken out of https://github.com/apache/arrow-rs/pull/9058
    
    # Rationale for this change
    
    Let's make arrow-rs the fastest we can and the fewer allocations the
    better
    
    # What changes are included in this PR?
    
    Apply pattern from https://github.com/apache/arrow-rs/pull/9114
    
    # Are these changes tested?
    
    Existing tests
    
    # Are there any user-facing changes?
    
    No
---
 arrow-array/src/array/run_array.rs | 41 +++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/arrow-array/src/array/run_array.rs 
b/arrow-array/src/array/run_array.rs
index 9ca1af943d..517e84f1d7 100644
--- a/arrow-array/src/array/run_array.rs
+++ b/arrow-array/src/array/run_array.rs
@@ -18,7 +18,7 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, 
RunEndBuffer};
+use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, 
RunEndBuffer, ScalarBuffer};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType, Field};
 
@@ -223,27 +223,40 @@ impl<R: RunEndIndexType> RunArray<R> {
 impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
     // The method assumes the caller already validated the data using 
`ArrayData::validate_data()`
     fn from(data: ArrayData) -> Self {
-        match data.data_type() {
+        let (data_type, len, _nulls, offset, _buffers, child_data) = 
data.into_parts();
+
+        match &data_type {
             DataType::RunEndEncoded(_, _) => {}
             _ => {
                 panic!(
-                    "Invalid data type for RunArray. The data type should be 
DataType::RunEndEncoded"
+                    "Invalid data type {data_type:?} for RunArray. Should be 
DataType::RunEndEncoded"
                 );
             }
         }
 
-        // Safety
-        // ArrayData is valid
-        let child = &data.child_data()[0];
-        assert_eq!(child.data_type(), &R::DATA_TYPE, "Incorrect run ends 
type");
-        let run_ends = unsafe {
-            let scalar = child.buffers()[0].clone().into();
-            RunEndBuffer::new_unchecked(scalar, data.offset(), data.len())
-        };
-
-        let values = make_array(data.child_data()[1].clone());
+        let [run_end_child, values_child]: [ArrayData; 2] = child_data
+            .try_into()
+            .expect("RunArray data should have exactly two child arrays");
+
+        // deconstruct the run ends child array
+        let (
+            run_end_data_type,
+            _run_end_len,
+            _run_end_nulls,
+            _run_end_offset,
+            run_end_buffers,
+            _run_end_child_data,
+        ) = run_end_child.into_parts();
+        assert_eq!(run_end_data_type, R::DATA_TYPE, "Incorrect run ends type");
+        let [run_end_buffer]: [arrow_buffer::Buffer; 1] = run_end_buffers
+            .try_into()
+            .expect("Run ends should have exactly one buffer");
+        let scalar = ScalarBuffer::from(run_end_buffer);
+        let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, 
len) };
+
+        let values = make_array(values_child);
         Self {
-            data_type: data.data_type().clone(),
+            data_type,
             run_ends,
             values,
         }

Reply via email to