This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 556942c8f9 Avoid a clone when creating `ListArray` from ArrayData 
(#9194)
556942c8f9 is described below

commit 556942c8f923ac17034ab68b8659e6c69e5d5db0
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Jan 18 07:56:27 2026 -0500

    Avoid a clone when creating `ListArray` from ArrayData (#9194)
    
    # Which issue does this PR close?
    
    - Part of https://github.com/apache/arrow-rs/issues/9061
    - broken out of https://github.com/apache/arrow-rs/pull/9058
    
    # Rationale for this change
    
    Let's make arrow-rs the fastest we can and the fewer allocations the
    better
    
    # What changes are included in this PR?
    
    Apply pattern from https://github.com/apache/arrow-rs/pull/9114
    
    # Are these changes tested?
    
    Existing tests
    
    # Are there any user-facing changes?
    
    No
---
 arrow-array/src/array/list_array.rs | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/arrow-array/src/array/list_array.rs 
b/arrow-array/src/array/list_array.rs
index 225be14ae3..ae1b77895d 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::array::{get_offsets, make_array, print_long_array};
+use crate::array::{get_offsets_from_buffer, make_array, print_long_array};
 use crate::builder::{GenericListBuilder, PrimitiveBuilder};
 use crate::{
     Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
@@ -479,23 +479,26 @@ impl<OffsetSize: OffsetSizeTrait> 
From<FixedSizeListArray> for GenericListArray<
 
 impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
     fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
-        if data.buffers().len() != 1 {
+        let (data_type, len, nulls, offset, mut buffers, mut child_data) = 
data.into_parts();
+
+        if buffers.len() != 1 {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "ListArray data should contain a single buffer only (value 
offsets), had {}",
-                data.buffers().len()
+                buffers.len()
             )));
         }
+        let buffer = buffers.pop().expect("checked above");
 
-        if data.child_data().len() != 1 {
+        if child_data.len() != 1 {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "ListArray should contain a single child array (values array), 
had {}",
-                data.child_data().len()
+                child_data.len()
             )));
         }
 
-        let values = data.child_data()[0].clone();
+        let values = child_data.pop().expect("checked above");
 
-        if let Some(child_data_type) = Self::get_type(data.data_type()) {
+        if let Some(child_data_type) = Self::get_type(&data_type) {
             if values.data_type() != child_data_type {
                 return Err(ArrowError::InvalidArgumentError(format!(
                     "[Large]ListArray's child datatype {:?} does not \
@@ -506,19 +509,18 @@ impl<OffsetSize: OffsetSizeTrait> 
GenericListArray<OffsetSize> {
             }
         } else {
             return Err(ArrowError::InvalidArgumentError(format!(
-                "[Large]ListArray's datatype must be [Large]ListArray(). It is 
{:?}",
-                data.data_type()
+                "[Large]ListArray's datatype must be [Large]ListArray(). It is 
{data_type:?}",
             )));
         }
 
         let values = make_array(values);
         // SAFETY:
         // ArrayData is valid, and verified type above
-        let value_offsets = unsafe { get_offsets(&data) };
+        let value_offsets = unsafe { get_offsets_from_buffer(buffer, offset, 
len) };
 
         Ok(Self {
-            data_type: data.data_type().clone(),
-            nulls: data.nulls().cloned(),
+            data_type,
+            nulls,
             values,
             value_offsets,
         })

Reply via email to