This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 93ebd3a2a5 Avoid a clone when creating `RunEndArray` from ArrayData
(#9189)
93ebd3a2a5 is described below
commit 93ebd3a2a5a55a654ce198b8d18d1f288ab13cd0
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Jan 21 08:53:11 2026 -0500
Avoid a clone when creating `RunEndArray` from ArrayData (#9189)
# Which issue does this PR close?
- Part of https://github.com/apache/arrow-rs/issues/9061
- broken out of https://github.com/apache/arrow-rs/pull/9058
# Rationale for this change
Let's make arrow-rs the fastest we can and the fewer allocations the
better
# What changes are included in this PR?
Apply pattern from https://github.com/apache/arrow-rs/pull/9114
# Are these changes tested?
Existing tests
# Are there any user-facing changes?
No
---
arrow-array/src/array/run_array.rs | 41 +++++++++++++++++++++++++-------------
1 file changed, 27 insertions(+), 14 deletions(-)
diff --git a/arrow-array/src/array/run_array.rs
b/arrow-array/src/array/run_array.rs
index 9ca1af943d..517e84f1d7 100644
--- a/arrow-array/src/array/run_array.rs
+++ b/arrow-array/src/array/run_array.rs
@@ -18,7 +18,7 @@
use std::any::Any;
use std::sync::Arc;
-use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer,
RunEndBuffer};
+use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer,
RunEndBuffer, ScalarBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, Field};
@@ -223,27 +223,40 @@ impl<R: RunEndIndexType> RunArray<R> {
impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
// The method assumes the caller already validated the data using
`ArrayData::validate_data()`
fn from(data: ArrayData) -> Self {
- match data.data_type() {
+ let (data_type, len, _nulls, offset, _buffers, child_data) =
data.into_parts();
+
+ match &data_type {
DataType::RunEndEncoded(_, _) => {}
_ => {
panic!(
- "Invalid data type for RunArray. The data type should be
DataType::RunEndEncoded"
+ "Invalid data type {data_type:?} for RunArray. Should be
DataType::RunEndEncoded"
);
}
}
- // Safety
- // ArrayData is valid
- let child = &data.child_data()[0];
- assert_eq!(child.data_type(), &R::DATA_TYPE, "Incorrect run ends
type");
- let run_ends = unsafe {
- let scalar = child.buffers()[0].clone().into();
- RunEndBuffer::new_unchecked(scalar, data.offset(), data.len())
- };
-
- let values = make_array(data.child_data()[1].clone());
+ let [run_end_child, values_child]: [ArrayData; 2] = child_data
+ .try_into()
+ .expect("RunArray data should have exactly two child arrays");
+
+ // deconstruct the run ends child array
+ let (
+ run_end_data_type,
+ _run_end_len,
+ _run_end_nulls,
+ _run_end_offset,
+ run_end_buffers,
+ _run_end_child_data,
+ ) = run_end_child.into_parts();
+ assert_eq!(run_end_data_type, R::DATA_TYPE, "Incorrect run ends type");
+ let [run_end_buffer]: [arrow_buffer::Buffer; 1] = run_end_buffers
+ .try_into()
+ .expect("Run ends should have exactly one buffer");
+ let scalar = ScalarBuffer::from(run_end_buffer);
+ let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset,
len) };
+
+ let values = make_array(values_child);
Self {
- data_type: data.data_type().clone(),
+ data_type,
run_ends,
values,
}