This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 556942c8f9 Avoid a clone when creating `ListArray` from ArrayData
(#9194)
556942c8f9 is described below
commit 556942c8f923ac17034ab68b8659e6c69e5d5db0
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Jan 18 07:56:27 2026 -0500
Avoid a clone when creating `ListArray` from ArrayData (#9194)
# Which issue does this PR close?
- Part of https://github.com/apache/arrow-rs/issues/9061
- broken out of https://github.com/apache/arrow-rs/pull/9058
# Rationale for this change
Let's make arrow-rs the fastest we can and the fewer allocations the
better
# What changes are included in this PR?
Apply pattern from https://github.com/apache/arrow-rs/pull/9114
# Are these changes tested?
Existing tests
# Are there any user-facing changes?
No
---
arrow-array/src/array/list_array.rs | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/arrow-array/src/array/list_array.rs
b/arrow-array/src/array/list_array.rs
index 225be14ae3..ae1b77895d 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use crate::array::{get_offsets, make_array, print_long_array};
+use crate::array::{get_offsets_from_buffer, make_array, print_long_array};
use crate::builder::{GenericListBuilder, PrimitiveBuilder};
use crate::{
Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
@@ -479,23 +479,26 @@ impl<OffsetSize: OffsetSizeTrait>
From<FixedSizeListArray> for GenericListArray<
impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
- if data.buffers().len() != 1 {
+ let (data_type, len, nulls, offset, mut buffers, mut child_data) =
data.into_parts();
+
+ if buffers.len() != 1 {
return Err(ArrowError::InvalidArgumentError(format!(
"ListArray data should contain a single buffer only (value
offsets), had {}",
- data.buffers().len()
+ buffers.len()
)));
}
+ let buffer = buffers.pop().expect("checked above");
- if data.child_data().len() != 1 {
+ if child_data.len() != 1 {
return Err(ArrowError::InvalidArgumentError(format!(
"ListArray should contain a single child array (values array),
had {}",
- data.child_data().len()
+ child_data.len()
)));
}
- let values = data.child_data()[0].clone();
+ let values = child_data.pop().expect("checked above");
- if let Some(child_data_type) = Self::get_type(data.data_type()) {
+ if let Some(child_data_type) = Self::get_type(&data_type) {
if values.data_type() != child_data_type {
return Err(ArrowError::InvalidArgumentError(format!(
"[Large]ListArray's child datatype {:?} does not \
@@ -506,19 +509,18 @@ impl<OffsetSize: OffsetSizeTrait>
GenericListArray<OffsetSize> {
}
} else {
return Err(ArrowError::InvalidArgumentError(format!(
- "[Large]ListArray's datatype must be [Large]ListArray(). It is
{:?}",
- data.data_type()
+ "[Large]ListArray's datatype must be [Large]ListArray(). It is
{data_type:?}",
)));
}
let values = make_array(values);
// SAFETY:
// ArrayData is valid, and verified type above
- let value_offsets = unsafe { get_offsets(&data) };
+ let value_offsets = unsafe { get_offsets_from_buffer(buffer, offset,
len) };
Ok(Self {
- data_type: data.data_type().clone(),
- nulls: data.nulls().cloned(),
+ data_type,
+ nulls,
values,
value_offsets,
})