This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new d4b293d33c Avoid a clone when creating `ListViewArray` from ArrayData
(#9193)
d4b293d33c is described below
commit d4b293d33c790aaae1dc065fd5627a9c73a7806e
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Jan 18 07:57:22 2026 -0500
Avoid a clone when creating `ListViewArray` from ArrayData (#9193)
# Which issue does this PR close?
- Part of https://github.com/apache/arrow-rs/issues/9061
- broken out of https://github.com/apache/arrow-rs/pull/9058
# Rationale for this change
Let's make arrow-rs the fastest we can and the fewer allocations the
better
# What changes are included in this PR?
Apply pattern from https://github.com/apache/arrow-rs/pull/9114
# Are these changes tested?
Existing tests
# Are there any user-facing changes?
No
---
arrow-array/src/array/list_view_array.rs | 43 ++++++++++++++++----------------
1 file changed, 22 insertions(+), 21 deletions(-)
diff --git a/arrow-array/src/array/list_view_array.rs
b/arrow-array/src/array/list_view_array.rs
index 52c88d581d..4c50a32de5 100644
--- a/arrow-array/src/array/list_view_array.rs
+++ b/arrow-array/src/array/list_view_array.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow_buffer::{NullBuffer, ScalarBuffer};
+use arrow_buffer::{Buffer, NullBuffer, ScalarBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, FieldRef};
use std::any::Any;
@@ -576,23 +576,25 @@ impl<OffsetSize: OffsetSizeTrait>
From<FixedSizeListArray> for GenericListViewAr
impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
- if data.buffers().len() != 2 {
- return Err(ArrowError::InvalidArgumentError(format!(
- "ListViewArray data should contain two buffers (value offsets
& value sizes), had {}",
- data.buffers().len()
- )));
- }
+ let (data_type, len, nulls, offset, buffers, child_data) =
data.into_parts();
- if data.child_data().len() != 1 {
- return Err(ArrowError::InvalidArgumentError(format!(
- "ListViewArray should contain a single child array (values
array), had {}",
- data.child_data().len()
- )));
- }
+ // ArrayData is valid, and verified type above
+ // buffer[0] is offsets, buffer[1] is sizes
+ let num_buffers = buffers.len();
+ let [offsets_buffer, sizes_buffer] : [Buffer; 2] =
buffers.try_into().map_err(|_| {
+ ArrowError::InvalidArgumentError(format!(
+ "ListViewArray data should contain two buffers (value offsets
& value sizes), had {num_buffers}",
+ ))
+ })?;
- let values = data.child_data()[0].clone();
+ let num_child = child_data.len();
+ let [values]: [ArrayData; 1] = child_data.try_into().map_err(|_| {
+ ArrowError::InvalidArgumentError(format!(
+ "ListViewArray should contain a single child array (values
array), had {num_child}",
+ ))
+ })?;
- if let Some(child_data_type) = Self::get_type(data.data_type()) {
+ if let Some(child_data_type) = Self::get_type(&data_type) {
if values.data_type() != child_data_type {
return Err(ArrowError::InvalidArgumentError(format!(
"{}ListViewArray's child datatype {:?} does not \
@@ -607,18 +609,17 @@ impl<OffsetSize: OffsetSizeTrait>
GenericListViewArray<OffsetSize> {
"{}ListViewArray's datatype must be {}ListViewArray(). It is
{:?}",
OffsetSize::PREFIX,
OffsetSize::PREFIX,
- data.data_type()
+ data_type
)));
}
let values = make_array(values);
- // ArrayData is valid, and verified type above
- let value_offsets = ScalarBuffer::new(data.buffers()[0].clone(),
data.offset(), data.len());
- let value_sizes = ScalarBuffer::new(data.buffers()[1].clone(),
data.offset(), data.len());
+ let value_offsets = ScalarBuffer::new(offsets_buffer, offset, len);
+ let value_sizes = ScalarBuffer::new(sizes_buffer, offset, len);
Ok(Self {
- data_type: data.data_type().clone(),
- nulls: data.nulls().cloned(),
+ data_type,
+ nulls,
values,
value_offsets,
value_sizes,