This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 96b30527e5 [Parquet] perf: Create Utf8/BinaryViewArray directly rather 
than via `ArrayData` (#9121)
96b30527e5 is described below

commit 96b30527e53ebca73e81b9ffbbd02a99da8fc11f
Author: Andrew Lamb <[email protected]>
AuthorDate: Sat Jan 17 11:20:10 2026 -0500

    [Parquet] perf: Create Utf8/BinaryViewArray directly rather than via 
`ArrayData` (#9121)
    
    # Which issue does this PR close?
    - part of https://github.com/apache/arrow-rs/issues/9061
    - part of - Part of https://github.com/apache/arrow-rs/issues/9128
    
    
    
    # Rationale for this change
    
    - similarly to https://github.com/apache/arrow-rs/pull/9120
    
    Creating Arrays via ArrayData / `make_array` has overhead (at least 2
    Vec allocations) compared to simply creating the arrays directly
    
    ViewArrays also have an extra Vec allocation (to hold their buffers)
    
    # What changes are included in this PR?
    
    Update the parquet reader to create ViewArrays directly
    
    # Are these changes tested?
    By CI
    
    # Are there any user-facing changes?
    
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    
    If there are any breaking changes to public APIs, please call them out.
    -->
---
 parquet/src/arrow/buffer/view_buffer.rs | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/parquet/src/arrow/buffer/view_buffer.rs 
b/parquet/src/arrow/buffer/view_buffer.rs
index 2802f97f8f..4ff34bf701 100644
--- a/parquet/src/arrow/buffer/view_buffer.rs
+++ b/parquet/src/arrow/buffer/view_buffer.rs
@@ -16,10 +16,10 @@
 // under the License.
 
 use crate::arrow::record_reader::buffer::ValuesBuffer;
-use arrow_array::{ArrayRef, builder::make_view, make_array};
-use arrow_buffer::Buffer;
-use arrow_data::ArrayDataBuilder;
+use arrow_array::{ArrayRef, BinaryViewArray, StringViewArray, 
builder::make_view};
+use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer, ScalarBuffer};
 use arrow_schema::DataType as ArrowType;
+use std::sync::Arc;
 
 /// A buffer of view type byte arrays that can be converted into
 /// `GenericByteViewArray`
@@ -70,26 +70,18 @@ impl ViewBuffer {
     /// Converts this into an [`ArrayRef`] with the provided `data_type` and 
`null_buffer`
     pub fn into_array(self, null_buffer: Option<Buffer>, data_type: 
&ArrowType) -> ArrayRef {
         let len = self.views.len();
-        let views = Buffer::from_vec(self.views);
+        let views = ScalarBuffer::from(self.views);
+        let nulls = null_buffer
+            .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, len)))
+            .filter(|n| n.null_count() != 0);
         match data_type {
             ArrowType::Utf8View => {
-                let builder = ArrayDataBuilder::new(ArrowType::Utf8View)
-                    .len(len)
-                    .add_buffer(views)
-                    .add_buffers(self.buffers)
-                    .null_bit_buffer(null_buffer);
-                // We have checked that the data is utf8 when building the 
buffer, so it is safe
-                let array = unsafe { builder.build_unchecked() };
-                make_array(array)
+                // Safety: views were created correctly, and checked that the 
data is utf8 when building the buffer
+                unsafe { Arc::new(StringViewArray::new_unchecked(views, 
self.buffers, nulls)) }
             }
             ArrowType::BinaryView => {
-                let builder = ArrayDataBuilder::new(ArrowType::BinaryView)
-                    .len(len)
-                    .add_buffer(views)
-                    .add_buffers(self.buffers)
-                    .null_bit_buffer(null_buffer);
-                let array = unsafe { builder.build_unchecked() };
-                make_array(array)
+                // Safety: views were created correctly
+                unsafe { Arc::new(BinaryViewArray::new_unchecked(views, 
self.buffers, nulls)) }
             }
             _ => panic!("Unsupported data type: {data_type}"),
         }

Reply via email to