This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 6c54276454 Improve `PrimitiveArray::from_iter` perf (#9294)
6c54276454 is described below

commit 6c5427645471907908ba43f5816b203040b28472
Author: Daniël Heres <[email protected]>
AuthorDate: Thu Jan 29 14:22:30 2026 +0100

    Improve `PrimitiveArray::from_iter` perf (#9294)
    
    # Which issue does this PR close?
    
    - Closes #NNN.
    
    # Rationale for this change
    Speeds up `from_iter`.
    
    This speeds up creation for statistics if all values are present (common
    case):
    
    ```
    Extract row group statistics for Int64/extract_statistics/Int64
                            time:   [392.26 ns 394.25 ns 397.06 ns]
                            change: [−44.865% −44.674% −44.456%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 5 outliers among 100 measurements (5.00%)
      3 (3.00%) high mild
      2 (2.00%) high severe
    
    Extract data page statistics for Int64/extract_statistics/Int64
                            time:   [8.8307 µs 8.8472 µs 8.8641 µs]
                            change: [−22.701% −22.399% −22.099%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 1 outliers among 100 measurements (1.00%)
      1 (1.00%) high mild
    
    Extract row group statistics for UInt64/extract_statistics/UInt64
                            time:   [391.21 ns 393.46 ns 396.43 ns]
                            change: [−44.227% −43.085% −41.444%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 9 outliers among 100 measurements (9.00%)
      4 (4.00%) high mild
      5 (5.00%) high severe
    
    Extract data page statistics for UInt64/extract_statistics/UInt64
                            time:   [7.9090 µs 8.0075 µs 8.1958 µs]
                            change: [−48.323% −46.584% −44.593%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 5 outliers among 100 measurements (5.00%)
      1 (1.00%) high mild
      4 (4.00%) high severe
    
    Extract row group statistics for F64/extract_statistics/F64
                            time:   [395.12 ns 395.86 ns 396.64 ns]
                            change: [−58.982% −57.663% −56.236%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 5 outliers among 100 measurements (5.00%)
      5 (5.00%) high mild
    
    Extract data page statistics for F64/extract_statistics/F64
                            time:   [8.9134 µs 8.9925 µs 9.1393 µs]
                            change: [−29.078% −25.866% −22.853%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 9 outliers among 100 measurements (9.00%)
      5 (5.00%) high mild
      4 (4.00%) high severe
    ```
    
    # What changes are included in this PR?
---
 arrow-array/src/array/primitive_array.rs | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/arrow-array/src/array/primitive_array.rs 
b/arrow-array/src/array/primitive_array.rs
index 573677dc60..186bfca2d4 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -25,7 +25,7 @@ use crate::timezone::Tz;
 use crate::trusted_len::trusted_len_unzip;
 use crate::types::*;
 use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
-use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, ScalarBuffer, i256};
+use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, NullBufferBuilder, 
ScalarBuffer, i256};
 use arrow_data::bit_iterator::try_for_each_valid_idx;
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
@@ -1449,15 +1449,15 @@ impl<T: ArrowPrimitiveType, Ptr: 
Into<NativeAdapter<T>>> FromIterator<Ptr> for P
         let iter = iter.into_iter();
         let (lower, _) = iter.size_hint();
 
-        let mut null_builder = BooleanBufferBuilder::new(lower);
+        let mut null_builder = NullBufferBuilder::new(lower);
 
         let buffer: Buffer = iter
             .map(|item| {
                 if let Some(a) = item.into().native {
-                    null_builder.append(true);
+                    null_builder.append_non_null();
                     a
                 } else {
-                    null_builder.append(false);
+                    null_builder.append_null();
                     // this ensures that null items on the buffer are not 
arbitrary.
                     // This is important because fallible operations can use 
null values (e.g. a vectorized "add")
                     // which may panic (e.g. overflow if the number on the 
slots happen to be very large).
@@ -1466,20 +1466,8 @@ impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> 
FromIterator<Ptr> for P
             })
             .collect();
 
-        let len = null_builder.len();
-
-        let data = unsafe {
-            ArrayData::new_unchecked(
-                T::DATA_TYPE,
-                len,
-                None,
-                Some(null_builder.into()),
-                0,
-                vec![buffer],
-                vec![],
-            )
-        };
-        PrimitiveArray::from(data)
+        let maybe_nulls = null_builder.finish();
+        PrimitiveArray::new(ScalarBuffer::from(buffer), maybe_nulls)
     }
 }
 

Reply via email to