This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 537541142b Perf: Vectorize check_bounds(2x speedup) (#8966)
537541142b is described below

commit 537541142b3c9a24741308076156c8033f8b9f49
Author: gstvg <[email protected]>
AuthorDate: Wed Dec 10 09:39:39 2025 -0300

    Perf: Vectorize check_bounds(2x speedup) (#8966)
    
    # Which issue does this PR close?
    
    Part of #279,  related to #8879
    
    # Rationale for this change
    
    ```
    take check bounds i32 512
                            time:   [338.84 ns 339.71 ns 340.89 ns]
                            change: [−55.056% −54.843% −54.650%] (p = 0.00 < 
0.05)
                            Performance has improved.
    
    take check bounds i32 1024
                            time:   [548.47 ns 551.23 ns 554.47 ns]
                            change: [−63.999% −63.813% −63.608%] (p = 0.00 < 
0.05)
                            Performance has improved.
    
    take check bounds i32 8192
                            time:   [3.4934 µs 3.5004 µs 3.5116 µs]
                            change: [−68.252% −68.158% −68.050%] (p = 0.00 < 
0.05)
                            Performance has improved.
    ```
    
    # What changes are included in this PR?
    
    Instead of checking index by index, and error-branching individually on
    them, check all indices for the presence of *any* out-of-bounds index,
    and only then, in the slow-path, find the exact out-of-bounds index
    
    Note how LLVM vectorizes the happy path into a very tight loop:
    <img width="634" height="155" alt="image"
    
src="https://github.com/user-attachments/assets/64006323-ea7c-4fce-8c74-acc32fc5e7d4";
    />
    
    https://rust.godbolt.org/z/MhY4cP6WG
    
    
    # Are these changes tested?
    
    Existing tests already cover the changed function
    
    # Are there any user-facing changes?
    
    No
---
 arrow-select/src/take.rs | 49 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index a42e28410c..fb3b559945 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -17,6 +17,7 @@
 
 //! Defines take kernel for [Array]
 
+use std::fmt::Display;
 use std::sync::Arc;
 
 use arrow_array::builder::{BufferBuilder, UInt32Builder};
@@ -164,31 +165,47 @@ pub fn take_arrays(
 fn check_bounds<T: ArrowPrimitiveType>(
     len: usize,
     indices: &PrimitiveArray<T>,
-) -> Result<(), ArrowError> {
+) -> Result<(), ArrowError>
+where
+    T::Native: Display,
+{
+    let len = match T::Native::from_usize(len) {
+        Some(len) => len,
+        None => {
+            if T::DATA_TYPE.is_integer() {
+                // the biggest representable value for T::Native is lower than 
len, e.g: u8::MAX < 512, no need to check bounds
+                return Ok(());
+            } else {
+                return Err(ArrowError::ComputeError("Cast to usize 
failed".to_string()));
+            }
+        }
+    };
+
     if indices.null_count() > 0 {
         indices.iter().flatten().try_for_each(|index| {
-            let ix = index
-                .to_usize()
-                .ok_or_else(|| ArrowError::ComputeError("Cast to usize 
failed".to_string()))?;
-            if ix >= len {
+            if index >= len {
                 return Err(ArrowError::ComputeError(format!(
-                    "Array index out of bounds, cannot get item at index {ix} 
from {len} entries"
+                    "Array index out of bounds, cannot get item at index 
{index} from {len} entries"
                 )));
             }
             Ok(())
         })
     } else {
-        indices.values().iter().try_for_each(|index| {
-            let ix = index
-                .to_usize()
-                .ok_or_else(|| ArrowError::ComputeError("Cast to usize 
failed".to_string()))?;
-            if ix >= len {
-                return Err(ArrowError::ComputeError(format!(
-                    "Array index out of bounds, cannot get item at index {ix} 
from {len} entries"
-                )));
+        let in_bounds = indices.values().iter().fold(true, |in_bounds, &i| {
+            in_bounds & (i >= T::Native::ZERO) & (i < len)
+        });
+
+        if !in_bounds {
+            for &index in indices.values() {
+                if index < T::Native::ZERO || index >= len {
+                    return Err(ArrowError::ComputeError(format!(
+                        "Array index out of bounds, cannot get item at index 
{index} from {len} entries"
+                    )));
+                }
             }
-            Ok(())
-        })
+        }
+
+        Ok(())
     }
 }
 

Reply via email to