gstvg commented on code in PR #8966:
URL: https://github.com/apache/arrow-rs/pull/8966#discussion_r2597111130


##########
arrow-select/src/take.rs:
##########
@@ -164,31 +165,47 @@ pub fn take_arrays(
 fn check_bounds<T: ArrowPrimitiveType>(
     len: usize,
     indices: &PrimitiveArray<T>,
-) -> Result<(), ArrowError> {
+) -> Result<(), ArrowError>
+where
+    T::Native: Display,
+{
+    let len = match T::Native::from_usize(len) {
+        Some(len) => len,
+        None => {
+            if T::DATA_TYPE.is_integer() {
+                // the biggest representable value for T::Native is lower than 
len, e.g: u8::MAX < 512, no need to check bounds
+                return Ok(());
+            } else {
+                return Err(ArrowError::ComputeError("Cast to usize 
failed".to_string()));
+            }
+        }
+    };
+
     if indices.null_count() > 0 {
         indices.iter().flatten().try_for_each(|index| {
-            let ix = index
-                .to_usize()
-                .ok_or_else(|| ArrowError::ComputeError("Cast to usize 
failed".to_string()))?;
-            if ix >= len {
+            if index >= len {
                 return Err(ArrowError::ComputeError(format!(
-                    "Array index out of bounds, cannot get item at index {ix} 
from {len} entries"
+                    "Array index out of bounds, cannot get item at index 
{index} from {len} entries"
                 )));
             }
             Ok(())
         })
     } else {
-        indices.values().iter().try_for_each(|index| {
-            let ix = index
-                .to_usize()
-                .ok_or_else(|| ArrowError::ComputeError("Cast to usize 
failed".to_string()))?;
-            if ix >= len {
-                return Err(ArrowError::ComputeError(format!(
-                    "Array index out of bounds, cannot get item at index {ix} 
from {len} entries"
-                )));
+        let in_bounds = indices.values().iter().fold(true, |in_bounds, &i| {
+            in_bounds & (i >= T::Native::ZERO) & (i < len)

Review Comment:
   Even if today check_bounds is only called with unsigned indices, 
ArrowPrimitiveType includes signed types, which can be used here in the future, 
so we have to check this too. Happily, LLVM will optimize this out. If this 
function get's restricted to a hypothetical ArrowUnsignedInteger or 
ArrowIndexType, this can be removed 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to