alamb commented on a change in pull request #9301:
URL: https://github.com/apache/arrow/pull/9301#discussion_r571424877



##########
File path: rust/arrow/src/compute/kernels/take.rs
##########
@@ -254,6 +254,143 @@ impl Default for TakeOptions {
     }
 }
 
+#[inline(always)]
+fn maybe_usize<I: ArrowPrimitiveType>(index: I::Native) -> Result<usize> {
+    index
+        .to_usize()
+        .ok_or_else(|| ArrowError::ComputeError("Cast to usize 
failed".to_string()))
+}
+
+// take implementation when neither values nor indices contain nulls
+fn take_no_nulls<T, I>(
+    values: &[T::Native],
+    indices: &[I::Native],
+) -> Result<(Buffer, Option<Buffer>)>
+where
+    T: ArrowPrimitiveType,
+    I: ArrowNumericType,
+{
+    let values = indices
+        .iter()
+        .map(|index| Result::Ok(values[maybe_usize::<I>(*index)?]));
+    // Soundness: `slice.map` is `TrustedLen`.
+    let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
+
+    Ok((buffer, None))
+}
+
+// take implementation when only values contain nulls
+fn take_values_nulls<T, I>(
+    values: &PrimitiveArray<T>,
+    indices: &[I::Native],
+) -> Result<(Buffer, Option<Buffer>)>
+where
+    T: ArrowPrimitiveType,
+    I: ArrowNumericType,
+    I::Native: ToPrimitive,
+{
+    let num_bytes = bit_util::ceil(indices.len(), 8);
+    let mut nulls = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
+    let null_slice = nulls.as_slice_mut();
+    let mut null_count = 0;
+
+    let values_values = values.values();
+
+    let values = indices.iter().enumerate().map(|(i, index)| {
+        let index = maybe_usize::<I>(*index)?;
+        if values.is_null(index) {
+            null_count += 1;
+            bit_util::unset_bit(null_slice, i);
+        }
+        Result::Ok(values_values[index])
+    });
+    // Soundness: `slice.map` is `TrustedLen`.
+    let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
+
+    let nulls = if null_count == 0 {
+        // if only non-null values were taken
+        None
+    } else {
+        Some(nulls.into())
+    };
+
+    Ok((buffer, nulls))
+}
+
+// take implementation when only indices contain nulls
+fn take_indices_nulls<T, I>(
+    values: &[T::Native],
+    indices: &PrimitiveArray<I>,
+) -> Result<(Buffer, Option<Buffer>)>
+where
+    T: ArrowPrimitiveType,
+    I: ArrowNumericType,
+    I::Native: ToPrimitive,
+{
+    let values = indices.values().iter().map(|index| {
+        let index = maybe_usize::<I>(*index)?;
+        Result::Ok(match values.get(index) {
+            Some(value) => *value,
+            None => {
+                if indices.is_null(index) {
+                    T::Native::default()
+                } else {
+                    panic!("Out-of-bounds index {}", index)
+                }
+            }
+        })
+    });

Review comment:
       I think I am missing some subtlety here --  given that `indicies` may 
contain nulls, shouldn't the code be checking if the current index value was 
null *before* looking at the value of `values.get(index)`?  Something like the 
following (untested):
   
   
   ```suggestion
       let values = indices.iter().map(|index| {
          Result::Ok(index.map(|index| 
values.get(index)).unwrap_or(T::Native::default()))
       });
   ```
   
   I don't see how if `values.get()` returns None can be used to tell if the 
corresponding index was null.

##########
File path: rust/arrow/benches/take_kernels.rs
##########
@@ -75,7 +75,7 @@ fn create_random_index(size: usize, null_density: f32) -> 
UInt32Array {
     let mut rng = seedable_rng();
     let mut builder = UInt32Builder::new(size);
     for _ in 0..size {
-        if rng.gen::<f32>() < null_density {

Review comment:
       I think this was fixed independently in 
https://github.com/apache/arrow/pull/9415 by @ritchie46  -- I am sorry I hadn't 
gotten to reviewing this PR previously. I think you can simply revert changes 
to this file when you rebase. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to