tyrelr commented on a change in pull request #9301: URL: https://github.com/apache/arrow/pull/9301#discussion_r563502344
########## File path: rust/arrow/src/compute/kernels/take.rs ########## @@ -254,6 +254,137 @@ impl Default for TakeOptions { } } +#[inline(always)] +fn maybe_usize<I: ArrowPrimitiveType>(index: I::Native) -> Result<usize> { + index + .to_usize() + .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string())) +} + +// take implementation when neither values nor indices contain nulls +fn take_no_nulls<T, I>( + values: &[T::Native], + indices: &[I::Native], +) -> Result<(Buffer, Option<Buffer>)> +where + T: ArrowPrimitiveType, + I: ArrowNumericType, +{ + let values = indices + .iter() + .map(|index| Result::Ok(values[maybe_usize::<I>(*index)?])); + // Soundness: `slice.map` is `TrustedLen`. + let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? }; + + Ok((buffer, None)) +} + +// take implementation when only values contain nulls +fn take_values_nulls<T, I>( + values: &PrimitiveArray<T>, + indices: &[I::Native], +) -> Result<(Buffer, Option<Buffer>)> +where + T: ArrowPrimitiveType, + I: ArrowNumericType, + I::Native: ToPrimitive, +{ + let num_bytes = bit_util::ceil(indices.len(), 8); + let mut nulls = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true); + let null_slice = nulls.as_slice_mut(); + let mut null_count = 0; + + let values_values = values.values(); + + let values = indices.iter().enumerate().map(|(i, index)| { + let index = maybe_usize::<I>(*index)?; + if values.is_null(index) { + null_count += 1; + bit_util::unset_bit(null_slice, i); + } + Result::Ok(values_values[index]) + }); + // Soundness: `slice.map` is `TrustedLen`. + let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? }; + + let nulls = if null_count == 0 { + // if only non-null values were taken + None + } else { + Some(nulls.into()) + }; + + Ok((buffer, nulls)) +} + +// take implementation when only indices contain nulls +fn take_indices_nulls<T, I>( + values: &[T::Native], + indices: &PrimitiveArray<I>, +) -> Result<(Buffer, Option<Buffer>)> +where + T: ArrowPrimitiveType, + I: ArrowNumericType, + I::Native: ToPrimitive, +{ + let values = indices.iter().map(|index| { + match index { + // valid index => try to take using it + Some(index) => Result::Ok(values[maybe_usize::<I>(index)?]), + // null index => do not + None => Ok(T::Native::default()), + } + }); + // Soundness: `slice.map` is `TrustedLen`. + let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? }; Review comment: Ah, I had missed the question mark so thought the Result wrapping was unnecessary. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org