tustvold commented on code in PR #2666:
URL: https://github.com/apache/arrow-rs/pull/2666#discussion_r964067774
##########
arrow/src/compute/kernels/arity.rs:
##########
@@ -71,16 +94,54 @@ where
O: ArrowPrimitiveType,
F: Fn(I::Native) -> O::Native,
{
+ let data = array.data();
+ let len = data.len();
+ let null_count = data.null_count();
+
+ let null_buffer = data
+ .null_buffer()
+ .map(|b| b.bit_slice(data.offset(), data.len()));
+
let values = array.values().iter().map(|v| op(*v));
// JUSTIFICATION
// Benefit
// ~60% speedup
// Soundness
// `values` is an iterator with a known size because arrays are sized.
let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
+ unsafe { build_primitive_array(len, buffer, null_count, null_buffer) }
+}
+
+/// Applies a unary and fallible function to all valid values in a primitive
array
+///
+/// This is unlike [`unary`] which will apply an infallible function to all
rows regardless
+/// of validity.
+///
+/// Note: LLVM is currently unable to effectively vectorize fallible operations
+pub fn try_unary<I, F, O>(array: &PrimitiveArray<I>, op: F) ->
Result<PrimitiveArray<O>>
+where
+ I: ArrowPrimitiveType,
+ O: ArrowPrimitiveType,
+ F: Fn(I::Native) -> Result<O::Native>,
+{
+ let len = array.len();
+ let null_count = array.null_count();
+
+ let mut buffer = BufferBuilder::<O::Native>::new(len);
+ buffer.append_n_zeroed(array.len());
Review Comment:
It is UB if we don't initialize all values in the buffer, even the null
slots. We must therefore zero out the nulls, in the past I have found it is
faster to zero initialize everything, and override the valid indexes, than to
interleave appending nulls and values.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]