abreis commented on a change in pull request #9454:
URL: https://github.com/apache/arrow/pull/9454#discussion_r579719800



##########
File path: rust/arrow/src/compute/kernels/arithmetic.rs
##########
@@ -256,6 +256,49 @@ where
     Ok(PrimitiveArray::<T>::from(Arc::new(data)))
 }
 
+/// Scalar-divisor version of `math_divide`.
+fn math_divide_scalar<T>(
+    array: &PrimitiveArray<T>,
+    divisor: T::Native,
+) -> Result<PrimitiveArray<T>>
+where
+    T: ArrowNumericType,
+    T::Native: Div<Output = T::Native> + Zero,
+{
+    if divisor.is_zero() {
+        return Err(ArrowError::DivideByZero);
+    }
+
+    let null_bit_buffer = array.data_ref().null_buffer().cloned();
+
+    let buffer = if let Some(b) = &null_bit_buffer {
+        let values = array.values().iter().enumerate().map(|(i, value)| {
+            let is_valid = unsafe { bit_util::get_bit_raw(b.as_ptr(), i) };
+            if is_valid {
+                *value / divisor
+            } else {
+                T::default_value()
+            }
+        });
+        unsafe { Buffer::from_trusted_len_iter(values) }
+    } else {
+        // no value is null
+        let values = array.values().iter().map(|value| *value / divisor);
+        unsafe { Buffer::from_trusted_len_iter(values) }
+    };

Review comment:
       @jorgecarleitao thanks for pointing this out. I copied this code over 
assuming that it was just standard practice to zero out the value under a null 
bit. 
   
   I now see that the reason the mask is used in the array/array operation is 
to jump over nulled divisors that might be zero underneath, triggering a 
floating-point exception.
   
   This does make for interesting benchmarks:
   ```rust
   # features = []
   divide 512              time:   [2.2900 us 2.2973 us 2.3044 us]
   divide_scalar 512       time:   [451.85 ns 457.37 ns 463.95 ns]
   
   divide_nulls_512        time:   [2.2143 us 2.2255 us 2.2372 us]
   divide_scalar_nulls_512 time:   [456.75 ns 460.49 ns 464.20 ns]
   
   
   # features = ["simd"]
   divide 512              time:   [1.0485 us 1.0568 us 1.0657 us]
   divide_scalar 512       time:   [473.13 ns 477.13 ns 481.07 ns]
   
   divide_nulls_512        time:   [981.42 ns 988.21 ns 996.23 ns]
   divide_scalar_nulls_512 time:   [484.54 ns 488.57 ns 492.06 ns]
   ```
   
   Same performance between `non-simd` and `simd` versions of `divide_scalar`, 
which I suppose is due to auto-vectorization now that the null checks are out 
of the way.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to