jorgecarleitao commented on a change in pull request #8688:
URL: https://github.com/apache/arrow/pull/8688#discussion_r525066482



##########
File path: rust/datafusion/src/physical_plan/expressions.rs
##########
@@ -1385,6 +1385,80 @@ pub fn binary(
     Ok(Arc::new(BinaryExpr::new(l, op, r)))
 }
 
+/// Invoke a compute kernel on a primitive array and a Boolean Array
+macro_rules! compute_bool_array_op {
+    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
+        let ll = $LEFT
+            .as_any()
+            .downcast_ref::<$DT>()
+            .expect("compute_op failed to downcast array");
+        let rr = $RIGHT
+            .as_any()
+            .downcast_ref::<BooleanArray>()
+            .expect("compute_op failed to downcast array");
+        Ok(Arc::new($OP(&ll, &rr)?))
+    }};
+}
+
+/// Binary op between primitive and boolean arrays
+macro_rules! primitive_bool_array_op {
+    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
+        match $LEFT.data_type() {
+            DataType::Int8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, 
Int8Array),
+            DataType::Int16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, 
Int16Array),
+            DataType::Int32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, 
Int32Array),
+            DataType::Int64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, 
Int64Array),
+            DataType::UInt8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, 
UInt8Array),
+            DataType::UInt16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, 
UInt16Array),
+            DataType::UInt32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, 
UInt32Array),
+            DataType::UInt64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, 
UInt64Array),
+            DataType::Float32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, 
Float32Array),
+            DataType::Float64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, 
Float64Array),
+            other => Err(DataFusionError::Internal(format!(
+                "Unsupported data type {:?} for NULLIF/primitive/boolean 
operator",
+                other
+            ))),
+        }
+    }};
+}
+
+///
+/// Implements NULLIF(expr1, expr2)
+/// Args: 0 - left expr is any array
+///       1 - if the left is equal to this expr2, then the result is NULL, 
otherwise left value is passed.
+///
+pub fn nullif_func(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() != 2 {
+        return Err(DataFusionError::Internal(format!(
+            "{:?} args were supplied but NULLIF takes exactly two args",
+            args.len(),
+        )));
+    }
+
+    // Get args0 == args1 evaluated and produce a boolean array
+    let cond_array = binary_array_op!(args[0], args[1], eq)?;
+
+    // Now, invoke nullif on the result
+    primitive_bool_array_op!(args[0], *cond_array, nullif)
+}
+
+/// Currently supported types by the nullif function.
+/// The order of these types correspond to the order on which coercion applies
+/// This should thus be from least informative to most informative
+pub static SUPPORTED_NULLIF_TYPES: &'static [DataType] = &[
+    DataType::Boolean,
+    DataType::UInt8,
+    DataType::UInt16,
+    DataType::UInt32,
+    DataType::UInt64,
+    DataType::Int8,
+    DataType::Int16,
+    DataType::Int32,
+    DataType::Int64,
+    DataType::Float32,
+    DataType::Float64,
+];

Review comment:
       > Better to make these trait bounds with a good comment about how these 
are selected
   
   AFAIK these cannot be trait bounds because logical and physical planning is 
dynamically typed. 
   
   In this case, this is enumerating all valid types that can be (dynamically) 
passed to the function. If someone tries to call this function with e.g. a 
`ListArray`, the logical planner will error with a description that this 
function does not support that type.
   
   The order here matters because when a function is planned to be called with 
type `X` that is not supported by the function, the physical planner will try 
to (lossless) cast that type to a valid type for that functions, and it does so 
in the order of this array. In general these should be ordered from fastest to 
slowest (in the eyes of the implementation), so that the cast chooses the type 
with the fastest implementation.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to