jorgecarleitao commented on a change in pull request #8688: URL: https://github.com/apache/arrow/pull/8688#discussion_r525066482
########## File path: rust/datafusion/src/physical_plan/expressions.rs ########## @@ -1385,6 +1385,80 @@ pub fn binary( Ok(Arc::new(BinaryExpr::new(l, op, r))) } +/// Invoke a compute kernel on a primitive array and a Boolean Array +macro_rules! compute_bool_array_op { + ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{ + let ll = $LEFT + .as_any() + .downcast_ref::<$DT>() + .expect("compute_op failed to downcast array"); + let rr = $RIGHT + .as_any() + .downcast_ref::<BooleanArray>() + .expect("compute_op failed to downcast array"); + Ok(Arc::new($OP(&ll, &rr)?)) + }}; +} + +/// Binary op between primitive and boolean arrays +macro_rules! primitive_bool_array_op { + ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{ + match $LEFT.data_type() { + DataType::Int8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int8Array), + DataType::Int16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int16Array), + DataType::Int32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int32Array), + DataType::Int64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int64Array), + DataType::UInt8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt8Array), + DataType::UInt16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt16Array), + DataType::UInt32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt32Array), + DataType::UInt64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt64Array), + DataType::Float32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float32Array), + DataType::Float64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float64Array), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for NULLIF/primitive/boolean operator", + other + ))), + } + }}; +} + +/// +/// Implements NULLIF(expr1, expr2) +/// Args: 0 - left expr is any array +/// 1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed. +/// +pub fn nullif_func(args: &[ArrayRef]) -> Result<ArrayRef> { + if args.len() != 2 { + return Err(DataFusionError::Internal(format!( + "{:?} args were supplied but NULLIF takes exactly two args", + args.len(), + ))); + } + + // Get args0 == args1 evaluated and produce a boolean array + let cond_array = binary_array_op!(args[0], args[1], eq)?; + + // Now, invoke nullif on the result + primitive_bool_array_op!(args[0], *cond_array, nullif) +} + +/// Currently supported types by the nullif function. +/// The order of these types correspond to the order on which coercion applies +/// This should thus be from least informative to most informative +pub static SUPPORTED_NULLIF_TYPES: &'static [DataType] = &[ + DataType::Boolean, + DataType::UInt8, + DataType::UInt16, + DataType::UInt32, + DataType::UInt64, + DataType::Int8, + DataType::Int16, + DataType::Int32, + DataType::Int64, + DataType::Float32, + DataType::Float64, +]; Review comment: > Better to make these trait bounds with a good comment about how these are selected AFAIK these cannot be trait bounds because logical and physical planning is dynamically typed. In this case, this is enumerating all valid types that can be (dynamically) passed to the function. If someone tries to call this function with e.g. a `ListArray`, the logical planner will error with a description that this function does not support that type. The order here matters because when a function is planned to be called with type `X` that is not supported by the function, the physical planner will try to (lossless) cast that type to a valid type for that functions, and it does so in the order of this array. In general these should be ordered from fastest to slowest (in the eyes of the implementation), so that the cast chooses the type with the fastest implementation. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org