jorgecarleitao commented on a change in pull request #8688: URL: https://github.com/apache/arrow/pull/8688#discussion_r525084383
########## File path: rust/datafusion/src/physical_plan/expressions.rs ########## @@ -1385,6 +1385,80 @@ pub fn binary( Ok(Arc::new(BinaryExpr::new(l, op, r))) } +/// Invoke a compute kernel on a primitive array and a Boolean Array +macro_rules! compute_bool_array_op { + ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{ + let ll = $LEFT + .as_any() + .downcast_ref::<$DT>() + .expect("compute_op failed to downcast array"); + let rr = $RIGHT + .as_any() + .downcast_ref::<BooleanArray>() + .expect("compute_op failed to downcast array"); + Ok(Arc::new($OP(&ll, &rr)?)) + }}; +} + +/// Binary op between primitive and boolean arrays +macro_rules! primitive_bool_array_op { + ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{ + match $LEFT.data_type() { + DataType::Int8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int8Array), + DataType::Int16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int16Array), + DataType::Int32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int32Array), + DataType::Int64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int64Array), + DataType::UInt8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt8Array), + DataType::UInt16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt16Array), + DataType::UInt32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt32Array), + DataType::UInt64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt64Array), + DataType::Float32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float32Array), + DataType::Float64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float64Array), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for NULLIF/primitive/boolean operator", + other + ))), + } + }}; +} + +/// +/// Implements NULLIF(expr1, expr2) +/// Args: 0 - left expr is any array +/// 1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed. +/// +pub fn nullif_func(args: &[ArrayRef]) -> Result<ArrayRef> { + if args.len() != 2 { + return Err(DataFusionError::Internal(format!( + "{:?} args were supplied but NULLIF takes exactly two args", + args.len(), + ))); + } + + // Get args0 == args1 evaluated and produce a boolean array + let cond_array = binary_array_op!(args[0], args[1], eq)?; + + // Now, invoke nullif on the result + primitive_bool_array_op!(args[0], *cond_array, nullif) +} + +/// Currently supported types by the nullif function. +/// The order of these types correspond to the order on which coercion applies +/// This should thus be from least informative to most informative +pub static SUPPORTED_NULLIF_TYPES: &'static [DataType] = &[ + DataType::Boolean, + DataType::UInt8, + DataType::UInt16, + DataType::UInt32, + DataType::UInt64, + DataType::Int8, + DataType::Int16, + DataType::Int32, + DataType::Int64, + DataType::Float32, + DataType::Float64, +]; Review comment: That is interesting. I would be interested in knowing what is the issue with the current implementation and why type algebra definitions should be used instead. Could you first introduce a proposal with the design e.g. on a google docs, before the PR? In DataFusion we have been doing that for larger changes to avoiding committing to an implementation before some general agreement. ########## File path: rust/datafusion/src/physical_plan/expressions.rs ########## @@ -1385,6 +1385,80 @@ pub fn binary( Ok(Arc::new(BinaryExpr::new(l, op, r))) } +/// Invoke a compute kernel on a primitive array and a Boolean Array +macro_rules! compute_bool_array_op { + ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{ + let ll = $LEFT + .as_any() + .downcast_ref::<$DT>() + .expect("compute_op failed to downcast array"); + let rr = $RIGHT + .as_any() + .downcast_ref::<BooleanArray>() + .expect("compute_op failed to downcast array"); + Ok(Arc::new($OP(&ll, &rr)?)) + }}; +} + +/// Binary op between primitive and boolean arrays +macro_rules! primitive_bool_array_op { + ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{ + match $LEFT.data_type() { + DataType::Int8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int8Array), + DataType::Int16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int16Array), + DataType::Int32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int32Array), + DataType::Int64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int64Array), + DataType::UInt8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt8Array), + DataType::UInt16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt16Array), + DataType::UInt32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt32Array), + DataType::UInt64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt64Array), + DataType::Float32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float32Array), + DataType::Float64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float64Array), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for NULLIF/primitive/boolean operator", + other + ))), + } + }}; +} + +/// +/// Implements NULLIF(expr1, expr2) +/// Args: 0 - left expr is any array +/// 1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed. +/// +pub fn nullif_func(args: &[ArrayRef]) -> Result<ArrayRef> { + if args.len() != 2 { + return Err(DataFusionError::Internal(format!( + "{:?} args were supplied but NULLIF takes exactly two args", + args.len(), + ))); + } + + // Get args0 == args1 evaluated and produce a boolean array + let cond_array = binary_array_op!(args[0], args[1], eq)?; + + // Now, invoke nullif on the result + primitive_bool_array_op!(args[0], *cond_array, nullif) +} + +/// Currently supported types by the nullif function. +/// The order of these types correspond to the order on which coercion applies +/// This should thus be from least informative to most informative +pub static SUPPORTED_NULLIF_TYPES: &'static [DataType] = &[ + DataType::Boolean, + DataType::UInt8, + DataType::UInt16, + DataType::UInt32, + DataType::UInt64, + DataType::Int8, + DataType::Int16, + DataType::Int32, + DataType::Int64, + DataType::Float32, + DataType::Float64, +]; Review comment: That is interesting. I would be interested in knowing what is the issue with the current implementation and why type algebra definitions should be used instead. Could you first introduce a proposal with the design e.g. on a google docs, before the PR? In DataFusion we have been doing that for larger changes to avoid committing to an implementation before some general agreement. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org