This is an automated email from the ASF dual-hosted git repository. dheres pushed a commit to branch support-is-distinct-from-for-binaryarray in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
commit 44abd6a0d590167335a07956c328968f9463af07 Author: Daniƫl Heres <[email protected]> AuthorDate: Fri May 19 17:21:03 2023 +0200 Support is [not] distinct from for binaryarray types --- .../core/tests/sqllogictests/test_files/select.slt | 15 ++++++++++++++ datafusion/physical-expr/src/expressions/binary.rs | 23 ++++++++++++++++++++-- .../src/expressions/binary/kernels_arrow.rs | 22 +++++++++++++++++++++ testing | 2 +- 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/select.slt b/datafusion/core/tests/sqllogictests/test_files/select.slt index 03f96bad95..8ab7cd1b13 100644 --- a/datafusion/core/tests/sqllogictests/test_files/select.slt +++ b/datafusion/core/tests/sqllogictests/test_files/select.slt @@ -298,6 +298,21 @@ select column1 is not distinct from column2 from t; false +# Binary Expression for Binary +# issue: https://github.com/apache/arrow-datafusion/issues/5893 +statement ok +CREATE TABLE t as select arrow_cast('Bar', 'Binary') as column1, 'B%' as column2; + +query B +select column1 is distinct from column2 from t; +---- +true + +query B +select column1 is not distinct from column2 from t; +---- +false + # select all # these two queries should return the same result query R diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 7bdbba88a8..a93c377112 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -71,10 +71,10 @@ use kernels_arrow::{ add_decimal_dyn_scalar, add_dyn_decimal, add_dyn_temporal, add_dyn_temporal_scalar, divide_decimal_dyn_scalar, divide_dyn_opt_decimal, is_distinct_from, is_distinct_from_bool, is_distinct_from_decimal, is_distinct_from_f32, - is_distinct_from_f64, is_distinct_from_null, is_distinct_from_utf8, + is_distinct_from_f64, is_distinct_from_null, is_distinct_from_utf8, is_distinct_from_binary, is_not_distinct_from, is_not_distinct_from_bool, is_not_distinct_from_decimal, is_not_distinct_from_f32, is_not_distinct_from_f64, is_not_distinct_from_null, - is_not_distinct_from_utf8, modulus_decimal_dyn_scalar, modulus_dyn_decimal, + is_not_distinct_from_utf8, is_not_distinct_from_binary, modulus_decimal_dyn_scalar, modulus_dyn_decimal, multiply_decimal_dyn_scalar, multiply_dyn_decimal, subtract_decimal_dyn_scalar, subtract_dyn_decimal, subtract_dyn_temporal, subtract_dyn_temporal_scalar, }; @@ -245,6 +245,22 @@ macro_rules! compute_utf8_op { }}; } +/// Invoke a compute kernel on a pair of binary data arrays +macro_rules! compute_binary_op { + ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{ + let ll = $LEFT + .as_any() + .downcast_ref::<$DT>() + .expect("compute_op failed to downcast left side array"); + let rr = $RIGHT + .as_any() + .downcast_ref::<$DT>() + .expect("compute_op failed to downcast right side array"); + Ok(Arc::new(paste::expr! {[<$OP _binary>]}(&ll, &rr)?)) + }}; +} + + /// Invoke a compute kernel on a data array and a scalar value macro_rules! compute_utf8_op_scalar { ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident, $OP_TYPE:expr) => {{ @@ -510,7 +526,10 @@ macro_rules! binary_array_op { DataType::Float32 => compute_f32_op!($LEFT, $RIGHT, $OP, Float32Array), DataType::Float64 => compute_f64_op!($LEFT, $RIGHT, $OP, Float64Array), DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, StringArray), + DataType::Binary => compute_binary_op!($LEFT, $RIGHT, $OP, BinaryArray), + DataType::LargeBinary => compute_binary_op!($LEFT, $RIGHT, $OP, LargeBinaryArray), DataType::LargeUtf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, LargeStringArray), + DataType::Timestamp(TimeUnit::Nanosecond, _) => { compute_op!($LEFT, $RIGHT, $OP, TimestampNanosecondArray) } diff --git a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs index 90fca17157..50a9f86c06 100644 --- a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs +++ b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs @@ -210,6 +210,17 @@ pub(crate) fn is_distinct_from_utf8<OffsetSize: OffsetSizeTrait>( .collect()) } +pub(crate) fn is_distinct_from_binary<OffsetSize: OffsetSizeTrait>( + left: &GenericBinaryArray<OffsetSize>, + right: &GenericBinaryArray<OffsetSize>, +) -> Result<BooleanArray> { + Ok(left + .iter() + .zip(right.iter()) + .map(|(x, y)| Some(x != y)) + .collect()) +} + pub(crate) fn is_distinct_from_null( left: &NullArray, _right: &NullArray, @@ -241,6 +252,17 @@ pub(crate) fn is_not_distinct_from_utf8<OffsetSize: OffsetSizeTrait>( .collect()) } +pub(crate) fn is_not_distinct_from_binary<OffsetSize: OffsetSizeTrait>( + left: &GenericBinaryArray<OffsetSize>, + right: &GenericBinaryArray<OffsetSize>, +) -> Result<BooleanArray> { + Ok(left + .iter() + .zip(right.iter()) + .map(|(x, y)| Some(x == y)) + .collect()) +} + pub(crate) fn is_distinct_from_decimal( left: &Decimal128Array, right: &Decimal128Array, diff --git a/testing b/testing index e81d0c6de3..5bab2f264a 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit e81d0c6de35948b3be7984af8e00413b314cde6e +Subproject commit 5bab2f264a23f5af68f69ea93d24ef1e8e77fc88
