This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch support-is-distinct-from-for-binaryarray
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 44abd6a0d590167335a07956c328968f9463af07
Author: DaniĆ«l Heres <[email protected]>
AuthorDate: Fri May 19 17:21:03 2023 +0200

    Support is [not] distinct from for binaryarray types
---
 .../core/tests/sqllogictests/test_files/select.slt | 15 ++++++++++++++
 datafusion/physical-expr/src/expressions/binary.rs | 23 ++++++++++++++++++++--
 .../src/expressions/binary/kernels_arrow.rs        | 22 +++++++++++++++++++++
 testing                                            |  2 +-
 4 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/select.slt 
b/datafusion/core/tests/sqllogictests/test_files/select.slt
index 03f96bad95..8ab7cd1b13 100644
--- a/datafusion/core/tests/sqllogictests/test_files/select.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/select.slt
@@ -298,6 +298,21 @@ select column1 is not distinct from column2 from t;
 false
 
 
+# Binary Expression for Binary
+# issue: https://github.com/apache/arrow-datafusion/issues/5893
+statement ok
+CREATE TABLE t as select arrow_cast('Bar', 'Binary') as column1, 'B%' as 
column2;
+
+query B
+select column1 is distinct from column2 from t;
+----
+true
+
+query B
+select column1 is not distinct from column2 from t;
+----
+false
+
 # select all
 # these two queries should return the same result
 query R
diff --git a/datafusion/physical-expr/src/expressions/binary.rs 
b/datafusion/physical-expr/src/expressions/binary.rs
index 7bdbba88a8..a93c377112 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -71,10 +71,10 @@ use kernels_arrow::{
     add_decimal_dyn_scalar, add_dyn_decimal, add_dyn_temporal, 
add_dyn_temporal_scalar,
     divide_decimal_dyn_scalar, divide_dyn_opt_decimal, is_distinct_from,
     is_distinct_from_bool, is_distinct_from_decimal, is_distinct_from_f32,
-    is_distinct_from_f64, is_distinct_from_null, is_distinct_from_utf8,
+    is_distinct_from_f64, is_distinct_from_null, is_distinct_from_utf8, 
is_distinct_from_binary,
     is_not_distinct_from, is_not_distinct_from_bool, 
is_not_distinct_from_decimal,
     is_not_distinct_from_f32, is_not_distinct_from_f64, 
is_not_distinct_from_null,
-    is_not_distinct_from_utf8, modulus_decimal_dyn_scalar, modulus_dyn_decimal,
+    is_not_distinct_from_utf8, is_not_distinct_from_binary, 
modulus_decimal_dyn_scalar, modulus_dyn_decimal,
     multiply_decimal_dyn_scalar, multiply_dyn_decimal, 
subtract_decimal_dyn_scalar,
     subtract_dyn_decimal, subtract_dyn_temporal, subtract_dyn_temporal_scalar,
 };
@@ -245,6 +245,22 @@ macro_rules! compute_utf8_op {
     }};
 }
 
+/// Invoke a compute kernel on a pair of binary data arrays
+macro_rules! compute_binary_op {
+    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
+        let ll = $LEFT
+            .as_any()
+            .downcast_ref::<$DT>()
+            .expect("compute_op failed to downcast left side array");
+        let rr = $RIGHT
+            .as_any()
+            .downcast_ref::<$DT>()
+            .expect("compute_op failed to downcast right side array");
+        Ok(Arc::new(paste::expr! {[<$OP _binary>]}(&ll, &rr)?))
+    }};
+}
+
+
 /// Invoke a compute kernel on a data array and a scalar value
 macro_rules! compute_utf8_op_scalar {
     ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident, $OP_TYPE:expr) => {{
@@ -510,7 +526,10 @@ macro_rules! binary_array_op {
             DataType::Float32 => compute_f32_op!($LEFT, $RIGHT, $OP, 
Float32Array),
             DataType::Float64 => compute_f64_op!($LEFT, $RIGHT, $OP, 
Float64Array),
             DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, 
StringArray),
+            DataType::Binary => compute_binary_op!($LEFT, $RIGHT, $OP, 
BinaryArray),
+            DataType::LargeBinary => compute_binary_op!($LEFT, $RIGHT, $OP, 
LargeBinaryArray),
             DataType::LargeUtf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, 
LargeStringArray),
+
             DataType::Timestamp(TimeUnit::Nanosecond, _) => {
                 compute_op!($LEFT, $RIGHT, $OP, TimestampNanosecondArray)
             }
diff --git a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs 
b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs
index 90fca17157..50a9f86c06 100644
--- a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs
+++ b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs
@@ -210,6 +210,17 @@ pub(crate) fn is_distinct_from_utf8<OffsetSize: 
OffsetSizeTrait>(
         .collect())
 }
 
+pub(crate) fn is_distinct_from_binary<OffsetSize: OffsetSizeTrait>(
+    left: &GenericBinaryArray<OffsetSize>,
+    right: &GenericBinaryArray<OffsetSize>,
+) -> Result<BooleanArray> {
+    Ok(left
+        .iter()
+        .zip(right.iter())
+        .map(|(x, y)| Some(x != y))
+        .collect())
+}
+
 pub(crate) fn is_distinct_from_null(
     left: &NullArray,
     _right: &NullArray,
@@ -241,6 +252,17 @@ pub(crate) fn is_not_distinct_from_utf8<OffsetSize: 
OffsetSizeTrait>(
         .collect())
 }
 
+pub(crate) fn is_not_distinct_from_binary<OffsetSize: OffsetSizeTrait>(
+    left: &GenericBinaryArray<OffsetSize>,
+    right: &GenericBinaryArray<OffsetSize>,
+) -> Result<BooleanArray> {
+    Ok(left
+        .iter()
+        .zip(right.iter())
+        .map(|(x, y)| Some(x == y))
+        .collect())
+}
+
 pub(crate) fn is_distinct_from_decimal(
     left: &Decimal128Array,
     right: &Decimal128Array,
diff --git a/testing b/testing
index e81d0c6de3..5bab2f264a 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit e81d0c6de35948b3be7984af8e00413b314cde6e
+Subproject commit 5bab2f264a23f5af68f69ea93d24ef1e8e77fc88

Reply via email to