This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 96a9a54 Add comparison support for fully qualified BinaryArray (#1195)
96a9a54 is described below
commit 96a9a541efed0ca79118fb9c4493ea1addd07df0
Author: Remzi Yang <[email protected]>
AuthorDate: Thu Jan 20 02:06:47 2022 +0800
Add comparison support for fully qualified BinaryArray (#1195)
* add eq_dyn for BinaryArray
Signed-off-by: remzi <[email protected]>
* correct the code formatting
Signed-off-by: remzi <[email protected]>
* add comparison support for fully qualified binary array
delete dyn comparison which will be added in successive PRs
Signed-off-by: remzi <[email protected]>
* add tests for comparison of fully qualified BinaryArray
Signed-off-by: remzi <[email protected]>
* add 2 missed tests
Signed-off-by: remzi <[email protected]>
* move 2 functions
Signed-off-by: remzi <[email protected]>
* fix reference error
Signed-off-by: remzi <[email protected]>
---
arrow/src/compute/kernels/comparison.rs | 267 ++++++++++++++++++++++++++++++++
1 file changed, 267 insertions(+)
diff --git a/arrow/src/compute/kernels/comparison.rs
b/arrow/src/compute/kernels/comparison.rs
index 9390d84..acfa47c 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -810,6 +810,102 @@ pub fn neq_bool_scalar(left: &BooleanArray, right: bool)
-> Result<BooleanArray>
eq_bool_scalar(left, !right)
}
+/// Perform `left == right` operation on [`BinaryArray`] /
[`LargeBinaryArray`].
+pub fn eq_binary<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &GenericBinaryArray<OffsetSize>,
+) -> Result<BooleanArray> {
+ compare_op!(left, right, |a, b| a == b)
+}
+
+/// Perform `left == right` operation on [`BinaryArray`] /
[`LargeBinaryArray`] and a scalar
+pub fn eq_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &[u8],
+) -> Result<BooleanArray> {
+ compare_op_scalar!(left, right, |a, b| a == b)
+}
+
+/// Perform `left != right` operation on [`BinaryArray`] /
[`LargeBinaryArray`].
+pub fn neq_binary<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &GenericBinaryArray<OffsetSize>,
+) -> Result<BooleanArray> {
+ compare_op!(left, right, |a, b| a != b)
+}
+
+/// Perform `left != right` operation on [`BinaryArray`] /
[`LargeBinaryArray`] and a scalar.
+pub fn neq_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &[u8],
+) -> Result<BooleanArray> {
+ compare_op_scalar!(left, right, |a, b| a != b)
+}
+
+/// Perform `left < right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
+pub fn lt_binary<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &GenericBinaryArray<OffsetSize>,
+) -> Result<BooleanArray> {
+ compare_op!(left, right, |a, b| a < b)
+}
+
+/// Perform `left < right` operation on [`BinaryArray`] / [`LargeBinaryArray`]
and a scalar.
+pub fn lt_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &[u8],
+) -> Result<BooleanArray> {
+ compare_op_scalar!(left, right, |a, b| a < b)
+}
+
+/// Perform `left <= right` operation on [`BinaryArray`] /
[`LargeBinaryArray`].
+pub fn lt_eq_binary<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &GenericBinaryArray<OffsetSize>,
+) -> Result<BooleanArray> {
+ compare_op!(left, right, |a, b| a <= b)
+}
+
+/// Perform `left <= right` operation on [`BinaryArray`] /
[`LargeBinaryArray`] and a scalar.
+pub fn lt_eq_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &[u8],
+) -> Result<BooleanArray> {
+ compare_op_scalar!(left, right, |a, b| a <= b)
+}
+
+/// Perform `left > right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
+pub fn gt_binary<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &GenericBinaryArray<OffsetSize>,
+) -> Result<BooleanArray> {
+ compare_op!(left, right, |a, b| a > b)
+}
+
+/// Perform `left > right` operation on [`BinaryArray`] / [`LargeBinaryArray`]
and a scalar.
+pub fn gt_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &[u8],
+) -> Result<BooleanArray> {
+ compare_op_scalar!(left, right, |a, b| a > b)
+}
+
+/// Perform `left >= right` operation on [`BinaryArray`] /
[`LargeBinaryArray`].
+pub fn gt_eq_binary<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &GenericBinaryArray<OffsetSize>,
+) -> Result<BooleanArray> {
+ compare_op!(left, right, |a, b| a >= b)
+}
+
+/// Perform `left >= right` operation on [`BinaryArray`] /
[`LargeBinaryArray`] and a scalar.
+pub fn gt_eq_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
+ left: &GenericBinaryArray<OffsetSize>,
+ right: &[u8],
+) -> Result<BooleanArray> {
+ compare_op_scalar!(left, right, |a, b| a >= b)
+}
+
/// Perform `left != right` operation on [`StringArray`] /
[`LargeStringArray`].
pub fn neq_utf8<OffsetSize: StringOffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
@@ -2794,6 +2890,177 @@ mod tests {
);
}
+ macro_rules! test_binary {
+ ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr)
=> {
+ #[test]
+ fn $test_name() {
+ let left = BinaryArray::from_vec($left);
+ let right = BinaryArray::from_vec($right);
+ let res = $op(&left, &right).unwrap();
+ let expected = $expected;
+ assert_eq!(expected.len(), res.len());
+ for i in 0..res.len() {
+ let v = res.value(i);
+ assert_eq!(v, expected[i]);
+ }
+
+ let left = LargeBinaryArray::from_vec($left);
+ let right = LargeBinaryArray::from_vec($right);
+ let res = $op(&left, &right).unwrap();
+ let expected = $expected;
+ assert_eq!(expected.len(), res.len());
+ for i in 0..res.len() {
+ let v = res.value(i);
+ assert_eq!(v, expected[i]);
+ }
+ }
+ };
+ }
+
+ #[test]
+ fn test_binary_eq_scalar_on_slice() {
+ let a = BinaryArray::from_opt_vec(
+ vec![Some(b"hi"), None, Some(b"hello"), Some(b"world")],
+ );
+ let a = a.slice(1, 3);
+ let a = as_generic_binary_array::<i32>(&a);
+ let a_eq = eq_binary_scalar(a, b"hello").unwrap();
+ assert_eq!(
+ a_eq,
+ BooleanArray::from(vec![None, Some(true), Some(false)])
+ );
+ }
+
+ macro_rules! test_binary_scalar {
+ ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr)
=> {
+ #[test]
+ fn $test_name() {
+ let left = BinaryArray::from_vec($left);
+ let res = $op(&left, $right).unwrap();
+ let expected = $expected;
+ assert_eq!(expected.len(), res.len());
+ for i in 0..res.len() {
+ let v = res.value(i);
+ assert_eq!(
+ v,
+ expected[i],
+ "unexpected result when comparing {:?} at position {}
to {:?} ",
+ left.value(i),
+ i,
+ $right
+ );
+ }
+
+ let left = LargeBinaryArray::from_vec($left);
+ let res = $op(&left, $right).unwrap();
+ let expected = $expected;
+ assert_eq!(expected.len(), res.len());
+ for i in 0..res.len() {
+ let v = res.value(i);
+ assert_eq!(
+ v,
+ expected[i],
+ "unexpected result when comparing {:?} at position {}
to {:?} ",
+ left.value(i),
+ i,
+ $right
+ );
+ }
+ }
+ };
+ }
+
+ test_binary!(
+ test_binary_array_eq,
+ vec![b"arrow", b"arrow", b"arrow", b"arrow"],
+ vec![b"arrow", b"parquet", b"datafusion", b"flight"],
+ eq_binary,
+ vec![true, false, false, false]
+ );
+
+ test_binary_scalar!(
+ test_binary_array_eq_scalar,
+ vec![b"arrow", b"parquet", b"datafusion", b"flight"],
+ "arrow".as_bytes(),
+ eq_binary_scalar,
+ vec![true, false, false, false]
+ );
+
+ test_binary!(
+ test_binary_array_neq,
+ vec![b"arrow", b"arrow", b"arrow", b"arrow"],
+ vec![b"arrow", b"parquet", b"datafusion", b"flight"],
+ neq_binary,
+ vec![false, true, true, true]
+ );
+ test_binary_scalar!(
+ test_binary_array_neq_scalar,
+ vec![b"arrow", b"parquet", b"datafusion", b"flight"],
+ "arrow".as_bytes(),
+ neq_binary_scalar,
+ vec![false, true, true, true]
+ );
+
+ test_binary!(
+ test_binary_array_lt,
+ vec![b"arrow", b"datafusion", b"flight", b"parquet"],
+ vec![b"flight", b"flight", b"flight", b"flight"],
+ lt_binary,
+ vec![true, true, false, false]
+ );
+ test_binary_scalar!(
+ test_binary_array_lt_scalar,
+ vec![b"arrow", b"datafusion", b"flight", b"parquet"],
+ "flight".as_bytes(),
+ lt_binary_scalar,
+ vec![true, true, false, false]
+ );
+
+ test_binary!(
+ test_binary_array_lt_eq,
+ vec![b"arrow", b"datafusion", b"flight", b"parquet"],
+ vec![b"flight", b"flight", b"flight", b"flight"],
+ lt_eq_binary,
+ vec![true, true, true, false]
+ );
+ test_binary_scalar!(
+ test_binary_array_lt_eq_scalar,
+ vec![b"arrow", b"datafusion", b"flight", b"parquet"],
+ "flight".as_bytes(),
+ lt_eq_binary_scalar,
+ vec![true, true, true, false]
+ );
+
+ test_binary!(
+ test_binary_array_gt,
+ vec![b"arrow", b"datafusion", b"flight", b"parquet"],
+ vec![b"flight", b"flight", b"flight", b"flight"],
+ gt_binary,
+ vec![false, false, false, true]
+ );
+ test_binary_scalar!(
+ test_binary_array_gt_scalar,
+ vec![b"arrow", b"datafusion", b"flight", b"parquet"],
+ "flight".as_bytes(),
+ gt_binary_scalar,
+ vec![false, false, false, true]
+ );
+
+ test_binary!(
+ test_binary_array_gt_eq,
+ vec![b"arrow", b"datafusion", b"flight", b"parquet"],
+ vec![b"flight", b"flight", b"flight", b"flight"],
+ gt_eq_binary,
+ vec![false, false, true, true]
+ );
+ test_binary_scalar!(
+ test_binary_array_gt_eq_scalar,
+ vec![b"arrow", b"datafusion", b"flight", b"parquet"],
+ "flight".as_bytes(),
+ gt_eq_binary_scalar,
+ vec![false, false, true, true]
+ );
+
// Expected behaviour:
// contains("ab", ["ab", "cd", null]) = true
// contains("ef", ["ab", "cd", null]) = false