This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 731e132 dyn compare for binary array (#1238)
731e132 is described below
commit 731e132489b99cd688f884642cf20de52aed24d0
Author: Remzi Yang <[email protected]>
AuthorDate: Wed Jan 26 20:18:25 2022 +0800
dyn compare for binary array (#1238)
* dyn compare two binary array
Signed-off-by: remzi <[email protected]>
* add dyn comparison for binary array
Signed-off-by: remzi <[email protected]>
* add tests for dyn compare binary array and scalar
Signed-off-by: remzi <[email protected]>
* remove DictionaryArray from dyn compare, because not find an easy way to
build binary dictionary array
Signed-off-by: remzi <[email protected]>
* fix mistakes in test code
Signed-off-by: remzi <[email protected]>
* add Nones into the test cases
Signed-off-by: remzi <[email protected]>
* add non utf8 scalar
Signed-off-by: remzi <[email protected]>
* correct the code format
Signed-off-by: remzi <[email protected]>
---
arrow/src/compute/kernels/comparison.rs | 237 ++++++++++++++++++++++++++++++--
1 file changed, 228 insertions(+), 9 deletions(-)
diff --git a/arrow/src/compute/kernels/comparison.rs
b/arrow/src/compute/kernels/comparison.rs
index a846b02..48e8011 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -1262,6 +1262,117 @@ where
}
/// Perform `left == right` operation on an array and a numeric scalar
+/// value. Supports BinaryArray and LargeBinaryArray
+pub fn eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) ->
Result<BooleanArray> {
+ match left.data_type() {
+ DataType::Binary => {
+ let left = as_generic_binary_array::<i32>(left);
+ eq_binary_scalar(left, right)
+ }
+ DataType::LargeBinary => {
+ let left = as_generic_binary_array::<i64>(left);
+ eq_binary_scalar(left, right)
+ }
+ _ => Err(ArrowError::ComputeError(
+ "eq_dyn_binary_scalar only supports Binary or LargeBinary
arrays".to_string(),
+ )),
+ }
+}
+
+/// Perform `left != right` operation on an array and a numeric scalar
+/// value. Supports BinaryArray and LargeBinaryArray
+pub fn neq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) ->
Result<BooleanArray> {
+ match left.data_type() {
+ DataType::Binary => {
+ let left = as_generic_binary_array::<i32>(left);
+ neq_binary_scalar(left, right)
+ }
+ DataType::LargeBinary => {
+ let left = as_generic_binary_array::<i64>(left);
+ neq_binary_scalar(left, right)
+ }
+ _ => Err(ArrowError::ComputeError(
+ "neq_dyn_binary_scalar only supports Binary or LargeBinary arrays"
+ .to_string(),
+ )),
+ }
+}
+
+/// Perform `left < right` operation on an array and a numeric scalar
+/// value. Supports BinaryArray and LargeBinaryArray
+pub fn lt_dyn_binary_scalar(left: &dyn Array, right: &[u8]) ->
Result<BooleanArray> {
+ match left.data_type() {
+ DataType::Binary => {
+ let left = as_generic_binary_array::<i32>(left);
+ lt_binary_scalar(left, right)
+ }
+ DataType::LargeBinary => {
+ let left = as_generic_binary_array::<i64>(left);
+ lt_binary_scalar(left, right)
+ }
+ _ => Err(ArrowError::ComputeError(
+ "lt_dyn_binary_scalar only supports Binary or LargeBinary
arrays".to_string(),
+ )),
+ }
+}
+
+/// Perform `left <= right` operation on an array and a numeric scalar
+/// value. Supports BinaryArray and LargeBinaryArray
+pub fn lt_eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) ->
Result<BooleanArray> {
+ match left.data_type() {
+ DataType::Binary => {
+ let left = as_generic_binary_array::<i32>(left);
+ lt_eq_binary_scalar(left, right)
+ }
+ DataType::LargeBinary => {
+ let left = as_generic_binary_array::<i64>(left);
+ lt_eq_binary_scalar(left, right)
+ }
+ _ => Err(ArrowError::ComputeError(
+ "lt_eq_dyn_binary_scalar only supports Binary or LargeBinary
arrays"
+ .to_string(),
+ )),
+ }
+}
+
+/// Perform `left > right` operation on an array and a numeric scalar
+/// value. Supports BinaryArray and LargeBinaryArray
+pub fn gt_dyn_binary_scalar(left: &dyn Array, right: &[u8]) ->
Result<BooleanArray> {
+ match left.data_type() {
+ DataType::Binary => {
+ let left = as_generic_binary_array::<i32>(left);
+ gt_binary_scalar(left, right)
+ }
+ DataType::LargeBinary => {
+ let left = as_generic_binary_array::<i64>(left);
+ gt_binary_scalar(left, right)
+ }
+ _ => Err(ArrowError::ComputeError(
+ "gt_dyn_binary_scalar only supports Binary or LargeBinary
arrays".to_string(),
+ )),
+ }
+}
+
+/// Perform `left >= right` operation on an array and a numeric scalar
+/// value. Supports BinaryArray and LargeBinaryArray
+pub fn gt_eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) ->
Result<BooleanArray> {
+ match left.data_type() {
+ DataType::Binary => {
+ let left = as_generic_binary_array::<i32>(left);
+ gt_eq_binary_scalar(left, right)
+ }
+ DataType::LargeBinary => {
+ let left = as_generic_binary_array::<i64>(left);
+ gt_eq_binary_scalar(left, right)
+ }
+ _ => Err(ArrowError::ComputeError(
+ "gt_eq_dyn_binary_scalar only supports Binary or LargeBinary
arrays"
+ .to_string(),
+ )),
+ }
+}
+
+/// Perform `left == right` operation on an array and a numeric scalar
/// value. Supports StringArrays, and DictionaryArrays that have string values
pub fn eq_dyn_utf8_scalar(left: &dyn Array, right: &str) ->
Result<BooleanArray> {
let result = match left.data_type() {
@@ -1770,7 +1881,7 @@ macro_rules! typed_cmp {
}
macro_rules! typed_compares {
- ($LEFT: expr, $RIGHT: expr, $OP_BOOL: ident, $OP_PRIM: ident, $OP_STR:
ident) => {{
+ ($LEFT: expr, $RIGHT: expr, $OP_BOOL: ident, $OP_PRIM: ident, $OP_STR:
ident, $OP_BINARY: ident) => {{
match ($LEFT.data_type(), $RIGHT.data_type()) {
(DataType::Boolean, DataType::Boolean) => {
typed_cmp!($LEFT, $RIGHT, BooleanArray, $OP_BOOL)
@@ -1811,6 +1922,12 @@ macro_rules! typed_compares {
(DataType::LargeUtf8, DataType::LargeUtf8) => {
typed_cmp!($LEFT, $RIGHT, LargeStringArray, $OP_STR, i64)
}
+ (DataType::Binary, DataType::Binary) => {
+ typed_cmp!($LEFT, $RIGHT, BinaryArray, $OP_BINARY, i32)
+ }
+ (DataType::LargeBinary, DataType::LargeBinary) => {
+ typed_cmp!($LEFT, $RIGHT, LargeBinaryArray, $OP_BINARY, i64)
+ }
(
DataType::Timestamp(TimeUnit::Nanosecond, _),
DataType::Timestamp(TimeUnit::Nanosecond, _),
@@ -1918,7 +2035,7 @@ macro_rules! typed_compares {
/// Only when two arrays are of the same type the comparison will happen
otherwise it will err
/// with a casting error.
pub fn eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
- typed_compares!(left, right, eq_bool, eq, eq_utf8)
+ typed_compares!(left, right, eq_bool, eq, eq_utf8, eq_binary)
}
/// Perform `left != right` operation on two (dynamic) [`Array`]s.
@@ -1926,7 +2043,7 @@ pub fn eq_dyn(left: &dyn Array, right: &dyn Array) ->
Result<BooleanArray> {
/// Only when two arrays are of the same type the comparison will happen
otherwise it will err
/// with a casting error.
pub fn neq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
- typed_compares!(left, right, neq_bool, neq, neq_utf8)
+ typed_compares!(left, right, neq_bool, neq, neq_utf8, neq_binary)
}
/// Perform `left < right` operation on two (dynamic) [`Array`]s.
@@ -1934,7 +2051,7 @@ pub fn neq_dyn(left: &dyn Array, right: &dyn Array) ->
Result<BooleanArray> {
/// Only when two arrays are of the same type the comparison will happen
otherwise it will err
/// with a casting error.
pub fn lt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
- typed_compares!(left, right, lt_bool, lt, lt_utf8)
+ typed_compares!(left, right, lt_bool, lt, lt_utf8, lt_binary)
}
/// Perform `left <= right` operation on two (dynamic) [`Array`]s.
@@ -1942,7 +2059,7 @@ pub fn lt_dyn(left: &dyn Array, right: &dyn Array) ->
Result<BooleanArray> {
/// Only when two arrays are of the same type the comparison will happen
otherwise it will err
/// with a casting error.
pub fn lt_eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
- typed_compares!(left, right, lt_eq_bool, lt_eq, lt_eq_utf8)
+ typed_compares!(left, right, lt_eq_bool, lt_eq, lt_eq_utf8, lt_eq_binary)
}
/// Perform `left > right` operation on two (dynamic) [`Array`]s.
@@ -1950,7 +2067,7 @@ pub fn lt_eq_dyn(left: &dyn Array, right: &dyn Array) ->
Result<BooleanArray> {
/// Only when two arrays are of the same type the comparison will happen
otherwise it will err
/// with a casting error.
pub fn gt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
- typed_compares!(left, right, gt_bool, gt, gt_utf8)
+ typed_compares!(left, right, gt_bool, gt, gt_utf8, gt_binary)
}
/// Perform `left >= right` operation on two (dynamic) [`Array`]s.
@@ -1958,7 +2075,7 @@ pub fn gt_dyn(left: &dyn Array, right: &dyn Array) ->
Result<BooleanArray> {
/// Only when two arrays are of the same type the comparison will happen
otherwise it will err
/// with a casting error.
pub fn gt_eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
- typed_compares!(left, right, gt_eq_bool, gt_eq, gt_eq_utf8)
+ typed_compares!(left, right, gt_eq_bool, gt_eq, gt_eq_utf8, gt_eq_binary)
}
/// Perform `left == right` operation on two [`PrimitiveArray`]s.
@@ -3055,10 +3172,10 @@ mod tests {
);
test_binary_scalar!(
test_binary_array_gt_eq_scalar,
- vec![b"arrow", b"datafusion", b"flight", b"parquet"],
+ vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
"flight".as_bytes(),
gt_eq_binary_scalar,
- vec![false, false, true, true]
+ vec![false, false, true, true, true]
);
// Expected behaviour:
@@ -3844,6 +3961,108 @@ mod tests {
}
#[test]
+ fn test_eq_dyn_binary_scalar() {
+ let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"),
Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]),
None];
+ let array = BinaryArray::from(data.clone());
+ let large_array = LargeBinaryArray::from(data);
+ let scalar = "flight".as_bytes();
+ let expected = BooleanArray::from(
+ vec![Some(false), Some(false), Some(true), Some(false),
Some(false), None],
+ );
+
+ assert_eq!(eq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
+ assert_eq!(
+ eq_dyn_binary_scalar(&large_array, scalar).unwrap(),
+ expected
+ );
+ }
+
+ #[test]
+ fn test_neq_dyn_binary_scalar() {
+ let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"),
Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]),
None];
+ let array = BinaryArray::from(data.clone());
+ let large_array = LargeBinaryArray::from(data);
+ let scalar = "flight".as_bytes();
+ let expected = BooleanArray::from(
+ vec![Some(true), Some(true), Some(false), Some(true), Some(true),
None],
+ );
+
+ assert_eq!(neq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
+ assert_eq!(
+ neq_dyn_binary_scalar(&large_array, scalar).unwrap(),
+ expected
+ );
+ }
+
+ #[test]
+ fn test_lt_dyn_binary_scalar() {
+ let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"),
Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]),
None];
+ let array = BinaryArray::from(data.clone());
+ let large_array = LargeBinaryArray::from(data);
+ let scalar = "flight".as_bytes();
+ let expected = BooleanArray::from(
+ vec![Some(true), Some(true), Some(false), Some(false),
Some(false), None],
+ );
+
+ assert_eq!(lt_dyn_binary_scalar(&array, scalar).unwrap(), expected);
+ assert_eq!(
+ lt_dyn_binary_scalar(&large_array, scalar).unwrap(),
+ expected
+ );
+ }
+
+ #[test]
+ fn test_lt_eq_dyn_binary_scalar() {
+ let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"),
Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]),
None];
+ let array = BinaryArray::from(data.clone());
+ let large_array = LargeBinaryArray::from(data);
+ let scalar = "flight".as_bytes();
+ let expected = BooleanArray::from(
+ vec![Some(true), Some(true), Some(true), Some(false), Some(false),
None],
+ );
+
+ assert_eq!(lt_eq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
+ assert_eq!(
+ lt_eq_dyn_binary_scalar(&large_array, scalar).unwrap(),
+ expected
+ );
+ }
+
+ #[test]
+ fn test_gt_dyn_binary_scalar() {
+ let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"),
Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]),
None];
+ let array = BinaryArray::from(data.clone());
+ let large_array = LargeBinaryArray::from(data);
+ let scalar = "flight".as_bytes();
+ let expected = BooleanArray::from(
+ vec![Some(false), Some(false), Some(false), Some(true),
Some(true), None],
+ );
+
+ assert_eq!(gt_dyn_binary_scalar(&array, scalar).unwrap(), expected);
+ assert_eq!(
+ gt_dyn_binary_scalar(&large_array, scalar).unwrap(),
+ expected
+ );
+ }
+
+ #[test]
+ fn test_gt_eq_dyn_binary_scalar() {
+ let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"),
Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]),
None];
+ let array = BinaryArray::from(data.clone());
+ let large_array = LargeBinaryArray::from(data);
+ let scalar = &[0xff, 0xf8];
+ let expected = BooleanArray::from(
+ vec![Some(false), Some(false), Some(false), Some(false),
Some(true), None],
+ );
+
+ assert_eq!(gt_eq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
+ assert_eq!(
+ gt_eq_dyn_binary_scalar(&large_array, scalar).unwrap(),
+ expected
+ );
+ }
+
+ #[test]
fn test_eq_dyn_utf8_scalar() {
let array = StringArray::from(vec!["abc", "def", "xyz"]);
let a_eq = eq_dyn_utf8_scalar(&array, "xyz").unwrap();