tustvold commented on a change in pull request #1263:
URL: https://github.com/apache/arrow-rs/pull/1263#discussion_r797919798



##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,12 +2030,112 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: ident, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Int8, DataType::Int8) => {
+                $OP_PRIM::<$KT, Int8Type>($LEFT, $RIGHT)
+            }
+            (t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
+                "Comparing dictionary arrays of value type {} is not yet 
implemented",
+                t1
+            ))),
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare two dictionary arrays of different value types 
({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_compares {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are 
`DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: ident) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), 
DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, Int8Type)
+                    }
+                    (t1, t2) if t1 == t2 => 
Err(ArrowError::NotYetImplemented(format!(
+                        "Comparing dictionary arrays of type {} is not yet 
implemented",
+                        t1
+                    ))),
+                    (t1, t2) => Err(ArrowError::CastError(format!(
+                        "Cannot compare two dictionary arrays of different key 
types ({} and {})",
+                        t1, t2
+                    ))),
+                }
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare dictionary array with non-dictionary array ({} 
and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+/// Perform `left == right` operation on two `DictionaryArray`s.
+/// Only when two arrays are of the same type the comparison will happen 
otherwise it will err
+/// with a casting error.
+pub fn eq_dict<K, T>(

Review comment:
       Definitely a bonus feature, but you might want to make this generic in 
the comparison operation, perhaps look at `simd_compare_op` for inspiration

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,12 +2030,112 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: ident, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Int8, DataType::Int8) => {
+                $OP_PRIM::<$KT, Int8Type>($LEFT, $RIGHT)
+            }
+            (t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
+                "Comparing dictionary arrays of value type {} is not yet 
implemented",
+                t1
+            ))),
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare two dictionary arrays of different value types 
({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_compares {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are 
`DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: ident) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), 
DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, Int8Type)

Review comment:
       I do wonder if there might be some way to reuse `typed_compares` here 
somehow, with the notion of the source type somehow encoded in OP_PRIM, etc... 
but it may be more effort than it is worth 

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,12 +2030,112 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: ident, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Int8, DataType::Int8) => {
+                $OP_PRIM::<$KT, Int8Type>($LEFT, $RIGHT)
+            }
+            (t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
+                "Comparing dictionary arrays of value type {} is not yet 
implemented",
+                t1
+            ))),
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare two dictionary arrays of different value types 
({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_compares {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are 
`DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: ident) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), 
DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, Int8Type)
+                    }
+                    (t1, t2) if t1 == t2 => 
Err(ArrowError::NotYetImplemented(format!(
+                        "Comparing dictionary arrays of type {} is not yet 
implemented",
+                        t1
+                    ))),
+                    (t1, t2) => Err(ArrowError::CastError(format!(
+                        "Cannot compare two dictionary arrays of different key 
types ({} and {})",
+                        t1, t2
+                    ))),
+                }
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare dictionary array with non-dictionary array ({} 
and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+/// Perform `left == right` operation on two `DictionaryArray`s.
+/// Only when two arrays are of the same type the comparison will happen 
otherwise it will err
+/// with a casting error.
+pub fn eq_dict<K, T>(
+    left: &DictionaryArray<K>,
+    right: &DictionaryArray<K>,
+) -> Result<BooleanArray>
+where
+    K: ArrowNumericType,
+    T: ArrowNumericType,
+{
+    assert_eq!(left.keys().len(), right.keys().len());

Review comment:
       Looking at the other implementations this typically returns an error
   
   ```
   if $left.len() != $right.len() {
               return Err(ArrowError::ComputeError(
                   "Cannot perform comparison operation on arrays of different 
length"
                       .to_string(),
               ));
           }
   ```

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,12 +2030,112 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {

Review comment:
       It's entirely personal preference, and this file does tend to use macros 
for this stuff, but _personally_ I find generics easier to interpret, debug and 
maintain. Just putting the idea out there

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,12 +2030,112 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: ident, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Int8, DataType::Int8) => {
+                $OP_PRIM::<$KT, Int8Type>($LEFT, $RIGHT)
+            }
+            (t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
+                "Comparing dictionary arrays of value type {} is not yet 
implemented",
+                t1
+            ))),
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare two dictionary arrays of different value types 
({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_compares {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are 
`DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: ident) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), 
DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, Int8Type)
+                    }
+                    (t1, t2) if t1 == t2 => 
Err(ArrowError::NotYetImplemented(format!(
+                        "Comparing dictionary arrays of type {} is not yet 
implemented",
+                        t1
+                    ))),
+                    (t1, t2) => Err(ArrowError::CastError(format!(
+                        "Cannot compare two dictionary arrays of different key 
types ({} and {})",
+                        t1, t2
+                    ))),
+                }
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare dictionary array with non-dictionary array ({} 
and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+/// Perform `left == right` operation on two `DictionaryArray`s.
+/// Only when two arrays are of the same type the comparison will happen 
otherwise it will err
+/// with a casting error.
+pub fn eq_dict<K, T>(
+    left: &DictionaryArray<K>,
+    right: &DictionaryArray<K>,
+) -> Result<BooleanArray>
+where
+    K: ArrowNumericType,
+    T: ArrowNumericType,
+{
+    assert_eq!(left.keys().len(), right.keys().len());
+
+    let left_values = left
+        .values()
+        .as_any()
+        .downcast_ref::<PrimitiveArray<T>>()
+        .unwrap();
+    let right_values = right
+        .values()
+        .as_any()
+        .downcast_ref::<PrimitiveArray<T>>()
+        .unwrap();
+
+    let mut result = vec![];
+
+    for idx in 0..left.keys().len() {
+        unsafe {
+            let left_key = left
+                .keys()
+                .value_unchecked(idx)
+                .to_usize()
+                .expect("Dictionary index not usize");
+            let left_value = left_values.value_unchecked(left_key);
+            let right_key = right
+                .keys()
+                .value_unchecked(idx)
+                .to_usize()
+                .expect("Dictionary index not usize");
+            let right_value = right_values.value_unchecked(right_key);
+
+            result.push(left_value == right_value);
+        }
+    }
+
+    Ok(BooleanArray::from(result))

Review comment:
       It is likely to be significantly faster to use 
`MutableBuffer::from_trusted_len_iter_bool(iter)` here, where `iter` is the 
body of the for loop. This not only avoids incrementally growing a `Vec`, but 
also allocating space for `Vec<bool>` and then copying it into a `Buffer` - 
instead it will construct the packed bitmask directly




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to