alamb commented on a change in pull request #1263:
URL: https://github.com/apache/arrow-rs/pull/1263#discussion_r798048841



##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,12 +2030,112 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {

Review comment:
       macros are used pretty heavily in this file, so it may be better to 
follow along with that pattern

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,12 +2030,112 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: ident, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Int8, DataType::Int8) => {
+                $OP_PRIM::<$KT, Int8Type>($LEFT, $RIGHT)
+            }
+            (t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
+                "Comparing dictionary arrays of value type {} is not yet 
implemented",
+                t1
+            ))),
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare two dictionary arrays of different value types 
({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_compares {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are 
`DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: ident) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), 
DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, Int8Type)
+                    }
+                    (t1, t2) if t1 == t2 => 
Err(ArrowError::NotYetImplemented(format!(
+                        "Comparing dictionary arrays of type {} is not yet 
implemented",
+                        t1
+                    ))),
+                    (t1, t2) => Err(ArrowError::CastError(format!(
+                        "Cannot compare two dictionary arrays of different key 
types ({} and {})",
+                        t1, t2
+                    ))),
+                }
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare dictionary array with non-dictionary array ({} 
and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+/// Perform `left == right` operation on two `DictionaryArray`s.
+/// Only when two arrays are of the same type the comparison will happen 
otherwise it will err
+/// with a casting error.
+pub fn eq_dict<K, T>(
+    left: &DictionaryArray<K>,
+    right: &DictionaryArray<K>,
+) -> Result<BooleanArray>
+where
+    K: ArrowNumericType,
+    T: ArrowNumericType,
+{
+    assert_eq!(left.keys().len(), right.keys().len());
+
+    let left_values = left
+        .values()
+        .as_any()
+        .downcast_ref::<PrimitiveArray<T>>()
+        .unwrap();
+    let right_values = right
+        .values()
+        .as_any()
+        .downcast_ref::<PrimitiveArray<T>>()
+        .unwrap();
+
+    let mut result = vec![];
+
+    for idx in 0..left.keys().len() {
+        unsafe {

Review comment:
       I think you could avoid some amount of this unsafe-ness and Vec 
manipulation with something like this (untested)
   
   ```rust
   let result: BooleanArray = left.keys().iter()
     .zip(right.keys.iter())
     .map(|(left_key, right_key)| {
       let left_key = left_key.to_usize().expect("..");
       let right_key = right_key.to_usize().expect("..");
       ...
       left_value == right_value
   })
   .collect()
   ```
   
   
   Though given this pattern, perhaps we could (in a separate PR) implement an 
iterator for DictionaryArray, so this code could devolve into 
   
   ```rust
   let result: BooleanArray = left.iter()
     .zip(right.iter())
     .map(|(left_value, right_value)| {
       left_value == right_value
   })
   .collect()
   ```
   
   🤔  then I wonder how many of the existing macros would "just work" 🤔 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to