alamb commented on a change in pull request #1263:
URL: https://github.com/apache/arrow-rs/pull/1263#discussion_r803756171



##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,6 +2030,271 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {

Review comment:
       ```suggestion
   /// Applies $OP to $LEFT and $RIGHT which are two dictionaries which have 
(the same) key type $KT
   macro_rules! typed_dict_cmp {
   ```

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,6 +2030,271 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {
+    ($LEFT: expr, $RIGHT: expr, $OP: expr, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Boolean, DataType::Boolean) => {
+                cmp_dict_bool::<$KT, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int8, DataType::Int8) => {
+                cmp_dict::<$KT, Int8Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int16, DataType::Int16) => {
+                cmp_dict::<$KT, Int16Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int32, DataType::Int32) => {
+                cmp_dict::<$KT, Int32Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int64, DataType::Int64) => {
+                cmp_dict::<$KT, Int64Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt8, DataType::UInt8) => {
+                cmp_dict::<$KT, UInt8Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt16, DataType::UInt16) => {
+                cmp_dict::<$KT, UInt16Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt32, DataType::UInt32) => {
+                cmp_dict::<$KT, UInt32Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt64, DataType::UInt64) => {
+                cmp_dict::<$KT, UInt64Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Utf8, DataType::Utf8) => {
+                cmp_dict_utf8::<$KT, i32, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::LargeUtf8, DataType::LargeUtf8) => {
+                cmp_dict_utf8::<$KT, i64, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Binary, DataType::Binary) => {
+               cmp_dict_binary::<$KT, i32, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::LargeBinary, DataType::LargeBinary) => {
+                cmp_dict_binary::<$KT, i64, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Nanosecond, _),
+                DataType::Timestamp(TimeUnit::Nanosecond, _),
+            ) => {
+                cmp_dict::<$KT, TimestampNanosecondType, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Microsecond, _),
+                DataType::Timestamp(TimeUnit::Microsecond, _),
+            ) => {
+                cmp_dict::<$KT, TimestampMicrosecondType, _>($LEFT, $RIGHT, 
$OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Millisecond, _),
+                DataType::Timestamp(TimeUnit::Millisecond, _),
+            ) => {
+                cmp_dict::<$KT, TimestampMillisecondType, _>($LEFT, $RIGHT, 
$OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Second, _),
+                DataType::Timestamp(TimeUnit::Second, _),
+            ) => {
+                cmp_dict::<$KT, TimestampSecondType, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Date32, DataType::Date32) => {
+                cmp_dict::<$KT, Date32Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Date64, DataType::Date64) => {
+                cmp_dict::<$KT, Date64Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Interval(IntervalUnit::YearMonth),
+                DataType::Interval(IntervalUnit::YearMonth),
+            ) => {
+                cmp_dict::<$KT, IntervalYearMonthType, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Interval(IntervalUnit::DayTime),
+                DataType::Interval(IntervalUnit::DayTime),
+            ) => {
+                cmp_dict::<$KT, IntervalDayTimeType, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Interval(IntervalUnit::MonthDayNano),
+                DataType::Interval(IntervalUnit::MonthDayNano),
+            ) => {
+                cmp_dict::<$KT, IntervalMonthDayNanoType, _>($LEFT, $RIGHT, 
$OP)
+            }
+            (t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
+                "Comparing dictionary arrays of value type {} is not yet 
implemented",
+                t1
+            ))),
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare two dictionary arrays of different value types 
({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_compares {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are 
`DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP: expr) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), 
DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int8Type)
+                    }
+                    (DataType::Int16, DataType::Int16) => {
+                        let left = as_dictionary_array::<Int16Type>($LEFT);
+                        let right = as_dictionary_array::<Int16Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int16Type)
+                    }
+                    (DataType::Int32, DataType::Int32) => {
+                        let left = as_dictionary_array::<Int32Type>($LEFT);
+                        let right = as_dictionary_array::<Int32Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int32Type)
+                    }
+                    (DataType::Int64, DataType::Int64) => {
+                        let left = as_dictionary_array::<Int64Type>($LEFT);
+                        let right = as_dictionary_array::<Int64Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int64Type)
+                    }
+                    (DataType::UInt8, DataType::UInt8) => {
+                        let left = as_dictionary_array::<UInt8Type>($LEFT);
+                        let right = as_dictionary_array::<UInt8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt8Type)
+                    }
+                    (DataType::UInt16, DataType::UInt16) => {
+                        let left = as_dictionary_array::<UInt16Type>($LEFT);
+                        let right = as_dictionary_array::<UInt16Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt16Type)
+                    }
+                    (DataType::UInt32, DataType::UInt32) => {
+                        let left = as_dictionary_array::<UInt32Type>($LEFT);
+                        let right = as_dictionary_array::<UInt32Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt32Type)
+                    }
+                    (DataType::UInt64, DataType::UInt64) => {
+                        let left = as_dictionary_array::<UInt64Type>($LEFT);
+                        let right = as_dictionary_array::<UInt64Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt64Type)
+                    }
+                    (t1, t2) if t1 == t2 => 
Err(ArrowError::NotYetImplemented(format!(
+                        "Comparing dictionary arrays of type {} is not yet 
implemented",
+                        t1
+                    ))),
+                    (t1, t2) => Err(ArrowError::CastError(format!(
+                        "Cannot compare two dictionary arrays of different key 
types ({} and {})",
+                        t1, t2
+                    ))),
+                }
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare dictionary array with non-dictionary array ({} 
and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+/// Helper function to perform boolean lambda function on values from two 
dictionary arrays, this
+/// version does not attempt to use SIMD.

Review comment:
       ```suggestion
   /// version does not attempt to use SIMD explicitly (though the compiler may 
auto vectorize)
   ```

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,6 +2030,271 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {
+    ($LEFT: expr, $RIGHT: expr, $OP: expr, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Boolean, DataType::Boolean) => {
+                cmp_dict_bool::<$KT, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int8, DataType::Int8) => {
+                cmp_dict::<$KT, Int8Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int16, DataType::Int16) => {
+                cmp_dict::<$KT, Int16Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int32, DataType::Int32) => {
+                cmp_dict::<$KT, Int32Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int64, DataType::Int64) => {
+                cmp_dict::<$KT, Int64Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt8, DataType::UInt8) => {
+                cmp_dict::<$KT, UInt8Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt16, DataType::UInt16) => {
+                cmp_dict::<$KT, UInt16Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt32, DataType::UInt32) => {
+                cmp_dict::<$KT, UInt32Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt64, DataType::UInt64) => {
+                cmp_dict::<$KT, UInt64Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Utf8, DataType::Utf8) => {
+                cmp_dict_utf8::<$KT, i32, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::LargeUtf8, DataType::LargeUtf8) => {
+                cmp_dict_utf8::<$KT, i64, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Binary, DataType::Binary) => {
+               cmp_dict_binary::<$KT, i32, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::LargeBinary, DataType::LargeBinary) => {
+                cmp_dict_binary::<$KT, i64, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Nanosecond, _),
+                DataType::Timestamp(TimeUnit::Nanosecond, _),
+            ) => {
+                cmp_dict::<$KT, TimestampNanosecondType, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Microsecond, _),
+                DataType::Timestamp(TimeUnit::Microsecond, _),
+            ) => {
+                cmp_dict::<$KT, TimestampMicrosecondType, _>($LEFT, $RIGHT, 
$OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Millisecond, _),
+                DataType::Timestamp(TimeUnit::Millisecond, _),
+            ) => {
+                cmp_dict::<$KT, TimestampMillisecondType, _>($LEFT, $RIGHT, 
$OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Second, _),
+                DataType::Timestamp(TimeUnit::Second, _),
+            ) => {
+                cmp_dict::<$KT, TimestampSecondType, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Date32, DataType::Date32) => {
+                cmp_dict::<$KT, Date32Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Date64, DataType::Date64) => {
+                cmp_dict::<$KT, Date64Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Interval(IntervalUnit::YearMonth),
+                DataType::Interval(IntervalUnit::YearMonth),
+            ) => {
+                cmp_dict::<$KT, IntervalYearMonthType, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Interval(IntervalUnit::DayTime),
+                DataType::Interval(IntervalUnit::DayTime),
+            ) => {
+                cmp_dict::<$KT, IntervalDayTimeType, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Interval(IntervalUnit::MonthDayNano),
+                DataType::Interval(IntervalUnit::MonthDayNano),
+            ) => {
+                cmp_dict::<$KT, IntervalMonthDayNanoType, _>($LEFT, $RIGHT, 
$OP)
+            }
+            (t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
+                "Comparing dictionary arrays of value type {} is not yet 
implemented",
+                t1
+            ))),
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare two dictionary arrays of different value types 
({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_compares {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are 
`DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP: expr) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), 
DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int8Type)
+                    }
+                    (DataType::Int16, DataType::Int16) => {
+                        let left = as_dictionary_array::<Int16Type>($LEFT);
+                        let right = as_dictionary_array::<Int16Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int16Type)
+                    }
+                    (DataType::Int32, DataType::Int32) => {
+                        let left = as_dictionary_array::<Int32Type>($LEFT);
+                        let right = as_dictionary_array::<Int32Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int32Type)
+                    }
+                    (DataType::Int64, DataType::Int64) => {
+                        let left = as_dictionary_array::<Int64Type>($LEFT);
+                        let right = as_dictionary_array::<Int64Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int64Type)
+                    }
+                    (DataType::UInt8, DataType::UInt8) => {
+                        let left = as_dictionary_array::<UInt8Type>($LEFT);
+                        let right = as_dictionary_array::<UInt8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt8Type)
+                    }
+                    (DataType::UInt16, DataType::UInt16) => {
+                        let left = as_dictionary_array::<UInt16Type>($LEFT);
+                        let right = as_dictionary_array::<UInt16Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt16Type)
+                    }
+                    (DataType::UInt32, DataType::UInt32) => {
+                        let left = as_dictionary_array::<UInt32Type>($LEFT);
+                        let right = as_dictionary_array::<UInt32Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt32Type)
+                    }
+                    (DataType::UInt64, DataType::UInt64) => {
+                        let left = as_dictionary_array::<UInt64Type>($LEFT);
+                        let right = as_dictionary_array::<UInt64Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt64Type)
+                    }
+                    (t1, t2) if t1 == t2 => 
Err(ArrowError::NotYetImplemented(format!(
+                        "Comparing dictionary arrays of type {} is not yet 
implemented",
+                        t1
+                    ))),
+                    (t1, t2) => Err(ArrowError::CastError(format!(
+                        "Cannot compare two dictionary arrays of different key 
types ({} and {})",
+                        t1, t2
+                    ))),
+                }
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare dictionary array with non-dictionary array ({} 
and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+/// Helper function to perform boolean lambda function on values from two 
dictionary arrays, this
+/// version does not attempt to use SIMD.
+macro_rules! compare_dict_op {
+    ($left: expr, $right:expr, $op:expr, $value_ty:ty) => {{
+        if $left.len() != $right.len() {
+            return Err(ArrowError::ComputeError(
+                "Cannot perform comparison operation on arrays of different 
length"
+                    .to_string(),
+            ));
+        }
+        let left_values = 
$left.values().as_any().downcast_ref::<$value_ty>().unwrap();
+        let right_values = $right
+            .values()
+            .as_any()
+            .downcast_ref::<$value_ty>()
+            .unwrap();
+
+        let result = $left
+            .keys()
+            .iter()
+            .zip($right.keys().iter())
+            .map(|(left_key, right_key)| {
+                if let (Some(left_k), Some(right_k)) = (left_key, right_key) {
+                    let left_key = left_k.to_usize().expect("Dictionary index 
not usize");
+                    let right_key =
+                        right_k.to_usize().expect("Dictionary index not 
usize");
+                    unsafe {
+                        let left_value = left_values.value_unchecked(left_key);
+                        let right_value = 
right_values.value_unchecked(right_key);
+                        Some($op(left_value, right_value))
+                    }
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        Ok(result)
+    }};
+}
+
+/// Perform given operation on two `DictionaryArray`s.
+/// Only when two arrays are of the same type the comparison will happen 
otherwise it will err

Review comment:
       ```suggestion
   /// Returns an error if the two arrays have different value type 
   ```

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -4374,4 +4645,192 @@ mod tests {
             BooleanArray::from(vec![Some(true), Some(false), Some(true)])
         );
     }
+
+    fn get_dict_arraydata(
+        keys: Buffer,
+        key_type: DataType,
+        value_data: ArrayData,
+    ) -> ArrayData {
+        let value_type = value_data.data_type().clone();
+        let dict_data_type =
+            DataType::Dictionary(Box::new(key_type), Box::new(value_type));
+        ArrayData::builder(dict_data_type)
+            .len(3)
+            .add_buffer(keys)
+            .add_child_data(value_data)
+            .build()
+            .unwrap()
+    }
+
+    #[test]
+    fn test_eq_dyn_dictionary_i8_array() {
+        let key_type = DataType::Int8;

Review comment:
       Thank you for these tests. It clearly is quite messy to create 
DictionaryArrays :(
   
   I have some ideas of how to make it less of a mess and will file a follow on 
ticket / PRs. 

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2045,7 +2310,12 @@ macro_rules! typed_compares {
 /// assert_eq!(BooleanArray::from(vec![Some(true), None, Some(false)]), 
result);
 /// ```
 pub fn eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
-    typed_compares!(left, right, eq_bool, eq, eq_utf8, eq_binary)
+    match left.data_type() {

Review comment:
       this is wonderful

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,12 +2030,173 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: expr, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Int8, DataType::Int8) => {
+                cmp_dict::<$KT, Int8Type, _>($LEFT, $RIGHT, $OP_PRIM)
+            }
+            (DataType::Int16, DataType::Int16) => {
+                cmp_dict::<$KT, Int16Type, _>($LEFT, $RIGHT, $OP_PRIM)
+            }
+            (DataType::Int32, DataType::Int32) => {
+                cmp_dict::<$KT, Int32Type, _>($LEFT, $RIGHT, $OP_PRIM)
+            }
+            (DataType::Int64, DataType::Int64) => {
+                cmp_dict::<$KT, Int64Type, _>($LEFT, $RIGHT, $OP_PRIM)
+            }
+            (DataType::UInt8, DataType::UInt8) => {
+                cmp_dict::<$KT, UInt8Type, _>($LEFT, $RIGHT, $OP_PRIM)
+            }
+            (DataType::UInt16, DataType::UInt16) => {
+                cmp_dict::<$KT, UInt16Type, _>($LEFT, $RIGHT, $OP_PRIM)
+            }
+            (DataType::UInt32, DataType::UInt32) => {
+                cmp_dict::<$KT, UInt32Type, _>($LEFT, $RIGHT, $OP_PRIM)
+            }
+            (DataType::UInt64, DataType::UInt64) => {
+                cmp_dict::<$KT, UInt64Type, _>($LEFT, $RIGHT, $OP_PRIM)
+            }
+            (t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
+                "Comparing dictionary arrays of value type {} is not yet 
implemented",
+                t1
+            ))),
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare two dictionary arrays of different value types 
({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_compares {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are 
`DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP_PRIM: expr) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), 
DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, Int8Type)
+                    }
+                    (DataType::Int16, DataType::Int16) => {
+                        let left = as_dictionary_array::<Int16Type>($LEFT);
+                        let right = as_dictionary_array::<Int16Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, Int16Type)
+                    }
+                    (DataType::Int32, DataType::Int32) => {
+                        let left = as_dictionary_array::<Int32Type>($LEFT);
+                        let right = as_dictionary_array::<Int32Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, Int32Type)
+                    }
+                    (DataType::Int64, DataType::Int64) => {
+                        let left = as_dictionary_array::<Int64Type>($LEFT);
+                        let right = as_dictionary_array::<Int64Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, Int64Type)
+                    }
+                    (DataType::UInt8, DataType::UInt8) => {
+                        let left = as_dictionary_array::<UInt8Type>($LEFT);
+                        let right = as_dictionary_array::<UInt8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, UInt8Type)
+                    }
+                    (DataType::UInt16, DataType::UInt16) => {
+                        let left = as_dictionary_array::<UInt16Type>($LEFT);
+                        let right = as_dictionary_array::<UInt16Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, UInt16Type)
+                    }
+                    (DataType::UInt32, DataType::UInt32) => {
+                        let left = as_dictionary_array::<UInt32Type>($LEFT);
+                        let right = as_dictionary_array::<UInt32Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, UInt32Type)
+                    }
+                    (DataType::UInt64, DataType::UInt64) => {
+                        let left = as_dictionary_array::<UInt64Type>($LEFT);
+                        let right = as_dictionary_array::<UInt64Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP_PRIM, UInt64Type)
+                    }

Review comment:
       Yes, I believe that is correct -- the only supported key types are 
`Int8Type...UInt64Type` (at least in arrow-rs, I am not sure about the arrow 
spec in general).

##########
File path: arrow/src/compute/kernels/comparison.rs
##########
@@ -2030,6 +2030,271 @@ macro_rules! typed_compares {
     }};
 }
 
+macro_rules! typed_dict_cmp {
+    ($LEFT: expr, $RIGHT: expr, $OP: expr, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Boolean, DataType::Boolean) => {
+                cmp_dict_bool::<$KT, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int8, DataType::Int8) => {
+                cmp_dict::<$KT, Int8Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int16, DataType::Int16) => {
+                cmp_dict::<$KT, Int16Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int32, DataType::Int32) => {
+                cmp_dict::<$KT, Int32Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Int64, DataType::Int64) => {
+                cmp_dict::<$KT, Int64Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt8, DataType::UInt8) => {
+                cmp_dict::<$KT, UInt8Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt16, DataType::UInt16) => {
+                cmp_dict::<$KT, UInt16Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt32, DataType::UInt32) => {
+                cmp_dict::<$KT, UInt32Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::UInt64, DataType::UInt64) => {
+                cmp_dict::<$KT, UInt64Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Utf8, DataType::Utf8) => {
+                cmp_dict_utf8::<$KT, i32, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::LargeUtf8, DataType::LargeUtf8) => {
+                cmp_dict_utf8::<$KT, i64, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Binary, DataType::Binary) => {
+               cmp_dict_binary::<$KT, i32, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::LargeBinary, DataType::LargeBinary) => {
+                cmp_dict_binary::<$KT, i64, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Nanosecond, _),
+                DataType::Timestamp(TimeUnit::Nanosecond, _),
+            ) => {
+                cmp_dict::<$KT, TimestampNanosecondType, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Microsecond, _),
+                DataType::Timestamp(TimeUnit::Microsecond, _),
+            ) => {
+                cmp_dict::<$KT, TimestampMicrosecondType, _>($LEFT, $RIGHT, 
$OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Millisecond, _),
+                DataType::Timestamp(TimeUnit::Millisecond, _),
+            ) => {
+                cmp_dict::<$KT, TimestampMillisecondType, _>($LEFT, $RIGHT, 
$OP)
+            }
+            (
+                DataType::Timestamp(TimeUnit::Second, _),
+                DataType::Timestamp(TimeUnit::Second, _),
+            ) => {
+                cmp_dict::<$KT, TimestampSecondType, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Date32, DataType::Date32) => {
+                cmp_dict::<$KT, Date32Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (DataType::Date64, DataType::Date64) => {
+                cmp_dict::<$KT, Date64Type, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Interval(IntervalUnit::YearMonth),
+                DataType::Interval(IntervalUnit::YearMonth),
+            ) => {
+                cmp_dict::<$KT, IntervalYearMonthType, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Interval(IntervalUnit::DayTime),
+                DataType::Interval(IntervalUnit::DayTime),
+            ) => {
+                cmp_dict::<$KT, IntervalDayTimeType, _>($LEFT, $RIGHT, $OP)
+            }
+            (
+                DataType::Interval(IntervalUnit::MonthDayNano),
+                DataType::Interval(IntervalUnit::MonthDayNano),
+            ) => {
+                cmp_dict::<$KT, IntervalMonthDayNanoType, _>($LEFT, $RIGHT, 
$OP)
+            }
+            (t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
+                "Comparing dictionary arrays of value type {} is not yet 
implemented",
+                t1
+            ))),
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare two dictionary arrays of different value types 
({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_compares {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are 
`DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP: expr) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), 
DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int8Type)
+                    }
+                    (DataType::Int16, DataType::Int16) => {
+                        let left = as_dictionary_array::<Int16Type>($LEFT);
+                        let right = as_dictionary_array::<Int16Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int16Type)
+                    }
+                    (DataType::Int32, DataType::Int32) => {
+                        let left = as_dictionary_array::<Int32Type>($LEFT);
+                        let right = as_dictionary_array::<Int32Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int32Type)
+                    }
+                    (DataType::Int64, DataType::Int64) => {
+                        let left = as_dictionary_array::<Int64Type>($LEFT);
+                        let right = as_dictionary_array::<Int64Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, Int64Type)
+                    }
+                    (DataType::UInt8, DataType::UInt8) => {
+                        let left = as_dictionary_array::<UInt8Type>($LEFT);
+                        let right = as_dictionary_array::<UInt8Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt8Type)
+                    }
+                    (DataType::UInt16, DataType::UInt16) => {
+                        let left = as_dictionary_array::<UInt16Type>($LEFT);
+                        let right = as_dictionary_array::<UInt16Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt16Type)
+                    }
+                    (DataType::UInt32, DataType::UInt32) => {
+                        let left = as_dictionary_array::<UInt32Type>($LEFT);
+                        let right = as_dictionary_array::<UInt32Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt32Type)
+                    }
+                    (DataType::UInt64, DataType::UInt64) => {
+                        let left = as_dictionary_array::<UInt64Type>($LEFT);
+                        let right = as_dictionary_array::<UInt64Type>($RIGHT);
+                        typed_dict_cmp!(left, right, $OP, UInt64Type)
+                    }
+                    (t1, t2) if t1 == t2 => 
Err(ArrowError::NotYetImplemented(format!(
+                        "Comparing dictionary arrays of type {} is not yet 
implemented",
+                        t1
+                    ))),
+                    (t1, t2) => Err(ArrowError::CastError(format!(
+                        "Cannot compare two dictionary arrays of different key 
types ({} and {})",
+                        t1, t2
+                    ))),
+                }
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot compare dictionary array with non-dictionary array ({} 
and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+/// Helper function to perform boolean lambda function on values from two 
dictionary arrays, this
+/// version does not attempt to use SIMD.
+macro_rules! compare_dict_op {
+    ($left: expr, $right:expr, $op:expr, $value_ty:ty) => {{
+        if $left.len() != $right.len() {
+            return Err(ArrowError::ComputeError(
+                "Cannot perform comparison operation on arrays of different 
length"
+                    .to_string(),
+            ));
+        }
+        let left_values = 
$left.values().as_any().downcast_ref::<$value_ty>().unwrap();
+        let right_values = $right
+            .values()
+            .as_any()
+            .downcast_ref::<$value_ty>()
+            .unwrap();
+
+        let result = $left
+            .keys()
+            .iter()
+            .zip($right.keys().iter())
+            .map(|(left_key, right_key)| {
+                if let (Some(left_k), Some(right_k)) = (left_key, right_key) {
+                    let left_key = left_k.to_usize().expect("Dictionary index 
not usize");
+                    let right_key =
+                        right_k.to_usize().expect("Dictionary index not 
usize");
+                    unsafe {

Review comment:
       👍 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to