friendlymatthew commented on code in PR #8838:
URL: https://github.com/apache/arrow-rs/pull/8838#discussion_r2524820204


##########
arrow-ord/src/ord.rs:
##########
@@ -296,6 +296,72 @@ fn compare_struct(
     Ok(f)
 }
 
+fn compare_union(
+    left: &dyn Array,
+    right: &dyn Array,
+    opts: SortOptions,
+) -> Result<DynComparator, ArrowError> {
+    let left = left.as_union();
+    let right = right.as_union();
+
+    let (left_fields, left_mode) = match left.data_type() {
+        DataType::Union(fields, mode) => (fields, mode),
+        _ => unreachable!(),
+    };
+    let (right_fields, right_mode) = match right.data_type() {
+        DataType::Union(fields, mode) => (fields, mode),
+        _ => unreachable!(),
+    };
+
+    if left_fields != right_fields || left_mode != right_mode {
+        return Err(ArrowError::InvalidArgumentError(
+            "Cannot compare UnionArrays with different fields or 
modes".to_string(),
+        ));
+    }
+
+    let c_opts = child_opts(opts);
+
+    let max_type_id = left_fields.iter().map(|(id, _)| id).max().unwrap_or(0);
+    let mut field_comparators: Vec<Option<DynComparator>> =
+        Vec::with_capacity((max_type_id + 1) as usize);
+    field_comparators.resize_with((max_type_id + 1) as usize, || None);

Review Comment:
   the comparison closure `f` uses a direct-indexed Vec where the type id 
serves as the array index, giving us O(1) lookups
   
   though this could potentially create sparse vectors when users assign 
non-contiguous type ids (worst case: 256 slots for 1 field), I chose this over 
a hash map or linear scan



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to