alamb commented on code in PR #9621:
URL: https://github.com/apache/arrow-rs/pull/9621#discussion_r3093328715
##########
arrow-ord/src/cmp.rs:
##########
@@ -257,18 +263,29 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum)
-> Result<BooleanArray,
)));
}
+ let l_side = SideInfo {
Review Comment:
if we are going to introduce a struct like this, perhaps as a follow on PR
we could refactor all the argument to `apply` into `SideInfo` and then make
`apply` a function of SideInfo
##########
arrow-ord/src/cmp.rs:
##########
@@ -387,16 +430,87 @@ fn apply<T: ArrayOrd>(
Op::GreaterEqual => apply_op(l, l_s, r, r_s, true, T::is_lt),
};
- // If a side had a dictionary, and was not scalar, we need to
materialize this
- Some(match (l_v, r_v) {
- (Some(l_v), _) if l_s.is_none() => take_bits(l_v, buffer),
- (_, Some(r_v)) if r_s.is_none() => take_bits(r_v, buffer),
- _ => buffer,
+ // Expand the physical-length result back to logical length.
+ // Find the non-scalar side that needs expansion (at most one).
+ let side = if l_s.is_none() { l_info } else { r_info };
Review Comment:
I find the new `l_info.is_scalar` much easier to read then `l_s` 👍
##########
arrow-ord/src/cmp.rs:
##########
@@ -340,29 +357,51 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum)
-> Result<BooleanArray,
})
}
+/// Per-side metadata for a comparison operand.
+struct SideInfo<'a> {
+ is_scalar: bool,
+ dict: Option<&'a dyn AnyDictionaryArray>,
+ ree: Option<&'a ReeInfo<'a>>,
+}
+
+impl SideInfo<'_> {
+ fn has_indirection(&self) -> bool {
+ self.dict.is_some() || self.ree.is_some()
+ }
+}
+
/// Perform a potentially vectored `op` on the provided `ArrayOrd`
fn apply<T: ArrayOrd>(
op: Op,
l: T,
- l_s: bool,
- l_v: Option<&dyn AnyDictionaryArray>,
+ l_info: &SideInfo,
r: T,
- r_s: bool,
- r_v: Option<&dyn AnyDictionaryArray>,
+ r_info: &SideInfo,
) -> Option<BooleanBuffer> {
if l.len() == 0 || r.len() == 0 {
return None; // Handle empty dictionaries
}
- if !l_s && !r_s && (l_v.is_some() || r_v.is_some()) {
- // Not scalar and at least one side has a dictionary, need to perform
vectored comparison
- let l_v = l_v
- .map(|x| x.normalized_keys())
- .unwrap_or_else(|| (0..l.len()).collect());
+ if !l_info.is_scalar
+ && !r_info.is_scalar
+ && (l_info.has_indirection() || r_info.has_indirection())
+ {
+ // Both non-scalar with indirection. Pure REE-vs-REE uses segment-based
+ // bulk comparison; other combinations fall back to index vectors.
+ if let (Some(li), None, Some(ri), None) = (l_info.ree, l_info.dict,
r_info.ree, r_info.dict)
Review Comment:
Can we please refactor this into its own function as it is REE specific and
I think makes it harder t read cmp
##########
arrow-ord/src/cmp.rs:
##########
@@ -493,6 +607,49 @@ fn apply_op_vectored<T: ArrayOrd>(
})
}
+/// Compare two REE arrays by walking both run_ends simultaneously, comparing
+/// once per aligned segment and bulk-filling the result.
+fn apply_op_segments<T: ArrayOrd>(
Review Comment:
Can we please name this something with `ree` in the name so it is clearer it
is ree specific?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]