jayzhan211 commented on code in PR #5217:
URL: https://github.com/apache/arrow-rs/pull/5217#discussion_r1430204495
##########
arrow-ord/src/cmp.rs:
##########
@@ -268,23 +240,122 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum)
-> Result<BooleanArray,
},
false => match op {
Op::Distinct => {
- let values = values();
+ let values = compare_op_values(op, l_array, l_s,
r_array, r_s, len)?;
let l = nulls.inner().bit_chunks().iter_padded();
let ne = values.bit_chunks().iter_padded();
let c = |(l, n)| u64::not(l) | n;
let buffer = l.zip(ne).map(c).collect();
BooleanBuffer::new(buffer, 0, len).into()
}
- Op::NotDistinct => (nulls.inner() & &values()).into(),
- _ => BooleanArray::new(values(), Some(nulls)),
+ Op::NotDistinct => (nulls.inner()
+ & &compare_op_values(op, l_array, l_s, r_array, r_s,
len)?)
+ .into(),
+ _ => BooleanArray::new(
+ compare_op_values(op, l_array, l_s, r_array, r_s,
len)?,
+ Some(nulls),
+ ),
},
}
}
// Neither side is nullable
- (None, _, None, _) => BooleanArray::new(values(), None),
+ (None, _, None, _) => BooleanArray::new(
+ compare_op_values(op, l_array, l_s, r_array, r_s, len)?,
+ None,
+ ),
})
}
+/// Defer computation as may not be necessary
+/// get the BooleanBuffer result of the comparison
+fn compare_op_values(
+ op: Op,
+ l: &dyn Array,
+ l_s: bool,
+ r: &dyn Array,
+ r_s: bool,
+ len: usize,
+) -> Result<BooleanBuffer, ArrowError> {
+ use arrow_schema::DataType::*;
+ let l_v = l.as_any_dictionary_opt();
+ let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
+ let l_t = l.data_type();
+
+ let r_v = r.as_any_dictionary_opt();
+ let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
+ let r_t = r.data_type();
+
+ if l_t.is_nested() {
+ if !l_t.equals_datatype(r_t) {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid comparison operation: {l_t} {op} {r_t}"
+ )));
+ }
+ match (l_t, op) {
+ (Struct(_), Op::Equal | Op::NotEqual) => {}
+ _ => {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid comparison operation: {l_t} {op} {r_t}"
+ )));
+ }
+ }
+ } else if r_t != l_t {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid comparison operation: {l_t} {op} {r_t}"
+ )));
+ }
+ let d = downcast_primitive_array! {
+ (l, r) => apply(op, l.values().as_ref(), l_s, l_v,
r.values().as_ref(), r_s, r_v),
+ (Boolean, Boolean) => apply(op, l.as_boolean(), l_s, l_v,
r.as_boolean(), r_s, r_v),
+ (Utf8, Utf8) => apply(op, l.as_string::<i32>(), l_s, l_v,
r.as_string::<i32>(), r_s, r_v),
+ (LargeUtf8, LargeUtf8) => apply(op, l.as_string::<i64>(), l_s, l_v,
r.as_string::<i64>(), r_s, r_v),
+ (Binary, Binary) => apply(op, l.as_binary::<i32>(), l_s, l_v,
r.as_binary::<i32>(), r_s, r_v),
+ (LargeBinary, LargeBinary) => apply(op, l.as_binary::<i64>(), l_s,
l_v, r.as_binary::<i64>(), r_s, r_v),
+ (FixedSizeBinary(_), FixedSizeBinary(_)) => apply(op,
l.as_fixed_size_binary(), l_s, l_v, r.as_fixed_size_binary(), r_s, r_v),
+ (Null, Null) => None,
+ (Struct(_), Struct(_)) => Some(compare_op_struct_values(op, l, l_s, r,
r_s, len)?),
+ _ => unreachable!(),
+ };
+ Ok(d.unwrap_or_else(|| BooleanBuffer::new_unset(len)))
+}
+
+/// recursively compare fields of struct arrays
+fn compare_op_struct_values(
+ op: Op,
+ l: &dyn Array,
+ l_s: bool,
+ r: &dyn Array,
+ r_s: bool,
+ len: usize,
+) -> Result<BooleanBuffer, ArrowError> {
+ // when one of field is equal, the result is false for not equal
+ // so we use neg to reverse the result of equal when handle not equal
+ let neg = match op {
+ Op::Equal => false,
+ Op::NotEqual => true,
+ _ => unreachable!(),
+ };
+
+ let l = l.as_any().downcast_ref::<StructArray>().unwrap();
Review Comment:
nit: probably `l.as_struct` can be used here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]