manishkr commented on code in PR #9213:
URL: https://github.com/apache/arrow-rs/pull/9213#discussion_r2725344151
##########
arrow-data/src/equal/run.rs:
##########
@@ -16,71 +16,148 @@
// under the License.
use crate::data::ArrayData;
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::DataType;
+use num_traits::ToPrimitive;
use super::equal_range;
-/// The current implementation of comparison of run array support physical
comparison.
-/// Comparing run encoded array based on logical indices (`lhs_start`,
`rhs_start`) will
-/// be time consuming as converting from logical index to physical index
cannot be done
-/// in constant time. The current comparison compares the underlying physical
arrays.
+/// Returns true if the two `RunEndEncoded` arrays are equal.
+///
+/// This provides a specialized implementation of equality for REE arrays that
+/// handles differences in run-encoding by iterating through the logical range.
pub(super) fn run_equal(
lhs: &ArrayData,
rhs: &ArrayData,
lhs_start: usize,
rhs_start: usize,
len: usize,
) -> bool {
- if lhs_start != 0
- || rhs_start != 0
- || (lhs.len() != len && rhs.len() != len)
- || lhs.offset() > 0
- || rhs.offset() > 0
- {
- unimplemented!("Logical comparison for run array not supported.")
+ let lhs_index_type = match lhs.data_type() {
+ DataType::RunEndEncoded(f, _) => f.data_type(),
+ _ => unreachable!(),
+ };
+
+ match lhs_index_type {
+ DataType::Int16 => run_equal_inner::<i16>(lhs, rhs, lhs_start,
rhs_start, len),
+ DataType::Int32 => run_equal_inner::<i32>(lhs, rhs, lhs_start,
rhs_start, len),
+ DataType::Int64 => run_equal_inner::<i64>(lhs, rhs, lhs_start,
rhs_start, len),
+ _ => unreachable!(),
}
+}
+
+struct RunArrayData<'a, T: ArrowNativeType> {
+ run_ends: &'a [T],
+ values: &'a ArrayData,
+ abs_start: usize,
+}
- if lhs.len() != rhs.len() {
- return false;
+impl<'a, T: ArrowNativeType + ToPrimitive> RunArrayData<'a, T> {
+ fn new(data: &'a ArrayData, start: usize) -> Self {
+ debug_assert!(
+ data.child_data().len() >= 2,
+ "RunEndEncoded arrays are guaranteed to have at least 2 children
[run_ends, values]"
Review Comment:
Changed. Thanks.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]