Jefffrey commented on code in PR #9213:
URL: https://github.com/apache/arrow-rs/pull/9213#discussion_r2711136422


##########
arrow-buffer/src/buffer/run.rs:
##########
@@ -199,9 +199,14 @@ where
     pub fn sliced_values(&self) -> impl Iterator<Item = E> + '_ {
         let offset = self.logical_offset;
         let len = self.logical_length;
-        let start = self.get_start_physical_index();
-        let end = self.get_end_physical_index();
-        self.run_ends[start..=end].iter().map(move |&val| {
+        let physical_slice = if len == 0 {

Review Comment:
   Thanks for this, I'll close my PR in favour of this (#9200)
   
   Could you add the tests I had in my PR though
   
   ```rust
   // arrow-array/src/array/run_array.rs
       #[test]
       fn test_run_array_empty() {
           let runs = new_empty_array(&DataType::Int16);
           let runs = runs.as_primitive::<Int16Type>();
           let values = new_empty_array(&DataType::Int64);
           let array = RunArray::try_new(runs, &values).unwrap();
   
           fn assertions(array: &RunArray<Int16Type>) {
               assert!(array.is_empty());
               assert_eq!(array.get_start_physical_index(), 0);
               assert_eq!(array.get_end_physical_index(), 0);
               
assert!(array.get_physical_indices::<i16>(&[]).unwrap().is_empty());
               assert!(array.run_ends().is_empty());
               assert_eq!(array.run_ends().sliced_values().count(), 0);
           }
   
           assertions(&array);
           assertions(&array.slice(0, 0));
       }
   ```



##########
arrow-data/src/equal/run.rs:
##########
@@ -16,71 +16,122 @@
 // under the License.
 
 use crate::data::ArrayData;
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::DataType;
+use num_traits::ToPrimitive;
 
 use super::equal_range;
 
-/// The current implementation of comparison of run array support physical 
comparison.
-/// Comparing run encoded array based on logical indices (`lhs_start`, 
`rhs_start`) will
-/// be time consuming as converting from logical index to physical index 
cannot be done
-/// in constant time. The current comparison compares the underlying physical 
arrays.
+/// Returns true if the two `RunEndEncoded` arrays are equal.
+///
+/// This provides a specialized implementation of equality for REE arrays that
+/// handles differences in run-encoding by iterating through the logical range.
 pub(super) fn run_equal(
     lhs: &ArrayData,
     rhs: &ArrayData,
     lhs_start: usize,
     rhs_start: usize,
     len: usize,
 ) -> bool {
-    if lhs_start != 0
-        || rhs_start != 0
-        || (lhs.len() != len && rhs.len() != len)
-        || lhs.offset() > 0
-        || rhs.offset() > 0
-    {
-        unimplemented!("Logical comparison for run array not supported.")
+    let lhs_index_type = match lhs.data_type() {
+        DataType::RunEndEncoded(f, _) => f.data_type(),
+        _ => unreachable!(),
+    };
+
+    match lhs_index_type {
+        DataType::Int16 => run_equal_inner::<i16>(lhs, rhs, lhs_start, 
rhs_start, len),
+        DataType::Int32 => run_equal_inner::<i32>(lhs, rhs, lhs_start, 
rhs_start, len),
+        DataType::Int64 => run_equal_inner::<i64>(lhs, rhs, lhs_start, 
rhs_start, len),
+        _ => unreachable!(),
     }
+}
 
-    if lhs.len() != rhs.len() {
-        return false;
+fn run_equal_inner<T: ArrowNativeType + ToPrimitive>(
+    lhs: &ArrayData,
+    rhs: &ArrayData,
+    lhs_start: usize,
+    rhs_start: usize,
+    len: usize,
+) -> bool {
+    if len == 0 {
+        return true;
     }
+    // RunEndEncoded arrays are guaranteed to have at least 2 children 
[run_ends, values]
+    let lhs_run_ends_data = &lhs.child_data()[0];

Review Comment:
   There is a lot of local variables in play in this function. Can we refactor 
to:
   
   - Try eliminate local variables where possible (e.g. introduce small 
functions or blocks to clearly delineate where local variables are useful for)
   - Look into using a struct to collate the field for left vs right (e.g a 
struct holding `values`, `run_ends`, etc. by reference) to make it easier to 
read the left vs right



##########
arrow-array/src/array/run_array.rs:
##########
@@ -141,6 +141,9 @@ impl<R: RunEndIndexType> RunArray<R> {
     ///
     /// [`values`]: Self::values
     pub fn values_slice(&self) -> ArrayRef {
+        if self.len() == 0 {

Review Comment:
   Could we add a test case for this



##########
arrow-array/src/array/run_array.rs:
##########
@@ -1173,4 +1176,56 @@ mod tests {
         let values_slice2 = values_slice2.as_primitive::<Int32Type>();
         assert_eq!(values_slice2.values(), &[1]);
     }
+
+    #[test]
+    fn test_run_array_eq_diff_physical_same_logical() {

Review Comment:
   Could we also add some cases checking empty run arrays



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to