tustvold commented on code in PR #7855:
URL: https://github.com/apache/arrow-datafusion/pull/7855#discussion_r1365960653


##########
datafusion/physical-plan/src/sorts/cursor.rs:
##########
@@ -20,124 +20,118 @@ use std::cmp::Ordering;
 use arrow::buffer::ScalarBuffer;
 use arrow::compute::SortOptions;
 use arrow::datatypes::ArrowNativeTypeOp;
-use arrow::row::{Row, Rows};
+use arrow::row::Rows;
 use arrow_array::types::ByteArrayType;
-use arrow_array::{Array, ArrowPrimitiveType, GenericByteArray, PrimitiveArray};
+use arrow_array::{
+    Array, ArrowPrimitiveType, GenericByteArray, OffsetSizeTrait, 
PrimitiveArray,
+};
+use arrow_buffer::{Buffer, OffsetBuffer};
 use datafusion_execution::memory_pool::MemoryReservation;
 
-/// A [`Cursor`] for [`Rows`]
-pub struct RowCursor {
-    cur_row: usize,
-    num_rows: usize,
+/// A comparable collection of values for use with [`Cursor`]
+pub trait CursorValues {
+    fn len(&self) -> usize;
 
-    rows: Rows,
+    fn eq(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> bool;
 
-    /// Tracks for the memory used by in the `Rows` of this
-    /// cursor. Freed on drop
-    #[allow(dead_code)]
-    reservation: MemoryReservation,
+    fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering;
 }
 
-impl std::fmt::Debug for RowCursor {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        f.debug_struct("SortKeyCursor")
-            .field("cur_row", &self.cur_row)
-            .field("num_rows", &self.num_rows)
-            .finish()
-    }
+/// A comparable cursor, used by sort operations
+#[derive(Debug)]
+pub struct Cursor<T: CursorValues> {
+    offset: usize,
+    values: T,
 }
 
-impl RowCursor {
-    /// Create a new SortKeyCursor from `rows` and a `reservation`
-    /// that tracks its memory. There must be at least one row
-    ///
-    /// Panics if the reservation is not for exactly `rows.size()`
-    /// bytes or if `rows` is empty.
-    pub fn new(rows: Rows, reservation: MemoryReservation) -> Self {
-        assert_eq!(
-            rows.size(),
-            reservation.size(),
-            "memory reservation mismatch"
-        );
-        assert!(rows.num_rows() > 0);
-        Self {
-            cur_row: 0,
-            num_rows: rows.num_rows(),
-            rows,
-            reservation,
-        }
+impl<T: CursorValues> Cursor<T> {
+    /// Create a [`Cursor`] from the given [`CursorValues`]
+    pub fn new(values: T) -> Self {
+        Self { offset: 0, values }
     }
 
-    /// Returns the current row
-    fn current(&self) -> Row<'_> {
-        self.rows.row(self.cur_row)
+    /// Returns true if there are no more rows in this cursor
+    pub fn is_finished(&self) -> bool {
+        self.offset == self.values.len()
+    }
+
+    /// Advance the cursor, returning the previous row index
+    pub fn advance(&mut self) -> usize {
+        let t = self.offset;
+        self.offset += 1;
+        t
     }
 }
 
-impl PartialEq for RowCursor {
+impl<T: CursorValues> PartialEq for Cursor<T> {
     fn eq(&self, other: &Self) -> bool {
-        self.current() == other.current()
+        T::eq(&self.values, self.offset, &other.values, other.offset)
     }
 }
 
-impl Eq for RowCursor {}
+impl<T: CursorValues> Eq for Cursor<T> {}
 
-impl PartialOrd for RowCursor {
+impl<T: CursorValues> PartialOrd for Cursor<T> {
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
         Some(self.cmp(other))
     }
 }
 
-impl Ord for RowCursor {
+impl<T: CursorValues> Ord for Cursor<T> {
     fn cmp(&self, other: &Self) -> Ordering {
-        self.current().cmp(&other.current())
+        T::compare(&self.values, self.offset, &other.values, other.offset)
     }
 }
 
-/// A cursor into a sorted batch of rows.
-///
-/// Each cursor must have at least one row so `advance` can be called at least
-/// once prior to calling `is_finished`.
-pub trait Cursor: Ord {
-    /// Returns true if there are no more rows in this cursor
-    fn is_finished(&self) -> bool;
+/// Implements [`CursorValues`] for [`Rows`]
+#[derive(Debug)]
+pub struct RowValues {
+    rows: Rows,
 
-    /// Advance the cursor, returning the previous row index
-    fn advance(&mut self) -> usize;
+    /// Tracks for the memory used by in the `Rows` of this
+    /// cursor. Freed on drop
+    #[allow(dead_code)]
+    reservation: MemoryReservation,
+}
+
+impl RowValues {
+    /// Create a new [`RowValues`] from `rows` and a `reservation`
+    /// that tracks its memory. There must be at least one row
+    ///
+    /// Panics if the reservation is not for exactly `rows.size()`
+    /// bytes or if `rows` is empty.
+    pub fn new(rows: Rows, reservation: MemoryReservation) -> Self {
+        assert_eq!(
+            rows.size(),
+            reservation.size(),
+            "memory reservation mismatch"
+        );
+        assert!(rows.num_rows() > 0);
+        Self { rows, reservation }
+    }
 }
 
-impl Cursor for RowCursor {
-    #[inline]
-    fn is_finished(&self) -> bool {
-        self.num_rows == self.cur_row
+impl CursorValues for RowValues {
+    fn len(&self) -> usize {
+        self.rows.num_rows()
     }
 
-    #[inline]
-    fn advance(&mut self) -> usize {
-        let t = self.cur_row;
-        self.cur_row += 1;
-        t
+    fn eq(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> bool {
+        l.rows.row(l_idx) == r.rows.row(r_idx)
+    }
+
+    fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering {
+        l.rows.row(l_idx).cmp(&r.rows.row(r_idx))
     }
 }
 
-/// An [`Array`] that can be converted into [`FieldValues`]
+/// An [`Array`] that can be converted into [`CursorValues`]
 pub trait FieldArray: Array + 'static {
-    type Values: FieldValues;
+    type Values: CursorValues;
 
     fn values(&self) -> Self::Values;

Review Comment:
   Member functions naturally take precedence over trait methods, so should 
ambiguity arise, it will necessitate the explicit form 
`CursorArray::values(...)` and so I don't think this is an issue



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to