tustvold commented on code in PR #7798:
URL: https://github.com/apache/arrow-datafusion/pull/7798#discussion_r1358375435
##########
datafusion/physical-plan/src/sorts/cursor.rs:
##########
@@ -432,4 +501,119 @@ mod tests {
b.advance();
assert_eq!(a.cmp(&b), Ordering::Less);
}
+
+ #[test]
+ fn test_slice_primitive() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![0, 1, 2]);
+ let mut cursor = new_primitive(options, buffer, 0);
+
+ // from start
+ let sliced = cursor.slice(0, 1);
+ assert_eq!(sliced.num_rows(), 1);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![0]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should slice from start"
+ );
+
+ // with offset
+ let sliced = cursor.slice(1, 2);
+ assert_eq!(sliced.num_rows(), 2);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![1]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should slice with offset"
+ );
+
+ // cursor current position != start
+ cursor.advance();
+ let sliced = cursor.slice(0, 1);
+ assert_eq!(sliced.num_rows(), 1);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![0]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should ignore current cursor position when sliced"
+ );
+ }
+
+ #[test]
+ #[should_panic(expected = "cursor slice out of bounds")]
+ fn test_slice_panic_can_panic() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![0, 1, 2]);
+ let cursor = new_primitive(options, buffer, 0);
+
+ cursor.slice(42, 1);
+ }
+
+ #[test]
+ fn test_slice_nulls_first() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![2, i32::MIN, 2]);
+ let mut a = new_primitive(options, buffer, 1);
+ let buffer = ScalarBuffer::from(vec![3, 2, i32::MIN, 2]);
+ let mut b = new_primitive(options, buffer, 1);
+
+ // NULL == NULL
+ assert_eq!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Equal);
+
+ // 2 > NULL
Review Comment:
```suggestion
// i32::MIN > NULL
```
##########
datafusion/physical-plan/src/sorts/cursor.rs:
##########
@@ -275,6 +324,26 @@ impl<T: FieldValues> Cursor for FieldCursor<T> {
self.offset += 1;
t
}
+
+ fn slice(&self, offset: usize, length: usize) -> Self {
+ let FieldCursor {
+ values,
+ offset: _,
+ null_threshold,
+ options,
+ } = self;
+
+ Self {
+ values: values.slice(offset, length),
+ offset: 0,
+ null_threshold: null_threshold.checked_sub(offset).unwrap_or(0),
Review Comment:
```suggestion
null_threshold: null_threshold.saturating_sub(offset),
```
##########
datafusion/physical-plan/src/sorts/cursor.rs:
##########
@@ -432,4 +501,119 @@ mod tests {
b.advance();
assert_eq!(a.cmp(&b), Ordering::Less);
}
+
+ #[test]
+ fn test_slice_primitive() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![0, 1, 2]);
+ let mut cursor = new_primitive(options, buffer, 0);
+
+ // from start
+ let sliced = cursor.slice(0, 1);
+ assert_eq!(sliced.num_rows(), 1);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![0]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should slice from start"
+ );
+
+ // with offset
+ let sliced = cursor.slice(1, 2);
+ assert_eq!(sliced.num_rows(), 2);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![1]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should slice with offset"
+ );
+
+ // cursor current position != start
+ cursor.advance();
+ let sliced = cursor.slice(0, 1);
+ assert_eq!(sliced.num_rows(), 1);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![0]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should ignore current cursor position when sliced"
+ );
+ }
+
+ #[test]
+ #[should_panic(expected = "cursor slice out of bounds")]
+ fn test_slice_panic_can_panic() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![0, 1, 2]);
+ let cursor = new_primitive(options, buffer, 0);
+
+ cursor.slice(42, 1);
+ }
+
+ #[test]
+ fn test_slice_nulls_first() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![2, i32::MIN, 2]);
+ let mut a = new_primitive(options, buffer, 1);
+ let buffer = ScalarBuffer::from(vec![3, 2, i32::MIN, 2]);
+ let mut b = new_primitive(options, buffer, 1);
+
+ // NULL == NULL
+ assert_eq!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Equal);
+
+ // 2 > NULL
+ a = a.slice(1, 1);
+ assert_ne!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Greater);
+
+ // 2 < 3
+ b = b.slice(3, 1);
+ assert_ne!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Less);
+ }
+
+ #[test]
+ fn test_slice_no_nulls_first() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: false,
+ };
+
+ let buffer = ScalarBuffer::from(vec![2, i32::MIN, 2]);
+ let mut a = new_primitive(options, buffer, 1);
+ let buffer = ScalarBuffer::from(vec![3, 2, i32::MIN, 2]);
+ let mut b = new_primitive(options, buffer, 1);
+
+ // 2 < 3
+ assert_ne!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Less);
+
+ // 2 == 2
+ b = b.slice(1, 3);
+ assert_eq!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Equal);
+
+ // NULL < 2
Review Comment:
The input arrays are
```
a: [2, i32::MIN, NULL]
b: [3, 2, i32::MIN, NULL]
```
We are comparing `a[1]` with `b[1]`, i.e. `i32::MIN` with `2`
##########
datafusion/physical-plan/src/sorts/cursor.rs:
##########
@@ -432,4 +501,119 @@ mod tests {
b.advance();
assert_eq!(a.cmp(&b), Ordering::Less);
}
+
+ #[test]
+ fn test_slice_primitive() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![0, 1, 2]);
+ let mut cursor = new_primitive(options, buffer, 0);
+
+ // from start
+ let sliced = cursor.slice(0, 1);
+ assert_eq!(sliced.num_rows(), 1);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![0]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should slice from start"
+ );
+
+ // with offset
+ let sliced = cursor.slice(1, 2);
+ assert_eq!(sliced.num_rows(), 2);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![1]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should slice with offset"
+ );
+
+ // cursor current position != start
+ cursor.advance();
+ let sliced = cursor.slice(0, 1);
+ assert_eq!(sliced.num_rows(), 1);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![0]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should ignore current cursor position when sliced"
+ );
+ }
+
+ #[test]
+ #[should_panic(expected = "cursor slice out of bounds")]
+ fn test_slice_panic_can_panic() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![0, 1, 2]);
+ let cursor = new_primitive(options, buffer, 0);
+
+ cursor.slice(42, 1);
+ }
+
+ #[test]
+ fn test_slice_nulls_first() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![2, i32::MIN, 2]);
+ let mut a = new_primitive(options, buffer, 1);
+ let buffer = ScalarBuffer::from(vec![3, 2, i32::MIN, 2]);
+ let mut b = new_primitive(options, buffer, 1);
+
+ // NULL == NULL
+ assert_eq!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Equal);
+
+ // 2 > NULL
+ a = a.slice(1, 1);
+ assert_ne!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Greater);
+
+ // 2 < 3
Review Comment:
```suggestion
// i32::MIN < 2
```
##########
datafusion/physical-plan/src/sorts/cursor.rs:
##########
@@ -432,4 +501,119 @@ mod tests {
b.advance();
assert_eq!(a.cmp(&b), Ordering::Less);
}
+
+ #[test]
+ fn test_slice_primitive() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![0, 1, 2]);
+ let mut cursor = new_primitive(options, buffer, 0);
+
+ // from start
+ let sliced = cursor.slice(0, 1);
+ assert_eq!(sliced.num_rows(), 1);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![0]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should slice from start"
+ );
+
+ // with offset
+ let sliced = cursor.slice(1, 2);
+ assert_eq!(sliced.num_rows(), 2);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![1]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should slice with offset"
+ );
+
+ // cursor current position != start
+ cursor.advance();
+ let sliced = cursor.slice(0, 1);
+ assert_eq!(sliced.num_rows(), 1);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![0]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should ignore current cursor position when sliced"
+ );
+ }
+
+ #[test]
+ #[should_panic(expected = "cursor slice out of bounds")]
+ fn test_slice_panic_can_panic() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![0, 1, 2]);
+ let cursor = new_primitive(options, buffer, 0);
+
+ cursor.slice(42, 1);
+ }
+
+ #[test]
+ fn test_slice_nulls_first() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![2, i32::MIN, 2]);
+ let mut a = new_primitive(options, buffer, 1);
+ let buffer = ScalarBuffer::from(vec![3, 2, i32::MIN, 2]);
+ let mut b = new_primitive(options, buffer, 1);
+
+ // NULL == NULL
+ assert_eq!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Equal);
+
+ // 2 > NULL
+ a = a.slice(1, 1);
+ assert_ne!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Greater);
+
+ // 2 < 3
+ b = b.slice(3, 1);
+ assert_ne!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Less);
+ }
+
+ #[test]
+ fn test_slice_no_nulls_first() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: false,
+ };
+
+ let buffer = ScalarBuffer::from(vec![2, i32::MIN, 2]);
+ let mut a = new_primitive(options, buffer, 1);
+ let buffer = ScalarBuffer::from(vec![3, 2, i32::MIN, 2]);
+ let mut b = new_primitive(options, buffer, 1);
+
+ // 2 < 3
+ assert_ne!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Less);
+
+ // 2 == 2
+ b = b.slice(1, 3);
+ assert_eq!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Equal);
+
+ // NULL < 2
+ a = a.slice(1, 2);
+ assert_ne!(a, b);
+ assert_eq!(a.cmp(&b), Ordering::Less);
+
+ // NULL == NULL
Review Comment:
Similarly to above this is comparing `a[1]` with `b[2]`, i.e. `i32::MIN`
with `i32::MIN`
##########
datafusion/physical-plan/src/sorts/cursor.rs:
##########
@@ -275,6 +324,26 @@ impl<T: FieldValues> Cursor for FieldCursor<T> {
self.offset += 1;
t
}
+
+ fn slice(&self, offset: usize, length: usize) -> Self {
+ let FieldCursor {
+ values,
+ offset: _,
+ null_threshold,
+ options,
+ } = self;
+
+ Self {
+ values: values.slice(offset, length),
+ offset: 0,
+ null_threshold: null_threshold.checked_sub(offset).unwrap_or(0),
Review Comment:
I would expect this logic to depend on the null ordering. In particular I
would expect if nulls are first, to decrement by offset, and otherwise by
`self.len - offset - length` or something...
##########
datafusion/physical-plan/src/sorts/cursor.rs:
##########
@@ -432,4 +501,119 @@ mod tests {
b.advance();
assert_eq!(a.cmp(&b), Ordering::Less);
}
+
+ #[test]
+ fn test_slice_primitive() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![0, 1, 2]);
+ let mut cursor = new_primitive(options, buffer, 0);
+
+ // from start
+ let sliced = cursor.slice(0, 1);
+ assert_eq!(sliced.num_rows(), 1);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![0]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should slice from start"
+ );
+
+ // with offset
+ let sliced = cursor.slice(1, 2);
+ assert_eq!(sliced.num_rows(), 2);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![1]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should slice with offset"
+ );
+
+ // cursor current position != start
+ cursor.advance();
+ let sliced = cursor.slice(0, 1);
+ assert_eq!(sliced.num_rows(), 1);
+ let expected = new_primitive(options, ScalarBuffer::from(vec![0]), 0);
+ assert_eq!(
+ sliced.cmp(&expected),
+ Ordering::Equal,
+ "should ignore current cursor position when sliced"
+ );
+ }
+
+ #[test]
+ #[should_panic(expected = "cursor slice out of bounds")]
+ fn test_slice_panic_can_panic() {
+ let options = SortOptions {
+ descending: false,
+ nulls_first: true,
+ };
+
+ let buffer = ScalarBuffer::from(vec![0, 1, 2]);
+ let cursor = new_primitive(options, buffer, 0);
+
+ cursor.slice(42, 1);
+ }
+
Review Comment:
These tests are good, but I think it would be better if we got some tests
with multiple nulls as well, to check that the nulls are being decremented
appropriately, and not just floored to 0 :smile:
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]