alamb commented on code in PR #8728:
URL: https://github.com/apache/arrow-rs/pull/8728#discussion_r2469908243
##########
arrow-array/src/iterator.rs:
##########
@@ -98,8 +98,8 @@ impl<T: ArrayAccessor> Iterator for ArrayIter<T> {
fn size_hint(&self) -> (usize, Option<usize>) {
(
- self.array.len() - self.current,
- Some(self.array.len() - self.current),
+ self.current_end - self.current,
Review Comment:
this is the actual code fix, right?
##########
arrow-array/src/iterator.rs:
##########
@@ -264,4 +269,924 @@ mod tests {
// check if ExactSizeIterator is implemented
let _ = array.iter().rposition(|opt_b| opt_b == Some(true));
}
+
+ trait SharedBetweenArrayIterAndSliceIter:
+ ExactSizeIterator<Item = Option<i32>> + DoubleEndedIterator<Item =
Option<i32>> + Clone
+ {
+ }
+ impl<T: Clone + ExactSizeIterator<Item = Option<i32>> +
DoubleEndedIterator<Item = Option<i32>>>
+ SharedBetweenArrayIterAndSliceIter for T
+ {
+ }
+
+ fn get_int32_iterator_cases() -> impl Iterator<Item = (Int32Array,
Vec<Option<i32>>)> {
+ let mut rng = StdRng::seed_from_u64(42);
+
+ let no_nulls_and_no_duplicates =
(0..10).map(Some).collect::<Vec<Option<i32>>>();
+ let no_nulls_random_values = (0..10)
+ .map(|_| rng.random::<i32>())
+ .map(Some)
+ .collect::<Vec<Option<i32>>>();
+
+ let all_nulls = (0..10).map(|_| None).collect::<Vec<Option<i32>>>();
+ let only_start_nulls = (0..10)
+ .map(|item| if item < 4 { None } else { Some(item) })
+ .collect::<Vec<Option<i32>>>();
+ let only_end_nulls = (0..10)
+ .map(|item| if item > 8 { None } else { Some(item) })
+ .collect::<Vec<Option<i32>>>();
+ let only_middle_nulls = (0..10)
+ .map(|item| {
+ if (4..=8).contains(&item) && rng.random_bool(0.9) {
+ None
+ } else {
+ Some(item)
+ }
+ })
+ .collect::<Vec<Option<i32>>>();
+ let random_values_with_random_nulls = (0..10)
+ .map(|_| {
+ if rng.random_bool(0.3) {
+ None
+ } else {
+ Some(rng.random::<i32>())
+ }
+ })
+ .collect::<Vec<Option<i32>>>();
+
+ let no_nulls_and_some_duplicates = (0..10)
+ .map(|item| item % 3)
+ .map(Some)
+ .collect::<Vec<Option<i32>>>();
+ let no_nulls_and_all_same_value =
+ (0..10).map(|_| 9).map(Some).collect::<Vec<Option<i32>>>();
+ let no_nulls_and_continues_duplicates = [0, 0, 0, 1, 1, 2, 2, 2, 2, 3]
+ .map(Some)
+ .into_iter()
+ .collect::<Vec<Option<i32>>>();
+
+ let single_null_and_no_duplicates = (0..10)
+ .map(|item| if item == 4 { None } else { Some(item) })
+ .collect::<Vec<Option<i32>>>();
+ let multiple_nulls_and_no_duplicates = (0..10)
+ .map(|item| if item % 3 == 2 { None } else { Some(item) })
+ .collect::<Vec<Option<i32>>>();
+ let continues_nulls_and_no_duplicates = [
+ Some(0),
+ Some(1),
+ None,
+ None,
+ Some(2),
+ Some(3),
+ None,
+ Some(4),
+ Some(5),
+ None,
+ ]
+ .into_iter()
+ .collect::<Vec<Option<i32>>>();
+
+ [
+ no_nulls_and_no_duplicates,
+ no_nulls_random_values,
+ no_nulls_and_some_duplicates,
+ no_nulls_and_all_same_value,
+ no_nulls_and_continues_duplicates,
+ all_nulls,
+ only_start_nulls,
+ only_end_nulls,
+ only_middle_nulls,
+ random_values_with_random_nulls,
+ single_null_and_no_duplicates,
+ multiple_nulls_and_no_duplicates,
+ continues_nulls_and_no_duplicates,
+ ]
+ .map(|case| (Int32Array::from(case.clone()), case))
+ .into_iter()
+ }
+
+ trait SetupIter {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I);
+ }
+
+ struct NoSetup;
+ impl SetupIter for NoSetup {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, _iter: &mut I) {
+ // none
+ }
+ }
+
+ fn setup_and_assert_cases(
+ setup_iterator: impl SetupIter,
+ assert_fn: impl Fn(ArrayIter<&Int32Array>, Copied<Iter<Option<i32>>>),
+ ) {
+ for (array, source) in get_int32_iterator_cases() {
+ let mut actual = ArrayIter::new(&array);
+ let mut expected = source.iter().copied();
+
+ setup_iterator.setup(&mut actual);
+ setup_iterator.setup(&mut expected);
+
+ assert_fn(actual, expected);
+ }
+ }
+
+ /// Trait representing an operation on a [`ArrayIter`]
+ /// that can be compared against a slice iterator
+ ///
+ /// this is for consuming operations (e.g. `count`, `last`, etc)
+ trait ConsumingArrayIteratorOp {
+ /// What the operation returns (e.g. Option<i32> for last, usize for
count, etc)
+ type Output: PartialEq + Debug;
+
+ /// The name of the operation, used for error messages
+ fn name(&self) -> String;
+
+ /// Get the value of the operation for the provided iterator
+ /// This will be either a [`ArrayIter`] or a slice iterator to make
sure they produce the same result
+ fn get_value<T: SharedBetweenArrayIterAndSliceIter>(&self, iter: T) ->
Self::Output;
+ }
+
+ /// Trait representing an operation on a [`ArrayIter`]
+ /// that can be compared against a slice iterator.
+ ///
+ /// This is for mutating operations (e.g. `position`, `any`, `find`, etc)
+ trait MutatingArrayIteratorOp {
+ /// What the operation returns (e.g. Option<i32> for last, usize for
count, etc)
+ type Output: PartialEq + Debug;
+
+ /// The name of the operation, used for error messages
+ fn name(&self) -> String;
+
+ /// Get the value of the operation for the provided iterator
+ /// This will be either a [`ArrayIter`] or a slice iterator to make
sure they produce the same result
+ fn get_value<T: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
T) -> Self::Output;
+ }
+
+ /// Helper function that will assert that the provided operation
+ /// produces the same result for both [`ArrayIter`] and slice iterator
+ /// under various consumption patterns (e.g. some calls to
next/next_back/consume_all/etc)
+ fn assert_array_iterator_cases<O: ConsumingArrayIteratorOp>(o: O) {
+ setup_and_assert_cases(NoSetup, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
Review Comment:
This assert code that gets the iterator values and compares them is repeated
many times in these tests and makes it hard to follow what they are doing --
can you please find some way to reduce the duplication ?
##########
arrow-array/src/iterator.rs:
##########
@@ -264,4 +269,924 @@ mod tests {
// check if ExactSizeIterator is implemented
let _ = array.iter().rposition(|opt_b| opt_b == Some(true));
}
+
+ trait SharedBetweenArrayIterAndSliceIter:
+ ExactSizeIterator<Item = Option<i32>> + DoubleEndedIterator<Item =
Option<i32>> + Clone
+ {
+ }
+ impl<T: Clone + ExactSizeIterator<Item = Option<i32>> +
DoubleEndedIterator<Item = Option<i32>>>
+ SharedBetweenArrayIterAndSliceIter for T
+ {
+ }
+
+ fn get_int32_iterator_cases() -> impl Iterator<Item = (Int32Array,
Vec<Option<i32>>)> {
+ let mut rng = StdRng::seed_from_u64(42);
+
+ let no_nulls_and_no_duplicates =
(0..10).map(Some).collect::<Vec<Option<i32>>>();
+ let no_nulls_random_values = (0..10)
+ .map(|_| rng.random::<i32>())
+ .map(Some)
+ .collect::<Vec<Option<i32>>>();
+
+ let all_nulls = (0..10).map(|_| None).collect::<Vec<Option<i32>>>();
+ let only_start_nulls = (0..10)
+ .map(|item| if item < 4 { None } else { Some(item) })
+ .collect::<Vec<Option<i32>>>();
+ let only_end_nulls = (0..10)
+ .map(|item| if item > 8 { None } else { Some(item) })
+ .collect::<Vec<Option<i32>>>();
+ let only_middle_nulls = (0..10)
+ .map(|item| {
+ if (4..=8).contains(&item) && rng.random_bool(0.9) {
+ None
+ } else {
+ Some(item)
+ }
+ })
+ .collect::<Vec<Option<i32>>>();
+ let random_values_with_random_nulls = (0..10)
+ .map(|_| {
+ if rng.random_bool(0.3) {
+ None
+ } else {
+ Some(rng.random::<i32>())
+ }
+ })
+ .collect::<Vec<Option<i32>>>();
+
+ let no_nulls_and_some_duplicates = (0..10)
+ .map(|item| item % 3)
+ .map(Some)
+ .collect::<Vec<Option<i32>>>();
+ let no_nulls_and_all_same_value =
+ (0..10).map(|_| 9).map(Some).collect::<Vec<Option<i32>>>();
+ let no_nulls_and_continues_duplicates = [0, 0, 0, 1, 1, 2, 2, 2, 2, 3]
+ .map(Some)
+ .into_iter()
+ .collect::<Vec<Option<i32>>>();
+
+ let single_null_and_no_duplicates = (0..10)
+ .map(|item| if item == 4 { None } else { Some(item) })
+ .collect::<Vec<Option<i32>>>();
+ let multiple_nulls_and_no_duplicates = (0..10)
+ .map(|item| if item % 3 == 2 { None } else { Some(item) })
+ .collect::<Vec<Option<i32>>>();
+ let continues_nulls_and_no_duplicates = [
+ Some(0),
+ Some(1),
+ None,
+ None,
+ Some(2),
+ Some(3),
+ None,
+ Some(4),
+ Some(5),
+ None,
+ ]
+ .into_iter()
+ .collect::<Vec<Option<i32>>>();
+
+ [
+ no_nulls_and_no_duplicates,
+ no_nulls_random_values,
+ no_nulls_and_some_duplicates,
+ no_nulls_and_all_same_value,
+ no_nulls_and_continues_duplicates,
+ all_nulls,
+ only_start_nulls,
+ only_end_nulls,
+ only_middle_nulls,
+ random_values_with_random_nulls,
+ single_null_and_no_duplicates,
+ multiple_nulls_and_no_duplicates,
+ continues_nulls_and_no_duplicates,
+ ]
+ .map(|case| (Int32Array::from(case.clone()), case))
+ .into_iter()
+ }
+
+ trait SetupIter {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I);
+ }
+
+ struct NoSetup;
+ impl SetupIter for NoSetup {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, _iter: &mut I) {
+ // none
+ }
+ }
+
+ fn setup_and_assert_cases(
+ setup_iterator: impl SetupIter,
+ assert_fn: impl Fn(ArrayIter<&Int32Array>, Copied<Iter<Option<i32>>>),
+ ) {
+ for (array, source) in get_int32_iterator_cases() {
+ let mut actual = ArrayIter::new(&array);
+ let mut expected = source.iter().copied();
+
+ setup_iterator.setup(&mut actual);
+ setup_iterator.setup(&mut expected);
+
+ assert_fn(actual, expected);
+ }
+ }
+
+ /// Trait representing an operation on a [`ArrayIter`]
+ /// that can be compared against a slice iterator
+ ///
+ /// this is for consuming operations (e.g. `count`, `last`, etc)
+ trait ConsumingArrayIteratorOp {
+ /// What the operation returns (e.g. Option<i32> for last, usize for
count, etc)
+ type Output: PartialEq + Debug;
+
+ /// The name of the operation, used for error messages
+ fn name(&self) -> String;
+
+ /// Get the value of the operation for the provided iterator
+ /// This will be either a [`ArrayIter`] or a slice iterator to make
sure they produce the same result
+ fn get_value<T: SharedBetweenArrayIterAndSliceIter>(&self, iter: T) ->
Self::Output;
+ }
+
+ /// Trait representing an operation on a [`ArrayIter`]
+ /// that can be compared against a slice iterator.
+ ///
+ /// This is for mutating operations (e.g. `position`, `any`, `find`, etc)
+ trait MutatingArrayIteratorOp {
+ /// What the operation returns (e.g. Option<i32> for last, usize for
count, etc)
+ type Output: PartialEq + Debug;
+
+ /// The name of the operation, used for error messages
+ fn name(&self) -> String;
+
+ /// Get the value of the operation for the provided iterator
+ /// This will be either a [`ArrayIter`] or a slice iterator to make
sure they produce the same result
+ fn get_value<T: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
T) -> Self::Output;
+ }
+
+ /// Helper function that will assert that the provided operation
+ /// produces the same result for both [`ArrayIter`] and slice iterator
+ /// under various consumption patterns (e.g. some calls to
next/next_back/consume_all/etc)
+ fn assert_array_iterator_cases<O: ConsumingArrayIteratorOp>(o: O) {
+ setup_and_assert_cases(NoSetup, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
+ assert_eq!(
+ o.get_value(actual),
+ o.get_value(expected),
+ "Failed on op {} for new iter (left actual, right expected)
({current_iterator_values:?})",
+ o.name()
+ );
+ });
+
+ struct Next;
+ impl SetupIter for Next {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
I) {
+ iter.next();
+ }
+ }
+ setup_and_assert_cases(Next, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
+
+ assert_eq!(
+ o.get_value(actual),
+ o.get_value(expected),
+ "Failed on op {} for new iter after consuming 1 element from
the start (left actual, right expected) ({current_iterator_values:?})",
+ o.name()
+ );
+ });
+
+ struct NextBack;
+ impl SetupIter for NextBack {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
I) {
+ iter.next_back();
+ }
+ }
+
+ setup_and_assert_cases(NextBack, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
+
+ assert_eq!(
+ o.get_value(actual),
+ o.get_value(expected),
+ "Failed on op {} for new iter after consuming 1 element from
the end (left actual, right expected) ({current_iterator_values:?})",
+ o.name()
+ );
+ });
+
+ struct NextAndBack;
+ impl SetupIter for NextAndBack {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
I) {
+ iter.next();
+ iter.next_back();
+ }
+ }
+
+ setup_and_assert_cases(NextAndBack, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
+
+ assert_eq!(
+ o.get_value(actual),
+ o.get_value(expected),
+ "Failed on op {} for new iter after consuming 1 element from
start and end (left actual, right expected) ({current_iterator_values:?})",
+ o.name()
+ );
+ });
+
+ struct NextUntilLast;
+ impl SetupIter for NextUntilLast {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
I) {
+ let len = iter.len();
+ if len > 1 {
+ iter.nth(len - 2);
+ }
+ }
+ }
+ setup_and_assert_cases(NextUntilLast, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
+
+ assert_eq!(
+ o.get_value(actual),
+ o.get_value(expected),
+ "Failed on op {} for new iter after consuming all from the
start but 1 (left actual, right expected) ({current_iterator_values:?})",
+ o.name()
+ );
+ });
+
+ struct NextBackUntilFirst;
+ impl SetupIter for NextBackUntilFirst {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
I) {
+ let len = iter.len();
+ if len > 1 {
+ iter.nth_back(len - 2);
+ }
+ }
+ }
+ setup_and_assert_cases(NextBackUntilFirst, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
+
+ assert_eq!(
+ o.get_value(actual),
+ o.get_value(expected),
+ "Failed on op {} for new iter after consuming all from the end
but 1 (left actual, right expected) ({current_iterator_values:?})",
+ o.name()
+ );
+ });
+
+ struct NextFinish;
+ impl SetupIter for NextFinish {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
I) {
+ iter.nth(iter.len());
+ }
+ }
+ setup_and_assert_cases(NextFinish, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
+
+ assert_eq!(
+ o.get_value(actual),
+ o.get_value(expected),
+ "Failed on op {} for new iter after consuming all from the
start (left actual, right expected) ({current_iterator_values:?})",
+ o.name()
+ );
+ });
+
+ struct NextBackFinish;
+ impl SetupIter for NextBackFinish {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
I) {
+ iter.nth_back(iter.len());
+ }
+ }
+ setup_and_assert_cases(NextBackFinish, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
+
+ assert_eq!(
+ o.get_value(actual),
+ o.get_value(expected),
+ "Failed on op {} for new iter after consuming all from the end
(left actual, right expected) ({current_iterator_values:?})",
+ o.name()
+ );
+ });
+
+ struct NextUntilLastNone;
+ impl SetupIter for NextUntilLastNone {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
I) {
+ let last_null_position = iter.clone().rposition(|item|
item.is_none());
+
+ // move the iterator to the location where there are no nulls
anymore
+ if let Some(last_null_position) = last_null_position {
+ iter.nth(last_null_position);
+ }
+ }
+ }
+ setup_and_assert_cases(NextUntilLastNone, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
+
+ assert_eq!(
+ o.get_value(actual),
+ o.get_value(expected),
+ "Failed on op {} for iter that have no nulls left (left
actual, right expected) ({current_iterator_values:?})",
+ o.name()
+ );
+ });
+
+ struct NextUntilLastSome;
+ impl SetupIter for NextUntilLastSome {
+ fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut
I) {
+ let last_some_position = iter.clone().rposition(|item|
item.is_some());
+
+ // move the iterator to the location where there are only nulls
+ if let Some(last_some_position) = last_some_position {
+ iter.nth(last_some_position);
+ }
+ }
+ }
+ setup_and_assert_cases(NextUntilLastSome, |actual, expected| {
+ let current_iterator_values: Vec<Option<i32>> =
expected.clone().collect();
+
+ assert_eq!(
+ o.get_value(actual),
+ o.get_value(expected),
+ "Failed on op {} for iter that only have nulls left (left
actual, right expected) ({current_iterator_values:?})",
+ o.name()
+ );
+ });
+ }
+
+ /// Helper function that will assert that the provided operation
+ /// produces the same result for both [`ArrayIter`] and slice iterator
+ /// under various consumption patterns (e.g. some calls to
next/next_back/consume_all/etc)
+ ///
+ /// this is different from [`assert_array_iterator_cases`] as this also
check that the state after the call is correct
+ /// to make sure we don't leave the iterator in incorrect state
+ fn assert_array_iterator_cases_mutate<O: MutatingArrayIteratorOp>(o: O) {
+ struct Adapter<O: MutatingArrayIteratorOp> {
+ o: O,
+ }
+
+ #[derive(Debug, PartialEq)]
+ struct AdapterOutput<Value> {
+ value: Value,
+ /// collect on the iterator after running the operation
+ leftover: Vec<Option<i32>>,
+ }
+
+ impl<O: MutatingArrayIteratorOp> ConsumingArrayIteratorOp for
Adapter<O> {
+ type Output = AdapterOutput<O::Output>;
+
+ fn name(&self) -> String {
+ self.o.name()
+ }
+
+ fn get_value<T: SharedBetweenArrayIterAndSliceIter>(
+ &self,
+ mut iter: T,
+ ) -> Self::Output {
+ let value = self.o.get_value(&mut iter);
+
+ let leftover: Vec<_> = iter.collect();
+
+ AdapterOutput { value, leftover }
+ }
+ }
+
+ assert_array_iterator_cases(Adapter { o })
+ }
+
+ #[derive(Debug, PartialEq)]
+ struct CallTrackingAndResult<Result: Debug + PartialEq, CallArgs: Debug +
PartialEq> {
+ result: Result,
+ calls: Vec<CallArgs>,
+ }
+ type CallTrackingWithInputType<Result> = CallTrackingAndResult<Result,
Option<i32>>;
+ type CallTrackingOnly = CallTrackingWithInputType<()>;
+
+ #[test]
+ fn assert_position() {
Review Comment:
I verified that this test fails without the code change in this PR
```
---- iterator::tests::assert_position stdout ----
thread 'iterator::tests::assert_position' panicked at
arrow-array/src/iterator.rs:467:13:
assertion `left == right` failed: Failed on op rposition with 0 false
returned for new iter after consuming 1 element from the end (left actual,
right expected) ([Some(0), Some(1), Some(2), Some(3), Some(4), Some(5),
Some(6), Some(7), Some(8)])
left: AdapterOutput { value: CallTrackingAndResult { result: Some(9),
calls: [Some(8)] }, leftover: [Some(0), Some(1), Some(2), Some(3), Some(4),
Some(5), Some(6), Some(7)] }
right: AdapterOutput { value: CallTrackingAndResult { result: Some(8),
calls: [Some(8)] }, leftover: [Some(0), Some(1), Some(2), Some(3), Some(4),
Some(5), Some(6), Some(7)] }
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]