This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 48294b0790 GH-34893: [C++] Fix run-end encoded array iterator issues
that manifest on backwards iteration (#34896)
48294b0790 is described below
commit 48294b0790e684c704a563cb98cada9fa59d0e2f
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Wed Apr 12 14:34:12 2023 -0300
GH-34893: [C++] Fix run-end encoded array iterator issues that manifest on
backwards iteration (#34896)
### Rationale for this change
The value of `RunEndEncodedArraySpan::Iterator::physical_pos_` from the
instance returned by `RunEndEncodedArraySpan::end()` is smaller (by 1) than the
results of successive applications of `operator++()` until `logical_pos_`
reaches the logical length of the run-end encoded array.
To support backwards iteration, the `Iterator` returned by
`RunEndEncodedArraySpan::end()` should be well-formed.
There was no `operator--()` in `RunEndEncodedArraySpan::Iterator` before
this PR, so one can't go backwards from `end()` and this latent bug doesn't
manifest.
### What changes are included in this PR?
- Addition of `RunEndEncodedArraySpan::Iterator::operator--()`
- Addition of `MergedRunsIterator::operator--()`
- Rename `isEnd()` to `is_end()` (it was a code-style violation)
- More test cases
### Are these changes tested?
By existing and new unit tests.
### Are there any user-facing changes?
A public function -- `isEnd` -- was renamed, but this code hasn't been
released yet, so it's OK.
* Closes: #34893
Authored-by: Felipe Oliveira Carvalho <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
---
cpp/src/arrow/array/builder_run_end.cc | 3 +-
cpp/src/arrow/compare.cc | 2 +-
.../arrow/compute/kernels/vector_run_end_encode.cc | 2 +-
cpp/src/arrow/util/ree_util.h | 137 ++++++++++++++---
cpp/src/arrow/util/ree_util_test.cc | 165 +++++++++++++++++++--
5 files changed, 270 insertions(+), 39 deletions(-)
diff --git a/cpp/src/arrow/array/builder_run_end.cc
b/cpp/src/arrow/array/builder_run_end.cc
index 5908809292..ec285b8e3a 100644
--- a/cpp/src/arrow/array/builder_run_end.cc
+++ b/cpp/src/arrow/array/builder_run_end.cc
@@ -232,8 +232,7 @@ Status RunEndEncodedBuilder::DoAppendArray(const ArraySpan&
to_append) {
RETURN_NOT_OK(ReservePhysical(physical_length));
// Append all the run ends from to_append
- const auto end = ree_span.end();
- for (auto it = ree_span.iterator(0, physical_offset); it != end; ++it) {
+ for (auto it = ree_span.iterator(0, physical_offset); !it.is_end(ree_span);
++it) {
const int64_t run_end = committed_logical_length_ + it.run_length();
RETURN_NOT_OK(DoAppendRunEnd<RunEndCType>(run_end));
UpdateDimensions(run_end, 0);
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 3f96310603..df41cd22c9 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -486,7 +486,7 @@ class RangeDataEqualsImpl {
const auto& right_values = *right_.child_data[1];
auto it = ree_util::MergedRunsIterator(left, right);
- for (; !it.isEnd(); ++it) {
+ for (; !it.is_end(); ++it) {
RangeDataEqualsImpl impl(options_, floating_approximate_, left_values,
right_values,
it.index_into_left_array(),
it.index_into_right_array(),
/*range_length=*/1);
diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
index 718086b21e..2fa260321c 100644
--- a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
+++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
@@ -698,7 +698,7 @@ class RunEndDecodingLoop {
const ree_util::RunEndEncodedArraySpan<RunEndCType>
ree_array_span(input_array_);
int64_t write_offset = 0;
int64_t output_valid_count = 0;
- for (auto it = ree_array_span.begin(); it != ree_array_span.end(); ++it) {
+ for (auto it = ree_array_span.begin(); !it.is_end(ree_array_span); ++it) {
const int64_t read_offset = values_offset_ + it.index_into_array();
const int64_t run_length = it.run_length();
ValueRepr value;
diff --git a/cpp/src/arrow/util/ree_util.h b/cpp/src/arrow/util/ree_util.h
index 83d828bbe5..8c8f58a151 100644
--- a/cpp/src/arrow/util/ree_util.h
+++ b/cpp/src/arrow/util/ree_util.h
@@ -157,13 +157,13 @@ class RunEndEncodedArraySpan {
/// The values array can be addressed with this index to get the value
/// that makes up the run.
///
- /// NOTE: if this Iterator was produced by RunEndEncodedArraySpan::end(),
+ /// NOTE: if this Iterator is equal to RunEndEncodedArraySpan::end(),
/// the value returned is undefined.
int64_t index_into_array() const { return physical_pos_; }
/// \brief Return the initial logical position of the run
///
- /// If this Iterator was produced by RunEndEncodedArraySpan::end(), this is
+ /// If this Iterator is equal to RunEndEncodedArraySpan::end(), this is
/// the same as RunEndEncodedArraySpan::length().
int64_t logical_position() const { return logical_pos_; }
@@ -177,6 +177,16 @@ class RunEndEncodedArraySpan {
/// Pre-condition: *this != RunEndEncodedArraySpan::end()
int64_t run_length() const { return run_end() - logical_pos_; }
+ /// \brief Check if the iterator is at the end of the array.
+ ///
+ /// This can be used to avoid paying the cost of a call to
+ /// RunEndEncodedArraySpan::end().
+ ///
+ /// \return true if the iterator is at the end of the array
+ bool is_end(const RunEndEncodedArraySpan& span) const {
+ return logical_pos_ >= span.length();
+ }
+
Iterator& operator++() {
logical_pos_ = span.run_end(physical_pos_);
physical_pos_ += 1;
@@ -189,6 +199,18 @@ class RunEndEncodedArraySpan {
return prev;
}
+ Iterator& operator--() {
+ physical_pos_ -= 1;
+ logical_pos_ = (physical_pos_ > 0) ? span.run_end(physical_pos_ - 1) : 0;
+ return *this;
+ }
+
+ Iterator operator--(int) {
+ const Iterator prev = *this;
+ --(*this);
+ return prev;
+ }
+
bool operator==(const Iterator& other) const {
return logical_pos_ == other.logical_pos_;
}
@@ -234,31 +256,52 @@ class RunEndEncodedArraySpan {
///
/// \param logical_pos is an index in the [0, length()] range
Iterator iterator(int64_t logical_pos) const {
- return iterator(logical_pos, logical_pos < length()
- ? PhysicalIndex(logical_pos)
- : RunEndsArray(array_span).length);
+ if (logical_pos < length()) {
+ return iterator(logical_pos, PhysicalIndex(logical_pos));
+ }
+ // If logical_pos is above the valid range, use length() as the logical
+ // position and calculate the physical address right after the last valid
+ // physical position. Which is the physical index of the last logical
+ // position, plus 1.
+ return (length() == 0) ? iterator(0, PhysicalIndex(0))
+ : iterator(length(), PhysicalIndex(length() - 1) +
1);
}
/// \brief Create an iterator representing the logical begin of the run-end
/// encoded array
- Iterator begin() const { return iterator(0); }
+ Iterator begin() const { return iterator(0, PhysicalIndex(0)); }
/// \brief Create an iterator representing the first invalid logical position
/// of the run-end encoded array
///
- /// The Iterator returned by end() should not be
+ /// \warning Avoid calling end() in a loop, as it will recompute the physical
+ /// length of the array on each call (O(log N) cost per call).
+ ///
+ /// \par You can write your loops like this instead:
+ /// \code
+ /// for (auto it = array.begin(), end = array.end(); it != end; ++it) {
+ /// // ...
+ /// }
+ /// \endcode
+ ///
+ /// \par Or this version that does not look like idiomatic C++, but removes
+ /// the need for calling end() completely:
+ /// \code
+ /// for (auto it = array.begin(); !it.is_end(array); ++it) {
+ /// // ...
+ /// }
+ /// \endcode
Iterator end() const {
- // NOTE: the run ends array length is not necessarily what
- // PhysicalIndex(length()) would return but it is a cheap to obtain
- // physical offset that is invalid.
- return iterator(length(), RunEndsArray(array_span).length);
+ return iterator(length(),
+ (length() == 0) ? PhysicalIndex(0) :
PhysicalIndex(length() - 1) + 1);
}
// Pre-condition: physical_pos < RunEndsArray(array_span).length);
inline int64_t run_end(int64_t physical_pos) const {
assert(physical_pos < RunEndsArray(array_span).length);
- // Logical index of the end of the currently active run
- const int64_t logical_run_end = run_ends_[physical_pos] - offset();
+ // Logical index of the end of the run at physical_pos with offset applied
+ const int64_t logical_run_end =
+ std::max<int64_t>(static_cast<int64_t>(run_ends_[physical_pos]) -
offset(), 0);
// The current run may go further than the logical length, cap it
return std::min(logical_run_end, length());
}
@@ -281,12 +324,37 @@ class MergedRunsIterator {
using LeftIterator = typename Left::Iterator;
using RightIterator = typename Right::Iterator;
+ MergedRunsIterator(LeftIterator left_it, RightIterator right_it,
+ int64_t common_logical_length, int64_t common_logical_pos)
+ : ree_iterators_{std::move(left_it), std::move(right_it)},
+ logical_length_(common_logical_length),
+ logical_pos_(common_logical_pos) {}
+
public:
+ /// \brief Construct a MergedRunsIterator positioned at logical position 0.
+ ///
+ /// Pre-condition: left.length() == right.length()
MergedRunsIterator(const Left& left, const Right& right)
- : ree_iterators_{left.begin(), right.begin()},
logical_length_(left.length()) {
+ : MergedRunsIterator(left.begin(), right.begin(), left.length(), 0) {
assert(left.length() == right.length());
}
+ static Result<MergedRunsIterator> MakeBegin(const Left& left, const Right&
right) {
+ if (left.length() != right.length()) {
+ return Status::Invalid(
+ "MergedRunsIterator expects RunEndEncodedArraySpans of the same
length");
+ }
+ return MergedRunsIterator(left, right);
+ }
+
+ static Result<MergedRunsIterator> MakeEnd(const Left& left, const Right&
right) {
+ if (left.length() != right.length()) {
+ return Status::Invalid(
+ "MergedRunsIterator expects RunEndEncodedArraySpans of the same
length");
+ }
+ return MergedRunsIterator(left.end(), right.end(), left.length(),
left.length());
+ }
+
/// \brief Return the left RunEndEncodedArraySpan child
const Left& left() const { return std::get<0>(ree_iterators_).span; }
@@ -295,15 +363,18 @@ class MergedRunsIterator {
/// \brief Return the initial logical position of the run
///
- /// If isEnd(), this is the same as length().
+ /// If is_end(), this is the same as length().
int64_t logical_position() const { return logical_pos_; }
+ /// \brief Whether the iterator is at logical position 0.
+ bool is_begin() const { return logical_pos_ == 0; }
+
/// \brief Whether the iterator has reached the end of both arrays
- bool isEnd() const { return logical_pos_ == logical_length_; }
+ bool is_end() const { return logical_pos_ == logical_length_; }
/// \brief Return the logical position immediately after the run.
///
- /// Pre-condition: !isEnd()
+ /// Pre-condition: !is_end()
int64_t run_end() const {
const auto& left_it = std::get<0>(ree_iterators_);
const auto& right_it = std::get<1>(ree_iterators_);
@@ -312,7 +383,7 @@ class MergedRunsIterator {
/// \brief returns the logical length of the current run
///
- /// Pre-condition: !isEnd()
+ /// Pre-condition: !is_end()
int64_t run_length() const { return run_end() - logical_pos_; }
/// \brief Return a physical index into the values array of a given input,
@@ -352,6 +423,34 @@ class MergedRunsIterator {
return prev;
}
+ MergedRunsIterator& operator--() {
+ auto& left_it = std::get<0>(ree_iterators_);
+ auto& right_it = std::get<1>(ree_iterators_);
+
+ // The logical position of each iterator is the run_end() of the previous
run.
+ const int64_t left_logical_pos = left_it.logical_position();
+ const int64_t right_logical_pos = right_it.logical_position();
+
+ if (left_logical_pos < right_logical_pos) {
+ --right_it;
+ logical_pos_ = std::max(left_logical_pos, right_it.logical_position());
+ } else if (left_logical_pos > right_logical_pos) {
+ --left_it;
+ logical_pos_ = std::max(left_it.logical_position(), right_logical_pos);
+ } else {
+ --left_it;
+ --right_it;
+ logical_pos_ = std::max(left_it.logical_position(),
right_it.logical_position());
+ }
+ return *this;
+ }
+
+ MergedRunsIterator operator--(int) {
+ MergedRunsIterator prev = *this;
+ --(*this);
+ return prev;
+ }
+
bool operator==(const MergedRunsIterator& other) const {
return logical_pos_ == other.logical_position();
}
@@ -361,7 +460,7 @@ class MergedRunsIterator {
private:
std::tuple<LeftIterator, RightIterator> ree_iterators_;
const int64_t logical_length_;
- int64_t logical_pos_ = 0;
+ int64_t logical_pos_;
};
} // namespace ree_util
diff --git a/cpp/src/arrow/util/ree_util_test.cc
b/cpp/src/arrow/util/ree_util_test.cc
index 38efab1f1c..5f630c3028 100644
--- a/cpp/src/arrow/util/ree_util_test.cc
+++ b/cpp/src/arrow/util/ree_util_test.cc
@@ -103,7 +103,7 @@ TYPED_TEST_P(ReeUtilTest, PhysicalLength) {
TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
// Construct the following two test arrays with a lot of different offsets
to test the
- // RLE iterator: left:
+ // REE iterator: left:
//
// child offset: 0
// |
@@ -121,7 +121,7 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
// logical offset: 1000
// physical offset: 10
//
- // right:
+ // REE iterator: right:
// child offset: 0
// |
// +---+---+---+---+---+------+------+------+------+
@@ -145,7 +145,7 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
run_end_type, "[1, 2, 3, 4, 5, 6, 7, 8, 9, 1000, 1005, 1015, 1020, 1025,
30000]");
const auto right_run_ends =
ArrayFromJSON(run_end_type, "[1, 2, 3, 4, 5, 2005, 2009, 2025, 2050]");
- const std::vector<int32_t> expected_run_ends = {5, 4, 6, 5, 5, 25};
+ const std::vector<int32_t> expected_run_lengths = {5, 4, 6, 5, 5, 25};
const std::vector<int32_t> expected_left_visits = {10, 11, 11, 12, 13, 14};
const std::vector<int32_t> expected_right_visits = {5, 6, 7, 7, 7, 8};
const int32_t left_parent_offset = 1000;
@@ -170,36 +170,100 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
const RunEndEncodedArraySpan<TypeParam> left_ree_span(*left_array->data());
const RunEndEncodedArraySpan<TypeParam> right_ree_span(*right_array->data());
- // Left array on one side and right on the other side
{
+ ARROW_SCOPED_TRACE("iterate over merged(left, right)");
size_t i = 0;
size_t logical_pos = 0;
auto it = MergedRunsIterator(left_ree_span, right_ree_span);
ASSERT_EQ(it.logical_position(), 0);
- ASSERT_TRUE(!it.isEnd());
+ ASSERT_TRUE(it.is_begin());
+ ASSERT_FALSE(it.is_end());
ASSERT_EQ(&it.left(), &left_ree_span);
ASSERT_EQ(&it.right(), &right_ree_span);
- for (; !it.isEnd(); ++it, ++i) {
- ASSERT_EQ(it.run_length(), expected_run_ends[i]);
+ for (; !it.is_end(); ++it, ++i) {
+ ASSERT_EQ(it.run_length(), expected_run_lengths[i]);
ASSERT_EQ(it.index_into_left_array(), expected_left_visits[i]);
ASSERT_EQ(it.index_into_right_array(), expected_right_visits[i]);
ASSERT_EQ(it.logical_position(), logical_pos);
logical_pos += it.run_length();
}
- ASSERT_EQ(i, expected_run_ends.size());
+ ASSERT_EQ(i, expected_run_lengths.size());
+ {
+ ARROW_SCOPED_TRACE("in reverse order");
+ auto it =
+ MergedRunsIterator<decltype(left_ree_span),
decltype(right_ree_span)>::MakeEnd(
+ left_ree_span, right_ree_span)
+ .ValueOrDie();
+ ASSERT_EQ(it.logical_position(), left_ree_span.length());
+ ASSERT_TRUE(it.is_end());
+ ASSERT_EQ(&it.left(), &left_ree_span);
+ ASSERT_EQ(&it.right(), &right_ree_span);
+ while (!it.is_begin()) {
+ --it;
+ --i;
+ ASSERT_EQ(it.run_length(), expected_run_lengths[i]);
+ ASSERT_EQ(it.index_into_left_array(), expected_left_visits[i]);
+ ASSERT_EQ(it.index_into_right_array(), expected_right_visits[i]);
+ logical_pos -= it.run_length();
+ ASSERT_EQ(it.logical_position(), logical_pos);
+ }
+ }
+ }
+
+ {
+ // This test ensures that iteration over merged(left, right) leads to the
same
+ // result as iteration over merged(right, left) both forward and in
reverse order.
+ ARROW_SCOPED_TRACE("iterate over merged(right, left)");
+ size_t i = 0;
+ size_t logical_pos = 0;
+ auto it = MergedRunsIterator(right_ree_span, left_ree_span);
+ ASSERT_EQ(it.logical_position(), 0);
+ ASSERT_TRUE(it.is_begin());
+ ASSERT_FALSE(it.is_end());
+ ASSERT_EQ(&it.left(), &right_ree_span);
+ ASSERT_EQ(&it.right(), &left_ree_span);
+ for (; !it.is_end(); ++it, ++i) {
+ ASSERT_EQ(it.run_length(), expected_run_lengths[i]);
+ ASSERT_EQ(it.index_into_left_array(), expected_right_visits[i]);
+ ASSERT_EQ(it.index_into_right_array(), expected_left_visits[i]);
+ ASSERT_EQ(it.logical_position(), logical_pos);
+ logical_pos += it.run_length();
+ }
+ ASSERT_EQ(i, expected_run_lengths.size());
+ {
+ ARROW_SCOPED_TRACE("in reverse order");
+ auto it =
+ MergedRunsIterator<decltype(right_ree_span),
decltype(left_ree_span)>::MakeEnd(
+ right_ree_span, left_ree_span)
+ .ValueOrDie();
+ ASSERT_EQ(it.logical_position(), left_ree_span.length());
+ ASSERT_TRUE(it.is_end());
+ ASSERT_EQ(&it.left(), &right_ree_span);
+ ASSERT_EQ(&it.right(), &left_ree_span);
+ while (!it.is_begin()) {
+ --it;
+ --i;
+ ASSERT_EQ(it.run_length(), expected_run_lengths[i]);
+ ASSERT_EQ(it.index_into_left_array(), expected_right_visits[i]);
+ ASSERT_EQ(it.index_into_right_array(), expected_left_visits[i]);
+ logical_pos -= it.run_length();
+ ASSERT_EQ(it.logical_position(), logical_pos);
+ }
+ }
}
- // Left child array on both sides
const int32_t left_only_run_lengths[] = {5, 10, 5, 5, 25};
{
+ ARROW_SCOPED_TRACE("iterate over merged(left, left)");
int64_t i = 0;
int64_t logical_pos = 0;
auto it = MergedRunsIterator(left_ree_span, left_ree_span);
ASSERT_EQ(it.logical_position(), 0);
- ASSERT_TRUE(!it.isEnd());
+ ASSERT_TRUE(it.is_begin());
+ ASSERT_FALSE(it.is_end());
ASSERT_EQ(&it.left(), &left_ree_span);
ASSERT_EQ(&it.right(), &left_ree_span);
- for (; !it.isEnd(); ++it, ++i) {
+ for (; !it.is_end(); ++it, ++i) {
ASSERT_EQ(it.run_length(), left_only_run_lengths[i]);
ASSERT_EQ(it.index_into_left_array(), 10 + i);
ASSERT_EQ(it.index_into_right_array(), 10 + i);
@@ -207,10 +271,28 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
logical_pos += it.run_length();
}
ASSERT_EQ(i, std::size(left_only_run_lengths));
+ {
+ ARROW_SCOPED_TRACE("in reverse order");
+ auto it =
+ MergedRunsIterator<decltype(left_ree_span),
decltype(left_ree_span)>::MakeEnd(
+ left_ree_span, left_ree_span)
+ .ValueOrDie();
+ ASSERT_EQ(it.logical_position(), left_ree_span.length());
+ ASSERT_TRUE(it.is_end());
+ ASSERT_EQ(&it.left(), &it.right());
+ while (!it.is_begin()) {
+ --it;
+ --i;
+ ASSERT_EQ(it.run_length(), left_only_run_lengths[i]);
+ ASSERT_EQ(it.index_into_left_array(), 10 + i);
+ ASSERT_EQ(it.index_into_right_array(), 10 + i);
+ logical_pos -= it.run_length();
+ ASSERT_EQ(it.logical_position(), logical_pos);
+ }
+ }
}
-
- // Stand-alone left array
{
+ ARROW_SCOPED_TRACE("iterate over left array)");
int64_t i = 0;
int64_t logical_pos = 0;
for (auto it = left_ree_span.begin(); it != left_ree_span.end(); ++it,
++i) {
@@ -220,15 +302,31 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
logical_pos += it.run_length();
}
ASSERT_EQ(i, std::size(left_only_run_lengths));
+ {
+ ARROW_SCOPED_TRACE("in reverse order");
+ auto it = left_ree_span.end();
+ auto begin = left_ree_span.begin();
+ ASSERT_EQ(it.logical_position(), left_ree_span.length());
+ while (it != begin) {
+ --it;
+ --i;
+ ASSERT_EQ(it.run_length(), left_only_run_lengths[i]);
+ ASSERT_EQ(it.index_into_array(), 10 + i);
+ logical_pos -= it.run_length();
+ ASSERT_EQ(it.logical_position(), logical_pos);
+ }
+ ASSERT_EQ(i, 0);
+ ASSERT_EQ(logical_pos, 0);
+ }
}
- // Right array on both sides
const int32_t right_only_run_lengths[] = {5, 4, 16, 25};
{
+ ARROW_SCOPED_TRACE("iterate over merged(right, right)");
int64_t i = 0;
int64_t logical_pos = 0;
auto it = MergedRunsIterator(right_ree_span, right_ree_span);
- for (; !it.isEnd(); ++it, ++i) {
+ for (; !it.is_end(); ++it, ++i) {
ASSERT_EQ(it.run_length(), right_only_run_lengths[i]);
ASSERT_EQ(it.index_into_left_array(), 5 + i);
ASSERT_EQ(it.index_into_right_array(), 5 + i);
@@ -236,9 +334,28 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
logical_pos += it.run_length();
}
ASSERT_EQ(i, std::size(right_only_run_lengths));
+ {
+ ARROW_SCOPED_TRACE("in reverse order");
+ auto it =
+ MergedRunsIterator<decltype(right_ree_span),
decltype(right_ree_span)>::MakeEnd(
+ right_ree_span, right_ree_span)
+ .ValueOrDie();
+ ASSERT_EQ(it.logical_position(), right_ree_span.length());
+ ASSERT_TRUE(it.is_end());
+ ASSERT_EQ(&it.left(), &it.right());
+ while (!it.is_begin()) {
+ --it;
+ --i;
+ ASSERT_EQ(it.run_length(), right_only_run_lengths[i]);
+ ASSERT_EQ(it.index_into_left_array(), 5 + i);
+ ASSERT_EQ(it.index_into_right_array(), 5 + i);
+ logical_pos -= it.run_length();
+ ASSERT_EQ(it.logical_position(), logical_pos);
+ }
+ }
}
-
{
+ ARROW_SCOPED_TRACE("iterate over right array");
int64_t i = 0;
int64_t logical_pos = 0;
for (auto it = right_ree_span.begin(); it != right_ree_span.end(); ++it,
++i) {
@@ -248,6 +365,22 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
logical_pos += it.run_length();
}
ASSERT_EQ(i, std::size(right_only_run_lengths));
+ {
+ ARROW_SCOPED_TRACE("in reverse order");
+ auto it = right_ree_span.end();
+ auto begin = right_ree_span.begin();
+ ASSERT_EQ(it.logical_position(), right_ree_span.length());
+ while (it != begin) {
+ --it;
+ --i;
+ ASSERT_EQ(it.run_length(), right_only_run_lengths[i]);
+ ASSERT_EQ(it.index_into_array(), 5 + i);
+ logical_pos -= it.run_length();
+ ASSERT_EQ(it.logical_position(), logical_pos);
+ }
+ ASSERT_EQ(i, 0);
+ ASSERT_EQ(logical_pos, 0);
+ }
}
}