This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 48294b0790 GH-34893: [C++] Fix run-end encoded array iterator issues 
that manifest on backwards iteration (#34896)
48294b0790 is described below

commit 48294b0790e684c704a563cb98cada9fa59d0e2f
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Wed Apr 12 14:34:12 2023 -0300

    GH-34893: [C++] Fix run-end encoded array iterator issues that manifest on 
backwards iteration (#34896)
    
    ### Rationale for this change
    
    The value of `RunEndEncodedArraySpan::Iterator::physical_pos_` from the 
instance returned by `RunEndEncodedArraySpan::end()` is smaller (by 1) than the 
results of successive applications of `operator++()` until `logical_pos_` 
reaches the logical length of the run-end encoded array.
    
    To support backwards iteration, the `Iterator` returned by 
`RunEndEncodedArraySpan::end()` should be well-formed.
    
    There was no `operator--()` in `RunEndEncodedArraySpan::Iterator` before 
this PR, so one can't go backwards from `end()` and this latent bug doesn't 
manifest.
    
    ### What changes are included in this PR?
    
     - Addition of `RunEndEncodedArraySpan::Iterator::operator--()`
     - Addition of `MergedRunsIterator::operator--()`
     - Rename `isEnd()` to `is_end()` (it was a code-style violation)
     - More test cases
    
    ### Are these changes tested?
    
    By existing and new unit tests.
    
    ### Are there any user-facing changes?
    
    A public function -- `isEnd` -- was renamed, but this code hasn't been 
released yet, so it's OK.
    * Closes: #34893
    
    Authored-by: Felipe Oliveira Carvalho <[email protected]>
    Signed-off-by: Matt Topol <[email protected]>
---
 cpp/src/arrow/array/builder_run_end.cc             |   3 +-
 cpp/src/arrow/compare.cc                           |   2 +-
 .../arrow/compute/kernels/vector_run_end_encode.cc |   2 +-
 cpp/src/arrow/util/ree_util.h                      | 137 ++++++++++++++---
 cpp/src/arrow/util/ree_util_test.cc                | 165 +++++++++++++++++++--
 5 files changed, 270 insertions(+), 39 deletions(-)

diff --git a/cpp/src/arrow/array/builder_run_end.cc 
b/cpp/src/arrow/array/builder_run_end.cc
index 5908809292..ec285b8e3a 100644
--- a/cpp/src/arrow/array/builder_run_end.cc
+++ b/cpp/src/arrow/array/builder_run_end.cc
@@ -232,8 +232,7 @@ Status RunEndEncodedBuilder::DoAppendArray(const ArraySpan& 
to_append) {
   RETURN_NOT_OK(ReservePhysical(physical_length));
 
   // Append all the run ends from to_append
-  const auto end = ree_span.end();
-  for (auto it = ree_span.iterator(0, physical_offset); it != end; ++it) {
+  for (auto it = ree_span.iterator(0, physical_offset); !it.is_end(ree_span); 
++it) {
     const int64_t run_end = committed_logical_length_ + it.run_length();
     RETURN_NOT_OK(DoAppendRunEnd<RunEndCType>(run_end));
     UpdateDimensions(run_end, 0);
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 3f96310603..df41cd22c9 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -486,7 +486,7 @@ class RangeDataEqualsImpl {
     const auto& right_values = *right_.child_data[1];
 
     auto it = ree_util::MergedRunsIterator(left, right);
-    for (; !it.isEnd(); ++it) {
+    for (; !it.is_end(); ++it) {
       RangeDataEqualsImpl impl(options_, floating_approximate_, left_values, 
right_values,
                                it.index_into_left_array(), 
it.index_into_right_array(),
                                /*range_length=*/1);
diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc 
b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
index 718086b21e..2fa260321c 100644
--- a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
+++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
@@ -698,7 +698,7 @@ class RunEndDecodingLoop {
     const ree_util::RunEndEncodedArraySpan<RunEndCType> 
ree_array_span(input_array_);
     int64_t write_offset = 0;
     int64_t output_valid_count = 0;
-    for (auto it = ree_array_span.begin(); it != ree_array_span.end(); ++it) {
+    for (auto it = ree_array_span.begin(); !it.is_end(ree_array_span); ++it) {
       const int64_t read_offset = values_offset_ + it.index_into_array();
       const int64_t run_length = it.run_length();
       ValueRepr value;
diff --git a/cpp/src/arrow/util/ree_util.h b/cpp/src/arrow/util/ree_util.h
index 83d828bbe5..8c8f58a151 100644
--- a/cpp/src/arrow/util/ree_util.h
+++ b/cpp/src/arrow/util/ree_util.h
@@ -157,13 +157,13 @@ class RunEndEncodedArraySpan {
     /// The values array can be addressed with this index to get the value
     /// that makes up the run.
     ///
-    /// NOTE: if this Iterator was produced by RunEndEncodedArraySpan::end(),
+    /// NOTE: if this Iterator is equal to RunEndEncodedArraySpan::end(),
     /// the value returned is undefined.
     int64_t index_into_array() const { return physical_pos_; }
 
     /// \brief Return the initial logical position of the run
     ///
-    /// If this Iterator was produced by RunEndEncodedArraySpan::end(), this is
+    /// If this Iterator is equal to RunEndEncodedArraySpan::end(), this is
     /// the same as RunEndEncodedArraySpan::length().
     int64_t logical_position() const { return logical_pos_; }
 
@@ -177,6 +177,16 @@ class RunEndEncodedArraySpan {
     /// Pre-condition: *this != RunEndEncodedArraySpan::end()
     int64_t run_length() const { return run_end() - logical_pos_; }
 
+    /// \brief Check if the iterator is at the end of the array.
+    ///
+    /// This can be used to avoid paying the cost of a call to
+    /// RunEndEncodedArraySpan::end().
+    ///
+    /// \return true if the iterator is at the end of the array
+    bool is_end(const RunEndEncodedArraySpan& span) const {
+      return logical_pos_ >= span.length();
+    }
+
     Iterator& operator++() {
       logical_pos_ = span.run_end(physical_pos_);
       physical_pos_ += 1;
@@ -189,6 +199,18 @@ class RunEndEncodedArraySpan {
       return prev;
     }
 
+    Iterator& operator--() {
+      physical_pos_ -= 1;
+      logical_pos_ = (physical_pos_ > 0) ? span.run_end(physical_pos_ - 1) : 0;
+      return *this;
+    }
+
+    Iterator operator--(int) {
+      const Iterator prev = *this;
+      --(*this);
+      return prev;
+    }
+
     bool operator==(const Iterator& other) const {
       return logical_pos_ == other.logical_pos_;
     }
@@ -234,31 +256,52 @@ class RunEndEncodedArraySpan {
   ///
   /// \param logical_pos is an index in the [0, length()] range
   Iterator iterator(int64_t logical_pos) const {
-    return iterator(logical_pos, logical_pos < length()
-                                     ? PhysicalIndex(logical_pos)
-                                     : RunEndsArray(array_span).length);
+    if (logical_pos < length()) {
+      return iterator(logical_pos, PhysicalIndex(logical_pos));
+    }
+    // If logical_pos is above the valid range, use length() as the logical
+    // position and calculate the physical address right after the last valid
+    // physical position. Which is the physical index of the last logical
+    // position, plus 1.
+    return (length() == 0) ? iterator(0, PhysicalIndex(0))
+                           : iterator(length(), PhysicalIndex(length() - 1) + 
1);
   }
 
   /// \brief Create an iterator representing the logical begin of the run-end
   /// encoded array
-  Iterator begin() const { return iterator(0); }
+  Iterator begin() const { return iterator(0, PhysicalIndex(0)); }
 
   /// \brief Create an iterator representing the first invalid logical position
   /// of the run-end encoded array
   ///
-  /// The Iterator returned by end() should not be
+  /// \warning Avoid calling end() in a loop, as it will recompute the physical
+  /// length of the array on each call (O(log N) cost per call).
+  ///
+  /// \par You can write your loops like this instead:
+  /// \code
+  /// for (auto it = array.begin(), end = array.end(); it != end; ++it) {
+  ///   // ...
+  /// }
+  /// \endcode
+  ///
+  /// \par Or this version that does not look like idiomatic C++, but removes
+  /// the need for calling end() completely:
+  /// \code
+  /// for (auto it = array.begin(); !it.is_end(array); ++it) {
+  ///   // ...
+  /// }
+  /// \endcode
   Iterator end() const {
-    // NOTE: the run ends array length is not necessarily what
-    // PhysicalIndex(length()) would return but it is a cheap to obtain
-    // physical offset that is invalid.
-    return iterator(length(), RunEndsArray(array_span).length);
+    return iterator(length(),
+                    (length() == 0) ? PhysicalIndex(0) : 
PhysicalIndex(length() - 1) + 1);
   }
 
   // Pre-condition: physical_pos < RunEndsArray(array_span).length);
   inline int64_t run_end(int64_t physical_pos) const {
     assert(physical_pos < RunEndsArray(array_span).length);
-    // Logical index of the end of the currently active run
-    const int64_t logical_run_end = run_ends_[physical_pos] - offset();
+    // Logical index of the end of the run at physical_pos with offset applied
+    const int64_t logical_run_end =
+        std::max<int64_t>(static_cast<int64_t>(run_ends_[physical_pos]) - 
offset(), 0);
     // The current run may go further than the logical length, cap it
     return std::min(logical_run_end, length());
   }
@@ -281,12 +324,37 @@ class MergedRunsIterator {
   using LeftIterator = typename Left::Iterator;
   using RightIterator = typename Right::Iterator;
 
+  MergedRunsIterator(LeftIterator left_it, RightIterator right_it,
+                     int64_t common_logical_length, int64_t common_logical_pos)
+      : ree_iterators_{std::move(left_it), std::move(right_it)},
+        logical_length_(common_logical_length),
+        logical_pos_(common_logical_pos) {}
+
  public:
+  /// \brief Construct a MergedRunsIterator positioned at logical position 0.
+  ///
+  /// Pre-condition: left.length() == right.length()
   MergedRunsIterator(const Left& left, const Right& right)
-      : ree_iterators_{left.begin(), right.begin()}, 
logical_length_(left.length()) {
+      : MergedRunsIterator(left.begin(), right.begin(), left.length(), 0) {
     assert(left.length() == right.length());
   }
 
+  static Result<MergedRunsIterator> MakeBegin(const Left& left, const Right& 
right) {
+    if (left.length() != right.length()) {
+      return Status::Invalid(
+          "MergedRunsIterator expects RunEndEncodedArraySpans of the same 
length");
+    }
+    return MergedRunsIterator(left, right);
+  }
+
+  static Result<MergedRunsIterator> MakeEnd(const Left& left, const Right& 
right) {
+    if (left.length() != right.length()) {
+      return Status::Invalid(
+          "MergedRunsIterator expects RunEndEncodedArraySpans of the same 
length");
+    }
+    return MergedRunsIterator(left.end(), right.end(), left.length(), 
left.length());
+  }
+
   /// \brief Return the left RunEndEncodedArraySpan child
   const Left& left() const { return std::get<0>(ree_iterators_).span; }
 
@@ -295,15 +363,18 @@ class MergedRunsIterator {
 
   /// \brief Return the initial logical position of the run
   ///
-  /// If isEnd(), this is the same as length().
+  /// If is_end(), this is the same as length().
   int64_t logical_position() const { return logical_pos_; }
 
+  /// \brief Whether the iterator is at logical position 0.
+  bool is_begin() const { return logical_pos_ == 0; }
+
   /// \brief Whether the iterator has reached the end of both arrays
-  bool isEnd() const { return logical_pos_ == logical_length_; }
+  bool is_end() const { return logical_pos_ == logical_length_; }
 
   /// \brief Return the logical position immediately after the run.
   ///
-  /// Pre-condition: !isEnd()
+  /// Pre-condition: !is_end()
   int64_t run_end() const {
     const auto& left_it = std::get<0>(ree_iterators_);
     const auto& right_it = std::get<1>(ree_iterators_);
@@ -312,7 +383,7 @@ class MergedRunsIterator {
 
   /// \brief returns the logical length of the current run
   ///
-  /// Pre-condition: !isEnd()
+  /// Pre-condition: !is_end()
   int64_t run_length() const { return run_end() - logical_pos_; }
 
   /// \brief Return a physical index into the values array of a given input,
@@ -352,6 +423,34 @@ class MergedRunsIterator {
     return prev;
   }
 
+  MergedRunsIterator& operator--() {
+    auto& left_it = std::get<0>(ree_iterators_);
+    auto& right_it = std::get<1>(ree_iterators_);
+
+    // The logical position of each iterator is the run_end() of the previous 
run.
+    const int64_t left_logical_pos = left_it.logical_position();
+    const int64_t right_logical_pos = right_it.logical_position();
+
+    if (left_logical_pos < right_logical_pos) {
+      --right_it;
+      logical_pos_ = std::max(left_logical_pos, right_it.logical_position());
+    } else if (left_logical_pos > right_logical_pos) {
+      --left_it;
+      logical_pos_ = std::max(left_it.logical_position(), right_logical_pos);
+    } else {
+      --left_it;
+      --right_it;
+      logical_pos_ = std::max(left_it.logical_position(), 
right_it.logical_position());
+    }
+    return *this;
+  }
+
+  MergedRunsIterator operator--(int) {
+    MergedRunsIterator prev = *this;
+    --(*this);
+    return prev;
+  }
+
   bool operator==(const MergedRunsIterator& other) const {
     return logical_pos_ == other.logical_position();
   }
@@ -361,7 +460,7 @@ class MergedRunsIterator {
  private:
   std::tuple<LeftIterator, RightIterator> ree_iterators_;
   const int64_t logical_length_;
-  int64_t logical_pos_ = 0;
+  int64_t logical_pos_;
 };
 
 }  // namespace ree_util
diff --git a/cpp/src/arrow/util/ree_util_test.cc 
b/cpp/src/arrow/util/ree_util_test.cc
index 38efab1f1c..5f630c3028 100644
--- a/cpp/src/arrow/util/ree_util_test.cc
+++ b/cpp/src/arrow/util/ree_util_test.cc
@@ -103,7 +103,7 @@ TYPED_TEST_P(ReeUtilTest, PhysicalLength) {
 
 TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
   // Construct the following two test arrays with a lot of different offsets 
to test the
-  // RLE iterator: left:
+  // REE iterator: left:
   //
   //          child offset: 0
   //                 |
@@ -121,7 +121,7 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
   //                                                logical offset: 1000
   //                                                physical offset: 10
   //
-  // right:
+  // REE iterator: right:
   //           child offset: 0
   //                  |
   //                  +---+---+---+---+---+------+------+------+------+
@@ -145,7 +145,7 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
       run_end_type, "[1, 2, 3, 4, 5, 6, 7, 8, 9, 1000, 1005, 1015, 1020, 1025, 
30000]");
   const auto right_run_ends =
       ArrayFromJSON(run_end_type, "[1, 2, 3, 4, 5, 2005, 2009, 2025, 2050]");
-  const std::vector<int32_t> expected_run_ends = {5, 4, 6, 5, 5, 25};
+  const std::vector<int32_t> expected_run_lengths = {5, 4, 6, 5, 5, 25};
   const std::vector<int32_t> expected_left_visits = {10, 11, 11, 12, 13, 14};
   const std::vector<int32_t> expected_right_visits = {5, 6, 7, 7, 7, 8};
   const int32_t left_parent_offset = 1000;
@@ -170,36 +170,100 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
   const RunEndEncodedArraySpan<TypeParam> left_ree_span(*left_array->data());
   const RunEndEncodedArraySpan<TypeParam> right_ree_span(*right_array->data());
 
-  // Left array on one side and right on the other side
   {
+    ARROW_SCOPED_TRACE("iterate over merged(left, right)");
     size_t i = 0;
     size_t logical_pos = 0;
     auto it = MergedRunsIterator(left_ree_span, right_ree_span);
     ASSERT_EQ(it.logical_position(), 0);
-    ASSERT_TRUE(!it.isEnd());
+    ASSERT_TRUE(it.is_begin());
+    ASSERT_FALSE(it.is_end());
     ASSERT_EQ(&it.left(), &left_ree_span);
     ASSERT_EQ(&it.right(), &right_ree_span);
-    for (; !it.isEnd(); ++it, ++i) {
-      ASSERT_EQ(it.run_length(), expected_run_ends[i]);
+    for (; !it.is_end(); ++it, ++i) {
+      ASSERT_EQ(it.run_length(), expected_run_lengths[i]);
       ASSERT_EQ(it.index_into_left_array(), expected_left_visits[i]);
       ASSERT_EQ(it.index_into_right_array(), expected_right_visits[i]);
       ASSERT_EQ(it.logical_position(), logical_pos);
       logical_pos += it.run_length();
     }
-    ASSERT_EQ(i, expected_run_ends.size());
+    ASSERT_EQ(i, expected_run_lengths.size());
+    {
+      ARROW_SCOPED_TRACE("in reverse order");
+      auto it =
+          MergedRunsIterator<decltype(left_ree_span), 
decltype(right_ree_span)>::MakeEnd(
+              left_ree_span, right_ree_span)
+              .ValueOrDie();
+      ASSERT_EQ(it.logical_position(), left_ree_span.length());
+      ASSERT_TRUE(it.is_end());
+      ASSERT_EQ(&it.left(), &left_ree_span);
+      ASSERT_EQ(&it.right(), &right_ree_span);
+      while (!it.is_begin()) {
+        --it;
+        --i;
+        ASSERT_EQ(it.run_length(), expected_run_lengths[i]);
+        ASSERT_EQ(it.index_into_left_array(), expected_left_visits[i]);
+        ASSERT_EQ(it.index_into_right_array(), expected_right_visits[i]);
+        logical_pos -= it.run_length();
+        ASSERT_EQ(it.logical_position(), logical_pos);
+      }
+    }
+  }
+
+  {
+    // This test ensures that iteration over merged(left, right) leads to the 
same
+    // result as iteration over merged(right, left) both forward and in 
reverse order.
+    ARROW_SCOPED_TRACE("iterate over merged(right, left)");
+    size_t i = 0;
+    size_t logical_pos = 0;
+    auto it = MergedRunsIterator(right_ree_span, left_ree_span);
+    ASSERT_EQ(it.logical_position(), 0);
+    ASSERT_TRUE(it.is_begin());
+    ASSERT_FALSE(it.is_end());
+    ASSERT_EQ(&it.left(), &right_ree_span);
+    ASSERT_EQ(&it.right(), &left_ree_span);
+    for (; !it.is_end(); ++it, ++i) {
+      ASSERT_EQ(it.run_length(), expected_run_lengths[i]);
+      ASSERT_EQ(it.index_into_left_array(), expected_right_visits[i]);
+      ASSERT_EQ(it.index_into_right_array(), expected_left_visits[i]);
+      ASSERT_EQ(it.logical_position(), logical_pos);
+      logical_pos += it.run_length();
+    }
+    ASSERT_EQ(i, expected_run_lengths.size());
+    {
+      ARROW_SCOPED_TRACE("in reverse order");
+      auto it =
+          MergedRunsIterator<decltype(right_ree_span), 
decltype(left_ree_span)>::MakeEnd(
+              right_ree_span, left_ree_span)
+              .ValueOrDie();
+      ASSERT_EQ(it.logical_position(), left_ree_span.length());
+      ASSERT_TRUE(it.is_end());
+      ASSERT_EQ(&it.left(), &right_ree_span);
+      ASSERT_EQ(&it.right(), &left_ree_span);
+      while (!it.is_begin()) {
+        --it;
+        --i;
+        ASSERT_EQ(it.run_length(), expected_run_lengths[i]);
+        ASSERT_EQ(it.index_into_left_array(), expected_right_visits[i]);
+        ASSERT_EQ(it.index_into_right_array(), expected_left_visits[i]);
+        logical_pos -= it.run_length();
+        ASSERT_EQ(it.logical_position(), logical_pos);
+      }
+    }
   }
 
-  // Left child array on both sides
   const int32_t left_only_run_lengths[] = {5, 10, 5, 5, 25};
   {
+    ARROW_SCOPED_TRACE("iterate over merged(left, left)");
     int64_t i = 0;
     int64_t logical_pos = 0;
     auto it = MergedRunsIterator(left_ree_span, left_ree_span);
     ASSERT_EQ(it.logical_position(), 0);
-    ASSERT_TRUE(!it.isEnd());
+    ASSERT_TRUE(it.is_begin());
+    ASSERT_FALSE(it.is_end());
     ASSERT_EQ(&it.left(), &left_ree_span);
     ASSERT_EQ(&it.right(), &left_ree_span);
-    for (; !it.isEnd(); ++it, ++i) {
+    for (; !it.is_end(); ++it, ++i) {
       ASSERT_EQ(it.run_length(), left_only_run_lengths[i]);
       ASSERT_EQ(it.index_into_left_array(), 10 + i);
       ASSERT_EQ(it.index_into_right_array(), 10 + i);
@@ -207,10 +271,28 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
       logical_pos += it.run_length();
     }
     ASSERT_EQ(i, std::size(left_only_run_lengths));
+    {
+      ARROW_SCOPED_TRACE("in reverse order");
+      auto it =
+          MergedRunsIterator<decltype(left_ree_span), 
decltype(left_ree_span)>::MakeEnd(
+              left_ree_span, left_ree_span)
+              .ValueOrDie();
+      ASSERT_EQ(it.logical_position(), left_ree_span.length());
+      ASSERT_TRUE(it.is_end());
+      ASSERT_EQ(&it.left(), &it.right());
+      while (!it.is_begin()) {
+        --it;
+        --i;
+        ASSERT_EQ(it.run_length(), left_only_run_lengths[i]);
+        ASSERT_EQ(it.index_into_left_array(), 10 + i);
+        ASSERT_EQ(it.index_into_right_array(), 10 + i);
+        logical_pos -= it.run_length();
+        ASSERT_EQ(it.logical_position(), logical_pos);
+      }
+    }
   }
-
-  // Stand-alone left array
   {
+    ARROW_SCOPED_TRACE("iterate over left array)");
     int64_t i = 0;
     int64_t logical_pos = 0;
     for (auto it = left_ree_span.begin(); it != left_ree_span.end(); ++it, 
++i) {
@@ -220,15 +302,31 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
       logical_pos += it.run_length();
     }
     ASSERT_EQ(i, std::size(left_only_run_lengths));
+    {
+      ARROW_SCOPED_TRACE("in reverse order");
+      auto it = left_ree_span.end();
+      auto begin = left_ree_span.begin();
+      ASSERT_EQ(it.logical_position(), left_ree_span.length());
+      while (it != begin) {
+        --it;
+        --i;
+        ASSERT_EQ(it.run_length(), left_only_run_lengths[i]);
+        ASSERT_EQ(it.index_into_array(), 10 + i);
+        logical_pos -= it.run_length();
+        ASSERT_EQ(it.logical_position(), logical_pos);
+      }
+      ASSERT_EQ(i, 0);
+      ASSERT_EQ(logical_pos, 0);
+    }
   }
 
-  // Right array on both sides
   const int32_t right_only_run_lengths[] = {5, 4, 16, 25};
   {
+    ARROW_SCOPED_TRACE("iterate over merged(right, right)");
     int64_t i = 0;
     int64_t logical_pos = 0;
     auto it = MergedRunsIterator(right_ree_span, right_ree_span);
-    for (; !it.isEnd(); ++it, ++i) {
+    for (; !it.is_end(); ++it, ++i) {
       ASSERT_EQ(it.run_length(), right_only_run_lengths[i]);
       ASSERT_EQ(it.index_into_left_array(), 5 + i);
       ASSERT_EQ(it.index_into_right_array(), 5 + i);
@@ -236,9 +334,28 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
       logical_pos += it.run_length();
     }
     ASSERT_EQ(i, std::size(right_only_run_lengths));
+    {
+      ARROW_SCOPED_TRACE("in reverse order");
+      auto it =
+          MergedRunsIterator<decltype(right_ree_span), 
decltype(right_ree_span)>::MakeEnd(
+              right_ree_span, right_ree_span)
+              .ValueOrDie();
+      ASSERT_EQ(it.logical_position(), right_ree_span.length());
+      ASSERT_TRUE(it.is_end());
+      ASSERT_EQ(&it.left(), &it.right());
+      while (!it.is_begin()) {
+        --it;
+        --i;
+        ASSERT_EQ(it.run_length(), right_only_run_lengths[i]);
+        ASSERT_EQ(it.index_into_left_array(), 5 + i);
+        ASSERT_EQ(it.index_into_right_array(), 5 + i);
+        logical_pos -= it.run_length();
+        ASSERT_EQ(it.logical_position(), logical_pos);
+      }
+    }
   }
-
   {
+    ARROW_SCOPED_TRACE("iterate over right array");
     int64_t i = 0;
     int64_t logical_pos = 0;
     for (auto it = right_ree_span.begin(); it != right_ree_span.end(); ++it, 
++i) {
@@ -248,6 +365,22 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) {
       logical_pos += it.run_length();
     }
     ASSERT_EQ(i, std::size(right_only_run_lengths));
+    {
+      ARROW_SCOPED_TRACE("in reverse order");
+      auto it = right_ree_span.end();
+      auto begin = right_ree_span.begin();
+      ASSERT_EQ(it.logical_position(), right_ree_span.length());
+      while (it != begin) {
+        --it;
+        --i;
+        ASSERT_EQ(it.run_length(), right_only_run_lengths[i]);
+        ASSERT_EQ(it.index_into_array(), 5 + i);
+        logical_pos -= it.run_length();
+        ASSERT_EQ(it.logical_position(), logical_pos);
+      }
+      ASSERT_EQ(i, 0);
+      ASSERT_EQ(logical_pos, 0);
+    }
   }
 }
 

Reply via email to