pitrou commented on code in PR #46229:
URL: https://github.com/apache/arrow/pull/46229#discussion_r2207507332


##########
cpp/src/arrow/array/array_binary.cc:
##########
@@ -105,6 +111,392 @@ 
BinaryViewArray::BinaryViewArray(std::shared_ptr<DataType> type, int64_t length,
       ArrayData::Make(std::move(type), length, std::move(buffers), null_count, 
offset));
 }
 
+namespace {
+
+// TODO Should We move this to bitmap_ops.h and Remove from 
compute/kernels/util.s
+Result<std::shared_ptr<Buffer>> GetOrCopyNullBitmapBuffer(const ArrayData& 
in_array,
+                                                          MemoryPool* pool) {
+  if (in_array.buffers[0]->data() == nullptr) {
+    return nullptr;
+  } else if (in_array.offset == 0) {
+    return in_array.buffers[0];
+  } else if (in_array.offset % 8 == 0) {
+    return SliceBuffer(in_array.buffers[0], /*offset=*/in_array.offset / 8);
+  } else {
+    // If a non-zero offset, we need to shift the bitmap
+    return internal::CopyBitmap(pool, in_array.buffers[0]->data(), 
in_array.offset,
+                                in_array.length);
+  }
+}
+
+struct Interval {
+  int64_t start;
+  int64_t end;
+  int32_t offset = -1;
+};
+
+struct IntervalComparator {
+  bool operator()(const Interval& left, const Interval& right) const {
+    return left.start < right.start;
+  }
+};
+
+// inspired from boost::icl::interval_set
+class IntervalMerger {
+ public:
+  using IntervalSet = std::set<Interval, IntervalComparator>;
+  using Iterator = std::set<Interval, IntervalComparator>::iterator;
+
+  void AddInterval(const Interval& interval) {
+    auto [it, is_inserted] = interval_set.insert(interval);
+    if (is_inserted) {
+      JointLeft(it);
+      JoinRight(it);
+    } else {
+      if (it->end < interval.end) {
+        const_cast<int64_t&>(it->end) = interval.end;
+        JoinRight(it);
+      }
+    }
+  }
+
+  int64_t CalculateOffsetAndTotalSize() {
+    int64_t total_size = 0;
+    for (auto& it : interval_set) {
+      const_cast<int32_t&>(it.offset) = static_cast<int32_t>(total_size);
+      total_size += it.end - it.start;
+    }
+    return total_size;
+  }
+
+  // This method should be called After CalculateOffsetAndTotalSize
+  int32_t GetRelativeOffset(int32_t view_offset) const {
+    // end and offset is included in comparison
+    auto it = interval_set.lower_bound({view_offset, -1, -1});
+    if (it == interval_set.end()) {
+      --it;
+      // offset from the start of interval
+      auto offset_from_span = view_offset - it->start;
+      return static_cast<int32_t>(offset_from_span) + it->offset;
+    } else if (it->start == view_offset) {
+      // this is the case where view_offset refers to the beginning of interval
+      return it->offset;
+    } else {
+      --it;
+      // offset from the start of interval
+      auto offset_from_span = view_offset - it->start;
+      return static_cast<int32_t>(offset_from_span) + it->offset;
+    }
+  }
+
+  IntervalSet::const_iterator begin() const { return interval_set.cbegin(); }
+
+  IntervalSet::const_iterator end() const { return interval_set.cend(); }
+
+ private:
+  void JointLeft(Iterator& it) {
+    if (it == interval_set.begin()) {
+      return;
+    } else {
+      auto prev_it = std::prev(it);
+      if (Joinable(prev_it, it)) {
+        MergeIntoLeftAndAdvanceRight(prev_it, it);
+        it = prev_it;
+        return;
+      }
+    }
+  }
+
+  void JoinRight(Iterator& it) {
+    auto begin_iterator = std::next(it);
+    auto end_iterator = begin_iterator;
+    while (end_iterator != interval_set.end() && Joinable(it, end_iterator)) {
+      const_cast<int64_t&>(it->end) = std::max(it->end, end_iterator->end);
+      ++end_iterator;
+    }
+    interval_set.erase(begin_iterator, end_iterator);
+  }
+
+  // Update left with a new end value
+  // Advance right to the next iterator
+  void MergeIntoLeftAndAdvanceRight(Iterator& left, Iterator& right) {
+    Interval interval{right->start, right->end};
+    right = interval_set.erase(right);
+    const_cast<int64_t&>(left->end) = std::max(left->end, interval.end);
+  }
+
+  bool Joinable(Iterator left, Iterator right) {
+    return std::max(left->start, right->start) <= std::min(left->end, 
right->end);
+  }
+
+  IntervalSet interval_set;
+};
+
+class CompactArrayImpl {

Review Comment:
   I will defer on reviewing the rest of the implementation until the comments 
on the interval code are addressed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to