emkornfield commented on code in PR #38867:
URL: https://github.com/apache/arrow/pull/38867#discussion_r1405601897
##########
cpp/src/parquet/column_reader.h:
##########
@@ -302,8 +303,274 @@ class TypedColumnReader : public ColumnReader {
int32_t* dict_len) = 0;
};
+struct Range {
+ static Range unionRange(const Range& left, const Range& right) {
+ if (left.from <= right.from) {
+ if (left.to + 1 >= right.from) {
+ return {left.from, std::max(left.to, right.to)};
+ }
+ } else if (right.to + 1 >= left.from) {
+ return {right.from, std::max(left.to, right.to)};
+ }
+ return {-1, -1};
+ }
+
+ static Range intersection(const Range& left, const Range& right) {
+ if (left.from <= right.from) {
+ if (left.to >= right.from) {
+ return {right.from, std::min(left.to, right.to)};
+ }
+ } else if (right.to >= left.from) {
+ return {left.from, std::min(left.to, right.to)};
+ }
+ return {-1, -1}; // Return a default Range object if no intersection
range found
+ }
+
+ int64_t from;
+ int64_t to;
+
+ Range(const int64_t from_, const int64_t to_) : from(from_), to(to_) {
+ assert(from <= to);
+ }
+
+ size_t count() const { return to - from + 1; }
+
+ bool isBefore(const Range& other) const { return to < other.from; }
+
+ bool isAfter(const Range& other) const { return from > other.to; }
+
+ bool isOverlap(const Range& other) const { return !isBefore(other) &&
!isAfter(other); }
+
+ std::string toString() const {
+ return "[" + std::to_string(from) + ", " + std::to_string(to) + "]";
+ }
+};
+
+class RowRanges {
+ std::vector<Range> ranges;
+
+ public:
+ RowRanges() = default;
+
+ explicit RowRanges(const Range& range) { ranges.push_back(range); }
+
+ RowRanges(const std::vector<Range>& ranges) { this->ranges = ranges; }
+
+ // copy cstr
+ RowRanges(const RowRanges& other) { ranges = other.ranges; }
+
+ RowRanges(RowRanges&& other) noexcept { ranges = std::move(other.ranges); }
+
+ static RowRanges unionRanges(const RowRanges& left, const RowRanges& right) {
+ RowRanges result;
+ auto it1 = left.ranges.begin();
+ auto it2 = right.ranges.begin();
+ if (it2 != right.ranges.end()) {
+ Range range2 = *it2;
+ while (it1 != left.ranges.end()) {
+ Range range1 = *it1;
+ if (range1.isAfter(range2)) {
+ result.add(range2);
+ range2 = range1;
+ const auto tmp = it1;
+ it1 = it2;
+ it2 = tmp;
+ } else {
+ result.add(range1);
+ }
+ ++it1;
+ }
+ result.add(range2);
+ } else {
+ it2 = it1;
+ }
+ while (it2 != right.ranges.end()) {
+ result.add(*it2);
+ ++it2;
+ }
+
+ return result;
+ }
+
+ static RowRanges intersection(const RowRanges& left, const RowRanges& right)
{
+ RowRanges result;
+
+ size_t rightIndex = 0;
+ for (const Range& l : left.ranges) {
+ for (size_t i = rightIndex, n = right.ranges.size(); i < n; ++i) {
+ const Range& r = right.ranges[i];
+ if (l.isBefore(r)) {
+ break;
+ } else if (l.isAfter(r)) {
+ rightIndex = i + 1;
+ continue;
+ }
+ result.add(Range::intersection(l, r));
+ }
+ }
+
+ return result;
+ }
+
+ RowRanges slice(const int64_t from, const int64_t to) const {
Review Comment:
please document intended semantics here. Is this method really necessary?
why wouldn't it consider partial ranges?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]