gaodayue commented on a change in pull request #2547: [Segment V2] Support lazy
materialization read
URL: https://github.com/apache/incubator-doris/pull/2547#discussion_r361248501
##########
File path: be/src/olap/rowset/segment_v2/segment_iterator.cpp
##########
@@ -34,13 +34,71 @@ using strings::Substitute;
namespace doris {
namespace segment_v2 {
+// A fast range iterator for roaring bitmap. Output ranges use closed-open
form, like [from, to).
+// Example:
+// input bitmap: [0 1 4 5 6 7 10 15 16 17 18 19]
+// output ranges: [0,2), [4,8), [10,11), [15,20) (when max_range_size=10)
+// output ranges: [0,2), [4,8), [10,11), [15,18), [18,20) (when
max_range_size=3)
+class SegmentIterator::BitmapRangeIterator {
+public:
+ explicit BitmapRangeIterator(const Roaring& bitmap) {
+ roaring_init_iterator(&bitmap.roaring, &_iter);
+ _last_val = 0;
+ _buf = new uint32_t[256];
+ _read_next_batch();
+ }
+
+ ~BitmapRangeIterator() {
+ delete[] _buf;
+ }
+
+ bool has_more_range() const { return !_eof; }
+
+ // read next range into [*from, *to) whose size <= max_range_size.
+ // return false when there is no more range.
+ bool next_range(uint32_t max_range_size, uint32_t* from, uint32_t* to) {
+ if (_eof) {
+ return false;
+ }
+ *from = _buf[_buf_pos];
+ uint32_t range_size = 0;
+ do {
+ _last_val = _buf[_buf_pos];
+ _buf_pos++;
+ range_size++;
+ if (_buf_pos == _buf_size) { // read next batch
+ _read_next_batch();
+ }
+ } while (range_size < max_range_size && !_eof && _buf[_buf_pos] ==
_last_val + 1);
+ *to = *from + range_size;
+ return true;
+ }
+
+private:
+ void _read_next_batch() {
+ uint32_t n = roaring_read_uint32_iterator(&_iter, _buf, kBatchSize);
+ _buf_pos = 0;
+ _buf_size = n;
+ _eof = n == 0;
+ }
+
+ static const uint32_t kBatchSize = 256;
+ roaring_uint32_iterator_t _iter;
+ uint32_t _last_val;
+ uint32_t* _buf = nullptr;
+ uint32_t _buf_pos;
+ uint32_t _buf_size;
+ bool _eof;
Review comment:
ok
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]