airborne12 commented on code in PR #57075:
URL: https://github.com/apache/doris/pull/57075#discussion_r2443765720
##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -962,12 +962,112 @@ Status MapFileColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr
Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const
size_t count,
vectorized::MutableColumnPtr&
dst) {
+ if (count == 0) {
+ return Status::OK();
+ }
+ // resolve ColumnMap and nullable wrapper
+ const auto* column_map =
vectorized::check_and_get_column<vectorized::ColumnMap>(
+ dst->is_nullable() ?
static_cast<vectorized::ColumnNullable&>(*dst).get_nested_column()
+ : *dst);
+ auto offsets_ptr = column_map->get_offsets_column().assume_mutable();
+ auto& offsets =
static_cast<vectorized::ColumnArray::ColumnOffsets&>(*offsets_ptr);
+ size_t base = offsets.get_data().empty() ? 0 : offsets.get_data().back();
+
+ // 1. bulk read null-map if nullable
+ std::vector<uint8_t> null_mask; // 0: not null, 1: null
+ if (_map_reader->is_nullable()) {
+ auto null_map_ptr =
+
static_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column_ptr();
+ size_t null_before = null_map_ptr->size();
+ RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count,
null_map_ptr));
+ // extract a light-weight view to decide element reads
+ auto& null_map_col =
assert_cast<vectorized::ColumnUInt8&>(*null_map_ptr);
+ null_mask.reserve(count);
+ for (size_t i = 0; i < count; ++i) {
+ null_mask.push_back(null_map_col.get_element(null_before + i));
+ }
+ } else if (dst->is_nullable()) {
+ // in not-null to null linked-schemachange mode,
+ // actually we do not change dat data include meta in footer,
+ // so may dst from changed meta which is nullable but old data is not
nullable,
+ // if so, we should set null_map to all null by default
+ auto null_map_ptr =
+
static_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column_ptr();
+ auto& null_map = assert_cast<vectorized::ColumnUInt8&>(*null_map_ptr);
+ null_map.insert_many_vals(0, count);
+ }
+
+ // 2. bulk read start ordinals for requested rows
+ vectorized::MutableColumnPtr starts_col =
vectorized::ColumnUInt64::create();
+ starts_col->reserve(count);
+ RETURN_IF_ERROR(_offsets_iterator->read_by_rowids(rowids, count,
starts_col));
+
+ // 3. bulk read next-start ordinals for rowid+1 (within bounds)
+ std::vector<rowid_t> next_rowids(count);
for (size_t i = 0; i < count; ++i) {
- RETURN_IF_ERROR(seek_to_ordinal(rowids[i]));
- size_t num_read = 1;
- RETURN_IF_ERROR(next_batch(&num_read, dst, nullptr));
- DCHECK(num_read == 1);
+ uint64_t nr = rowids[i] + 1;
+ next_rowids[i] = nr < _map_reader->num_rows() ?
static_cast<rowid_t>(nr)
+ :
static_cast<rowid_t>(0); // placeholder
+ }
+ vectorized::MutableColumnPtr next_starts_col =
vectorized::ColumnUInt64::create();
+ next_starts_col->reserve(count);
+ // read for all; we'll fix out-of-bound cases below
+ RETURN_IF_ERROR(_offsets_iterator->read_by_rowids(next_rowids.data(),
count, next_starts_col));
+
+ // 4. fix next_start for rows whose next_rowid is out-of-bound (rowid ==
num_rows-1)
+ for (size_t i = 0; i < count; ++i) {
+ if (rowids[i] + 1 >= _map_reader->num_rows()) {
+ // seek to the last row and consume one to move decoder to
end-of-page,
+ // then peek page-tail sentinel next_array_item_ordinal as
next_start
+ RETURN_IF_ERROR(_offsets_iterator->seek_to_ordinal(rowids[i]));
+ size_t one = 1;
+ bool has_null_unused = false;
+ vectorized::MutableColumnPtr tmp =
vectorized::ColumnUInt64::create();
+ RETURN_IF_ERROR(_offsets_iterator->next_batch(&one, tmp,
&has_null_unused));
+ ordinal_t ns = 0;
+ RETURN_IF_ERROR(_offsets_iterator->_peek_one_offset(&ns));
+ // overwrite with sentinel
+
assert_cast<vectorized::ColumnUInt64&>(*next_starts_col).get_data()[i] = ns;
+ }
+ }
+
+ // 5. compute sizes and append offsets prefix-sum
+ auto& starts_data =
assert_cast<vectorized::ColumnUInt64&>(*starts_col).get_data();
+ auto& next_starts_data =
assert_cast<vectorized::ColumnUInt64&>(*next_starts_col).get_data();
+ std::vector<size_t> sizes(count, 0);
+ size_t acc = base;
+ offsets.get_data().reserve(offsets.get_data().size() + count);
+ for (size_t i = 0; i < count; ++i) {
+ size_t sz = static_cast<size_t>(next_starts_data[i] - starts_data[i]);
+ if (_map_reader->is_nullable() && !null_mask.empty() && null_mask[i]) {
+ sz = 0; // null rows do not consume elements
+ }
+ sizes[i] = sz;
+ acc += sz;
+ offsets.get_data().push_back(acc);
}
+
+ // 6. read key/value elements for non-empty sizes
+ auto keys_ptr = column_map->get_keys().assume_mutable();
+ auto vals_ptr = column_map->get_values().assume_mutable();
+
+ for (size_t i = 0; i < count; ++i) {
+ size_t sz = sizes[i];
+ if (sz == 0) {
+ continue;
+ }
+ ordinal_t start = static_cast<ordinal_t>(starts_data[i]);
+ RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start));
+ RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start));
+ size_t n = sz;
+ bool dummy_has_null = false;
+ RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr,
&dummy_has_null));
+ DCHECK(n == sz);
+ n = sz;
+ RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr,
&dummy_has_null));
+ DCHECK(n == sz);
+ }
+
Review Comment:
maybe we could check elements' consecutive for map? fake code as follows:
```
struct Range { ordinal_t start; size_t len; };
std::vector<Range> ranges;
for (size_t i = 0; i < count; ++i) {
if (sizes[i] == 0) continue;
ordinal_t cur_start = starts_data[i];
size_t total = sizes[i];
while (i + 1 < count && sizes[i + 1] > 0 &&
starts_data[i + 1] == cur_start + total) {
total += sizes[++i];
}
ranges.push_back({cur_start, total});
}
for (const auto& r : ranges) {
RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(r.start));
RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(r.start));
size_t n = r.len;
RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy));
n = r.len;
RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy));
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]