Re: [PR] [Exec](be) Support offset prue column and null column in BE [doris]

via GitHub Mon, 30 Mar 2026 02:15:08 -0700


Copilot commented on code in PR #61888:
URL: https://github.com/apache/doris/pull/61888#discussion_r3008537605



##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -1838,6 +2094,19 @@ Status 
FileColumnIterator::next_batch_of_zone_map(size_t* n, MutableColumnPtr& d
 }
 
 Status FileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* 
has_null) {
+    if (read_null_map_only()) {
+        DLOG(INFO) << "File column iterator column " << _column_name
+                   << " in NULL_MAP_ONLY mode, reading only null map.";
+        DORIS_CHECK(dst->is_nullable());
+        auto& nullable_col = assert_cast<ColumnNullable&>(*dst);
+        nullable_col.get_nested_column().insert_many_defaults(*n);
+        size_t read_rows = *n;
+        auto& null_map_data = nullable_col.get_null_map_data();
+        RETURN_IF_ERROR(read_null_map(&read_rows, null_map_data));
+        *has_null = true;

Review Comment:
   `FileColumnIterator::next_batch()` calls `read_null_map(&read_rows, 
null_map_data)` but there is no definition of `read_null_map` in this TU or in 
the class declaration. This will fail to compile (and even if added later, this 
branch should also update `*n` to `read_rows` and keep nested/default sizes 
consistent with the actual rows read).



##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -1903,6 +2172,68 @@ Status FileColumnIterator::next_batch(size_t* n, 
MutableColumnPtr& dst, bool* ha
 
 Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t 
count,
                                           MutableColumnPtr& dst) {
+    if (read_null_map_only()) {
+        DLOG(INFO) << "File column iterator column " << _column_name
+                   << " in NULL_MAP_ONLY mode, reading only null map by 
rowids.";
+
+        DORIS_CHECK(dst->is_nullable());
+        auto& nullable_col = assert_cast<ColumnNullable&>(*dst);
+        auto& null_map_data = nullable_col.get_null_map_data();
+        const size_t base_size = null_map_data.size();
+        null_map_data.resize(base_size + count);
+
+        nullable_col.get_nested_column().insert_many_defaults(count);
+
+        size_t remaining = count;
+        size_t total_read_count = 0;
+        size_t nrows_to_read = 0;
+        while (remaining > 0) {
+            RETURN_IF_ERROR(seek_to_ordinal(rowids[total_read_count]));
+
+            nrows_to_read = std::min(remaining, _page.remaining());
+
+            if (_page.has_null) {
+                size_t already_read = 0;
+                while ((nrows_to_read - already_read) > 0) {
+                    bool is_null = false;
+                    size_t this_run = std::min(nrows_to_read - already_read, 
_page.remaining());
+                    if (UNLIKELY(this_run == 0)) {
+                        break;
+                    }
+                    this_run = _page.null_decoder.GetNextRun(&is_null, 
this_run);
+
+                    size_t offset = total_read_count + already_read;
+                    size_t this_read_count = 0;
+                    rowid_t current_ordinal_in_page =
+                            cast_set<uint32_t>(_page.offset_in_page + 
_page.first_ordinal);
+                    for (size_t i = 0; i < this_run; ++i) {
+                        if (rowids[offset + i] - current_ordinal_in_page >= 
this_run) {
+                            break;
+                        }
+                        this_read_count++;
+                    }
+
+                    if (this_read_count > 0) {
+                        memset(null_map_data.data() + base_size + offset, 
is_null ? 1 : 0,
+                               this_read_count);
+                    }
+
+                    already_read += this_read_count;
+                    _page.offset_in_page += this_run;
+                }
+
+                nrows_to_read = already_read;
+                total_read_count += nrows_to_read;
+                remaining -= nrows_to_read;
+            } else {
+                memset(null_map_data.data() + base_size + total_read_count, 0, 
nrows_to_read);
+                total_read_count += nrows_to_read;
+                remaining -= nrows_to_read;

Review Comment:
   In `FileColumnIterator::read_by_rowids()` NULL_MAP_ONLY branch, the 
`_page.has_null == false` path does `memset(..., 0, nrows_to_read)` where 
`nrows_to_read = min(remaining, _page.remaining())`. `_page.remaining()` is the 
number of rows left in the current page, not the number of requested `rowids` 
that fall within this page. If the next requested rowid is in a later page, 
this will incorrectly mark it as non-null based on the current page. Compute 
how many rowids are within `[page.first_ordinal, page.first_ordinal + 
page.num_rows)` before filling, similar to the `_page.has_null` path logic.
   ```suggestion
                   // _page.has_null == false, so all entries in this page are 
non-null.
                   // However, only mark as non-null the subset of requested 
rowids that
                   // actually fall within this page's ordinal range.
                   rowid_t page_start = cast_set<uint32_t>(_page.first_ordinal);
                   rowid_t page_end = cast_set<uint32_t>(_page.first_ordinal + 
_page.num_rows);
                   size_t this_read_count = 0;
                   while (this_read_count < remaining) {
                       rowid_t rowid = rowids[total_read_count + 
this_read_count];
                       if (rowid < page_start || rowid >= page_end) {
                           break;
                       }
                       ++this_read_count;
                   }
   
                   if (this_read_count == 0) {
                       // No requested rowids fall within this page; move to 
next iteration,
                       // where seek_to_ordinal() will reposition to the 
correct page.
                       continue;
                   }
   
                   memset(null_map_data.data() + base_size + total_read_count, 
0, this_read_count);
                   total_read_count += this_read_count;
                   remaining -= this_read_count;
   ```



##########
be/src/core/column/column_string.h:
##########
@@ -276,6 +276,29 @@ class ColumnStr final : public COWHelper<IColumn, 
ColumnStr<T>> {
         sanity_check_simple();
     }
 
+    // Insert `num` string entries with real length information but no actual
+    // character data. The `lengths` array provides the byte length of each
+    // string. Offsets are built with correct cumulative sizes so that
+    // size_at(i) returns the true string length. The chars buffer is extended
+    // with zero-filled padding to maintain the invariant chars.size() == 
offsets.back().
+    // Used by OFFSET_ONLY reading mode where actual string content is not 
needed
+    // but length information must be preserved (e.g., for length() function).
+    void insert_offsets_from_lengths(const uint32_t* lengths, size_t num) 
override {
+        if (UNLIKELY(num == 0)) {
+            return;
+        }
+        const auto old_rows = offsets.size();
+        // Build cumulative offsets from lengths
+        offsets.resize(old_rows + num);
+        auto* offsets_ptr = &offsets[old_rows];
+        size_t running_offset = offsets[old_rows - 1];
+        for (size_t i = 0; i < num; ++i) {
+            running_offset += lengths[i];
+            offsets_ptr[i] = static_cast<T>(running_offset);
+        }
+        chars.resize(offsets[old_rows + num - 1]);

Review Comment:
   The comment says the chars buffer is extended with “zero-filled padding”, 
but `PaddedPODArray::resize()` does not guarantee zero-initialization. In 
OFFSET_ONLY mode this can leave uninitialized bytes in `chars`, which is risky 
if the column is later materialized/serialized. Consider explicitly 
zero-filling the newly resized region (or update the comment if zero-fill is 
not required/guaranteed).
   ```suggestion
           const size_t old_chars_size = chars.size();
           const size_t new_chars_size = offsets[old_rows + num - 1];
           check_chars_length(new_chars_size, old_rows + num);
           if (new_chars_size > old_chars_size) {
               chars.resize(new_chars_size);
               std::memset(chars.data() + old_chars_size, 0, new_chars_size - 
old_chars_size);
           } else {
               chars.resize(new_chars_size);
           }
   ```



##########
be/src/core/column/column_string.h:
##########
@@ -276,6 +276,29 @@ class ColumnStr final : public COWHelper<IColumn, 
ColumnStr<T>> {
         sanity_check_simple();
     }
 
+    // Insert `num` string entries with real length information but no actual
+    // character data. The `lengths` array provides the byte length of each
+    // string. Offsets are built with correct cumulative sizes so that
+    // size_at(i) returns the true string length. The chars buffer is extended
+    // with zero-filled padding to maintain the invariant chars.size() == 
offsets.back().
+    // Used by OFFSET_ONLY reading mode where actual string content is not 
needed
+    // but length information must be preserved (e.g., for length() function).
+    void insert_offsets_from_lengths(const uint32_t* lengths, size_t num) 
override {
+        if (UNLIKELY(num == 0)) {
+            return;
+        }
+        const auto old_rows = offsets.size();
+        // Build cumulative offsets from lengths
+        offsets.resize(old_rows + num);
+        auto* offsets_ptr = &offsets[old_rows];
+        size_t running_offset = offsets[old_rows - 1];
+        for (size_t i = 0; i < num; ++i) {
+            running_offset += lengths[i];
+            offsets_ptr[i] = static_cast<T>(running_offset);
+        }
+        chars.resize(offsets[old_rows + num - 1]);

Review Comment:
   `ColumnStr::insert_offsets_from_lengths()` grows `offsets/chars` without any 
overflow check. Other insertion paths call `check_chars_length(...)` to enforce 
`MAX_STRING_SIZE`; this method should do the same based on the new cumulative 
offset, otherwise very large length sums can overflow offsets or exceed the 4GB 
limit silently.
   ```suggestion
           size_t running_offset = old_rows > 0 ? offsets[old_rows - 1] : 0;
           for (size_t i = 0; i < num; ++i) {
               running_offset += lengths[i];
               check_chars_length(running_offset, old_rows + i + 1, old_rows + 
i + 1);
               offsets_ptr[i] = static_cast<T>(running_offset);
           }
           chars.resize(running_offset);
   ```



##########
be/src/storage/segment/column_reader.h:
##########
@@ -508,6 +529,21 @@ class EmptyFileColumnIterator final : public 
ColumnIterator {
     ordinal_t get_current_ordinal() const override { return 0; }
 };
 
+// StringFileColumnIterator extends FileColumnIterator with meta-only reading
+// support for string/binary column types. When the OFFSET path is detected in
+// set_access_paths, it sets only_read_offsets on the ColumnIteratorOptions so
+// that the BinaryPlainPageDecoder skips chars memcpy and only fills offsets.

Review Comment:
   This comment is slightly misleading: 
`StringFileColumnIterator::set_access_paths()` sets `_read_mode`, but 
`only_read_offsets` is actually propagated into `ColumnIteratorOptions` inside 
`StringFileColumnIterator::init()`. Consider rewording to reflect the actual 
mechanism (set mode in `set_access_paths`, propagate option during `init`) to 
avoid confusion for future readers.
   ```suggestion
   // set_access_paths, it records an offset-only read mode, which is then
   // propagated as only_read_offsets in ColumnIteratorOptions during init so 
that
   // the BinaryPlainPageDecoder skips chars memcpy and only fills offsets.
   ```



##########
be/src/storage/segment/binary_dict_page.cpp:
##########
@@ -318,11 +319,28 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, 
MutableColumnPtr& dst) {
                                                         
_bit_shuffle_ptr->_cur_index));
     *n = max_fetch;
 
-    const auto* data_array = reinterpret_cast<const 
int32_t*>(_bit_shuffle_ptr->get_data(0));
-    size_t start_index = _bit_shuffle_ptr->_cur_index;
+    if (_options.only_read_offsets) {
+        // OFFSET_ONLY mode: resolve dict codes to get real string lengths
+        // without copying actual char data. This allows length() to work.
+        const auto* data_array = reinterpret_cast<const 
int32_t*>(_bit_shuffle_ptr->get_data(0));
+        size_t start_index = _bit_shuffle_ptr->_cur_index;
+        // Reuse _buffer (int32_t vector) to store uint32_t lengths.
+        // int32_t and uint32_t have the same size/alignment, and string
+        // lengths are always non-negative, so the bit patterns are identical.
+        _buffer.resize(max_fetch);
+        for (size_t i = 0; i < max_fetch; ++i) {
+            int32_t codeword = data_array[start_index + i];
+            _buffer[i] = static_cast<int32_t>(_dict_word_info[codeword].size);
+        }
+        dst->insert_offsets_from_lengths(reinterpret_cast<const 
uint32_t*>(_buffer.data()),
+                                         max_fetch);

Review Comment:
   In OFFSET_ONLY mode, `_buffer` is a `std::vector<int32_t>` but is used to 
store string byte lengths and then passed as `reinterpret_cast<const 
uint32_t*>(_buffer.data())`. This relies on strict-aliasing-unsafe type punning 
and also on implementation-defined behavior when casting sizes > INT32_MAX to 
`int32_t`. Prefer using a `std::vector<uint32_t>` (or another properly typed 
buffer) for lengths to avoid UB and portability issues.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [Exec](be) Support offset prue column and null column in BE [doris]

Reply via email to