This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit e00e00c60748a794026fa19260fbcc10a5885373
Author: Yongqiang YANG <[email protected]>
AuthorDate: Thu Jun 23 08:44:43 2022 +0800

    [fix] do not read seq column when reading a compacted rowset (#10344)
    
    SEQ_COL is used on tables with unique key to order data in one 
transaction(rowset),
    when there is only one rowset and the rowset is compacted, rows in the 
rowset is sorted
    and rows with same keys are resolved by compaction, so a scanner sets 
direct_mode to
    optimize read iterator to avoid sorting and aggregating, and iterators does 
not need SEQ_COL.
    However, init_return_columns adds SEQ_COL to return_columns, which is 
passed to SegmentIterator.
    Then segment Iterator would be called via get_next with a block without 
SEQ_COL, segment iterator
    creates columns included in return_columns but not in the block. SEQ_COL is 
nullable, segment Iterator
    does not handle it, so a core dump happen.
    
    Actually, in the above case, segment iterator does not need to read SEQ_COL.
    When SEQ_COL is really needed, iterators creates SEQ_COL column in block,
    so segment Iterator does not need do create SEQ_COL at all.
---
 be/src/exec/olap_scanner.cpp | 37 ++++++++++++++++++++++---------------
 be/src/exec/olap_scanner.h   | 10 +++++-----
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 4e2003ae0b..11a14a55da 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -138,7 +138,25 @@ Status OlapScanner::_init_tablet_reader_params(
         const std::vector<OlapScanRange*>& key_ranges, const 
std::vector<TCondition>& filters,
         const std::vector<std::pair<string, 
std::shared_ptr<IBloomFilterFuncBase>>>&
                 bloom_filters) {
-    RETURN_IF_ERROR(_init_return_columns());
+    // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty
+    bool single_version =
+            (_tablet_reader_params.rs_readers.size() == 1 &&
+             _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 
0 &&
+             !_tablet_reader_params.rs_readers[0]
+                      ->rowset()
+                      ->rowset_meta()
+                      ->is_segments_overlapping()) ||
+            (_tablet_reader_params.rs_readers.size() == 2 &&
+             
_tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 &&
+             _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 
2 &&
+             !_tablet_reader_params.rs_readers[1]
+                      ->rowset()
+                      ->rowset_meta()
+                      ->is_segments_overlapping());
+
+    _tablet_reader_params.direct_mode = single_version || _aggregation;
+
+    RETURN_IF_ERROR(_init_return_columns(!_tablet_reader_params.direct_mode));
 
     _tablet_reader_params.tablet = _tablet;
     _tablet_reader_params.reader_type = READER_QUERY;
@@ -169,22 +187,11 @@ Status OlapScanner::_init_tablet_reader_params(
     // TODO(zc)
     _tablet_reader_params.profile = _parent->runtime_profile();
     _tablet_reader_params.runtime_state = _runtime_state;
-    // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty
-    bool single_version =
-            (_tablet_reader_params.rs_readers.size() == 1 &&
-             _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 
0 &&
-             
!_tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->is_segments_overlapping())
 ||
-            (_tablet_reader_params.rs_readers.size() == 2 &&
-             
_tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 &&
-             _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 
2 &&
-             
!_tablet_reader_params.rs_readers[1]->rowset()->rowset_meta()->is_segments_overlapping());
-
     _tablet_reader_params.origin_return_columns = &_return_columns;
     _tablet_reader_params.tablet_columns_convert_to_null_set = 
&_tablet_columns_convert_to_null_set;
 
-    if (_aggregation || single_version) {
+    if (_tablet_reader_params.direct_mode) {
         _tablet_reader_params.return_columns = _return_columns;
-        _tablet_reader_params.direct_mode = true;
     } else {
         // we need to fetch all key columns to do the right aggregation on 
storage engine side.
         for (size_t i = 0; i < _tablet->num_key_columns(); ++i) {
@@ -219,7 +226,7 @@ Status OlapScanner::_init_tablet_reader_params(
     return Status::OK();
 }
 
-Status OlapScanner::_init_return_columns() {
+Status OlapScanner::_init_return_columns(bool need_seq_col) {
     for (auto slot : _tuple_desc->slots()) {
         if (!slot->is_materialized()) {
             continue;
@@ -238,7 +245,7 @@ Status OlapScanner::_init_return_columns() {
     }
 
     // expand the sequence column
-    if (_tablet->tablet_schema().has_sequence_col()) {
+    if (_tablet->tablet_schema().has_sequence_col() && need_seq_col) {
         bool has_replace_col = false;
         for (auto col : _return_columns) {
             if (_tablet->tablet_schema().column(col).aggregation() ==
diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h
index 4a6ecbf06f..fa0d7e4578 100644
--- a/be/src/exec/olap_scanner.h
+++ b/be/src/exec/olap_scanner.h
@@ -93,11 +93,11 @@ public:
     const std::vector<SlotDescriptor*>& get_query_slots() const { return 
_query_slots; }
 
 protected:
-    Status _init_tablet_reader_params(const std::vector<OlapScanRange*>& 
key_ranges,
-                        const std::vector<TCondition>& filters,
-                        const std::vector<std::pair<string, 
std::shared_ptr<IBloomFilterFuncBase>>>&
-                                bloom_filters);
-    Status _init_return_columns();
+    Status _init_tablet_reader_params(
+            const std::vector<OlapScanRange*>& key_ranges, const 
std::vector<TCondition>& filters,
+            const std::vector<std::pair<string, 
std::shared_ptr<IBloomFilterFuncBase>>>&
+                    bloom_filters);
+    Status _init_return_columns(bool need_seq_col);
     void _convert_row_to_tuple(Tuple* tuple);
 
     // Update profile that need to be reported in realtime.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to