This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new eb936025a24 [Revert](exec) revert error change in pr55534 to keep the 
origin logic for index scan (#58289)
eb936025a24 is described below

commit eb936025a24d35e9b2ffe4719efe40af158b7eed
Author: HappenLee <[email protected]>
AuthorDate: Tue Nov 25 20:14:54 2025 +0800

    [Revert](exec) revert error change in pr55534 to keep the origin logic for 
index scan (#58289)
    
    ### What problem does this PR solve?
    
    The pr #55534 change scanner logic use by index scan cause perfermance
    problem. need revert the error change
---
 be/src/olap/parallel_scanner_builder.cpp    | 81 +++++++----------------------
 be/src/olap/parallel_scanner_builder.h      |  6 +--
 be/src/pipeline/exec/olap_scan_operator.cpp |  2 +-
 3 files changed, 22 insertions(+), 67 deletions(-)

diff --git a/be/src/olap/parallel_scanner_builder.cpp 
b/be/src/olap/parallel_scanner_builder.cpp
index 48fe50469cc..ec11c42aa7e 100644
--- a/be/src/olap/parallel_scanner_builder.cpp
+++ b/be/src/olap/parallel_scanner_builder.cpp
@@ -35,8 +35,8 @@ using namespace vectorized;
 
 Status ParallelScannerBuilder::build_scanners(std::list<ScannerSPtr>& 
scanners) {
     RETURN_IF_ERROR(_load());
-    if (_scan_parallelism_by_segment) {
-        return _build_scanners_by_segment(scanners);
+    if (_scan_parallelism_by_per_segment) {
+        return _build_scanners_by_per_segment(scanners);
     } else if (_is_dup_mow_key) {
         // Default strategy for DUP/MOW tables: split by rowids within segments
         return _build_scanners_by_rowid(scanners);
@@ -170,7 +170,7 @@ Status 
ParallelScannerBuilder::_build_scanners_by_rowid(std::list<ScannerSPtr>&
 // This guarantees the number of scanners equals the number of segments across 
all rowsets
 // for the involved tablets. It preserves delete predicates and key ranges, 
and clones
 // RowsetReader per scanner to avoid sharing between scanners.
-Status 
ParallelScannerBuilder::_build_scanners_by_segment(std::list<ScannerSPtr>& 
scanners) {
+Status 
ParallelScannerBuilder::_build_scanners_by_per_segment(std::list<ScannerSPtr>& 
scanners) {
     DCHECK_GE(_rows_per_scanner, _min_rows_per_scanner);
 
     for (auto&& [tablet, version] : _tablets) {
@@ -182,79 +182,34 @@ Status 
ParallelScannerBuilder::_build_scanners_by_segment(std::list<ScannerSPtr>
             
ExecEnv::GetInstance()->storage_engine().to_cloud().tablet_hotspot().count(*tablet);
         }
 
-        // Collect segments into scanners based on rows count instead of one 
scanner per segment
-        TabletReadSource partitial_read_source;
-        int64_t rows_collected = 0;
-
+        // For each RowSet split in the read source, split by segment id and 
build
+        // one scanner per segment. Keep delete predicates shared.
         for (auto& rs_split : entire_read_source.rs_splits) {
             auto reader = rs_split.rs_reader;
             auto rowset = reader->rowset();
             const auto rowset_id = rowset->rowset_id();
-
             const auto& segments_rows = _all_segments_rows[rowset_id];
             if (segments_rows.empty() || rowset->num_rows() == 0) {
                 continue;
             }
 
-            int64_t segment_start = 0;
-            auto split = RowSetSplits(reader->clone());
-
-            for (size_t i = 0; i < segments_rows.size(); ++i) {
-                const size_t rows_of_segment = segments_rows[i];
+            // Build scanners for [i, i+1) segment range, without row-range 
slicing.
+            for (int64_t i = 0; i < rowset->num_segments(); ++i) {
+                RowSetSplits split(reader->clone());
+                split.segment_offsets.first = i;
+                split.segment_offsets.second = i + 1;
+                // No row-ranges slicing; scan whole segment i.
+                DCHECK_GE(split.segment_offsets.second, 
split.segment_offsets.first + 1);
 
-                // Check if adding this segment would exceed rows_per_scanner
-                // 0.9: try to avoid splitting the segments into excessively 
small parts.
-                if (rows_collected > 0 && (rows_collected + rows_of_segment > 
_rows_per_scanner &&
-                                           rows_collected < _rows_per_scanner 
* 9 / 10)) {
-                    // Create a new scanner with collected segments
-                    split.segment_offsets.first = segment_start;
-                    split.segment_offsets.second =
-                            i; // Range is [segment_start, i), including all 
segments from segment_start to i-1
-
-                    DCHECK_GT(split.segment_offsets.second, 
split.segment_offsets.first);
-
-                    
partitial_read_source.rs_splits.emplace_back(std::move(split));
-
-                    scanners.emplace_back(_build_scanner(
-                            tablet, version, _key_ranges,
-                            {.rs_splits = 
std::move(partitial_read_source.rs_splits),
-                             .delete_predicates = 
entire_read_source.delete_predicates,
-                             .delete_bitmap = 
entire_read_source.delete_bitmap}));
-
-                    // Reset for next scanner
-                    partitial_read_source = {};
-                    split = RowSetSplits(reader->clone());
-                    segment_start = i;
-                    rows_collected = 0;
-                }
-
-                // Add current segment to the current scanner
-                rows_collected += rows_of_segment;
-            }
-
-            // Add remaining segments in this rowset to a scanner
-            if (rows_collected > 0) {
-                split.segment_offsets.first = segment_start;
-                split.segment_offsets.second = segments_rows.size();
-                DCHECK_GT(split.segment_offsets.second, 
split.segment_offsets.first);
+                TabletReadSource partitial_read_source;
                 partitial_read_source.rs_splits.emplace_back(std::move(split));
-            }
-        }
 
-        // Add remaining segments across all rowsets to a scanner
-        if (rows_collected > 0) {
-            DCHECK_GT(partitial_read_source.rs_splits.size(), 0);
-#ifndef NDEBUG
-            for (auto& split : partitial_read_source.rs_splits) {
-                DCHECK(split.rs_reader != nullptr);
-                DCHECK_LT(split.segment_offsets.first, 
split.segment_offsets.second);
+                scanners.emplace_back(
+                        _build_scanner(tablet, version, _key_ranges,
+                                       {.rs_splits = 
std::move(partitial_read_source.rs_splits),
+                                        .delete_predicates = 
entire_read_source.delete_predicates,
+                                        .delete_bitmap = 
entire_read_source.delete_bitmap}));
             }
-#endif
-            scanners.emplace_back(
-                    _build_scanner(tablet, version, _key_ranges,
-                                   {.rs_splits = 
std::move(partitial_read_source.rs_splits),
-                                    .delete_predicates = 
entire_read_source.delete_predicates,
-                                    .delete_bitmap = 
entire_read_source.delete_bitmap}));
         }
     }
 
diff --git a/be/src/olap/parallel_scanner_builder.h 
b/be/src/olap/parallel_scanner_builder.h
index 7c57711bc70..74fe5ad0f16 100644
--- a/be/src/olap/parallel_scanner_builder.h
+++ b/be/src/olap/parallel_scanner_builder.h
@@ -65,7 +65,7 @@ public:
 
     void set_min_rows_per_scanner(int64_t size) { _min_rows_per_scanner = 
size; }
 
-    void set_scan_parallelism_by_segment(bool v) { 
_scan_parallelism_by_segment = v; }
+    void set_scan_parallelism_by_per_segment(bool v) { 
_scan_parallelism_by_per_segment = v; }
 
     const OlapReaderStatistics* builder_stats() const { return 
&_builder_stats; }
 
@@ -75,7 +75,7 @@ private:
     Status _build_scanners_by_rowid(std::list<ScannerSPtr>& scanners);
 
     // Build scanners so that each segment is handled by its own scanner.
-    Status _build_scanners_by_segment(std::list<ScannerSPtr>& scanners);
+    Status _build_scanners_by_per_segment(std::list<ScannerSPtr>& scanners);
 
     std::shared_ptr<vectorized::OlapScanner> _build_scanner(
             BaseTabletSPtr tablet, int64_t version, const 
std::vector<OlapScanRange*>& key_ranges,
@@ -96,7 +96,7 @@ private:
     std::map<RowsetId, std::vector<size_t>> _all_segments_rows;
 
     // Force building one scanner per segment when true.
-    bool _scan_parallelism_by_segment {false};
+    bool _scan_parallelism_by_per_segment {false};
 
     std::shared_ptr<RuntimeProfile> _scanner_profile;
     OlapReaderStatistics _builder_stats;
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp 
b/be/src/pipeline/exec/olap_scan_operator.cpp
index 9080fbd581b..76af87695b1 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -503,7 +503,7 @@ Status 
OlapScanLocalState::_init_scanners(std::list<vectorized::ScannerSPtr>* sc
             // TODO: Use optimize_index_scan_parallelism for ann range search 
in the future.
             // Currently, ann topn is enough
             if (_ann_topn_runtime != nullptr) {
-                scanner_builder.set_scan_parallelism_by_segment(true);
+                scanner_builder.set_scan_parallelism_by_per_segment(true);
             }
         }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to