github-actions[bot] commented on code in PR #38964:
URL: https://github.com/apache/doris/pull/38964#discussion_r1705267340
##########
be/src/vec/exec/scan/split_source_connector.h:
##########
@@ -43,6 +43,49 @@ class SplitSourceConnector {
virtual int num_scan_ranges() = 0;
virtual TFileScanRangeParams* get_params() = 0;
+
+protected:
+ template <typename T>
+ void _merge_ranges(std::vector<T>& merged_ranges, const std::vector<T>&
scan_ranges) {
+ if (scan_ranges.size() <= _max_scanners) {
+ merged_ranges = scan_ranges;
+ return;
+ }
+
+ // There is no need for the number of scanners to exceed the number of
threads in thread pool.
+ // scan_ranges is sorted by path(as well as partition path) in FE, so
merge scan ranges in order.
+ // In the insert statement, reading data in partition order can reduce
the memory usage of BE
+ // and prevent the generation of smaller tables.
+ merged_ranges.resize(_max_scanners);
+ int num_ranges = scan_ranges.size() / _max_scanners;
+ int num_add_one = scan_ranges.size() - num_ranges * _max_scanners;
+ int scan_index = 0;
+ int range_index = 0;
+ for (int i = 0; i < num_add_one; ++i) {
+ merged_ranges[scan_index] = scan_ranges[range_index++];
+ auto& ranges =
+
merged_ranges[scan_index++].scan_range.ext_scan_range.file_scan_range.ranges;
+ for (int j = 0; j < num_ranges; j++) {
+ auto& merged_ranges =
+
scan_ranges[range_index++].scan_range.ext_scan_range.file_scan_range.ranges;
+ ranges.insert(ranges.end(), merged_ranges.begin(),
merged_ranges.end());
+ }
+ }
+ for (int i = num_add_one; i < _max_scanners; ++i) {
+ merged_ranges[scan_index] = scan_ranges[range_index++];
+ auto& ranges =
+
merged_ranges[scan_index++].scan_range.ext_scan_range.file_scan_range.ranges;
+ for (int j = 0; j < num_ranges - 1; j++) {
+ auto& merged_ranges =
+
scan_ranges[range_index++].scan_range.ext_scan_range.file_scan_range.ranges;
+ ranges.insert(ranges.end(), merged_ranges.begin(),
merged_ranges.end());
+ }
+ }
+ LOG(INFO) << "Merge " << scan_ranges.size() << " scan ranges to " <<
merged_ranges.size();
+ }
+
+protected:
Review Comment:
warning: redundant access specifier has the same accessibility as the
previous access specifier [readability-redundant-access-specifiers]
```suggestion
```
<details>
<summary>Additional context</summary>
**be/src/vec/exec/scan/split_source_connector.h:46:** previously declared
here
```cpp
protected:
^
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]