Copilot commented on code in PR #55534:
URL: https://github.com/apache/doris/pull/55534#discussion_r2437960438
##########
be/src/pipeline/exec/scan_operator.h:
##########
@@ -122,6 +124,8 @@ class ScanLocalStateBase : public PipelineXLocalState<> {
RuntimeFilterConsumerHelper _helper;
std::mutex _conjunct_lock;
+ // magic number as seed to generate hash value for condiction cache
Review Comment:
Corrected spelling of 'condiction' to 'condition'.
```suggestion
// magic number as seed to generate hash value for condition cache
```
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -630,6 +678,8 @@ Status
SegmentIterator::_get_row_ranges_by_column_conditions() {
++it;
}
}
+ _opts.condition_cache_digest =
+ _common_expr_ctxs_push_down.empty() ? 0 :
_opts.condition_cache_digest;
Review Comment:
This conditional assignment could be simplified to just check if the digest
should be reset when common expressions are empty, making the logic clearer.
```suggestion
if (_common_expr_ctxs_push_down.empty()) {
_opts.condition_cache_digest = 0;
}
```
##########
be/src/pipeline/exec/scan_operator.cpp:
##########
@@ -115,6 +115,20 @@ Status ScanLocalState<Derived>::open(RuntimeState* state) {
p._common_expr_ctxs_push_down[i]->clone(state,
_common_expr_ctxs_push_down[i]));
}
RETURN_IF_ERROR(_helper.acquire_runtime_filter(state, _conjuncts,
p.row_descriptor()));
+
+ // Disable condition cache in topn filter valid. TODO:: Try to support the
topn filter in condition cache
+ if (state->query_options().condition_cache_digest &&
p._topn_filter_source_node_ids.empty()) {
+ _condition_cache_digest =
state->query_options().condition_cache_digest;
+ for (auto& conjunt : _conjuncts) {
+ _condition_cache_digest =
conjunt->get_digest(_condition_cache_digest);
Review Comment:
Corrected spelling of 'conjunt' to 'conjunct'.
```suggestion
for (auto& conjunct : _conjuncts) {
_condition_cache_digest =
conjunct->get_digest(_condition_cache_digest);
```
##########
be/src/olap/parallel_scanner_builder.cpp:
##########
@@ -87,7 +87,7 @@ Status
ParallelScannerBuilder::_build_scanners_by_rowid(std::list<ScannerSPtr>&
auto rows_need = _rows_per_scanner - rows_collected;
// 0.9: try to avoid splitting the segments into
excessively small parts.
- if (rows_need >= remaining_rows * 0.9) {
+ if (rows_need >= remaining_rows * 9 / 10) {
Review Comment:
Replace magic number calculation with a named constant or variable to
improve code clarity and maintainability.
##########
be/src/olap/parallel_scanner_builder.cpp:
##########
@@ -188,21 +192,62 @@ Status
ParallelScannerBuilder::_build_scanners_by_segment(std::list<ScannerSPtr>
continue;
}
- // Build scanners for [i, i+1) segment range, without row-range
slicing.
- for (int64_t i = 0; i < rowset->num_segments(); ++i) {
- RowSetSplits split(reader->clone());
- split.segment_offsets.first = i;
- split.segment_offsets.second = i + 1;
- // No row-ranges slicing; scan whole segment i.
- DCHECK_GE(split.segment_offsets.second,
split.segment_offsets.first + 1);
+ int64_t segment_start = 0;
+ auto split = RowSetSplits(reader->clone());
- TabletReader::ReadSource partitial_read_source;
+ for (size_t i = 0; i < segments_rows.size(); ++i) {
+ const size_t rows_of_segment = segments_rows[i];
+
+ // Check if adding this segment would exceed rows_per_scanner
+ // 0.9: try to avoid splitting the segments into excessively
small parts.
+ if (rows_collected > 0 && (rows_collected + rows_of_segment >
_rows_per_scanner &&
+ rows_collected < _rows_per_scanner
* 9 / 10)) {
Review Comment:
Replace magic number calculation with a named constant or variable to
improve code clarity and maintainability.
##########
be/src/vec/exprs/vliteral.cpp:
##########
@@ -92,5 +92,10 @@ bool VLiteral::equals(const VExpr& other) {
return true;
}
+uint64_t VLiteral::get_digest(uint64_t seed) const {
+ _column_ptr->update_xxHash_with_value(0, 1, seed, nullptr);
+ return seed;
+}
Review Comment:
The method modifies the seed parameter but returns the original seed value
instead of the updated hash. The updated hash should be returned.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]