xiaokang commented on code in PR #24735:
URL: https://github.com/apache/doris/pull/24735#discussion_r1357639904
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1622,8 +1618,49 @@ Status SegmentIterator::_read_columns_by_index(uint32_t
nrows_read_limit, uint32
}
_split_row_ranges.emplace_back(std::pair {range_from, range_to});
- // if _opts.read_orderby_key_reverse is true, only read one range for
fast reverse purpose
+
+ _read_range_info.add(range_from, range_to);
} while (nrows_read < nrows_read_limit && !_opts.read_orderby_key_reverse);
+
+ if (_read_range_info.ranges.empty()) {
+ return Status::OK();
+ }
+
+ if (_opts.runtime_state) {
+ int32_t threshold =
_opts.runtime_state->query_options().inverted_index_read_mode_threshold;
+ if (threshold == 0 || _read_range_info.ranges.size() <= threshold) {
+
RETURN_IF_ERROR(_read_continuous_columns_data(_read_range_info.ranges));
+ } else {
+ RETURN_IF_ERROR(_read_many_columns_data(_read_range_info.rowids));
+ }
+ } else {
+
RETURN_IF_ERROR(_read_continuous_columns_data(_read_range_info.ranges));
Review Comment:
init threshold to 0 and reduce this branch
##########
fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java:
##########
@@ -1224,6 +1226,12 @@ public void setMaxJoinNumberOfReorder(int
maxJoinNumberOfReorder) {
flag = VariableMgr.GLOBAL)
public boolean enableFullAutoAnalyze = true;
+ @VariableMgr.VarAttr(name = INVERTED_INDEX_READ_MODE_THRESHOLD,
+ description = {"在通过索引查询的时候,根据查询范围的松散程度使用不同的查询方式。",
+ "When querying through indexes,"
+ + " use different query methods according to the looseness
of the query scope."})
Review Comment:
sparse
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1622,8 +1618,49 @@ Status SegmentIterator::_read_columns_by_index(uint32_t
nrows_read_limit, uint32
}
_split_row_ranges.emplace_back(std::pair {range_from, range_to});
- // if _opts.read_orderby_key_reverse is true, only read one range for
fast reverse purpose
+
+ _read_range_info.add(range_from, range_to);
} while (nrows_read < nrows_read_limit && !_opts.read_orderby_key_reverse);
+
+ if (_read_range_info.ranges.empty()) {
+ return Status::OK();
+ }
+
+ if (_opts.runtime_state) {
+ int32_t threshold =
_opts.runtime_state->query_options().inverted_index_read_mode_threshold;
Review Comment:
the name inverted_index_read_mode_threshold has inverted index, but the
logic is not only for inverted index.
##########
be/src/vec/columns/column_fixed_length_object.h:
##########
@@ -286,6 +286,23 @@ class ColumnFixedLengthObject final : public
COWHelper<IColumn, ColumnFixedLengt
memcpy(_data.data() + old_size, data + begin_offset, total_mem_size);
}
+ void insert_many_binary_data(char* data_array, uint32_t* len_array,
Review Comment:
Is this function related to this pr?
##########
be/src/olap/rowset/segment_v2/segment_iterator.h:
##########
@@ -99,6 +99,28 @@ struct ColumnPredicateInfo {
std::string query_op;
};
+struct ReadRangeInfo {
+ std::vector<uint32_t> rowids;
Review Comment:
It may be very large if many rows selected.
##########
be/src/olap/rowset/segment_v2/segment_iterator.h:
##########
@@ -99,6 +99,28 @@ struct ColumnPredicateInfo {
std::string query_op;
};
+struct ReadRangeInfo {
+ std::vector<uint32_t> rowids;
+ std::vector<std::pair<uint32_t, uint32_t>> ranges;
+
+ void init(int32_t capacity) {
Review Comment:
reserve
##########
be/src/olap/rowset/segment_v2/segment_iterator.h:
##########
@@ -212,6 +234,9 @@ class SegmentIterator : public RowwiseIterator {
vectorized::MutableColumns&
column_block, size_t nrows);
[[nodiscard]] Status _read_columns_by_index(uint32_t nrows_read_limit,
uint32_t& nrows_read,
bool set_block_rowid);
+ [[nodiscard]] Status _read_continuous_columns_data(
Review Comment:
the name is not intuitive
##########
be/src/olap/rowset/segment_v2/binary_dict_page.cpp:
##########
@@ -283,6 +283,7 @@ Status BinaryDictPageDecoder::next_batch(size_t* n,
vectorized::MutableColumnPtr
Status BinaryDictPageDecoder::read_by_rowids(const rowid_t* rowids, ordinal_t
page_first_ordinal,
size_t* n,
vectorized::MutableColumnPtr& dst) {
if (_encoding_type == PLAIN_ENCODING) {
+ dst = dst->convert_to_predicate_column_if_dictionary();
Review Comment:
why
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]