This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9c52b4a508 [enhance] improve dict in-predicate evaluate (#10009)
9c52b4a508 is described below
commit 9c52b4a508f7c4865ae077e286ea0b7794efa03b
Author: minghong <[email protected]>
AuthorDate: Thu Jun 9 00:25:30 2022 +0800
[enhance] improve dict in-predicate evaluate (#10009)
---
be/src/olap/in_list_predicate.cpp | 13 ++++++++-----
be/src/vec/columns/column_dictionary.h | 17 +++++++++--------
2 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/be/src/olap/in_list_predicate.cpp
b/be/src/olap/in_list_predicate.cpp
index b33ef09c49..82c8241368 100644
--- a/be/src/olap/in_list_predicate.cpp
+++ b/be/src/olap/in_list_predicate.cpp
@@ -132,13 +132,14 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
auto* nested_col_ptr = vectorized::check_and_get_column<
\
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
auto& data_array = nested_col_ptr->get_data();
\
- auto dict_codes = nested_col_ptr->find_codes(_values);
\
+ std::vector<bool> selected;
\
+ nested_col_ptr->find_codes(_values, selected);
\
for (uint16_t i = 0; i < *size; i++) {
\
uint16_t idx = sel[i];
\
sel[new_size] = idx;
\
const auto& cell_value = data_array[idx];
\
- bool ret = !null_bitmap[idx] &&
\
- (dict_codes.find(cell_value) OP
dict_codes.end()); \
+ DCHECK(cell_value < selected.size());
\
+ bool ret = !null_bitmap[idx] && (selected[cell_value]
OP false); \
new_size += _opposite ? !ret : ret;
\
}
\
}
\
@@ -161,12 +162,14 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
column);
\
auto& data_array = dict_col.get_data();
\
- auto dict_codes = dict_col.find_codes(_values);
\
+ std::vector<bool> selected;
\
+ dict_col.find_codes(_values, selected);
\
for (uint16_t i = 0; i < *size; i++) {
\
uint16_t idx = sel[i];
\
sel[new_size] = idx;
\
const auto& cell_value = data_array[idx];
\
- auto result = (dict_codes.find(cell_value) OP
dict_codes.end()); \
+ DCHECK(cell_value < selected.size());
\
+ auto result = (selected[cell_value] OP false);
\
new_size += _opposite ? !result : result;
\
}
\
}
\
diff --git a/be/src/vec/columns/column_dictionary.h
b/be/src/vec/columns/column_dictionary.h
index 532d7eb6a7..29db3a334c 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -258,9 +258,9 @@ public:
uint32_t get_hash_value(uint32_t idx) const { return
_dict.get_hash_value(_codes[idx]); }
- phmap::flat_hash_set<int32_t> find_codes(
- const phmap::flat_hash_set<StringValue>& values) const {
- return _dict.find_codes(values);
+ void find_codes(const phmap::flat_hash_set<StringValue>& values,
+ std::vector<bool>& selected) const {
+ return _dict.find_codes(values, selected);
}
bool is_dict_sorted() const { return _dict_sorted; }
@@ -362,16 +362,17 @@ public:
return greater ? bound - greater + eq : bound - eq;
}
- phmap::flat_hash_set<int32_t> find_codes(
- const phmap::flat_hash_set<StringValue>& values) const {
- phmap::flat_hash_set<int32_t> code_set;
+ void find_codes(const phmap::flat_hash_set<StringValue>& values,
+ std::vector<bool>& selected) const {
+ size_t dict_word_num = _dict_data.size();
+ selected.resize(dict_word_num);
+ selected.assign(dict_word_num, false);
for (const auto& value : values) {
auto it = _inverted_index.find(value);
if (it != _inverted_index.end()) {
- code_set.insert(it->second);
+ selected[it->second] = true;
}
}
- return code_set;
}
void clear() {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]