This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch opt_dict_perf
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/opt_dict_perf by this push:
new 80445655f3 [improvement](dict) cache the value of find code (#17674)
80445655f3 is described below
commit 80445655f3e57036124b7fbe7c398fa469fd7b49
Author: Jerry Hu <[email protected]>
AuthorDate: Fri Mar 10 18:35:07 2023 +0800
[improvement](dict) cache the value of find code (#17674)
* [improvement](join) Avoid reading from left child while hash table is
empty(right join)
* [improvement](dict) cache the value of find code
---
be/src/olap/comparison_predicate.h | 27 ++++++++++++++++++++++++---
be/src/vec/columns/column_dictionary.h | 10 +++++++++-
be/src/vec/exec/join/vhash_join_node.cpp | 16 ++++++++++++++++
3 files changed, 49 insertions(+), 4 deletions(-)
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index 26d97a27ed..2b591e8d93 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -263,8 +263,16 @@ public:
:
dict_column_ptr->find_code(_value);
auto* data_array = dict_column_ptr->get_data().data();
- _base_loop_vec<true, is_and>(size, flags, null_map.data(),
data_array,
- dict_code);
+ do {
+ if constexpr (PT == PredicateType::EQ) {
+ if (dict_code == -2) {
+ memset(flags, 0, size);
+ break;
+ }
+ }
+ _base_loop_vec<true, is_and>(size, flags,
null_map.data(), data_array,
+ dict_code);
+ } while (false);
} else {
LOG(FATAL) << "column_dictionary must use StringRef
predicate.";
}
@@ -286,7 +294,15 @@ public:
:
dict_column_ptr->find_code(_value);
auto* data_array = dict_column_ptr->get_data().data();
- _base_loop_vec<false, is_and>(size, flags, nullptr,
data_array, dict_code);
+ do {
+ if constexpr (PT == PredicateType::EQ) {
+ if (dict_code == -2) {
+ memset(flags, 0, size);
+ break;
+ }
+ }
+ _base_loop_vec<false, is_and>(size, flags, nullptr,
data_array, dict_code);
+ } while (false);
} else {
LOG(FATAL) << "column_dictionary must use StringRef
predicate.";
}
@@ -511,6 +527,11 @@ private:
_value, _is_greater(),
_is_eq())
:
dict_column_ptr->find_code(_value);
+ if constexpr (PT == PredicateType::EQ) {
+ if (dict_code == -2 && !_opposite) {
+ return 0;
+ }
+ }
return _base_loop<is_nullable>(sel, size, null_map,
data_array, dict_code);
} else {
LOG(FATAL) << "column_dictionary must use StringRef
predicate.";
diff --git a/be/src/vec/columns/column_dictionary.h
b/be/src/vec/columns/column_dictionary.h
index 5c0ee0f059..67ce4971f5 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -277,7 +277,13 @@ public:
}
}
- int32_t find_code(const StringRef& value) const { return
_dict.find_code(value); }
+ int32_t find_code(const StringRef& value) const {
+ if (value != _last_target_value) {
+ _cached_code = _dict.find_code(value);
+ _last_target_value = value;
+ }
+ return _cached_code;
+ }
int32_t find_code_by_bound(const StringRef& value, bool greater, bool eq)
const {
return _dict.find_code_by_bound(value, greater, eq);
@@ -513,6 +519,8 @@ private:
size_t _reserve_size;
bool _dict_sorted = false;
bool _dict_code_converted = false;
+ mutable int32_t _cached_code = -2;
+ mutable StringRef _last_target_value;
Dictionary _dict;
Container _codes;
FieldType _type;
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp
b/be/src/vec/exec/join/vhash_join_node.cpp
index face23f898..45847960f7 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -598,6 +598,22 @@ Status HashJoinNode::get_next(RuntimeState* state, Block*
output_block, bool* eo
*eos = true;
return Status::OK();
}
+
+ if (_join_op == TJoinOp::RIGHT_OUTER_JOIN) {
+ const auto hash_table_empty = std::visit(
+ Overload {[&](std::monostate&) -> bool {
+ LOG(FATAL) << "FATAL: uninited hash table";
+ __builtin_unreachable();
+ },
+ [&](auto&& arg) -> bool { return
arg.hash_table.size() == 0; }},
+ *_hash_table_variants);
+
+ if (hash_table_empty) {
+ *eos = true;
+ return Status::OK();
+ }
+ }
+
while (need_more_input_data()) {
prepare_for_next();
SCOPED_TIMER(_probe_next_timer);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]