This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch opt_dict_perf
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/opt_dict_perf by this push:
     new 80445655f3 [improvement](dict) cache the value of find code (#17674)
80445655f3 is described below

commit 80445655f3e57036124b7fbe7c398fa469fd7b49
Author: Jerry Hu <[email protected]>
AuthorDate: Fri Mar 10 18:35:07 2023 +0800

    [improvement](dict) cache the value of find code (#17674)
    
    * [improvement](join) Avoid reading from left child while hash table is 
empty(right join)
    
    * [improvement](dict) cache the value of find code
---
 be/src/olap/comparison_predicate.h       | 27 ++++++++++++++++++++++++---
 be/src/vec/columns/column_dictionary.h   | 10 +++++++++-
 be/src/vec/exec/join/vhash_join_node.cpp | 16 ++++++++++++++++
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/be/src/olap/comparison_predicate.h 
b/be/src/olap/comparison_predicate.h
index 26d97a27ed..2b591e8d93 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -263,8 +263,16 @@ public:
                                                  : 
dict_column_ptr->find_code(_value);
                     auto* data_array = dict_column_ptr->get_data().data();
 
-                    _base_loop_vec<true, is_and>(size, flags, null_map.data(), 
data_array,
-                                                 dict_code);
+                    do {
+                        if constexpr (PT == PredicateType::EQ) {
+                            if (dict_code == -2) {
+                                memset(flags, 0, size);
+                                break;
+                            }
+                        }
+                        _base_loop_vec<true, is_and>(size, flags, 
null_map.data(), data_array,
+                                                     dict_code);
+                    } while (false);
                 } else {
                     LOG(FATAL) << "column_dictionary must use StringRef 
predicate.";
                 }
@@ -286,7 +294,15 @@ public:
                                                  : 
dict_column_ptr->find_code(_value);
                     auto* data_array = dict_column_ptr->get_data().data();
 
-                    _base_loop_vec<false, is_and>(size, flags, nullptr, 
data_array, dict_code);
+                    do {
+                        if constexpr (PT == PredicateType::EQ) {
+                            if (dict_code == -2) {
+                                memset(flags, 0, size);
+                                break;
+                            }
+                        }
+                        _base_loop_vec<false, is_and>(size, flags, nullptr, 
data_array, dict_code);
+                    } while (false);
                 } else {
                     LOG(FATAL) << "column_dictionary must use StringRef 
predicate.";
                 }
@@ -511,6 +527,11 @@ private:
                                                        _value, _is_greater(), 
_is_eq())
                                              : 
dict_column_ptr->find_code(_value);
 
+                if constexpr (PT == PredicateType::EQ) {
+                    if (dict_code == -2 && !_opposite) {
+                        return 0;
+                    }
+                }
                 return _base_loop<is_nullable>(sel, size, null_map, 
data_array, dict_code);
             } else {
                 LOG(FATAL) << "column_dictionary must use StringRef 
predicate.";
diff --git a/be/src/vec/columns/column_dictionary.h 
b/be/src/vec/columns/column_dictionary.h
index 5c0ee0f059..67ce4971f5 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -277,7 +277,13 @@ public:
         }
     }
 
-    int32_t find_code(const StringRef& value) const { return 
_dict.find_code(value); }
+    int32_t find_code(const StringRef& value) const {
+        if (value != _last_target_value) {
+            _cached_code = _dict.find_code(value);
+            _last_target_value = value;
+        }
+        return _cached_code;
+    }
 
     int32_t find_code_by_bound(const StringRef& value, bool greater, bool eq) 
const {
         return _dict.find_code_by_bound(value, greater, eq);
@@ -513,6 +519,8 @@ private:
     size_t _reserve_size;
     bool _dict_sorted = false;
     bool _dict_code_converted = false;
+    mutable int32_t _cached_code = -2;
+    mutable StringRef _last_target_value;
     Dictionary _dict;
     Container _codes;
     FieldType _type;
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index face23f898..45847960f7 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -598,6 +598,22 @@ Status HashJoinNode::get_next(RuntimeState* state, Block* 
output_block, bool* eo
         *eos = true;
         return Status::OK();
     }
+
+    if (_join_op == TJoinOp::RIGHT_OUTER_JOIN) {
+        const auto hash_table_empty = std::visit(
+                Overload {[&](std::monostate&) -> bool {
+                              LOG(FATAL) << "FATAL: uninited hash table";
+                              __builtin_unreachable();
+                          },
+                          [&](auto&& arg) -> bool { return 
arg.hash_table.size() == 0; }},
+                *_hash_table_variants);
+
+        if (hash_table_empty) {
+            *eos = true;
+            return Status::OK();
+        }
+    }
+
     while (need_more_input_data()) {
         prepare_for_next();
         SCOPED_TIMER(_probe_next_timer);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to