This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 23c12fd68f8c980e53df8daf5a388560138e8622
Author: Jerry Hu <[email protected]>
AuthorDate: Thu Mar 21 22:42:38 2024 +0800

    [fix](join) core caused by null-safe-equal join (#32623)
---
 be/src/pipeline/exec/hashjoin_build_sink.cpp             | 16 +++++++++++++++-
 be/src/vec/common/hash_table/hash_map_context_creator.h  | 15 ++++++++++++---
 be/src/vec/exec/join/vhash_join_node.cpp                 | 12 +++++++++++-
 .../query_p0/join/test_half_join_nullable_build_side.out |  8 ++++++++
 .../join/test_half_join_nullable_build_side.groovy       |  9 +++++++++
 5 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp 
b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index b3ee878a941..dab127c2c50 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -361,8 +361,22 @@ void 
HashJoinBuildSinkLocalState::_hash_table_init(RuntimeState* state) {
                     }
                     return;
                 }
+
+                std::vector<vectorized::DataTypePtr> data_types;
+                for (size_t i = 0; i != _build_expr_ctxs.size(); ++i) {
+                    auto& ctx = _build_expr_ctxs[i];
+                    auto data_type = ctx->root()->data_type();
+
+                    /// For 'null safe equal' join,
+                    /// the build key column maybe be converted to nullable 
from non-nullable.
+                    if (p._should_convert_to_nullable[i]) {
+                        data_type = vectorized::make_nullable(data_type);
+                    }
+                    data_types.emplace_back(std::move(data_type));
+                }
+
                 if (!try_get_hash_map_context_fixed<JoinHashMap, HashCRC32>(
-                            *_shared_state->hash_table_variants, 
_build_expr_ctxs)) {
+                            *_shared_state->hash_table_variants, data_types)) {
                     _shared_state->hash_table_variants
                             
->emplace<vectorized::SerializedHashTableContext>();
                 }
diff --git a/be/src/vec/common/hash_table/hash_map_context_creator.h 
b/be/src/vec/common/hash_table/hash_map_context_creator.h
index 89d6ab865ad..fa27d1df181 100644
--- a/be/src/vec/common/hash_table/hash_map_context_creator.h
+++ b/be/src/vec/common/hash_table/hash_map_context_creator.h
@@ -55,14 +55,23 @@ void get_hash_map_context_fixed(Variant& variant, size_t 
size, bool has_nullable
 template <template <typename... Args> typename HashMap, template <typename> 
typename Hash,
           typename... Mapped, typename Variant>
 bool try_get_hash_map_context_fixed(Variant& variant, const VExprContextSPtrs& 
expr_ctxs) {
+    std::vector<DataTypePtr> data_types;
+    for (const auto& ctx : expr_ctxs) {
+        data_types.emplace_back(ctx->root()->data_type());
+    }
+    return try_get_hash_map_context_fixed<HashMap, Hash, Mapped...>(variant, 
data_types);
+}
+
+template <template <typename... Args> typename HashMap, template <typename> 
typename Hash,
+          typename... Mapped, typename Variant>
+bool try_get_hash_map_context_fixed(Variant& variant, const 
std::vector<DataTypePtr>& data_types) {
     Sizes key_sizes;
 
     bool use_fixed_key = true;
     bool has_null = false;
     size_t key_byte_size = 0;
 
-    for (auto ctx : expr_ctxs) {
-        const auto& data_type = ctx->root()->data_type();
+    for (const auto& data_type : data_types) {
         if (!data_type->have_maximum_size_of_value()) {
             use_fixed_key = false;
             break;
@@ -73,7 +82,7 @@ bool try_get_hash_map_context_fixed(Variant& variant, const 
VExprContextSPtrs& e
         key_byte_size += key_sizes.back();
     }
 
-    size_t bitmap_size = has_null ? get_bitmap_size(expr_ctxs.size()) : 0;
+    size_t bitmap_size = has_null ? get_bitmap_size(data_types.size()) : 0;
     if (bitmap_size + key_byte_size > sizeof(UInt256)) {
         use_fixed_key = false;
     }
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index 44074f3bcf9..ea4fd924180 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -1053,8 +1053,18 @@ void HashJoinNode::_hash_table_init(RuntimeState* state) 
{
                     return;
                 }
 
+                std::vector<DataTypePtr> data_types;
+                for (size_t i = 0; i != _build_expr_ctxs.size(); ++i) {
+                    auto& ctx = _build_expr_ctxs[i];
+                    auto data_type = ctx->root()->data_type();
+                    if (_should_convert_build_side_to_nullable[i]) {
+                        data_type = make_nullable(data_type);
+                    }
+                    data_types.emplace_back(std::move(data_type));
+                }
+
                 if (!try_get_hash_map_context_fixed<JoinHashMap, 
HashCRC32>(*_hash_table_variants,
-                                                                            
_build_expr_ctxs)) {
+                                                                            
data_types)) {
                     
_hash_table_variants->emplace<SerializedHashTableContext>();
                 }
             },
diff --git 
a/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out 
b/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out
index 6f7d74acd37..8404bee641f 100644
--- a/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out
+++ b/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out
@@ -126,3 +126,11 @@
 2      111     111
 3      1111    1111
 
+-- !sql30 --
+1      11      11      1       11      11
+2      111     111     4       111     111
+2      111     111     2       111     111
+3      1111    1111    3       1111    1111
+4      \N      \N      \N      \N      \N
+5      1111    1111    3       1111    1111
+
diff --git 
a/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy
 
b/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy
index bddccb26ab1..2bb24309960 100644
--- 
a/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy
+++ 
b/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy
@@ -277,4 +277,13 @@ suite("test_half_join_nullable_build_side", "query,p0") {
             test_half_join_nullable_build_side_l2 l right semi join 
test_half_join_nullable_build_side_r2 r on  l.v2 = r.v2
         order by 1, 2, 3;
     """
+
+    qt_sql30 """
+        select
+            *
+        from
+            test_half_join_nullable_build_side_l2 l
+            left join test_half_join_nullable_build_side_l r on  l.v2 <=> r.v2
+        order by 1, 2, 3;
+    """
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to