This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 23c12fd68f8c980e53df8daf5a388560138e8622 Author: Jerry Hu <[email protected]> AuthorDate: Thu Mar 21 22:42:38 2024 +0800 [fix](join) core caused by null-safe-equal join (#32623) --- be/src/pipeline/exec/hashjoin_build_sink.cpp | 16 +++++++++++++++- be/src/vec/common/hash_table/hash_map_context_creator.h | 15 ++++++++++++--- be/src/vec/exec/join/vhash_join_node.cpp | 12 +++++++++++- .../query_p0/join/test_half_join_nullable_build_side.out | 8 ++++++++ .../join/test_half_join_nullable_build_side.groovy | 9 +++++++++ 5 files changed, 55 insertions(+), 5 deletions(-) diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index b3ee878a941..dab127c2c50 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -361,8 +361,22 @@ void HashJoinBuildSinkLocalState::_hash_table_init(RuntimeState* state) { } return; } + + std::vector<vectorized::DataTypePtr> data_types; + for (size_t i = 0; i != _build_expr_ctxs.size(); ++i) { + auto& ctx = _build_expr_ctxs[i]; + auto data_type = ctx->root()->data_type(); + + /// For 'null safe equal' join, + /// the build key column maybe be converted to nullable from non-nullable. + if (p._should_convert_to_nullable[i]) { + data_type = vectorized::make_nullable(data_type); + } + data_types.emplace_back(std::move(data_type)); + } + if (!try_get_hash_map_context_fixed<JoinHashMap, HashCRC32>( - *_shared_state->hash_table_variants, _build_expr_ctxs)) { + *_shared_state->hash_table_variants, data_types)) { _shared_state->hash_table_variants ->emplace<vectorized::SerializedHashTableContext>(); } diff --git a/be/src/vec/common/hash_table/hash_map_context_creator.h b/be/src/vec/common/hash_table/hash_map_context_creator.h index 89d6ab865ad..fa27d1df181 100644 --- a/be/src/vec/common/hash_table/hash_map_context_creator.h +++ b/be/src/vec/common/hash_table/hash_map_context_creator.h @@ -55,14 +55,23 @@ void get_hash_map_context_fixed(Variant& variant, size_t size, bool has_nullable template <template <typename... Args> typename HashMap, template <typename> typename Hash, typename... Mapped, typename Variant> bool try_get_hash_map_context_fixed(Variant& variant, const VExprContextSPtrs& expr_ctxs) { + std::vector<DataTypePtr> data_types; + for (const auto& ctx : expr_ctxs) { + data_types.emplace_back(ctx->root()->data_type()); + } + return try_get_hash_map_context_fixed<HashMap, Hash, Mapped...>(variant, data_types); +} + +template <template <typename... Args> typename HashMap, template <typename> typename Hash, + typename... Mapped, typename Variant> +bool try_get_hash_map_context_fixed(Variant& variant, const std::vector<DataTypePtr>& data_types) { Sizes key_sizes; bool use_fixed_key = true; bool has_null = false; size_t key_byte_size = 0; - for (auto ctx : expr_ctxs) { - const auto& data_type = ctx->root()->data_type(); + for (const auto& data_type : data_types) { if (!data_type->have_maximum_size_of_value()) { use_fixed_key = false; break; @@ -73,7 +82,7 @@ bool try_get_hash_map_context_fixed(Variant& variant, const VExprContextSPtrs& e key_byte_size += key_sizes.back(); } - size_t bitmap_size = has_null ? get_bitmap_size(expr_ctxs.size()) : 0; + size_t bitmap_size = has_null ? get_bitmap_size(data_types.size()) : 0; if (bitmap_size + key_byte_size > sizeof(UInt256)) { use_fixed_key = false; } diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 44074f3bcf9..ea4fd924180 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -1053,8 +1053,18 @@ void HashJoinNode::_hash_table_init(RuntimeState* state) { return; } + std::vector<DataTypePtr> data_types; + for (size_t i = 0; i != _build_expr_ctxs.size(); ++i) { + auto& ctx = _build_expr_ctxs[i]; + auto data_type = ctx->root()->data_type(); + if (_should_convert_build_side_to_nullable[i]) { + data_type = make_nullable(data_type); + } + data_types.emplace_back(std::move(data_type)); + } + if (!try_get_hash_map_context_fixed<JoinHashMap, HashCRC32>(*_hash_table_variants, - _build_expr_ctxs)) { + data_types)) { _hash_table_variants->emplace<SerializedHashTableContext>(); } }, diff --git a/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out b/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out index 6f7d74acd37..8404bee641f 100644 --- a/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out +++ b/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out @@ -126,3 +126,11 @@ 2 111 111 3 1111 1111 +-- !sql30 -- +1 11 11 1 11 11 +2 111 111 4 111 111 +2 111 111 2 111 111 +3 1111 1111 3 1111 1111 +4 \N \N \N \N \N +5 1111 1111 3 1111 1111 + diff --git a/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy b/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy index bddccb26ab1..2bb24309960 100644 --- a/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy +++ b/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy @@ -277,4 +277,13 @@ suite("test_half_join_nullable_build_side", "query,p0") { test_half_join_nullable_build_side_l2 l right semi join test_half_join_nullable_build_side_r2 r on l.v2 = r.v2 order by 1, 2, 3; """ + + qt_sql30 """ + select + * + from + test_half_join_nullable_build_side_l2 l + left join test_half_join_nullable_build_side_l r on l.v2 <=> r.v2 + order by 1, 2, 3; + """ } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
