This is an automated email from the ASF dual-hosted git repository.

BiteTheDDDDt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new dc5d1398b33 [chore](be) Reject broadcast joins that finalize build 
side (#63445)
dc5d1398b33 is described below

commit dc5d1398b335d1400830b73e87698c5bd92586ab
Author: Pxl <[email protected]>
AuthorDate: Thu May 21 10:48:28 2026 +0800

    [chore](be) Reject broadcast joins that finalize build side (#63445)
    
    Broadcast hash joins that need to emit build-side rows
    after probing, such as right outer, full outer, right semi, and right
    anti joins, cannot safely share one broadcast build side across multiple
    local probe instances without an additional global finalization
    protocol. The frontend already avoids generating these broadcast hash
    joins, but the backend had no guard if such a plan reached execution.
    This change rejects unsupported broadcast hash joins during build sink
    preparation and adds a BE unit test covering the rejected join types.
---
 be/src/exec/operator/hashjoin_build_sink.cpp       |  6 +++++
 be/test/exec/operator/hashjoin_build_sink_test.cpp | 28 ++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/be/src/exec/operator/hashjoin_build_sink.cpp 
b/be/src/exec/operator/hashjoin_build_sink.cpp
index 338c43cac7a..1d5d4e84261 100644
--- a/be/src/exec/operator/hashjoin_build_sink.cpp
+++ b/be/src/exec/operator/hashjoin_build_sink.cpp
@@ -774,6 +774,12 @@ Status HashJoinBuildSinkOperatorX::init(const TPlanNode& 
tnode, RuntimeState* st
 
 Status HashJoinBuildSinkOperatorX::prepare(RuntimeState* state) {
     
RETURN_IF_ERROR(JoinBuildSinkOperatorX<HashJoinBuildSinkLocalState>::prepare(state));
+    if (_is_broadcast_join && (_match_all_build || _is_right_semi_anti)) {
+        return Status::NotSupported(
+                "Broadcast hash join does not support {} because build-side 
rows must be "
+                "finalized exactly once",
+                to_string(_join_op));
+    }
     _use_shared_hash_table =
             _is_broadcast_join && 
state->enable_share_hash_table_for_broadcast_join();
     auto init_keep_column_flags = [&](auto& tuple_descs, auto& 
output_slot_flags) {
diff --git a/be/test/exec/operator/hashjoin_build_sink_test.cpp 
b/be/test/exec/operator/hashjoin_build_sink_test.cpp
index b5cd8ee3589..20b66132949 100644
--- a/be/test/exec/operator/hashjoin_build_sink_test.cpp
+++ b/be/test/exec/operator/hashjoin_build_sink_test.cpp
@@ -180,6 +180,34 @@ TEST_F(HashJoinBuildSinkTest, Init) {
     run_test_block(test_block);
 }
 
+TEST_F(HashJoinBuildSinkTest, 
RejectBroadcastJoinThatRequiresBuildSideFinalize) {
+    for (const auto join_op : {TJoinOp::RIGHT_OUTER_JOIN, 
TJoinOp::FULL_OUTER_JOIN,
+                               TJoinOp::RIGHT_SEMI_JOIN, 
TJoinOp::RIGHT_ANTI_JOIN}) {
+        auto tnode =
+                _helper.create_test_plan_node(join_op, {TPrimitiveType::INT}, 
{false}, {false});
+        tnode.hash_join_node.__set_is_broadcast_join(true);
+
+        auto [probe_operator, sink_operator] = _helper.create_operators(tnode);
+        ASSERT_TRUE(probe_operator);
+        ASSERT_TRUE(sink_operator);
+
+        auto runtime_state = std::make_unique<MockRuntimeState>();
+        runtime_state->_query_ctx = _helper.query_ctx.get();
+        runtime_state->_query_id = _helper.query_ctx->query_id();
+        runtime_state->resize_op_id_to_local_state(-100);
+        runtime_state->set_max_operator_id(-100);
+        runtime_state->set_desc_tbl(_helper.desc_tbl);
+
+        auto st = sink_operator->init(tnode, runtime_state.get());
+        ASSERT_TRUE(st.ok()) << "init failed: " << st.to_string();
+
+        st = sink_operator->prepare(runtime_state.get());
+        ASSERT_TRUE(st.is<ErrorCode::NOT_IMPLEMENTED_ERROR>())
+                << "broadcast " << to_string(join_op)
+                << " should be rejected, got: " << st.to_string();
+    }
+}
+
 TEST_F(HashJoinBuildSinkTest, Sink) {
     auto test_block = [&](TJoinOp::type op_type, const 
std::vector<TPrimitiveType::type>& key_types,
                           const std::vector<bool>& left_nullables,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to