This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch dev_join in repository https://gitbox.apache.org/repos/asf/doris.git
commit 90b226c08e125670d65dc2e8318f79952367a0ce Author: HappenLee <[email protected]> AuthorDate: Wed Oct 25 16:44:14 2023 +0800 fix tpch some bug of right semi/anti join (#25896) --- be/src/vec/common/hash_table/hash_map.h | 9 ++-- .../vec/exec/join/process_hash_table_probe_impl.h | 51 ++++++++++++---------- be/src/vec/exec/join/vhash_join_node.h | 2 +- 3 files changed, 34 insertions(+), 28 deletions(-) diff --git a/be/src/vec/common/hash_table/hash_map.h b/be/src/vec/common/hash_table/hash_map.h index ab094d69a67..35abaf48fff 100644 --- a/be/src/vec/common/hash_table/hash_map.h +++ b/be/src/vec/common/hash_table/hash_map.h @@ -270,12 +270,12 @@ public: const auto batch_size = max_batch_size; const auto elem_num = visited.size(); int count = 0; - build_idxs.reserve(batch_size); + build_idxs.resize(batch_size); while (count < batch_size && iter_idx < elem_num) { const auto matched = visited[iter_idx]; build_idxs[count] = iter_idx; - if constexpr (JoinOpType != doris::TJoinOp::RIGHT_ANTI_JOIN) { + if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN) { count += !matched; } else { count += matched; @@ -284,7 +284,7 @@ public: } build_idxs.resize(count); - return iter_idx == elem_num; + return iter_idx >= elem_num; } private: @@ -300,8 +300,9 @@ private: } build_idx = next[build_idx]; } + probe_idx++; } - return std::pair {probe_rows, 0}; + return std::pair {probe_idx, 0}; } template <int JoinOpType> diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index fc408711768..53749b78d1f 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -661,38 +661,43 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::process_data_in_hashtable( // } // } // _build_blocks_locs.resize(block_size); - auto& mcol = mutable_block.mutable_columns(); - *eos = hash_table_ctx.hash_table->template iterate_map<JoinOpType>(_build_block_rows); + auto is_eof = hash_table_ctx.hash_table->template iterate_map<JoinOpType>(_build_block_rows); + *eos = is_eof; auto block_size = _build_block_rows.size(); - int right_col_idx = _parent->left_table_data_types().size(); + int right_col_idx = + JoinOpType == TJoinOp::RIGHT_OUTER_JOIN || JoinOpType == TJoinOp::FULL_OUTER_JOIN + ? _parent->left_table_data_types().size() + : 0; int right_col_len = _parent->right_table_data_types().size(); - for (size_t j = 0; j < right_col_len; ++j) { - const auto& column = *_build_block->get_by_position(j).column; - mcol[j + right_col_idx]->insert_indices_from( - column, _build_block_rows.data(), - _build_block_rows.data() + _build_block_rows.size()); - } + if (block_size) { + for (size_t j = 0; j < right_col_len; ++j) { + const auto& column = *_build_block->get_by_position(j).column; + LOG(INFO) << "happne lee build block size:" << column.size(); + mcol[j + right_col_idx]->insert_indices_from(column, _build_block_rows.data(), + _build_block_rows.data() + block_size); + } - // just resize the left table column in case with other conjunct to make block size is not zero - if (_is_right_semi_anti && _have_other_join_conjunct) { - auto target_size = mcol[right_col_idx]->size(); - for (int i = 0; i < right_col_idx; ++i) { - mcol[i]->resize(target_size); + // just resize the left table column in case with other conjunct to make block size is not zero + if (_is_right_semi_anti && _have_other_join_conjunct) { + auto target_size = mcol[right_col_idx]->size(); + for (int i = 0; i < right_col_idx; ++i) { + mcol[i]->resize(target_size); + } } - } - // right outer join / full join need insert data of left table - if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN || - JoinOpType == TJoinOp::FULL_OUTER_JOIN) { - for (int i = 0; i < right_col_idx; ++i) { - assert_cast<ColumnNullable*>(mcol[i].get())->insert_many_defaults(block_size); + // right outer join / full join need insert data of left table + if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN || + JoinOpType == TJoinOp::FULL_OUTER_JOIN) { + for (int i = 0; i < right_col_idx; ++i) { + assert_cast<ColumnNullable*>(mcol[i].get())->insert_many_defaults(block_size); + } + _tuple_is_null_left_flags->resize_fill(block_size, 1); } - _tuple_is_null_left_flags->resize_fill(block_size, 1); + output_block->swap(mutable_block.to_block(0)); + DCHECK(block_size <= _batch_size); } - output_block->swap(mutable_block.to_block(0)); - DCHECK(block_size <= _batch_size); return Status::OK(); // else { // LOG(FATAL) << "Invalid RowRefList"; diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 5fc1c3c6cae..86484811885 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -118,7 +118,7 @@ struct ProcessHashTableBuild { template <int JoinOpType, bool ignore_null, bool short_circuit_for_null> Status run(HashTableContext& hash_table_ctx, ConstNullMapPtr null_map, bool* has_null_key) { if (short_circuit_for_null || ignore_null) { - for (int i = 0; i < _rows; i++) { + for (size_t i = 0; i < _rows; i++) { if ((*null_map)[i]) { *has_null_key = true; } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
