This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch dev_join in repository https://gitbox.apache.org/repos/asf/doris.git
commit 0b2e1748ec2838b19d430e80adbf9beebf9f2ea6 Author: BiteTheDDDDt <[email protected]> AuthorDate: Wed Oct 25 12:07:59 2023 +0800 update --- be/src/pipeline/exec/hashjoin_probe_operator.h | 2 - be/src/vec/exec/join/process_hash_table_probe.h | 13 +- .../vec/exec/join/process_hash_table_probe_impl.h | 325 +++------------------ be/src/vec/exec/join/vhash_join_node.h | 2 - 4 files changed, 40 insertions(+), 302 deletions(-) diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h b/be/src/pipeline/exec/hashjoin_probe_operator.h index 181934e7b50..cc0f37caa3d 100644 --- a/be/src/pipeline/exec/hashjoin_probe_operator.h +++ b/be/src/pipeline/exec/hashjoin_probe_operator.h @@ -115,8 +115,6 @@ private: bool _need_null_map_for_probe = false; bool _has_set_need_null_map_for_probe = false; vectorized::ColumnUInt8::MutablePtr _null_map_column; - // for cases when a probe row matches more than batch size build rows. - bool _is_any_probe_match_row_output = false; std::unique_ptr<HashTableCtxVariants> _process_hashtable_ctx_variants = std::make_unique<HashTableCtxVariants>(); diff --git a/be/src/vec/exec/join/process_hash_table_probe.h b/be/src/vec/exec/join/process_hash_table_probe.h index ebd63f9f55d..cf7c7e230d4 100644 --- a/be/src/vec/exec/join/process_hash_table_probe.h +++ b/be/src/vec/exec/join/process_hash_table_probe.h @@ -68,12 +68,7 @@ struct ProcessHashTableProbe { // and output block may be different // The output result is determined by the other join conjunct result and same_to_prev struct Status do_other_join_conjuncts(Block* output_block, bool is_mark_join, - int multi_matched_output_row_count, bool is_the_last_sub_block); - - void _process_splited_equal_matched_tuples(int start_row_idx, int row_count, - const UInt8* __restrict other_hit_column, - UInt8* __restrict null_map_data, - UInt8* __restrict filter_map, Block* output_block); + bool is_the_last_sub_block); template <typename HashTableType> typename HashTableType::State _init_probe_side(HashTableType& hash_table_ctx, size_t probe_rows, @@ -93,7 +88,7 @@ struct ProcessHashTableProbe { std::vector<StringRef> _probe_keys; std::vector<uint32_t> _probe_indexs; - std::vector<uint32_t> _build_block_rows; + std::vector<uint32_t> _build_indexs; std::vector<int> _build_blocks_locs; // only need set the tuple is null in RIGHT_OUTER_JOIN and FULL_OUTER_JOIN ColumnUInt8::Container* _tuple_is_null_left_flags; @@ -105,12 +100,8 @@ struct ProcessHashTableProbe { std::unique_ptr<Arena> _serialize_key_arena; std::vector<char> _probe_side_find_result; - std::vector<bool*> _visited_map; - std::vector<bool> _same_to_prev; - int _right_col_idx; int _right_col_len; - int _row_count_from_last_probe; bool _have_other_join_conjunct; bool _is_right_semi_anti; diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index 53749b78d1f..68e3883a05b 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -70,10 +70,10 @@ void ProcessHashTableProbe<JoinOpType, Parent>::build_side_output_column( if (!is_semi_anti_join || have_other_join_conjunct) { for (int i = 0; i < _right_col_len; i++) { - const auto& column = *_build_block->get_by_position(i).column; + const auto& column = *_build_block->safe_get_by_position(i).column; if (output_slot_flags[i]) { - mcol[i + _right_col_idx]->insert_indices_from(column, _build_block_rows.data(), - _build_block_rows.data() + size); + mcol[i + _right_col_idx]->insert_indices_from(column, _build_indexs.data(), + _build_indexs.data() + size); } else { mcol[i + _right_col_idx]->insert_many_defaults(size); } @@ -85,7 +85,7 @@ void ProcessHashTableProbe<JoinOpType, Parent>::build_side_output_column( _tuple_is_null_right_flags->resize(size); auto* __restrict null_data = _tuple_is_null_right_flags->data(); for (int i = 0; i < size; ++i) { - null_data[i] = _build_block_rows[i] == 0; + null_data[i] = _build_indexs[i] == 0; } } } @@ -126,19 +126,9 @@ typename HashTableType::State ProcessHashTableProbe<JoinOpType, Parent>::_init_p ? 0 : _parent->left_table_data_types().size(); _right_col_len = _parent->right_table_data_types().size(); - _row_count_from_last_probe = 0; - - _build_block_rows.clear(); - _probe_indexs.clear(); - if (with_other_join_conjuncts) { - // use in right join to change visited state after exec the vother join conjunct - _visited_map.clear(); - _same_to_prev.clear(); - _visited_map.reserve(_batch_size * PROBE_SIDE_EXPLODE_RATE); - _same_to_prev.reserve(_batch_size * PROBE_SIDE_EXPLODE_RATE); - } + _probe_indexs.resize(_batch_size); - _build_block_rows.resize(_batch_size); + _build_indexs.resize(_batch_size); if (!_parent->_ready_probe) { _parent->_ready_probe = true; @@ -170,11 +160,6 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_process(HashTableType& hash bool all_match_one = false; size_t probe_size = 0; - // If not(which means it excceed batch size), probe_index is not increased and - // remaining matched rows for the current probe row will be - // handled in the next call of this function - int multi_matched_output_row_count = 0; - // Is the last sub block of splitted block bool is_the_last_sub_block = false; @@ -188,7 +173,7 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_process(HashTableType& hash auto [new_probe_idx, new_current_offset] = hash_table_ctx.hash_table->template find_batch<JoinOpType>( hash_table_ctx.keys, hash_table_ctx.hash_values.data(), probe_index, - probe_rows, _probe_indexs, _build_block_rows); + probe_rows, _probe_indexs, _build_indexs); probe_index = new_probe_idx; current_offset = new_current_offset; probe_size = probe_index - last_probe_index; @@ -206,8 +191,7 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_process(HashTableType& hash output_block->swap(mutable_block.to_block()); if constexpr (with_other_conjuncts) { - return do_other_join_conjuncts(output_block, is_mark_join, multi_matched_output_row_count, - is_the_last_sub_block); + return do_other_join_conjuncts(output_block, is_mark_join, is_the_last_sub_block); } return Status::OK(); @@ -215,8 +199,7 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_process(HashTableType& hash template <int JoinOpType, typename Parent> Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( - Block* output_block, bool is_mark_join, int multi_matched_output_row_count, - bool is_the_last_sub_block) { + Block* output_block, bool is_mark_join, bool is_the_last_sub_block) { // dispose the other join conjunct exec auto row_count = output_block->rows(); if (!row_count) { @@ -243,6 +226,10 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( ->get_data() .data(); + auto same_with_prev = [this](size_t index) { + return index && _probe_indexs[index] == _probe_indexs[index - 1]; + }; + if constexpr (JoinOpType == TJoinOp::LEFT_OUTER_JOIN || JoinOpType == TJoinOp::FULL_OUTER_JOIN) { auto new_filter_column = ColumnVector<UInt8>::create(row_count); @@ -251,22 +238,11 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( auto null_map_column = ColumnVector<UInt8>::create(row_count, 0); auto* __restrict null_map_data = null_map_column->get_data().data(); - // It contains non-first sub block of splited equal-conjuncts-matched tuples from last probe row - if (_row_count_from_last_probe > 0) { - _process_splited_equal_matched_tuples(0, _row_count_from_last_probe, filter_column_ptr, - null_map_data, filter_map, output_block); - // This is the last sub block of splitted block, and no equal-conjuncts-matched tuple - // is output in all sub blocks, need to output a tuple for this probe row - if (is_the_last_sub_block && !_parent->_is_any_probe_match_row_output) { - filter_map[0] = true; - null_map_data[0] = true; - } - } - int end_idx = row_count - multi_matched_output_row_count; + int end_idx = row_count; // process equal-conjuncts-matched tuples that are newly generated // in this run if there are any. - for (int i = _row_count_from_last_probe; i < end_idx; ++i) { - auto join_hit = _visited_map[i] != nullptr; + for (int i = 0; i < end_idx; ++i) { + auto join_hit = _build_indexs[i]; auto other_hit = filter_column_ptr[i]; if (!other_hit) { @@ -291,12 +267,11 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( // 2. All the remaining other-conjuncts-NOT-matched tuples. // All these tuples are marked not to output. if (join_hit) { - *_visited_map[i] |= other_hit; - filter_map[i] = other_hit || !_same_to_prev[i] || + filter_map[i] = other_hit || !same_with_prev(i) || (!filter_column_ptr[i] && filter_map[i - 1]); // Here to keep only hit join conjunct and other join conjunt is true need to be output. // if not, only some key must keep one row will output will null right table column - if (_same_to_prev[i] && filter_map[i] && !filter_column_ptr[i - 1]) { + if (same_with_prev(i) && filter_map[i] && !filter_column_ptr[i - 1]) { filter_map[i - 1] = false; } } else { @@ -304,14 +279,6 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( } } - // It contains the first sub block of splited equal-conjuncts-matched tuples of the current probe row - if (multi_matched_output_row_count > 0) { - _parent->_is_any_probe_match_row_output = false; - _process_splited_equal_matched_tuples(row_count - multi_matched_output_row_count, - multi_matched_output_row_count, filter_column_ptr, - null_map_data, filter_map, output_block); - } - for (size_t i = 0; i < row_count; ++i) { if (filter_map[i]) { _tuple_is_null_right_flags->emplace_back(null_map_data[i]); @@ -324,46 +291,16 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( auto& filter_map = new_filter_column->get_data(); size_t start_row_idx = 1; - // We are handling euqual-conjuncts matched tuples that are splitted into multiple blocks - if (_row_count_from_last_probe > 0) { - if (_parent->_is_any_probe_match_row_output) { - // if any matched tuple for this probe row is output, - // ignore all the following tuples for this probe row. - for (int row_idx = 0; row_idx < _row_count_from_last_probe; ++row_idx) { - filter_map.emplace_back(false); - } - start_row_idx += _row_count_from_last_probe; - if (_row_count_from_last_probe < row_count) { - filter_map.emplace_back(filter_column_ptr[_row_count_from_last_probe]); - } - } else { - filter_map.emplace_back(filter_column_ptr[0]); - } - } else { - filter_map.emplace_back(filter_column_ptr[0]); - } + filter_map.emplace_back(filter_column_ptr[0]); for (size_t i = start_row_idx; i < row_count; ++i) { - if (filter_column_ptr[i] || (_same_to_prev[i] && filter_map[i - 1])) { + if (filter_column_ptr[i] || (same_with_prev(i) && filter_map[i - 1])) { // Only last same element is true, output last one filter_map.push_back(true); - filter_map[i - 1] = !_same_to_prev[i] && filter_map[i - 1]; + filter_map[i - 1] = !same_with_prev(i) && filter_map[i - 1]; } else { filter_map.push_back(false); } } - // It contains the first sub block of splited equal-conjuncts-matched tuples of the current probe row - if (multi_matched_output_row_count > 0) { - // If a matched row is output, all the equal-matched tuples in - // the following sub blocks should be ignored - _parent->_is_any_probe_match_row_output = filter_map[row_count - 1]; - } else if (_row_count_from_last_probe > 0 && !_parent->_is_any_probe_match_row_output) { - // We are handling euqual-conjuncts matched tuples that are splitted into multiple blocks, - // and no matched tuple has been output in all previous run. - // If a tuple is output in this run, all the following mathced tuples should be ignored - if (filter_map[_row_count_from_last_probe - 1]) { - _parent->_is_any_probe_match_row_output = true; - } - } /// FIXME: incorrect result of semi mark join with other conjuncts(null value missed). if (is_mark_join) { @@ -374,7 +311,7 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( // For mark join, we only filter rows which have duplicate join keys. // And then, we set matched_map to the join result to do the mark join's filtering. for (size_t i = 1; i < row_count; ++i) { - if (!_same_to_prev[i]) { + if (!same_with_prev(i)) { helper.insert_value(filter_map[i - 1]); filter_map[i - 1] = true; } @@ -399,34 +336,19 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( // if there are none, just pick a tuple and output. size_t start_row_idx = 1; - // We are handling euqual-conjuncts matched tuples that are splitted into multiple blocks - if (_row_count_from_last_probe > 0 && _parent->_is_any_probe_match_row_output) { - // if any matched tuple for this probe row is output, - // ignore all the following tuples for this probe row. - for (int row_idx = 0; row_idx < _row_count_from_last_probe; ++row_idx) { - filter_map[row_idx] = false; - } - start_row_idx += _row_count_from_last_probe; - if (_row_count_from_last_probe < row_count) { - filter_map[_row_count_from_last_probe] = - filter_column_ptr[_row_count_from_last_probe] && - _visited_map[_row_count_from_last_probe]; - } - } else { - // Both equal conjuncts and other conjuncts are true - filter_map[0] = filter_column_ptr[0] && _visited_map[0]; - } + // Both equal conjuncts and other conjuncts are true + filter_map[0] = filter_column_ptr[0] && _build_indexs[0]; for (size_t i = start_row_idx; i < row_count; ++i) { - if ((_visited_map[i] && filter_column_ptr[i]) || - (_same_to_prev[i] && filter_map[i - 1])) { + if ((_build_indexs[i] && filter_column_ptr[i]) || + (same_with_prev(i) && filter_map[i - 1])) { // When either of two conditions is meet: // 1. Both equal conjuncts and other conjuncts are true or same_to_prev // 2. This row is joined from the same build side row as the previous row // Set filter_map[i] to true and filter_map[i - 1] to false if same_to_prev[i] // is true. filter_map[i] = true; - filter_map[i - 1] = !_same_to_prev[i] && filter_map[i - 1]; + filter_map[i - 1] = !same_with_prev(i) && filter_map[i - 1]; } else { filter_map[i] = false; } @@ -438,7 +360,7 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( .column->assume_mutable())) .get_data(); for (int i = 1; i < row_count; ++i) { - if (!_same_to_prev[i]) { + if (!same_with_prev(i)) { matched_map.push_back(!filter_map[i - 1]); filter_map[i - 1] = true; } @@ -447,67 +369,26 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( filter_map[row_count - 1] = true; } else { int end_row_idx = 0; - if (_row_count_from_last_probe > 0) { - end_row_idx = row_count - multi_matched_output_row_count; - if (!_parent->_is_any_probe_match_row_output) { - // We are handling euqual-conjuncts matched tuples that are splitted into multiple blocks, - // and no matched tuple has been output in all previous run. - // If a tuple is output in this run, all the following mathced tuples should be ignored - if (filter_map[_row_count_from_last_probe - 1]) { - _parent->_is_any_probe_match_row_output = true; - filter_map[_row_count_from_last_probe - 1] = false; - } - if (is_the_last_sub_block && !_parent->_is_any_probe_match_row_output) { - // This is the last sub block of splitted block, and no equal-conjuncts-matched tuple - // is output in all sub blocks, output a tuple for this probe row - filter_map[0] = true; - } - } - if (multi_matched_output_row_count > 0) { - // It contains the first sub block of splited equal-conjuncts-matched tuples of the current probe row - // If a matched row is output, all the equal-matched tuples in - // the following sub blocks should be ignored - _parent->_is_any_probe_match_row_output = filter_map[row_count - 1]; - filter_map[row_count - 1] = false; - } - } else if (multi_matched_output_row_count > 0) { - end_row_idx = row_count - multi_matched_output_row_count; - // It contains the first sub block of splited equal-conjuncts-matched tuples of the current probe row - // If a matched row is output, all the equal-matched tuples in - // the following sub blocks should be ignored - _parent->_is_any_probe_match_row_output = filter_map[row_count - 1]; - filter_map[row_count - 1] = false; - } else { - end_row_idx = row_count; - } + + end_row_idx = row_count; // Same to the semi join, but change the last value to opposite value - for (int i = 1 + _row_count_from_last_probe; i < end_row_idx; ++i) { - if (!_same_to_prev[i]) { + for (int i = 1; i < end_row_idx; ++i) { + if (!same_with_prev(i)) { filter_map[i - 1] = !filter_map[i - 1]; } } - auto non_sub_blocks_matched_row_count = - row_count - _row_count_from_last_probe - multi_matched_output_row_count; + auto non_sub_blocks_matched_row_count = row_count; if (non_sub_blocks_matched_row_count > 0) { filter_map[end_row_idx - 1] = !filter_map[end_row_idx - 1]; } } output_block->get_by_position(result_column_id).column = std::move(new_filter_column); - } else if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN || - JoinOpType == TJoinOp::RIGHT_ANTI_JOIN) { - for (int i = 0; i < row_count; ++i) { - DCHECK(_visited_map[i]); - *_visited_map[i] |= filter_column_ptr[i]; - } } else if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN) { auto filter_size = 0; for (int i = 0; i < row_count; ++i) { - DCHECK(_visited_map[i]); - auto result = filter_column_ptr[i]; - *_visited_map[i] |= result; - filter_size += result; + filter_size += filter_column_ptr[i]; } _tuple_is_null_left_flags->resize_fill(filter_size, 0); } @@ -529,142 +410,16 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( return Status::OK(); } -// For left or full outer join with other conjuncts. -// If multiple equal-conjuncts-matched tuples is splitted into several -// sub blocks, just filter out all the other-conjuncts-NOT-matched tuples at first, -// and when processing the last sub block, check whether there are any -// equal-conjuncts-matched tuple is output in all sub blocks, -// if not, just pick a tuple and output. -template <int JoinOpType, typename Parent> -void ProcessHashTableProbe<JoinOpType, Parent>::_process_splited_equal_matched_tuples( - int start_row_idx, int row_count, const UInt8* __restrict other_hit_column, - UInt8* __restrict null_map_data, UInt8* __restrict filter_map, Block* output_block) { - int end_row_idx = start_row_idx + row_count; - for (int i = start_row_idx; i < end_row_idx; ++i) { - auto join_hit = _visited_map[i] != nullptr; - auto other_hit = other_hit_column[i]; - - if (!other_hit) { - for (size_t j = 0; j < _right_col_len; ++j) { - typeid_cast<ColumnNullable*>( - std::move(*output_block->get_by_position(j + _right_col_idx).column) - .assume_mutable() - .get()) - ->get_null_map_data()[i] = true; - } - } - - null_map_data[i] = !join_hit || !other_hit; - filter_map[i] = other_hit; - - if (join_hit) { - *_visited_map[i] |= other_hit; - } - } - _parent->_is_any_probe_match_row_output |= - simd::contain_byte(filter_map + start_row_idx, row_count, 1); -} - template <int JoinOpType, typename Parent> template <typename HashTableType> Status ProcessHashTableProbe<JoinOpType, Parent>::process_data_in_hashtable( HashTableType& hash_table_ctx, MutableBlock& mutable_block, Block* output_block, bool* eos) { - // using Mapped = typename HashTableType::Mapped; SCOPED_TIMER(_probe_process_hashtable_timer); - // if constexpr (std::is_same_v<Mapped, RowRefListWithFlag> || - // std::is_same_v<Mapped, RowRefListWithFlags>) { - // hash_table_ctx.init_iterator(); - // auto& mcol = mutable_block.mutable_columns(); - // - // bool right_semi_anti_without_other = _is_right_semi_anti && !_have_other_join_conjunct; - // int right_col_idx = - // right_semi_anti_without_other ? 0 : _parent->left_table_data_types().size(); - // int right_col_len = _parent->right_table_data_types().size(); - // - // auto& iter = hash_table_ctx.iterator; - // auto block_size = 0; - // auto& visited_iter = - // std::get<ForwardIterator<Mapped>>(_parent->_outer_join_pull_visited_iter); - // _build_blocks_locs.resize(_batch_size); - // if (visited_iter.ok()) { - // if constexpr (std::is_same_v<Mapped, RowRefListWithFlag>) { - // for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { - // _build_blocks_locs[block_size++] = visited_iter->row_num; - // } - // } else { - // for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { - // if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { - // if (visited_iter->visited) { - // _build_blocks_locs[block_size++] = visited_iter->row_num; - // } - // } else { - // if (!visited_iter->visited) { - // _build_blocks_locs[block_size++] = visited_iter->row_num; - // } - // } - // } - // } - // if (!visited_iter.ok()) { - // ++iter; - // } - // } - // - // for (; iter != hash_table_ctx.hash_table->end() && block_size < _batch_size; ++iter) { - // auto& mapped = iter->get_second(); - // if constexpr (std::is_same_v<Mapped, RowRefListWithFlag>) { - // if (mapped.visited) { - // if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { - // visited_iter = mapped.begin(); - // for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { - // _build_blocks_locs[block_size++] = visited_iter->row_num; - // } - // if (visited_iter.ok()) { - // // block_size >= _batch_size, quit for loop - // break; - // } - // } - // } else { - // if constexpr (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN) { - // visited_iter = mapped.begin(); - // for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { - // _build_blocks_locs[block_size++] = visited_iter->row_num; - // } - // if (visited_iter.ok()) { - // // block_size >= _batch_size, quit for loop - // break; - // } - // } - // } - // } else { - // visited_iter = mapped.begin(); - // for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { - // if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { - // if (visited_iter->visited) { - // _build_blocks_locs[block_size++] = visited_iter->row_num; - // } - // } else { - // if (!visited_iter->visited) { - // _build_blocks_locs[block_size++] = visited_iter->row_num; - // } - // } - // } - // if (visited_iter.ok()) { - // // block_size >= _batch_size, quit for loop - // - // const auto size = _build_blocks_locs.size(); - // _build_block_rows.resize(_build_blocks_locs.size()); - // for (int i = 0; i < size; i++) { - // _build_block_rows[i] = _build_blocks_locs[i]; - // } break; - // } - // } - // } - // _build_blocks_locs.resize(block_size); auto& mcol = mutable_block.mutable_columns(); - auto is_eof = hash_table_ctx.hash_table->template iterate_map<JoinOpType>(_build_block_rows); + auto is_eof = hash_table_ctx.hash_table->template iterate_map<JoinOpType>(_build_indexs); *eos = is_eof; - auto block_size = _build_block_rows.size(); + auto block_size = _build_indexs.size(); int right_col_idx = JoinOpType == TJoinOp::RIGHT_OUTER_JOIN || JoinOpType == TJoinOp::FULL_OUTER_JOIN ? _parent->left_table_data_types().size() @@ -675,8 +430,8 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::process_data_in_hashtable( for (size_t j = 0; j < right_col_len; ++j) { const auto& column = *_build_block->get_by_position(j).column; LOG(INFO) << "happne lee build block size:" << column.size(); - mcol[j + right_col_idx]->insert_indices_from(column, _build_block_rows.data(), - _build_block_rows.data() + block_size); + mcol[j + right_col_idx]->insert_indices_from(column, _build_indexs.data(), + _build_indexs.data() + block_size); } // just resize the left table column in case with other conjunct to make block size is not zero @@ -699,10 +454,6 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::process_data_in_hashtable( DCHECK(block_size <= _batch_size); } return Status::OK(); - // else { - // LOG(FATAL) << "Invalid RowRefList"; - // return Status::InvalidArgument("Invalid RowRefList"); - // } } template <int JoinOpType, typename Parent> diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 86484811885..ba6a8b3111e 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -397,8 +397,6 @@ private: std::vector<bool> _left_output_slot_flags; std::vector<bool> _right_output_slot_flags; - // for cases when a probe row matches more than batch size build rows. - bool _is_any_probe_match_row_output = false; uint8_t _build_block_idx = 0; int64_t _build_side_mem_used = 0; int64_t _build_side_last_mem_used = 0; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
