This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch dev_join
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 0b2e1748ec2838b19d430e80adbf9beebf9f2ea6
Author: BiteTheDDDDt <[email protected]>
AuthorDate: Wed Oct 25 12:07:59 2023 +0800

    update
---
 be/src/pipeline/exec/hashjoin_probe_operator.h     |   2 -
 be/src/vec/exec/join/process_hash_table_probe.h    |  13 +-
 .../vec/exec/join/process_hash_table_probe_impl.h  | 325 +++------------------
 be/src/vec/exec/join/vhash_join_node.h             |   2 -
 4 files changed, 40 insertions(+), 302 deletions(-)

diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h 
b/be/src/pipeline/exec/hashjoin_probe_operator.h
index 181934e7b50..cc0f37caa3d 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.h
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.h
@@ -115,8 +115,6 @@ private:
     bool _need_null_map_for_probe = false;
     bool _has_set_need_null_map_for_probe = false;
     vectorized::ColumnUInt8::MutablePtr _null_map_column;
-    // for cases when a probe row matches more than batch size build rows.
-    bool _is_any_probe_match_row_output = false;
     std::unique_ptr<HashTableCtxVariants> _process_hashtable_ctx_variants =
             std::make_unique<HashTableCtxVariants>();
 
diff --git a/be/src/vec/exec/join/process_hash_table_probe.h 
b/be/src/vec/exec/join/process_hash_table_probe.h
index ebd63f9f55d..cf7c7e230d4 100644
--- a/be/src/vec/exec/join/process_hash_table_probe.h
+++ b/be/src/vec/exec/join/process_hash_table_probe.h
@@ -68,12 +68,7 @@ struct ProcessHashTableProbe {
     // and output block may be different
     // The output result is determined by the other join conjunct result and 
same_to_prev struct
     Status do_other_join_conjuncts(Block* output_block, bool is_mark_join,
-                                   int multi_matched_output_row_count, bool 
is_the_last_sub_block);
-
-    void _process_splited_equal_matched_tuples(int start_row_idx, int 
row_count,
-                                               const UInt8* __restrict 
other_hit_column,
-                                               UInt8* __restrict null_map_data,
-                                               UInt8* __restrict filter_map, 
Block* output_block);
+                                   bool is_the_last_sub_block);
 
     template <typename HashTableType>
     typename HashTableType::State _init_probe_side(HashTableType& 
hash_table_ctx, size_t probe_rows,
@@ -93,7 +88,7 @@ struct ProcessHashTableProbe {
     std::vector<StringRef> _probe_keys;
 
     std::vector<uint32_t> _probe_indexs;
-    std::vector<uint32_t> _build_block_rows;
+    std::vector<uint32_t> _build_indexs;
     std::vector<int> _build_blocks_locs;
     // only need set the tuple is null in RIGHT_OUTER_JOIN and FULL_OUTER_JOIN
     ColumnUInt8::Container* _tuple_is_null_left_flags;
@@ -105,12 +100,8 @@ struct ProcessHashTableProbe {
     std::unique_ptr<Arena> _serialize_key_arena;
     std::vector<char> _probe_side_find_result;
 
-    std::vector<bool*> _visited_map;
-    std::vector<bool> _same_to_prev;
-
     int _right_col_idx;
     int _right_col_len;
-    int _row_count_from_last_probe;
 
     bool _have_other_join_conjunct;
     bool _is_right_semi_anti;
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h 
b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index 53749b78d1f..68e3883a05b 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -70,10 +70,10 @@ void ProcessHashTableProbe<JoinOpType, 
Parent>::build_side_output_column(
 
     if (!is_semi_anti_join || have_other_join_conjunct) {
         for (int i = 0; i < _right_col_len; i++) {
-            const auto& column = *_build_block->get_by_position(i).column;
+            const auto& column = *_build_block->safe_get_by_position(i).column;
             if (output_slot_flags[i]) {
-                mcol[i + _right_col_idx]->insert_indices_from(column, 
_build_block_rows.data(),
-                                                              
_build_block_rows.data() + size);
+                mcol[i + _right_col_idx]->insert_indices_from(column, 
_build_indexs.data(),
+                                                              
_build_indexs.data() + size);
             } else {
                 mcol[i + _right_col_idx]->insert_many_defaults(size);
             }
@@ -85,7 +85,7 @@ void ProcessHashTableProbe<JoinOpType, 
Parent>::build_side_output_column(
         _tuple_is_null_right_flags->resize(size);
         auto* __restrict null_data = _tuple_is_null_right_flags->data();
         for (int i = 0; i < size; ++i) {
-            null_data[i] = _build_block_rows[i] == 0;
+            null_data[i] = _build_indexs[i] == 0;
         }
     }
 }
@@ -126,19 +126,9 @@ typename HashTableType::State 
ProcessHashTableProbe<JoinOpType, Parent>::_init_p
                              ? 0
                              : _parent->left_table_data_types().size();
     _right_col_len = _parent->right_table_data_types().size();
-    _row_count_from_last_probe = 0;
-
-    _build_block_rows.clear();
-    _probe_indexs.clear();
-    if (with_other_join_conjuncts) {
-        // use in right join to change visited state after exec the vother 
join conjunct
-        _visited_map.clear();
-        _same_to_prev.clear();
-        _visited_map.reserve(_batch_size * PROBE_SIDE_EXPLODE_RATE);
-        _same_to_prev.reserve(_batch_size * PROBE_SIDE_EXPLODE_RATE);
-    }
+
     _probe_indexs.resize(_batch_size);
-    _build_block_rows.resize(_batch_size);
+    _build_indexs.resize(_batch_size);
 
     if (!_parent->_ready_probe) {
         _parent->_ready_probe = true;
@@ -170,11 +160,6 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
     bool all_match_one = false;
     size_t probe_size = 0;
 
-    // If not(which means it excceed batch size), probe_index is not increased 
and
-    // remaining matched rows for the current probe row will be
-    // handled in the next call of this function
-    int multi_matched_output_row_count = 0;
-
     // Is the last sub block of splitted block
     bool is_the_last_sub_block = false;
 
@@ -188,7 +173,7 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
         auto [new_probe_idx, new_current_offset] =
                 hash_table_ctx.hash_table->template find_batch<JoinOpType>(
                         hash_table_ctx.keys, 
hash_table_ctx.hash_values.data(), probe_index,
-                        probe_rows, _probe_indexs, _build_block_rows);
+                        probe_rows, _probe_indexs, _build_indexs);
         probe_index = new_probe_idx;
         current_offset = new_current_offset;
         probe_size = probe_index - last_probe_index;
@@ -206,8 +191,7 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
     output_block->swap(mutable_block.to_block());
 
     if constexpr (with_other_conjuncts) {
-        return do_other_join_conjuncts(output_block, is_mark_join, 
multi_matched_output_row_count,
-                                       is_the_last_sub_block);
+        return do_other_join_conjuncts(output_block, is_mark_join, 
is_the_last_sub_block);
     }
 
     return Status::OK();
@@ -215,8 +199,7 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
 
 template <int JoinOpType, typename Parent>
 Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts(
-        Block* output_block, bool is_mark_join, int 
multi_matched_output_row_count,
-        bool is_the_last_sub_block) {
+        Block* output_block, bool is_mark_join, bool is_the_last_sub_block) {
     // dispose the other join conjunct exec
     auto row_count = output_block->rows();
     if (!row_count) {
@@ -243,6 +226,10 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
                     ->get_data()
                     .data();
 
+    auto same_with_prev = [this](size_t index) {
+        return index && _probe_indexs[index] == _probe_indexs[index - 1];
+    };
+
     if constexpr (JoinOpType == TJoinOp::LEFT_OUTER_JOIN ||
                   JoinOpType == TJoinOp::FULL_OUTER_JOIN) {
         auto new_filter_column = ColumnVector<UInt8>::create(row_count);
@@ -251,22 +238,11 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
         auto null_map_column = ColumnVector<UInt8>::create(row_count, 0);
         auto* __restrict null_map_data = null_map_column->get_data().data();
 
-        // It contains non-first sub block of splited equal-conjuncts-matched 
tuples from last probe row
-        if (_row_count_from_last_probe > 0) {
-            _process_splited_equal_matched_tuples(0, 
_row_count_from_last_probe, filter_column_ptr,
-                                                  null_map_data, filter_map, 
output_block);
-            // This is the last sub block of splitted block, and no 
equal-conjuncts-matched tuple
-            // is output in all sub blocks, need to output a tuple for this 
probe row
-            if (is_the_last_sub_block && 
!_parent->_is_any_probe_match_row_output) {
-                filter_map[0] = true;
-                null_map_data[0] = true;
-            }
-        }
-        int end_idx = row_count - multi_matched_output_row_count;
+        int end_idx = row_count;
         // process equal-conjuncts-matched tuples that are newly generated
         // in this run if there are any.
-        for (int i = _row_count_from_last_probe; i < end_idx; ++i) {
-            auto join_hit = _visited_map[i] != nullptr;
+        for (int i = 0; i < end_idx; ++i) {
+            auto join_hit = _build_indexs[i];
             auto other_hit = filter_column_ptr[i];
 
             if (!other_hit) {
@@ -291,12 +267,11 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
             //    2. All the remaining other-conjuncts-NOT-matched tuples.
             //       All these tuples are marked not to output.
             if (join_hit) {
-                *_visited_map[i] |= other_hit;
-                filter_map[i] = other_hit || !_same_to_prev[i] ||
+                filter_map[i] = other_hit || !same_with_prev(i) ||
                                 (!filter_column_ptr[i] && filter_map[i - 1]);
                 // Here to keep only hit join conjunct and other join conjunt 
is true need to be output.
                 // if not, only some key must keep one row will output will 
null right table column
-                if (_same_to_prev[i] && filter_map[i] && !filter_column_ptr[i 
- 1]) {
+                if (same_with_prev(i) && filter_map[i] && !filter_column_ptr[i 
- 1]) {
                     filter_map[i - 1] = false;
                 }
             } else {
@@ -304,14 +279,6 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
             }
         }
 
-        // It contains the first sub block of splited equal-conjuncts-matched 
tuples of the current probe row
-        if (multi_matched_output_row_count > 0) {
-            _parent->_is_any_probe_match_row_output = false;
-            _process_splited_equal_matched_tuples(row_count - 
multi_matched_output_row_count,
-                                                  
multi_matched_output_row_count, filter_column_ptr,
-                                                  null_map_data, filter_map, 
output_block);
-        }
-
         for (size_t i = 0; i < row_count; ++i) {
             if (filter_map[i]) {
                 _tuple_is_null_right_flags->emplace_back(null_map_data[i]);
@@ -324,46 +291,16 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
         auto& filter_map = new_filter_column->get_data();
 
         size_t start_row_idx = 1;
-        // We are handling euqual-conjuncts matched tuples that are splitted 
into multiple blocks
-        if (_row_count_from_last_probe > 0) {
-            if (_parent->_is_any_probe_match_row_output) {
-                // if any matched tuple for this probe row is output,
-                // ignore all the following tuples for this probe row.
-                for (int row_idx = 0; row_idx < _row_count_from_last_probe; 
++row_idx) {
-                    filter_map.emplace_back(false);
-                }
-                start_row_idx += _row_count_from_last_probe;
-                if (_row_count_from_last_probe < row_count) {
-                    
filter_map.emplace_back(filter_column_ptr[_row_count_from_last_probe]);
-                }
-            } else {
-                filter_map.emplace_back(filter_column_ptr[0]);
-            }
-        } else {
-            filter_map.emplace_back(filter_column_ptr[0]);
-        }
+        filter_map.emplace_back(filter_column_ptr[0]);
         for (size_t i = start_row_idx; i < row_count; ++i) {
-            if (filter_column_ptr[i] || (_same_to_prev[i] && filter_map[i - 
1])) {
+            if (filter_column_ptr[i] || (same_with_prev(i) && filter_map[i - 
1])) {
                 // Only last same element is true, output last one
                 filter_map.push_back(true);
-                filter_map[i - 1] = !_same_to_prev[i] && filter_map[i - 1];
+                filter_map[i - 1] = !same_with_prev(i) && filter_map[i - 1];
             } else {
                 filter_map.push_back(false);
             }
         }
-        // It contains the first sub block of splited equal-conjuncts-matched 
tuples of the current probe row
-        if (multi_matched_output_row_count > 0) {
-            // If a matched row is output, all the equal-matched tuples in
-            // the following sub blocks should be ignored
-            _parent->_is_any_probe_match_row_output = filter_map[row_count - 
1];
-        } else if (_row_count_from_last_probe > 0 && 
!_parent->_is_any_probe_match_row_output) {
-            // We are handling euqual-conjuncts matched tuples that are 
splitted into multiple blocks,
-            // and no matched tuple has been output in all previous run.
-            // If a tuple is output in this run, all the following mathced 
tuples should be ignored
-            if (filter_map[_row_count_from_last_probe - 1]) {
-                _parent->_is_any_probe_match_row_output = true;
-            }
-        }
 
         /// FIXME: incorrect result of semi mark join with other 
conjuncts(null value missed).
         if (is_mark_join) {
@@ -374,7 +311,7 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
             // For mark join, we only filter rows which have duplicate join 
keys.
             // And then, we set matched_map to the join result to do the mark 
join's filtering.
             for (size_t i = 1; i < row_count; ++i) {
-                if (!_same_to_prev[i]) {
+                if (!same_with_prev(i)) {
                     helper.insert_value(filter_map[i - 1]);
                     filter_map[i - 1] = true;
                 }
@@ -399,34 +336,19 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
         // if there are none, just pick a tuple and output.
 
         size_t start_row_idx = 1;
-        // We are handling euqual-conjuncts matched tuples that are splitted 
into multiple blocks
-        if (_row_count_from_last_probe > 0 && 
_parent->_is_any_probe_match_row_output) {
-            // if any matched tuple for this probe row is output,
-            // ignore all the following tuples for this probe row.
-            for (int row_idx = 0; row_idx < _row_count_from_last_probe; 
++row_idx) {
-                filter_map[row_idx] = false;
-            }
-            start_row_idx += _row_count_from_last_probe;
-            if (_row_count_from_last_probe < row_count) {
-                filter_map[_row_count_from_last_probe] =
-                        filter_column_ptr[_row_count_from_last_probe] &&
-                        _visited_map[_row_count_from_last_probe];
-            }
-        } else {
-            // Both equal conjuncts and other conjuncts are true
-            filter_map[0] = filter_column_ptr[0] && _visited_map[0];
-        }
+        // Both equal conjuncts and other conjuncts are true
+        filter_map[0] = filter_column_ptr[0] && _build_indexs[0];
 
         for (size_t i = start_row_idx; i < row_count; ++i) {
-            if ((_visited_map[i] && filter_column_ptr[i]) ||
-                (_same_to_prev[i] && filter_map[i - 1])) {
+            if ((_build_indexs[i] && filter_column_ptr[i]) ||
+                (same_with_prev(i) && filter_map[i - 1])) {
                 // When either of two conditions is meet:
                 // 1. Both equal conjuncts and other conjuncts are true or 
same_to_prev
                 // 2. This row is joined from the same build side row as the 
previous row
                 // Set filter_map[i] to true and filter_map[i - 1] to false if 
same_to_prev[i]
                 // is true.
                 filter_map[i] = true;
-                filter_map[i - 1] = !_same_to_prev[i] && filter_map[i - 1];
+                filter_map[i - 1] = !same_with_prev(i) && filter_map[i - 1];
             } else {
                 filter_map[i] = false;
             }
@@ -438,7 +360,7 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
                                                   .column->assume_mutable()))
                                         .get_data();
             for (int i = 1; i < row_count; ++i) {
-                if (!_same_to_prev[i]) {
+                if (!same_with_prev(i)) {
                     matched_map.push_back(!filter_map[i - 1]);
                     filter_map[i - 1] = true;
                 }
@@ -447,67 +369,26 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
             filter_map[row_count - 1] = true;
         } else {
             int end_row_idx = 0;
-            if (_row_count_from_last_probe > 0) {
-                end_row_idx = row_count - multi_matched_output_row_count;
-                if (!_parent->_is_any_probe_match_row_output) {
-                    // We are handling euqual-conjuncts matched tuples that 
are splitted into multiple blocks,
-                    // and no matched tuple has been output in all previous 
run.
-                    // If a tuple is output in this run, all the following 
mathced tuples should be ignored
-                    if (filter_map[_row_count_from_last_probe - 1]) {
-                        _parent->_is_any_probe_match_row_output = true;
-                        filter_map[_row_count_from_last_probe - 1] = false;
-                    }
-                    if (is_the_last_sub_block && 
!_parent->_is_any_probe_match_row_output) {
-                        // This is the last sub block of splitted block, and 
no equal-conjuncts-matched tuple
-                        // is output in all sub blocks, output a tuple for 
this probe row
-                        filter_map[0] = true;
-                    }
-                }
-                if (multi_matched_output_row_count > 0) {
-                    // It contains the first sub block of splited 
equal-conjuncts-matched tuples of the current probe row
-                    // If a matched row is output, all the equal-matched 
tuples in
-                    // the following sub blocks should be ignored
-                    _parent->_is_any_probe_match_row_output = 
filter_map[row_count - 1];
-                    filter_map[row_count - 1] = false;
-                }
-            } else if (multi_matched_output_row_count > 0) {
-                end_row_idx = row_count - multi_matched_output_row_count;
-                // It contains the first sub block of splited 
equal-conjuncts-matched tuples of the current probe row
-                // If a matched row is output, all the equal-matched tuples in
-                // the following sub blocks should be ignored
-                _parent->_is_any_probe_match_row_output = filter_map[row_count 
- 1];
-                filter_map[row_count - 1] = false;
-            } else {
-                end_row_idx = row_count;
-            }
+
+            end_row_idx = row_count;
 
             // Same to the semi join, but change the last value to opposite 
value
-            for (int i = 1 + _row_count_from_last_probe; i < end_row_idx; ++i) 
{
-                if (!_same_to_prev[i]) {
+            for (int i = 1; i < end_row_idx; ++i) {
+                if (!same_with_prev(i)) {
                     filter_map[i - 1] = !filter_map[i - 1];
                 }
             }
-            auto non_sub_blocks_matched_row_count =
-                    row_count - _row_count_from_last_probe - 
multi_matched_output_row_count;
+            auto non_sub_blocks_matched_row_count = row_count;
             if (non_sub_blocks_matched_row_count > 0) {
                 filter_map[end_row_idx - 1] = !filter_map[end_row_idx - 1];
             }
         }
 
         output_block->get_by_position(result_column_id).column = 
std::move(new_filter_column);
-    } else if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN ||
-                         JoinOpType == TJoinOp::RIGHT_ANTI_JOIN) {
-        for (int i = 0; i < row_count; ++i) {
-            DCHECK(_visited_map[i]);
-            *_visited_map[i] |= filter_column_ptr[i];
-        }
     } else if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN) {
         auto filter_size = 0;
         for (int i = 0; i < row_count; ++i) {
-            DCHECK(_visited_map[i]);
-            auto result = filter_column_ptr[i];
-            *_visited_map[i] |= result;
-            filter_size += result;
+            filter_size += filter_column_ptr[i];
         }
         _tuple_is_null_left_flags->resize_fill(filter_size, 0);
     }
@@ -529,142 +410,16 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
     return Status::OK();
 }
 
-// For left or full outer join with other conjuncts.
-// If multiple equal-conjuncts-matched tuples is splitted into several
-// sub blocks, just filter out all the other-conjuncts-NOT-matched tuples at 
first,
-// and when processing the last sub block, check whether there are any
-// equal-conjuncts-matched tuple is output in all sub blocks,
-// if not, just pick a tuple and output.
-template <int JoinOpType, typename Parent>
-void ProcessHashTableProbe<JoinOpType, 
Parent>::_process_splited_equal_matched_tuples(
-        int start_row_idx, int row_count, const UInt8* __restrict 
other_hit_column,
-        UInt8* __restrict null_map_data, UInt8* __restrict filter_map, Block* 
output_block) {
-    int end_row_idx = start_row_idx + row_count;
-    for (int i = start_row_idx; i < end_row_idx; ++i) {
-        auto join_hit = _visited_map[i] != nullptr;
-        auto other_hit = other_hit_column[i];
-
-        if (!other_hit) {
-            for (size_t j = 0; j < _right_col_len; ++j) {
-                typeid_cast<ColumnNullable*>(
-                        std::move(*output_block->get_by_position(j + 
_right_col_idx).column)
-                                .assume_mutable()
-                                .get())
-                        ->get_null_map_data()[i] = true;
-            }
-        }
-
-        null_map_data[i] = !join_hit || !other_hit;
-        filter_map[i] = other_hit;
-
-        if (join_hit) {
-            *_visited_map[i] |= other_hit;
-        }
-    }
-    _parent->_is_any_probe_match_row_output |=
-            simd::contain_byte(filter_map + start_row_idx, row_count, 1);
-}
-
 template <int JoinOpType, typename Parent>
 template <typename HashTableType>
 Status ProcessHashTableProbe<JoinOpType, Parent>::process_data_in_hashtable(
         HashTableType& hash_table_ctx, MutableBlock& mutable_block, Block* 
output_block,
         bool* eos) {
-    //    using Mapped = typename HashTableType::Mapped;
     SCOPED_TIMER(_probe_process_hashtable_timer);
-    //    if constexpr (std::is_same_v<Mapped, RowRefListWithFlag> ||
-    //                  std::is_same_v<Mapped, RowRefListWithFlags>) {
-    //        hash_table_ctx.init_iterator();
-    //        auto& mcol = mutable_block.mutable_columns();
-    //
-    //        bool right_semi_anti_without_other = _is_right_semi_anti && 
!_have_other_join_conjunct;
-    //        int right_col_idx =
-    //                right_semi_anti_without_other ? 0 : 
_parent->left_table_data_types().size();
-    //        int right_col_len = _parent->right_table_data_types().size();
-    //
-    //        auto& iter = hash_table_ctx.iterator;
-    //        auto block_size = 0;
-    //        auto& visited_iter =
-    //                
std::get<ForwardIterator<Mapped>>(_parent->_outer_join_pull_visited_iter);
-    //        _build_blocks_locs.resize(_batch_size);
-    //        if (visited_iter.ok()) {
-    //            if constexpr (std::is_same_v<Mapped, RowRefListWithFlag>) {
-    //                for (; visited_iter.ok() && block_size < _batch_size; 
++visited_iter) {
-    //                    _build_blocks_locs[block_size++] = 
visited_iter->row_num;
-    //                }
-    //            } else {
-    //                for (; visited_iter.ok() && block_size < _batch_size; 
++visited_iter) {
-    //                    if constexpr (JoinOpType == 
TJoinOp::RIGHT_SEMI_JOIN) {
-    //                        if (visited_iter->visited) {
-    //                            _build_blocks_locs[block_size++] = 
visited_iter->row_num;
-    //                        }
-    //                    } else {
-    //                        if (!visited_iter->visited) {
-    //                            _build_blocks_locs[block_size++] = 
visited_iter->row_num;
-    //                        }
-    //                    }
-    //                }
-    //            }
-    //            if (!visited_iter.ok()) {
-    //                ++iter;
-    //            }
-    //        }
-    //
-    //        for (; iter != hash_table_ctx.hash_table->end() && block_size < 
_batch_size; ++iter) {
-    //            auto& mapped = iter->get_second();
-    //            if constexpr (std::is_same_v<Mapped, RowRefListWithFlag>) {
-    //                if (mapped.visited) {
-    //                    if constexpr (JoinOpType == 
TJoinOp::RIGHT_SEMI_JOIN) {
-    //                        visited_iter = mapped.begin();
-    //                        for (; visited_iter.ok() && block_size < 
_batch_size; ++visited_iter) {
-    //                            _build_blocks_locs[block_size++] = 
visited_iter->row_num;
-    //                        }
-    //                        if (visited_iter.ok()) {
-    //                            // block_size >= _batch_size, quit for loop
-    //                            break;
-    //                        }
-    //                    }
-    //                } else {
-    //                    if constexpr (JoinOpType != 
TJoinOp::RIGHT_SEMI_JOIN) {
-    //                        visited_iter = mapped.begin();
-    //                        for (; visited_iter.ok() && block_size < 
_batch_size; ++visited_iter) {
-    //                            _build_blocks_locs[block_size++] = 
visited_iter->row_num;
-    //                        }
-    //                        if (visited_iter.ok()) {
-    //                            // block_size >= _batch_size, quit for loop
-    //                            break;
-    //                        }
-    //                    }
-    //                }
-    //            } else {
-    //                visited_iter = mapped.begin();
-    //                for (; visited_iter.ok() && block_size < _batch_size; 
++visited_iter) {
-    //                    if constexpr (JoinOpType == 
TJoinOp::RIGHT_SEMI_JOIN) {
-    //                        if (visited_iter->visited) {
-    //                            _build_blocks_locs[block_size++] = 
visited_iter->row_num;
-    //                        }
-    //                    } else {
-    //                        if (!visited_iter->visited) {
-    //                            _build_blocks_locs[block_size++] = 
visited_iter->row_num;
-    //                        }
-    //                    }
-    //                }
-    //                if (visited_iter.ok()) {
-    //                    // block_size >= _batch_size, quit for loop
-    //
-    //                    const auto size = _build_blocks_locs.size();
-    //                    _build_block_rows.resize(_build_blocks_locs.size());
-    //                    for (int i = 0; i < size; i++) {
-    //                        _build_block_rows[i] = _build_blocks_locs[i];
-    //                    }                    break;
-    //                }
-    //            }
-    //        }
-    //        _build_blocks_locs.resize(block_size);
     auto& mcol = mutable_block.mutable_columns();
-    auto is_eof = hash_table_ctx.hash_table->template 
iterate_map<JoinOpType>(_build_block_rows);
+    auto is_eof = hash_table_ctx.hash_table->template 
iterate_map<JoinOpType>(_build_indexs);
     *eos = is_eof;
-    auto block_size = _build_block_rows.size();
+    auto block_size = _build_indexs.size();
     int right_col_idx =
             JoinOpType == TJoinOp::RIGHT_OUTER_JOIN || JoinOpType == 
TJoinOp::FULL_OUTER_JOIN
                     ? _parent->left_table_data_types().size()
@@ -675,8 +430,8 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::process_data_in_hashtable(
         for (size_t j = 0; j < right_col_len; ++j) {
             const auto& column = *_build_block->get_by_position(j).column;
             LOG(INFO) << "happne lee build block size:" << column.size();
-            mcol[j + right_col_idx]->insert_indices_from(column, 
_build_block_rows.data(),
-                                                         
_build_block_rows.data() + block_size);
+            mcol[j + right_col_idx]->insert_indices_from(column, 
_build_indexs.data(),
+                                                         _build_indexs.data() 
+ block_size);
         }
 
         // just resize the left table column in case with other conjunct to 
make block size is not zero
@@ -699,10 +454,6 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::process_data_in_hashtable(
         DCHECK(block_size <= _batch_size);
     }
     return Status::OK();
-    //    else {
-    //        LOG(FATAL) << "Invalid RowRefList";
-    //        return Status::InvalidArgument("Invalid RowRefList");
-    //    }
 }
 
 template <int JoinOpType, typename Parent>
diff --git a/be/src/vec/exec/join/vhash_join_node.h 
b/be/src/vec/exec/join/vhash_join_node.h
index 86484811885..ba6a8b3111e 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -397,8 +397,6 @@ private:
     std::vector<bool> _left_output_slot_flags;
     std::vector<bool> _right_output_slot_flags;
 
-    // for cases when a probe row matches more than batch size build rows.
-    bool _is_any_probe_match_row_output = false;
     uint8_t _build_block_idx = 0;
     int64_t _build_side_mem_used = 0;
     int64_t _build_side_last_mem_used = 0;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to