This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch dev_join
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 90b226c08e125670d65dc2e8318f79952367a0ce
Author: HappenLee <[email protected]>
AuthorDate: Wed Oct 25 16:44:14 2023 +0800

    fix tpch some bug of right semi/anti join (#25896)
---
 be/src/vec/common/hash_table/hash_map.h            |  9 ++--
 .../vec/exec/join/process_hash_table_probe_impl.h  | 51 ++++++++++++----------
 be/src/vec/exec/join/vhash_join_node.h             |  2 +-
 3 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/be/src/vec/common/hash_table/hash_map.h 
b/be/src/vec/common/hash_table/hash_map.h
index ab094d69a67..35abaf48fff 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -270,12 +270,12 @@ public:
         const auto batch_size = max_batch_size;
         const auto elem_num = visited.size();
         int count = 0;
-        build_idxs.reserve(batch_size);
+        build_idxs.resize(batch_size);
 
         while (count < batch_size && iter_idx < elem_num) {
             const auto matched = visited[iter_idx];
             build_idxs[count] = iter_idx;
-            if constexpr (JoinOpType != doris::TJoinOp::RIGHT_ANTI_JOIN) {
+            if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN) {
                 count += !matched;
             } else {
                 count += matched;
@@ -284,7 +284,7 @@ public:
         }
 
         build_idxs.resize(count);
-        return iter_idx == elem_num;
+        return iter_idx >= elem_num;
     }
 
 private:
@@ -300,8 +300,9 @@ private:
                 }
                 build_idx = next[build_idx];
             }
+            probe_idx++;
         }
-        return std::pair {probe_rows, 0};
+        return std::pair {probe_idx, 0};
     }
 
     template <int JoinOpType>
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h 
b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index fc408711768..53749b78d1f 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -661,38 +661,43 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::process_data_in_hashtable(
     //            }
     //        }
     //        _build_blocks_locs.resize(block_size);
-
     auto& mcol = mutable_block.mutable_columns();
-    *eos = hash_table_ctx.hash_table->template 
iterate_map<JoinOpType>(_build_block_rows);
+    auto is_eof = hash_table_ctx.hash_table->template 
iterate_map<JoinOpType>(_build_block_rows);
+    *eos = is_eof;
     auto block_size = _build_block_rows.size();
-    int right_col_idx = _parent->left_table_data_types().size();
+    int right_col_idx =
+            JoinOpType == TJoinOp::RIGHT_OUTER_JOIN || JoinOpType == 
TJoinOp::FULL_OUTER_JOIN
+                    ? _parent->left_table_data_types().size()
+                    : 0;
     int right_col_len = _parent->right_table_data_types().size();
 
-    for (size_t j = 0; j < right_col_len; ++j) {
-        const auto& column = *_build_block->get_by_position(j).column;
-        mcol[j + right_col_idx]->insert_indices_from(
-                column, _build_block_rows.data(),
-                _build_block_rows.data() + _build_block_rows.size());
-    }
+    if (block_size) {
+        for (size_t j = 0; j < right_col_len; ++j) {
+            const auto& column = *_build_block->get_by_position(j).column;
+            LOG(INFO) << "happne lee build block size:" << column.size();
+            mcol[j + right_col_idx]->insert_indices_from(column, 
_build_block_rows.data(),
+                                                         
_build_block_rows.data() + block_size);
+        }
 
-    // just resize the left table column in case with other conjunct to make 
block size is not zero
-    if (_is_right_semi_anti && _have_other_join_conjunct) {
-        auto target_size = mcol[right_col_idx]->size();
-        for (int i = 0; i < right_col_idx; ++i) {
-            mcol[i]->resize(target_size);
+        // just resize the left table column in case with other conjunct to 
make block size is not zero
+        if (_is_right_semi_anti && _have_other_join_conjunct) {
+            auto target_size = mcol[right_col_idx]->size();
+            for (int i = 0; i < right_col_idx; ++i) {
+                mcol[i]->resize(target_size);
+            }
         }
-    }
 
-    // right outer join / full join need insert data of left table
-    if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN ||
-                  JoinOpType == TJoinOp::FULL_OUTER_JOIN) {
-        for (int i = 0; i < right_col_idx; ++i) {
-            
assert_cast<ColumnNullable*>(mcol[i].get())->insert_many_defaults(block_size);
+        // right outer join / full join need insert data of left table
+        if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN ||
+                      JoinOpType == TJoinOp::FULL_OUTER_JOIN) {
+            for (int i = 0; i < right_col_idx; ++i) {
+                
assert_cast<ColumnNullable*>(mcol[i].get())->insert_many_defaults(block_size);
+            }
+            _tuple_is_null_left_flags->resize_fill(block_size, 1);
         }
-        _tuple_is_null_left_flags->resize_fill(block_size, 1);
+        output_block->swap(mutable_block.to_block(0));
+        DCHECK(block_size <= _batch_size);
     }
-    output_block->swap(mutable_block.to_block(0));
-    DCHECK(block_size <= _batch_size);
     return Status::OK();
     //    else {
     //        LOG(FATAL) << "Invalid RowRefList";
diff --git a/be/src/vec/exec/join/vhash_join_node.h 
b/be/src/vec/exec/join/vhash_join_node.h
index 5fc1c3c6cae..86484811885 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -118,7 +118,7 @@ struct ProcessHashTableBuild {
     template <int JoinOpType, bool ignore_null, bool short_circuit_for_null>
     Status run(HashTableContext& hash_table_ctx, ConstNullMapPtr null_map, 
bool* has_null_key) {
         if (short_circuit_for_null || ignore_null) {
-            for (int i = 0; i < _rows; i++) {
+            for (size_t i = 0; i < _rows; i++) {
                 if ((*null_map)[i]) {
                     *has_null_key = true;
                 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to