This is an automated email from the ASF dual-hosted git repository.
zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new fc5b87e6203 [improve](join) reuse the join block to reduce malloc
memory (#43738)
fc5b87e6203 is described below
commit fc5b87e6203635c3f23d45a0316a97bbcd9fb2c6
Author: zhangstar333 <[email protected]>
AuthorDate: Fri Nov 15 10:32:54 2024 +0800
[improve](join) reuse the join block to reduce malloc memory (#43738)
### What problem does this PR solve?
Problem Summary:
before in the pull function, it's use tmp_block to reference data and
swap with output_block.
and then create empty column in join_block, so insert into data at next
time, it's need malloc memory again.
after fixed:
```
mysql [ssb]>set parallel_pipeline_task_num = 0;
mysql [ssb]>select count(c_custkey) from (select c_custkey from customer
cross join dates)t;
+------------------+
| count(c_custkey) |
+------------------+
| 7668000000 |
+------------------+
1 row in set (0.32 sec)
mysql [ssb]>set parallel_pipeline_task_num = 1;
Query OK, 0 rows affected (0.00 sec)
mysql [ssb]>select count(c_custkey) from (select c_custkey from customer
cross join dates)t;
+------------------+
| count(c_custkey) |
+------------------+
| 7668000000 |
+------------------+
1 row in set (5.61 sec)
```
before
```
mysql [ssb]>set parallel_pipeline_task_num = 0;
Query OK, 0 rows affected (0.00 sec)
mysql [ssb]>select count(c_custkey) from (select c_custkey from customer
cross join dates)t;
+------------------+
| count(c_custkey) |
+------------------+
| 7668000000 |
+------------------+
1 row in set (2.79 sec)
mysql [ssb]>set parallel_pipeline_task_num = 1;
Query OK, 0 rows affected (0.00 sec)
mysql [ssb]>select count(c_custkey) from (select c_custkey from customer
cross join dates)t;
+------------------+
| count(c_custkey) |
+------------------+
| 7668000000 |
+------------------+
1 row in set (10.21 sec)
```
---
.../pipeline/exec/nested_loop_join_probe_operator.cpp | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp
b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp
index afa1a2e59b7..f4f4ef21ece 100644
--- a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp
+++ b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp
@@ -516,23 +516,20 @@ Status NestedLoopJoinProbeOperatorX::pull(RuntimeState*
state, vectorized::Block
local_state._matched_rows_done
: local_state._matched_rows_done);
+ size_t join_block_column_size = local_state._join_block.columns();
{
- vectorized::Block tmp_block = local_state._join_block;
-
- // Here make _join_block release the columns' ptr
-
local_state._join_block.set_columns(local_state._join_block.clone_empty_columns());
-
- local_state.add_tuple_is_null_column(&tmp_block);
+ local_state.add_tuple_is_null_column(&local_state._join_block);
{
SCOPED_TIMER(local_state._join_filter_timer);
RETURN_IF_ERROR(vectorized::VExprContext::filter_block(
- local_state._conjuncts, &tmp_block,
tmp_block.columns()));
+ local_state._conjuncts, &local_state._join_block,
+ local_state._join_block.columns()));
}
- RETURN_IF_ERROR(local_state._build_output_block(&tmp_block, block,
false));
+ RETURN_IF_ERROR(
+ local_state._build_output_block(&local_state._join_block,
block, false));
local_state._reset_tuple_is_null_column();
}
- local_state._join_block.clear_column_data();
-
+ local_state._join_block.clear_column_data(join_block_column_size);
if (!(*eos) and !local_state._need_more_input_data) {
auto func = [&](auto&& join_op_variants, auto set_build_side_flag,
auto set_probe_side_flag) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]