This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7c3d7087e5e [fix](join) Should not use resize to create null column
data in join (#59991)
7c3d7087e5e is described below
commit 7c3d7087e5e174f98fb0fcac279a0287dc6160b9
Author: Jerry Hu <[email protected]>
AuthorDate: Mon Jan 19 11:25:36 2026 +0800
[fix](join) Should not use resize to create null column data in join
(#59991)
### What problem does this PR solve?
The data obtained from resize is uninitialized.
```text
*** Aborted at 1768640603 (unix time) try "date -d @1768640603" if you are
using GNU date ***
*** Current BE git commitID: 3e03c4fa5be ***
*** SIGSEGV unknown detail explain (@0x0) received by PID 1252336 (TID
1252521 OR 0x7f6aa2df5640) from PID 0; stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int,
siginfo_t*, void*) at /root/doris/be/src/common/signal_handler.h:421
1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
2# JVM_handle_linux_signal in
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
3# 0x00007F6B17241520 in /lib/x86_64-linux-gnu/libc.so.6
4#
doris::vectorized::FunctionArrayElement::_execute_string(doris::vectorized::PODArray<unsigned
long, 4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul,
15ul> const&, doris::vectorized::IColumn const&, unsigned char const*,
doris::vectorized::IColumn const&, unsigned char const*, unsigned char*) const
at /root/doris/be/src/vec/functions/array/function_array_element.h:241
5#
doris::vectorized::FunctionArrayElement::_execute_nullable(std::vector<doris::vectorized::ColumnWithTypeAndName,
std::allocator<doris::vectorized::ColumnWithTypeAndName> > const&, unsigned
long, unsigned char const*, unsigned char*) const at
/root/doris/be/src/vec/functions/array/function_array_element.h:411
6#
doris::vectorized::FunctionArrayElement::_execute_map(std::vector<doris::vectorized::ColumnWithTypeAndName,
std::allocator<doris::vectorized::ColumnWithTypeAndName> > const&, unsigned
long, unsigned char const*, unsigned char*) const in
/root/doris/release_output/be/lib/doris_be
7#
doris::vectorized::FunctionArrayElement::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned
long> > const&, unsigned long, unsigned long) const in
/root/doris/release_output/be/lib/doris_be
8#
doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned
long> > const&, unsigned long, unsigned long) const at
/root/doris/be/src/vec/functions/function.h:472
9#
doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned
long> > const&, unsigned long, unsigned long, bool) const at
/root/doris/be/src/vec/functions/function.cpp:246
10#
doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned
long> > const&, unsigned long, unsigned long, bool) const at
/root/doris/be/src/vec/functions/function.cpp:252
11# doris::vectorized::IFunctionBase::execute(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned
long> > const&, unsigned long, unsigned long, bool) const at
/root/doris/be/src/vec/functions/function.h:195
12#
doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*,
doris::vectorized::Block*, int*, std::vector<unsigned long,
std::allocator<unsigned long> >&) at
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:190
13#
doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*,
doris::vectorized::Block*, int*) at
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:206
14#
doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*,
doris::vectorized::Block*, int*, std::vector<unsigned long,
std::allocator<unsigned long> >&) at
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:181
15#
doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*,
doris::vectorized::Block*, int*) at
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:206
16# doris::vectorized::VExprContext::execute(doris::vectorized::Block*,
int*) at /root/doris/be/src/vec/exprs/vexpr_context.cpp:55
17# doris::pipeline::OperatorXBase::do_projections(doris::RuntimeState*,
doris::vectorized::Block*, doris::vectorized::Block*) const at
/root/doris/be/src/pipeline/exec/operator.cpp:300
18#
doris::pipeline::OperatorXBase::get_block_after_projects(doris::RuntimeState*,
doris::vectorized::Block*, bool*) in /root/doris/release_output/be/lib/doris_be
19#
doris::pipeline::StatefulOperatorX<doris::pipeline::StreamingAggLocalState>::get_block(doris::RuntimeState*,
doris::vectorized::Block*, bool*) in /root/doris/release_output/be/lib/doris_be
20#
doris::pipeline::OperatorXBase::get_block_after_projects(doris::RuntimeState*,
doris::vectorized::Block*, bool*) in /root/doris/release_output/be/lib/doris_be
21# doris::pipeline::PipelineTask::execute(bool*) at
/root/doris/be/src/pipeline/pipeline_task.cpp:426
22# doris::pipeline::TaskScheduler::_do_work(unsigned long) at
/root/doris/be/src/pipeline/task_scheduler.cpp:139
23# doris::ThreadPool::dispatch_thread() at
/root/doris/be/src/util/threadpool.cpp:609
24# doris::Thread::supervise_thread(void*) at
/root/doris/be/src/util/thread.cpp:499
25# start_thread at ./nptl/pthread_create.c:442
26# 0x00007F6B173258C0 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
```
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/pipeline/exec/hashjoin_probe_operator.cpp | 8 +++-
.../data/query_p0/join/test_column_resize.out | 8 ++++
.../suites/query_p0/join/test_column_resize.groovy | 55 ++++++++++++++++++++++
3 files changed, 70 insertions(+), 1 deletion(-)
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
index 3e9653e4070..3bde47cf143 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
@@ -196,7 +196,13 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState*
state, vectorized::Bloc
(_join_op != TJoinOp::LEFT_ANTI_JOIN) && i <
_right_output_slot_flags.size(); ++i) {
auto type = remove_nullable(_right_table_data_types[i]);
auto column = type->create_column();
- column->resize(block_rows);
+ // Here should NOT use `resize` instead of `insert_many_defaults`
to create null column,
+ // because `insert_many_defaults` will create a column with
default value.
+ // For example: for `Struct<a: int, b: string>`,
+ // If the column is created by `resize`,
+ // the nested columns' null-map will be random values(not only 1
and 0),
+ // if `struct_element` is called next, it will result in an
invalid ColumnNullable data.
+ column->insert_many_defaults(block_rows);
auto null_map_column = vectorized::ColumnUInt8::create(block_rows,
1);
auto nullable_column =
vectorized::ColumnNullable::create(std::move(column),
std::move(null_map_column));
diff --git a/regression-test/data/query_p0/join/test_column_resize.out
b/regression-test/data/query_p0/join/test_column_resize.out
new file mode 100644
index 00000000000..fd001b34f92
--- /dev/null
+++ b/regression-test/data/query_p0/join/test_column_resize.out
@@ -0,0 +1,8 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql_insert_default_instead_of_resize --
+1 \N
+3 \N
+5 \N
+6 \N
+7 \N
+
diff --git a/regression-test/suites/query_p0/join/test_column_resize.groovy
b/regression-test/suites/query_p0/join/test_column_resize.groovy
new file mode 100644
index 00000000000..2a3896a0f2e
--- /dev/null
+++ b/regression-test/suites/query_p0/join/test_column_resize.groovy
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_column_resize") {
+ sql "drop table if exists t1;"
+ sql "drop table if exists t2;"
+
+ sql """
+ CREATE TABLE `t1` (
+ `id` int NULL,
+ `s` struct<a:int,b:text> NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY RANDOM BUCKETS AUTO
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ CREATE TABLE `t2` (
+ `id` int NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY RANDOM BUCKETS AUTO
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ insert into t1 values(1, '{"a": 1, "b": "abc"}');
+ """
+ sql """
+ insert into t2 values(1),(3),(5),(6),(7);
+ """
+
+ qt_sql_insert_default_instead_of_resize """
+ select t2.id, t1.s from t2 left join t1 on t2.id = t1.id and t1.id >1
order by t2.id;
+ """
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]