This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7c3d7087e5e [fix](join) Should not use resize to create null column 
data in join (#59991)
7c3d7087e5e is described below

commit 7c3d7087e5e174f98fb0fcac279a0287dc6160b9
Author: Jerry Hu <[email protected]>
AuthorDate: Mon Jan 19 11:25:36 2026 +0800

    [fix](join) Should not use resize to create null column data in join 
(#59991)
    
    ### What problem does this PR solve?
    
    The data obtained from resize is uninitialized.
    
    ```text
    *** Aborted at 1768640603 (unix time) try "date -d @1768640603" if you are 
using GNU date ***
    *** Current BE git commitID: 3e03c4fa5be ***
    *** SIGSEGV unknown detail explain (@0x0) received by PID 1252336 (TID 
1252521 OR 0x7f6aa2df5640) from PID 0; stack trace: ***
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at /root/doris/be/src/common/signal_handler.h:421
     1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in 
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
     2# JVM_handle_linux_signal in 
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
     3# 0x00007F6B17241520 in /lib/x86_64-linux-gnu/libc.so.6
     4# 
doris::vectorized::FunctionArrayElement::_execute_string(doris::vectorized::PODArray<unsigned
 long, 4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul, 
15ul> const&, doris::vectorized::IColumn const&, unsigned char const*, 
doris::vectorized::IColumn const&, unsigned char const*, unsigned char*) const 
at /root/doris/be/src/vec/functions/array/function_array_element.h:241
     5# 
doris::vectorized::FunctionArrayElement::_execute_nullable(std::vector<doris::vectorized::ColumnWithTypeAndName,
 std::allocator<doris::vectorized::ColumnWithTypeAndName> > const&, unsigned 
long, unsigned char const*, unsigned char*) const at 
/root/doris/be/src/vec/functions/array/function_array_element.h:411
     6# 
doris::vectorized::FunctionArrayElement::_execute_map(std::vector<doris::vectorized::ColumnWithTypeAndName,
 std::allocator<doris::vectorized::ColumnWithTypeAndName> > const&, unsigned 
long, unsigned char const*, unsigned char*) const in 
/root/doris/release_output/be/lib/doris_be
     7# 
doris::vectorized::FunctionArrayElement::execute_impl(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned 
long> > const&, unsigned long, unsigned long) const in 
/root/doris/release_output/be/lib/doris_be
     8# 
doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned 
long> > const&, unsigned long, unsigned long) const at 
/root/doris/be/src/vec/functions/function.h:472
     9# 
doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned 
long> > const&, unsigned long, unsigned long, bool) const at 
/root/doris/be/src/vec/functions/function.cpp:246
    10# 
doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned 
long> > const&, unsigned long, unsigned long, bool) const at 
/root/doris/be/src/vec/functions/function.cpp:252
    11# doris::vectorized::IFunctionBase::execute(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned 
long> > const&, unsigned long, unsigned long, bool) const at 
/root/doris/be/src/vec/functions/function.h:195
    12# 
doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*,
 doris::vectorized::Block*, int*, std::vector<unsigned long, 
std::allocator<unsigned long> >&) at 
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:190
    13# 
doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, 
doris::vectorized::Block*, int*) at 
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:206
    14# 
doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*,
 doris::vectorized::Block*, int*, std::vector<unsigned long, 
std::allocator<unsigned long> >&) at 
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:181
    15# 
doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, 
doris::vectorized::Block*, int*) at 
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:206
    16# doris::vectorized::VExprContext::execute(doris::vectorized::Block*, 
int*) at /root/doris/be/src/vec/exprs/vexpr_context.cpp:55
    17# doris::pipeline::OperatorXBase::do_projections(doris::RuntimeState*, 
doris::vectorized::Block*, doris::vectorized::Block*) const at 
/root/doris/be/src/pipeline/exec/operator.cpp:300
    18# 
doris::pipeline::OperatorXBase::get_block_after_projects(doris::RuntimeState*, 
doris::vectorized::Block*, bool*) in /root/doris/release_output/be/lib/doris_be
    19# 
doris::pipeline::StatefulOperatorX<doris::pipeline::StreamingAggLocalState>::get_block(doris::RuntimeState*,
 doris::vectorized::Block*, bool*) in /root/doris/release_output/be/lib/doris_be
    20# 
doris::pipeline::OperatorXBase::get_block_after_projects(doris::RuntimeState*, 
doris::vectorized::Block*, bool*) in /root/doris/release_output/be/lib/doris_be
    21# doris::pipeline::PipelineTask::execute(bool*) at 
/root/doris/be/src/pipeline/pipeline_task.cpp:426
    22# doris::pipeline::TaskScheduler::_do_work(unsigned long) at 
/root/doris/be/src/pipeline/task_scheduler.cpp:139
    23# doris::ThreadPool::dispatch_thread() at 
/root/doris/be/src/util/threadpool.cpp:609
    24# doris::Thread::supervise_thread(void*) at 
/root/doris/be/src/util/thread.cpp:499
    25# start_thread at ./nptl/pthread_create.c:442
    26# 0x00007F6B173258C0 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
    ```
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/pipeline/exec/hashjoin_probe_operator.cpp   |  8 +++-
 .../data/query_p0/join/test_column_resize.out      |  8 ++++
 .../suites/query_p0/join/test_column_resize.groovy | 55 ++++++++++++++++++++++
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp 
b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
index 3e9653e4070..3bde47cf143 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
@@ -196,7 +196,13 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* 
state, vectorized::Bloc
              (_join_op != TJoinOp::LEFT_ANTI_JOIN) && i < 
_right_output_slot_flags.size(); ++i) {
             auto type = remove_nullable(_right_table_data_types[i]);
             auto column = type->create_column();
-            column->resize(block_rows);
+            // Here should NOT use `resize` instead of `insert_many_defaults` 
to create null column,
+            // because `insert_many_defaults` will create a column with 
default value.
+            // For example: for `Struct<a: int, b: string>`,
+            //  If the column is created by `resize`,
+            //  the nested columns' null-map will be random values(not only 1 
and 0),
+            //  if `struct_element` is called next, it will result in an 
invalid ColumnNullable data.
+            column->insert_many_defaults(block_rows);
             auto null_map_column = vectorized::ColumnUInt8::create(block_rows, 
1);
             auto nullable_column = 
vectorized::ColumnNullable::create(std::move(column),
                                                                       
std::move(null_map_column));
diff --git a/regression-test/data/query_p0/join/test_column_resize.out 
b/regression-test/data/query_p0/join/test_column_resize.out
new file mode 100644
index 00000000000..fd001b34f92
--- /dev/null
+++ b/regression-test/data/query_p0/join/test_column_resize.out
@@ -0,0 +1,8 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql_insert_default_instead_of_resize --
+1      \N
+3      \N
+5      \N
+6      \N
+7      \N
+
diff --git a/regression-test/suites/query_p0/join/test_column_resize.groovy 
b/regression-test/suites/query_p0/join/test_column_resize.groovy
new file mode 100644
index 00000000000..2a3896a0f2e
--- /dev/null
+++ b/regression-test/suites/query_p0/join/test_column_resize.groovy
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_column_resize") {
+    sql "drop table if exists t1;"
+    sql "drop table if exists t2;"
+
+    sql """
+        CREATE TABLE `t1` (
+                `id` int NULL,
+                `s` struct<a:int,b:text> NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY RANDOM BUCKETS AUTO
+        PROPERTIES (
+                "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql """
+        CREATE TABLE `t2` (
+                `id` int NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY RANDOM BUCKETS AUTO
+        PROPERTIES (
+                "replication_allocation" = "tag.location.default: 1"
+        ); 
+    """
+
+    sql """
+        insert into t1 values(1, '{"a": 1, "b": "abc"}');
+    """
+    sql """
+        insert into t2 values(1),(3),(5),(6),(7);
+    """
+
+    qt_sql_insert_default_instead_of_resize """
+        select t2.id, t1.s from t2 left join t1 on t2.id = t1.id and t1.id >1 
order by t2.id;
+    """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to