This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 5112cbff69f branch-4.0: [fix](join) Should not use resize to create
null column data in join #59991 (#60011)
5112cbff69f is described below
commit 5112cbff69f8435e788ad07411e3d89a5401428e
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Jan 19 15:05:43 2026 +0800
branch-4.0: [fix](join) Should not use resize to create null column data in
join #59991 (#60011)
Cherry-picked from #59991
Co-authored-by: Jerry Hu <[email protected]>
---
be/src/pipeline/exec/hashjoin_probe_operator.cpp | 8 +++-
.../data/query_p0/join/test_column_resize.out | 8 ++++
.../suites/query_p0/join/test_column_resize.groovy | 55 ++++++++++++++++++++++
3 files changed, 70 insertions(+), 1 deletion(-)
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
index 3e9653e4070..3bde47cf143 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
@@ -196,7 +196,13 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState*
state, vectorized::Bloc
(_join_op != TJoinOp::LEFT_ANTI_JOIN) && i <
_right_output_slot_flags.size(); ++i) {
auto type = remove_nullable(_right_table_data_types[i]);
auto column = type->create_column();
- column->resize(block_rows);
+ // Here should NOT use `resize` instead of `insert_many_defaults`
to create null column,
+ // because `insert_many_defaults` will create a column with
default value.
+ // For example: for `Struct<a: int, b: string>`,
+ // If the column is created by `resize`,
+ // the nested columns' null-map will be random values(not only 1
and 0),
+ // if `struct_element` is called next, it will result in an
invalid ColumnNullable data.
+ column->insert_many_defaults(block_rows);
auto null_map_column = vectorized::ColumnUInt8::create(block_rows,
1);
auto nullable_column =
vectorized::ColumnNullable::create(std::move(column),
std::move(null_map_column));
diff --git a/regression-test/data/query_p0/join/test_column_resize.out
b/regression-test/data/query_p0/join/test_column_resize.out
new file mode 100644
index 00000000000..fd001b34f92
--- /dev/null
+++ b/regression-test/data/query_p0/join/test_column_resize.out
@@ -0,0 +1,8 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql_insert_default_instead_of_resize --
+1 \N
+3 \N
+5 \N
+6 \N
+7 \N
+
diff --git a/regression-test/suites/query_p0/join/test_column_resize.groovy
b/regression-test/suites/query_p0/join/test_column_resize.groovy
new file mode 100644
index 00000000000..2a3896a0f2e
--- /dev/null
+++ b/regression-test/suites/query_p0/join/test_column_resize.groovy
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_column_resize") {
+ sql "drop table if exists t1;"
+ sql "drop table if exists t2;"
+
+ sql """
+ CREATE TABLE `t1` (
+ `id` int NULL,
+ `s` struct<a:int,b:text> NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY RANDOM BUCKETS AUTO
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ CREATE TABLE `t2` (
+ `id` int NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY RANDOM BUCKETS AUTO
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ insert into t1 values(1, '{"a": 1, "b": "abc"}');
+ """
+ sql """
+ insert into t2 values(1),(3),(5),(6),(7);
+ """
+
+ qt_sql_insert_default_instead_of_resize """
+ select t2.id, t1.s from t2 left join t1 on t2.id = t1.id and t1.id >1
order by t2.id;
+ """
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]