This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 805520bfe4d [Bug](join) fix single null eq get wrong result (#44613)
805520bfe4d is described below
commit 805520bfe4d90965bbd844945bd302eff38dc8f3
Author: Pxl <[email protected]>
AuthorDate: Wed Nov 27 11:22:57 2024 +0800
[Bug](join) fix single null eq get wrong result (#44613)
introduced by https://github.com/apache/doris/pull/42398
In order to make the null keys equal when using single null eq, all null
keys need to be set to default value.
---
be/src/pipeline/exec/hashjoin_build_sink.h | 5 ++++-
.../exec/join/process_hash_table_probe_impl.h | 4 ++++
.../data/nereids_p0/join/test_mark_join.out | 7 +++++++
regression-test/data/query_p0/join/test_join.out | 6 ++++++
.../suites/nereids_p0/join/test_mark_join.groovy | 21 +++++++++++++++++++++
.../suites/query_p0/join/test_join.groovy | 13 ++++++++++++-
6 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h
b/be/src/pipeline/exec/hashjoin_build_sink.h
index d905afa2758..cc78e6a769f 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.h
+++ b/be/src/pipeline/exec/hashjoin_build_sink.h
@@ -197,7 +197,10 @@ struct ProcessHashTableBuild {
SCOPED_TIMER(_parent->_build_table_insert_timer);
hash_table_ctx.hash_table->template prepare_build<JoinOpType>(_rows,
_batch_size,
*has_null_key);
-
+ // In order to make the null keys equal when using single null eq, all
null keys need to be set to default value.
+ if (_build_raw_ptrs.size() == 1 && null_map) {
+
_build_raw_ptrs[0]->assume_mutable()->replace_column_null_data(null_map->data());
+ }
hash_table_ctx.init_serialized_keys(_build_raw_ptrs, _rows,
null_map ? null_map->data() :
nullptr, true, true,
hash_table_ctx.hash_table->get_bucket_size());
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
index 05cd3d7d9e0..fc1153b3419 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
@@ -173,6 +173,10 @@ typename HashTableType::State
ProcessHashTableProbe<JoinOpType>::_init_probe_sid
if (!_parent->_ready_probe) {
_parent->_ready_probe = true;
hash_table_ctx.reset();
+ // In order to make the null keys equal when using single null eq, all
null keys need to be set to default value.
+ if (_parent->_probe_columns.size() == 1 && null_map) {
+
_parent->_probe_columns[0]->assume_mutable()->replace_column_null_data(null_map);
+ }
hash_table_ctx.init_serialized_keys(_parent->_probe_columns,
probe_rows, null_map, true,
false,
hash_table_ctx.hash_table->get_bucket_size());
hash_table_ctx.hash_table->pre_build_idxs(hash_table_ctx.bucket_nums,
diff --git a/regression-test/data/nereids_p0/join/test_mark_join.out
b/regression-test/data/nereids_p0/join/test_mark_join.out
index 59fc7d651ad..a6035556674 100644
--- a/regression-test/data/nereids_p0/join/test_mark_join.out
+++ b/regression-test/data/nereids_p0/join/test_mark_join.out
@@ -53,3 +53,10 @@
-- !mark_join_null_conjunct --
\N
+-- !mark_join8 --
+1 1 1 false
+2 2 2 false
+3 \N \N \N
+3 \N 3 \N
+4 \N 4 true
+
diff --git a/regression-test/data/query_p0/join/test_join.out
b/regression-test/data/query_p0/join/test_join.out
index 3814530e0c6..7cbaf4f7750 100644
--- a/regression-test/data/query_p0/join/test_join.out
+++ b/regression-test/data/query_p0/join/test_join.out
@@ -3243,3 +3243,9 @@ false true true false false
-- !sql --
4
+-- !sql --
+2 \N
+
+-- !sql --
+2 \N
+
diff --git a/regression-test/suites/nereids_p0/join/test_mark_join.groovy
b/regression-test/suites/nereids_p0/join/test_mark_join.groovy
index 3b07c357b5d..034900f81ac 100644
--- a/regression-test/suites/nereids_p0/join/test_mark_join.groovy
+++ b/regression-test/suites/nereids_p0/join/test_mark_join.groovy
@@ -142,4 +142,25 @@ suite("test_mark_join", "nereids_p0") {
"""
qt_mark_join_null_conjunct """select null in ( select k1 from
test_mark_join_t1);"""
+
+ qt_mark_join8 """
+ select
+ k1,
+ k2,
+ k3,
+ k1 not in (
+ select
+ test_mark_join_t2.k2
+ from
+ test_mark_join_t2
+ where
+ test_mark_join_t2.k3 <=> test_mark_join_t1.k3
+ ) vv
+ from
+ test_mark_join_t1
+ order by
+ 1,
+ 2,
+ 3;
+ """
}
diff --git a/regression-test/suites/query_p0/join/test_join.groovy
b/regression-test/suites/query_p0/join/test_join.groovy
index e75878cb032..fa83fddbd41 100644
--- a/regression-test/suites/query_p0/join/test_join.groovy
+++ b/regression-test/suites/query_p0/join/test_join.groovy
@@ -720,7 +720,7 @@ suite("test_join", "query,p0") {
}
qt_left_anti_join_with_other_pred "select b.k1 from baseall b left anti
join test t on b.k1 = t.k1 and 1 = 2 order by b.k1"
-
+ // null not in (1,2,3,null) = true
qt_left_anti_join_null_1 "select b.k1 from baseall b left anti join test t
on b.k1 = t.k1 order by b.k1"
qt_left_anti_join_null_2 "select b.k1 from baseall b left anti join
test_join_empty_view t on b.k1 = t.k1 order by b.k1"
@@ -931,6 +931,7 @@ suite("test_join", "query,p0") {
// https://github.com/apache/doris/issues/4210
qt_join_bug3"""select * from baseall t1 where k1 = (select min(k1) from
test t2 where t2.k1 = t1.k1 and t2.k2=t1.k2)
order by k1"""
+ // null not in (1,2,3) = false
qt_join_bug4"""select b.k1 from baseall b where b.k1 not in( select k1
from baseall where k1 is not null )"""
@@ -1330,4 +1331,14 @@ suite("test_join", "query,p0") {
qt_sql """ select /*+SET_VAR(batch_size=1, disable_join_reorder=true)*/
count(DISTINCT dcqewrt.engineer) as active_person_count from tbl1 dcqewrt left
join [broadcast] tbl2 dd on dd.data_dt = dcqewrt.data_dt; """
sql """ DROP TABLE IF EXISTS tbl2; """
sql """ DROP TABLE IF EXISTS tbl1; """
+
+
+ sql "drop table if exists t01;"
+ sql "drop table if exists t02;"
+ sql"""create table t01 (id int, a varchar(10)) properties
("replication_num" = "1");"""
+ sql"""create table t02 (id int, b varchar(10)) properties
("replication_num" = "1");"""
+ sql"insert into t01 values (1, 'a'), (2, null), (3, 'c');"
+ sql"insert into t02 values (1, 'b');"
+ qt_sql"select * from t01 where (not like (a, 'a%')) <=> 'b';"
+ qt_sql"select * from t01 where (not like (a, 'a%')) <=> (select max(b)
from t02); "
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]