This is an automated email from the ASF dual-hosted git repository.

mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a7ad76ae570 [fix](be) Preserve null probe rows in mark anti join 
(#63767)
a7ad76ae570 is described below

commit a7ad76ae5704536aa70f400d6d9bbad2be68bc50
Author: Jerry Hu <[email protected]>
AuthorDate: Fri May 29 18:42:36 2026 +0800

    [fix](be) Preserve null probe rows in mark anti join (#63767)
    
    ### What problem does this PR solve?
    
    Issue Number: None
    
    Related PR: None
    
    Problem Summary: Correlated `NOT IN` subqueries under disjunction can be
    rewritten to a mark null-aware left anti join with additional join
    conjuncts. When the probe join key is `NULL`, the hash table lookup
    advanced the probe index before the caller could run the null-probe
    handling path. As a result, the probe row was skipped before the mark
    column was evaluated by the outer disjunction, producing incomplete
    query results. This change keeps the probe index on the `NULL` row so
    the null-aware join path can emit the correct mark value.
    
    ### Release note
    
    Fix incorrect results for correlated `NOT IN` subqueries combined with
    disjunctions.
    
    ### Check List (For Author)
    
    - Test:
    - Regression test: `doris-local-regression.sh --network 10.26.20.3/24
    run -d correctness -s test_subquery_in_disjunction -forceGenOut`
    - Regression test: `doris-local-regression.sh --network 10.26.20.3/24
    run -d correctness -s test_subquery_in_disjunction`
    - Manual test: verified the `NOT IN` + `OR` reproducer before and after
    the fix on a local FE/BE cluster
        - Build: `./build.sh --be`
    - Behavior changed: Yes. Corrects query result semantics for affected
    null-aware mark anti joins.
    - Does this need documentation: No
---
 be/src/exec/common/hash_table/join_hash_table.h    |  5 +-
 .../correctness/test_subquery_in_disjunction.out   | 12 ++++
 .../test_subquery_in_disjunction.groovy            | 81 ++++++++++++++++++----
 3 files changed, 83 insertions(+), 15 deletions(-)

diff --git a/be/src/exec/common/hash_table/join_hash_table.h 
b/be/src/exec/common/hash_table/join_hash_table.h
index 9b63040330b..bc737352f41 100644
--- a/be/src/exec/common/hash_table/join_hash_table.h
+++ b/be/src/exec/common/hash_table/join_hash_table.h
@@ -480,7 +480,8 @@ private:
             /// If the probe key is null
             if constexpr (has_null_map) {
                 if (null_map[probe_idx]) {
-                    probe_idx++;
+                    build_idx = 0;
+                    picking_null_keys = false;
                     break;
                 }
             }
@@ -512,4 +513,4 @@ private:
 
 template <typename Key, typename Hash, bool DirectMapping>
 using JoinHashMap = JoinHashTable<Key, Hash, DirectMapping>;
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/regression-test/data/correctness/test_subquery_in_disjunction.out 
b/regression-test/data/correctness/test_subquery_in_disjunction.out
index 65dbd980472..7b253ee410d 100644
--- a/regression-test/data/correctness/test_subquery_in_disjunction.out
+++ b/regression-test/data/correctness/test_subquery_in_disjunction.out
@@ -66,6 +66,18 @@
 1      2       3
 10     20      30
 
+-- !not_in_nullable_mark_join --
+1      0
+11     \N
+
+-- !not_in_nullable_mark_join_in_disjunction --
+1      0
+11     \N
+
+-- !not_in_nullable_mark_join_in_disjunction_build_null --
+1      0
+11     \N
+
 -- !mark_join_with_other_conjuncts1 --
 1      2
 1      3
diff --git 
a/regression-test/suites/correctness/test_subquery_in_disjunction.groovy 
b/regression-test/suites/correctness/test_subquery_in_disjunction.groovy
index 2decf0583f2..384103d2c14 100644
--- a/regression-test/suites/correctness/test_subquery_in_disjunction.groovy
+++ b/regression-test/suites/correctness/test_subquery_in_disjunction.groovy
@@ -97,22 +97,21 @@ suite("test_subquery_in_disjunction") {
         SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2 
WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
     """
 
-    // TODO: enable this after DORIS-7051 and DORIS-7052 is fixed
-    // qt_hash_join_with_other_conjuncts5 """
-    //     SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM 
test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
-    // """
+    qt_hash_join_with_other_conjuncts5 """
+        SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 
WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
+    """
 
-    // qt_hash_join_with_other_conjuncts6 """
-    //     SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM 
test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
-    // """
+    qt_hash_join_with_other_conjuncts6 """
+        SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 
WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
+    """
 
-    // qt_hash_join_with_other_conjuncts7 """
-    //     SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM 
test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
-    // """
+    qt_hash_join_with_other_conjuncts7 """
+        SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 
WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
+    """
 
-    // qt_hash_join_with_other_conjuncts8 """
-    //     SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM 
test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
-    // """
+    qt_hash_join_with_other_conjuncts8 """
+        SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 
WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
+    """
 
     qt_same_subquery_in_conjuncts """
         SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2) OR 
c1 IN (SELECT c1 FROM test_sq_dj2) OR c1 < 10 ORDER BY c1;
@@ -122,6 +121,62 @@ suite("test_subquery_in_disjunction") {
         SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2) OR 
c1 IN (SELECT c2 FROM test_sq_dj2) OR c1 < 10 ORDER BY c1;
     """
 
+    sql """ DROP TABLE IF EXISTS test_sq_dj_nullable_outer """
+    sql """ DROP TABLE IF EXISTS test_sq_dj_nullable_inner """
+    sql """
+    CREATE TABLE `test_sq_dj_nullable_outer` (
+        `id` int(11) NULL,
+        `a` int(11) NULL
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`id`)
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES (
+        "replication_num" = "1"
+    );
+    """
+    sql """
+    CREATE TABLE `test_sq_dj_nullable_inner` (
+        `id` int(11) NULL,
+        `a` int(11) NULL
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`id`)
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES (
+        "replication_num" = "1"
+    );
+    """
+    sql """ INSERT INTO test_sq_dj_nullable_outer VALUES (1, 0), (11, NULL) """
+    sql """ INSERT INTO test_sq_dj_nullable_inner VALUES (1, 10) """
+
+    order_qt_not_in_nullable_mark_join """
+        SELECT id, a FROM test_sq_dj_nullable_outer o
+        WHERE o.a NOT IN (
+            SELECT i.a FROM test_sq_dj_nullable_inner i
+            WHERE i.id > o.id AND i.a IS NOT NULL
+        )
+        ORDER BY id;
+    """
+
+    order_qt_not_in_nullable_mark_join_in_disjunction """
+        SELECT id, a FROM test_sq_dj_nullable_outer o
+        WHERE o.a NOT IN (
+            SELECT i.a FROM test_sq_dj_nullable_inner i
+            WHERE i.id > o.id AND i.a IS NOT NULL
+        ) OR o.id IN (1, 11)
+        ORDER BY id;
+    """
+
+    sql """ INSERT INTO test_sq_dj_nullable_inner VALUES (20, NULL) """
+
+    order_qt_not_in_nullable_mark_join_in_disjunction_build_null """
+        SELECT id, a FROM test_sq_dj_nullable_outer o
+        WHERE o.a NOT IN (
+            SELECT i.a FROM test_sq_dj_nullable_inner i
+            WHERE i.id > o.id
+        ) OR o.id IN (1, 11)
+        ORDER BY id;
+    """
+
     // test mark join that one probe row matches multiple build rows
     sql """drop table if exists sub_query_correlated_subquery1;"""
     sql """create table if not exists sub_query_correlated_subquery1


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to