This is an automated email from the ASF dual-hosted git repository.

gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9468711f9f [Bug](join) fix bug null aware left anti join not correct 
result (#15841)
9468711f9f is described below

commit 9468711f9fa67228e48202cbceebeeb886ecc1d3
Author: HappenLee <[email protected]>
AuthorDate: Fri Jan 13 10:18:05 2023 +0800

    [Bug](join) fix bug null aware left anti join not correct result (#15841)
---
 be/src/common/config.h                                | 2 +-
 be/src/olap/push_handler.cpp                          | 2 +-
 be/src/vec/exec/join/vhash_join_node.cpp              | 5 ++---
 regression-test/data/query_p0/join/test_join.out      | 4 ++++
 regression-test/suites/query_p0/join/test_join.groovy | 6 +++---
 5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index 021410f26b..32a6b4f0f6 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -487,7 +487,7 @@ CONF_mInt64(write_buffer_size, "209715200");
 // max buffer size used in memtable for the aggregated table, default 400MB
 CONF_mInt64(write_buffer_size_for_agg, "419430400");
 // write buffer size in push task for sparkload, default 1GB
-CONF_mInt64(write_buffer_size_for_sparkload, "1073741824");
+CONF_mInt64(flush_size_for_sparkload, "1073741824");
 
 // following 2 configs limit the memory consumption of load process on a 
Backend.
 // eg: memory limit to 80% of mem limit config but up to 100GB(default)
diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp
index 155b322c85..55472c6d82 100644
--- a/be/src/olap/push_handler.cpp
+++ b/be/src/olap/push_handler.cpp
@@ -250,7 +250,7 @@ Status PushHandler::_convert_v2(TabletSharedPtr cur_tablet, 
RowsetSharedPtr* cur
             VLOG_NOTICE << "start to convert etl file to delta.";
             while (!reader->eof()) {
                 if (reader->mem_pool()->mem_tracker()->consumption() >
-                    config::write_buffer_size_for_sparkload) {
+                    config::flush_size_for_sparkload) {
                     RETURN_NOT_OK(rowset_writer->flush());
                     reader->mem_pool()->free_all();
                 }
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index 31f90807fd..c6e6ecea45 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -861,10 +861,9 @@ Status HashJoinNode::sink(doris::RuntimeState* state, 
vectorized::Block* in_bloc
         _process_hashtable_ctx_variants_init(state);
     }
 
-    // Since the comparison of null values is meaningless, left anti join 
should not output null
+    // Since the comparison of null values is meaningless, null aware left 
anti join should not output null
     // when the build side is not empty.
-    if (eos && !_build_blocks->empty() &&
-        (_join_op == TJoinOp::LEFT_ANTI_JOIN || _join_op == 
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)) {
+    if (eos && !_build_blocks->empty() && _join_op == 
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
         _probe_ignore_null = true;
     }
     return Status::OK();
diff --git a/regression-test/data/query_p0/join/test_join.out 
b/regression-test/data/query_p0/join/test_join.out
index f9d5b36279..e4d2ebcbb1 100644
--- a/regression-test/data/query_p0/join/test_join.out
+++ b/regression-test/data/query_p0/join/test_join.out
@@ -1217,6 +1217,7 @@ false     3       1989    1002    11011905        
24453.325       false   2012-03-14      2000-01-01T00:00        yunlj8@nk
 3
 
 -- !left_anti_join_with_other_pred --
+\N
 1
 2
 3
@@ -1234,6 +1235,7 @@ false     3       1989    1002    11011905        
24453.325       false   2012-03-14      2000-01-01T00:00        yunlj8@nk
 15
 
 -- !left_anti_join_null_1 --
+\N
 4
 5
 6
@@ -1780,6 +1782,8 @@ false     1       1989    1001    11011902        123.123 
true    1989-03-21      1989-03-21T13:00        wangjuoo4       0.
 false  2       1986    1001    11011903        1243.500        false   
1901-12-31      1989-03-21T13:00        wangynnsf       20.268  789.25  
string12345     -170141183460469231731687303715884105727
 false  3       1989    1002    11011905        24453.325       false   
2012-03-14      2000-01-01T00:00        yunlj8@nk       78945.0 3654.0  
string12345     0
 
+-- !join_bug4 --
+
 -- !join_basic1 --
 false  1       1989    1001    11011902        123.123 true    1989-03-21      
1989-03-21T13:00        wangjuoo4       0.1     6.333   string12345     
170141183460469231731687303715884105727 false   1       1989    1001    
11011902        123.123 true    1989-03-21      1989-03-21T13:00        
wangjuoo4       0.1     6.333   string12345     
170141183460469231731687303715884105727
 false  2       1986    1001    11011903        1243.500        false   
1901-12-31      1989-03-21T13:00        wangynnsf       20.268  789.25  
string12345     -170141183460469231731687303715884105727        false   2       
1986    1001    11011903        1243.500        false   1901-12-31      
1989-03-21T13:00        wangynnsf       20.268  789.25  string12345     
-170141183460469231731687303715884105727
diff --git a/regression-test/suites/query_p0/join/test_join.groovy 
b/regression-test/suites/query_p0/join/test_join.groovy
index 7d5901e5f6..06d923bc7f 100644
--- a/regression-test/suites/query_p0/join/test_join.groovy
+++ b/regression-test/suites/query_p0/join/test_join.groovy
@@ -933,7 +933,7 @@ suite("test_join", "query,p0") {
     // https://github.com/apache/doris/issues/4210
     qt_join_bug3"""select * from baseall t1 where k1 = (select min(k1) from 
test t2 where t2.k1 = t1.k1 and t2.k2=t1.k2)
            order by k1"""
-
+    qt_join_bug4"""select b.k1 from baseall b where b.k1 not in( select k1 
from baseall where k1 is not null )"""
 
 
     // basic join
@@ -975,7 +975,7 @@ suite("test_join", "query,p0") {
         def res71 = sql"""select * from ${tbName2} a left anti join ${tbName1} 
b on (a.${c} = b.${c}) 
                 order by a.k1, a.k2, a.k3"""
         def res72 = sql"""select distinct a.* from ${tbName2} a left outer 
join ${tbName1} b on (a.${c} = b.${c}) 
-                where b.k1 is null and a.k1 is not null order by a.k1, a.k2, 
a.k3"""
+                where b.k1 is null order by a.k1, a.k2, a.k3"""
         check2_doris(res71, res72)
 
         def res73 = sql"""select * from ${tbName2} a right anti join 
${tbName1} b on (a.${c} = b.${c}) 
@@ -1083,7 +1083,7 @@ suite("test_join", "query,p0") {
 
     def res85 = sql"""select a.k1, a.k2 from ${tbName2} a left anti join 
${null_name} b on a.k1 = b.n2 
            order by 1, 2"""
-    def res86 = sql"""select k1, k2 from ${tbName2} where k1 is not null order 
by k1, k2"""
+    def res86 = sql"""select k1, k2 from ${tbName2} order by k1, k2"""
     check2_doris(res85, res86)
 
     def res87 = sql"""select b.n1, b.n2 from ${tbName2} a right anti join 
${null_name} b on a.k1 = b.n2 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to