This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit d42a8c6298c433436b9d0206c4c3acf5c8ccc096
Author: Zhenghua Lyu <[email protected]>
AuthorDate: Thu Mar 24 10:56:37 2022 +0800

    Do not match non vars in inner plan's target for LASJ_NOTIN.
    
    This commit is cherry-picked from 6X to fix issue
    https://github.com/greenplum-db/gpdb/issues/13265.
    Master branch's code also has the same issue, but
    due to other parts of code refactor (like removing
    disuse_physical_tlist), it is hard to find a bad
    case for master branch. But I still keep the test
    in this commit since more tests do no harm. Brief info
    on the code change:
    
        left-anti-semi-join (LASJ_NOTIN), the executor logic of it is similar
        to anti-join or semi-join. So during set_join_references's
        fix_join_expr, for LASJ_NOTIN,  we should use the same logic as
        anti-join.
    
    (cherry picked from commit f9597375fef8f37d07b283e99b6a0b8f3a6b2796)
---
 src/backend/optimizer/plan/setrefs.c              |  3 +-
 src/test/regress/expected/update_gp.out           | 42 +++++++++++++++++++++++
 src/test/regress/expected/update_gp_optimizer.out | 42 +++++++++++++++++++++++
 src/test/regress/sql/update_gp.sql                | 23 +++++++++++++
 4 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/src/backend/optimizer/plan/setrefs.c 
b/src/backend/optimizer/plan/setrefs.c
index b1851c6473..66e52469a0 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -2554,6 +2554,7 @@ set_join_references(PlannerInfo *root, Join *join, int 
rtoffset)
                case JOIN_LEFT:
                case JOIN_SEMI:
                case JOIN_ANTI:
+               case JOIN_LASJ_NOTIN:
                        inner_itlist->has_non_vars = false;
                        break;
                case JOIN_RIGHT:
@@ -3474,8 +3475,6 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context 
*context)
        }
        if (context->inner_itlist && context->inner_itlist->has_non_vars &&
                context->use_inner_tlist_for_matching_nonvars)
-
-       if (context->inner_itlist && context->inner_itlist->has_non_vars)
        {
                newvar = search_indexed_tlist_for_non_var((Expr *) node,
                                                                                
                  context->inner_itlist,
diff --git a/src/test/regress/expected/update_gp.out 
b/src/test/regress/expected/update_gp.out
index 03b99dd035..7535120a8e 100644
--- a/src/test/regress/expected/update_gp.out
+++ b/src/test/regress/expected/update_gp.out
@@ -791,6 +791,46 @@ SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b 
= 1;
  update_gp_rangep_10_to_20 | 11 | 1 |      4
 (3 rows)
 
+-- Test for update with LASJ_NOTIN
+-- See Issue: https://github.com/greenplum-db/gpdb/issues/13265
+-- Actually master branch does not have the above issue even master
+-- does have the same problematic code (other parts of code are
+-- refactored). Also cherry-pick the case to master and keep it
+-- since more test cases do no harm.
+create table t1_13265(a int, b int, c int, d int) distributed by (a);
+create table t2_13265(a int, b int, c int, d int) distributed by (a);
+insert into t1_13265 values (1, null, 1, 1);
+insert into t2_13265 values (2, null, 2, 2);
+explain (verbose, costs off)
+update t1_13265 set b = 2 where
+(c, d) not in (select c, d from t2_13265 where a = 2);
+                                     QUERY PLAN                                
      
+-------------------------------------------------------------------------------------
+ Update on public.t1_13265
+   ->  Nested Loop Left Anti Semi (Not-In) Join
+         Output: 2, t1_13265.ctid, t1_13265.gp_segment_id, t2_13265.ctid
+         Join Filter: ((t1_13265.c = t2_13265.c) AND (t1_13265.d = t2_13265.d))
+         ->  Seq Scan on public.t1_13265
+               Output: t1_13265.ctid, t1_13265.gp_segment_id, t1_13265.c, 
t1_13265.d
+         ->  Materialize
+               Output: t2_13265.ctid, t2_13265.c, t2_13265.d
+               ->  Broadcast Motion 1:3  (slice1; segments: 1)
+                     Output: t2_13265.ctid, t2_13265.c, t2_13265.d
+                     ->  Seq Scan on public.t2_13265
+                           Output: t2_13265.ctid, t2_13265.c, t2_13265.d
+                           Filter: (t2_13265.a = 2)
+ Settings: optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(15 rows)
+
+update t1_13265 set b = 2 where
+(c, d) not in (select c, d from t2_13265 where a = 2);
+select * from t1_13265;
+ a | b | c | d 
+---+---+---+---
+ 1 | 2 | 1 | 1
+(1 row)
+
 -- start_ignore
 drop table r;
 drop table s;
@@ -799,4 +839,6 @@ drop table update_ao_table;
 drop table update_aoco_table;
 drop table nosplitupdate;
 drop table tsplit_entry;
+drop table t1_13265;
+drop table t2_13265;
 -- end_ignore
diff --git a/src/test/regress/expected/update_gp_optimizer.out 
b/src/test/regress/expected/update_gp_optimizer.out
index e7b4da8775..1e59baaa29 100644
--- a/src/test/regress/expected/update_gp_optimizer.out
+++ b/src/test/regress/expected/update_gp_optimizer.out
@@ -822,6 +822,46 @@ SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b 
= 1;
  update_gp_rangep_10_to_20 | 11 | 1 |      4
 (3 rows)
 
+-- Test for update with LASJ_NOTIN
+-- See Issue: https://github.com/greenplum-db/gpdb/issues/13265
+-- Actually master branch does not have the above issue even master
+-- does have the same problematic code (other parts of code are
+-- refactored). Also cherry-pick the case to master and keep it
+-- since more test cases do no harm.
+create table t1_13265(a int, b int, c int, d int) distributed by (a);
+create table t2_13265(a int, b int, c int, d int) distributed by (a);
+insert into t1_13265 values (1, null, 1, 1);
+insert into t2_13265 values (2, null, 2, 2);
+explain (verbose, costs off)
+update t1_13265 set b = 2 where
+(c, d) not in (select c, d from t2_13265 where a = 2);
+                                     QUERY PLAN                                
      
+-------------------------------------------------------------------------------------
+ Update on public.t1_13265
+   ->  Nested Loop Left Anti Semi (Not-In) Join
+         Output: 2, t1_13265.ctid, t1_13265.gp_segment_id, t2_13265.ctid
+         Join Filter: ((t1_13265.c = t2_13265.c) AND (t1_13265.d = t2_13265.d))
+         ->  Seq Scan on public.t1_13265
+               Output: t1_13265.ctid, t1_13265.gp_segment_id, t1_13265.c, 
t1_13265.d
+         ->  Materialize
+               Output: t2_13265.ctid, t2_13265.c, t2_13265.d
+               ->  Broadcast Motion 1:3  (slice1; segments: 1)
+                     Output: t2_13265.ctid, t2_13265.c, t2_13265.d
+                     ->  Seq Scan on public.t2_13265
+                           Output: t2_13265.ctid, t2_13265.c, t2_13265.d
+                           Filter: (t2_13265.a = 2)
+ Settings: optimizer = 'on'
+ Optimizer: Postgres query optimizer
+(15 rows)
+
+update t1_13265 set b = 2 where
+(c, d) not in (select c, d from t2_13265 where a = 2);
+select * from t1_13265;
+ a | b | c | d 
+---+---+---+---
+ 1 | 2 | 1 | 1
+(1 row)
+
 -- start_ignore
 drop table r;
 drop table s;
@@ -830,4 +870,6 @@ drop table update_ao_table;
 drop table update_aoco_table;
 drop table nosplitupdate;
 drop table tsplit_entry;
+drop table t1_13265;
+drop table t2_13265;
 -- end_ignore
diff --git a/src/test/regress/sql/update_gp.sql 
b/src/test/regress/sql/update_gp.sql
index bbc2e44829..8019de85a2 100644
--- a/src/test/regress/sql/update_gp.sql
+++ b/src/test/regress/sql/update_gp.sql
@@ -373,6 +373,27 @@ SELECT tableoid::regclass, * FROM update_gp_rangep ORDER 
BY orig_a;
 -- that direct dispatch is effective.
 SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b = 1;
 
+-- Test for update with LASJ_NOTIN
+-- See Issue: https://github.com/greenplum-db/gpdb/issues/13265
+-- Actually master branch does not have the above issue even master
+-- does have the same problematic code (other parts of code are
+-- refactored). Also cherry-pick the case to master and keep it
+-- since more test cases do no harm.
+create table t1_13265(a int, b int, c int, d int) distributed by (a);
+create table t2_13265(a int, b int, c int, d int) distributed by (a);
+
+insert into t1_13265 values (1, null, 1, 1);
+insert into t2_13265 values (2, null, 2, 2);
+
+explain (verbose, costs off)
+update t1_13265 set b = 2 where
+(c, d) not in (select c, d from t2_13265 where a = 2);
+
+update t1_13265 set b = 2 where
+(c, d) not in (select c, d from t2_13265 where a = 2);
+
+select * from t1_13265;
+
 -- start_ignore
 drop table r;
 drop table s;
@@ -381,4 +402,6 @@ drop table update_ao_table;
 drop table update_aoco_table;
 drop table nosplitupdate;
 drop table tsplit_entry;
+drop table t1_13265;
+drop table t2_13265;
 -- end_ignore


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to