This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit d42a8c6298c433436b9d0206c4c3acf5c8ccc096 Author: Zhenghua Lyu <[email protected]> AuthorDate: Thu Mar 24 10:56:37 2022 +0800 Do not match non vars in inner plan's target for LASJ_NOTIN. This commit is cherry-picked from 6X to fix issue https://github.com/greenplum-db/gpdb/issues/13265. Master branch's code also has the same issue, but due to other parts of code refactor (like removing disuse_physical_tlist), it is hard to find a bad case for master branch. But I still keep the test in this commit since more tests do no harm. Brief info on the code change: left-anti-semi-join (LASJ_NOTIN), the executor logic of it is similar to anti-join or semi-join. So during set_join_references's fix_join_expr, for LASJ_NOTIN, we should use the same logic as anti-join. (cherry picked from commit f9597375fef8f37d07b283e99b6a0b8f3a6b2796) --- src/backend/optimizer/plan/setrefs.c | 3 +- src/test/regress/expected/update_gp.out | 42 +++++++++++++++++++++++ src/test/regress/expected/update_gp_optimizer.out | 42 +++++++++++++++++++++++ src/test/regress/sql/update_gp.sql | 23 +++++++++++++ 4 files changed, 108 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index b1851c6473..66e52469a0 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -2554,6 +2554,7 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) case JOIN_LEFT: case JOIN_SEMI: case JOIN_ANTI: + case JOIN_LASJ_NOTIN: inner_itlist->has_non_vars = false; break; case JOIN_RIGHT: @@ -3474,8 +3475,6 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) } if (context->inner_itlist && context->inner_itlist->has_non_vars && context->use_inner_tlist_for_matching_nonvars) - - if (context->inner_itlist && context->inner_itlist->has_non_vars) { newvar = search_indexed_tlist_for_non_var((Expr *) node, context->inner_itlist, diff --git a/src/test/regress/expected/update_gp.out b/src/test/regress/expected/update_gp.out index 03b99dd035..7535120a8e 100644 --- a/src/test/regress/expected/update_gp.out +++ b/src/test/regress/expected/update_gp.out @@ -791,6 +791,46 @@ SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b = 1; update_gp_rangep_10_to_20 | 11 | 1 | 4 (3 rows) +-- Test for update with LASJ_NOTIN +-- See Issue: https://github.com/greenplum-db/gpdb/issues/13265 +-- Actually master branch does not have the above issue even master +-- does have the same problematic code (other parts of code are +-- refactored). Also cherry-pick the case to master and keep it +-- since more test cases do no harm. +create table t1_13265(a int, b int, c int, d int) distributed by (a); +create table t2_13265(a int, b int, c int, d int) distributed by (a); +insert into t1_13265 values (1, null, 1, 1); +insert into t2_13265 values (2, null, 2, 2); +explain (verbose, costs off) +update t1_13265 set b = 2 where +(c, d) not in (select c, d from t2_13265 where a = 2); + QUERY PLAN +------------------------------------------------------------------------------------- + Update on public.t1_13265 + -> Nested Loop Left Anti Semi (Not-In) Join + Output: 2, t1_13265.ctid, t1_13265.gp_segment_id, t2_13265.ctid + Join Filter: ((t1_13265.c = t2_13265.c) AND (t1_13265.d = t2_13265.d)) + -> Seq Scan on public.t1_13265 + Output: t1_13265.ctid, t1_13265.gp_segment_id, t1_13265.c, t1_13265.d + -> Materialize + Output: t2_13265.ctid, t2_13265.c, t2_13265.d + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: t2_13265.ctid, t2_13265.c, t2_13265.d + -> Seq Scan on public.t2_13265 + Output: t2_13265.ctid, t2_13265.c, t2_13265.d + Filter: (t2_13265.a = 2) + Settings: optimizer = 'off' + Optimizer: Postgres query optimizer +(15 rows) + +update t1_13265 set b = 2 where +(c, d) not in (select c, d from t2_13265 where a = 2); +select * from t1_13265; + a | b | c | d +---+---+---+--- + 1 | 2 | 1 | 1 +(1 row) + -- start_ignore drop table r; drop table s; @@ -799,4 +839,6 @@ drop table update_ao_table; drop table update_aoco_table; drop table nosplitupdate; drop table tsplit_entry; +drop table t1_13265; +drop table t2_13265; -- end_ignore diff --git a/src/test/regress/expected/update_gp_optimizer.out b/src/test/regress/expected/update_gp_optimizer.out index e7b4da8775..1e59baaa29 100644 --- a/src/test/regress/expected/update_gp_optimizer.out +++ b/src/test/regress/expected/update_gp_optimizer.out @@ -822,6 +822,46 @@ SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b = 1; update_gp_rangep_10_to_20 | 11 | 1 | 4 (3 rows) +-- Test for update with LASJ_NOTIN +-- See Issue: https://github.com/greenplum-db/gpdb/issues/13265 +-- Actually master branch does not have the above issue even master +-- does have the same problematic code (other parts of code are +-- refactored). Also cherry-pick the case to master and keep it +-- since more test cases do no harm. +create table t1_13265(a int, b int, c int, d int) distributed by (a); +create table t2_13265(a int, b int, c int, d int) distributed by (a); +insert into t1_13265 values (1, null, 1, 1); +insert into t2_13265 values (2, null, 2, 2); +explain (verbose, costs off) +update t1_13265 set b = 2 where +(c, d) not in (select c, d from t2_13265 where a = 2); + QUERY PLAN +------------------------------------------------------------------------------------- + Update on public.t1_13265 + -> Nested Loop Left Anti Semi (Not-In) Join + Output: 2, t1_13265.ctid, t1_13265.gp_segment_id, t2_13265.ctid + Join Filter: ((t1_13265.c = t2_13265.c) AND (t1_13265.d = t2_13265.d)) + -> Seq Scan on public.t1_13265 + Output: t1_13265.ctid, t1_13265.gp_segment_id, t1_13265.c, t1_13265.d + -> Materialize + Output: t2_13265.ctid, t2_13265.c, t2_13265.d + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: t2_13265.ctid, t2_13265.c, t2_13265.d + -> Seq Scan on public.t2_13265 + Output: t2_13265.ctid, t2_13265.c, t2_13265.d + Filter: (t2_13265.a = 2) + Settings: optimizer = 'on' + Optimizer: Postgres query optimizer +(15 rows) + +update t1_13265 set b = 2 where +(c, d) not in (select c, d from t2_13265 where a = 2); +select * from t1_13265; + a | b | c | d +---+---+---+--- + 1 | 2 | 1 | 1 +(1 row) + -- start_ignore drop table r; drop table s; @@ -830,4 +870,6 @@ drop table update_ao_table; drop table update_aoco_table; drop table nosplitupdate; drop table tsplit_entry; +drop table t1_13265; +drop table t2_13265; -- end_ignore diff --git a/src/test/regress/sql/update_gp.sql b/src/test/regress/sql/update_gp.sql index bbc2e44829..8019de85a2 100644 --- a/src/test/regress/sql/update_gp.sql +++ b/src/test/regress/sql/update_gp.sql @@ -373,6 +373,27 @@ SELECT tableoid::regclass, * FROM update_gp_rangep ORDER BY orig_a; -- that direct dispatch is effective. SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b = 1; +-- Test for update with LASJ_NOTIN +-- See Issue: https://github.com/greenplum-db/gpdb/issues/13265 +-- Actually master branch does not have the above issue even master +-- does have the same problematic code (other parts of code are +-- refactored). Also cherry-pick the case to master and keep it +-- since more test cases do no harm. +create table t1_13265(a int, b int, c int, d int) distributed by (a); +create table t2_13265(a int, b int, c int, d int) distributed by (a); + +insert into t1_13265 values (1, null, 1, 1); +insert into t2_13265 values (2, null, 2, 2); + +explain (verbose, costs off) +update t1_13265 set b = 2 where +(c, d) not in (select c, d from t2_13265 where a = 2); + +update t1_13265 set b = 2 where +(c, d) not in (select c, d from t2_13265 where a = 2); + +select * from t1_13265; + -- start_ignore drop table r; drop table s; @@ -381,4 +402,6 @@ drop table update_ao_table; drop table update_aoco_table; drop table nosplitupdate; drop table tsplit_entry; +drop table t1_13265; +drop table t2_13265; -- end_ignore --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
