This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit c66c184aca24d375b9297d835c72853c3aaaee84 Author: 1mmortal <[email protected]> AuthorDate: Tue Oct 11 15:01:21 2022 +0800 Fix incorrect sortOp and eqOp generated by IsCorrelatedEqualityOpExpr. (#14108) In GPDB, expression subquery is promoted to join, e.g. select * from T where a > (select 10*avg(x) from R where T.b=R.y); in postgres, the plan is: QUERY PLAN ----------------------------------------------------------------- Seq Scan on t (cost=0.00..80364.30 rows=753 width=8) Filter: ((a)::numeric > (SubPlan 1)) SubPlan 1 -> Aggregate (cost=35.53..35.54 rows=1 width=32) -> Seq Scan on r (cost=0.00..35.50 rows=10 width=4) Filter: (t.b = y) while in GPDB, the subquery is promoted to join: QUERY PLAN --------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Hash Join (cost=477.00..969.11 rows=9567 width=8) Hash Cond: (t.b = "Expr_SUBQUERY".csq_c0) Join Filter: ((t.a)::numeric > "Expr_SUBQUERY".csq_c1) -> Seq Scan on t (cost=0.00..321.00 rows=28700 width=8) -> Hash (cost=472.83..472.83 rows=333 width=36) -> Subquery Scan on "Expr_SUBQUERY" -> HashAggregate Group Key: r.y -> Seq Scan on r Optimizer: Postgres query optimizer But if the T.b and R.y are different type, it will report error like: ERROR: operator 37 is not a valid ordering operator (pathkeys.c:579) The root cause is that IsCorrelatedEqualityOpExpr may generate incorrect sort operator and equal operator for SortGroupClause when the types of left and right operands of OpExpr are different. Since get_ordering_op_properties() requires the type of the left and right operands being consistent and SortGroupClause->tleSortGroupRef is referencing the target entry created from innerExpr, we teach IsCorrelatedEqualityOpExpr() construct appropriate eqOp and sortOp according to the type of innerExpr. --- src/backend/cdb/cdbsubselect.c | 29 ++-- src/test/regress/expected/subselect_gp.out | 186 +++++++++++++++++++++ .../regress/expected/subselect_gp_optimizer.out | 186 +++++++++++++++++++++ src/test/regress/sql/subselect_gp.sql | 83 +++++++++ 4 files changed, 470 insertions(+), 14 deletions(-) diff --git a/src/backend/cdb/cdbsubselect.c b/src/backend/cdb/cdbsubselect.c index 07fd8d0f6a..6180441c3e 100644 --- a/src/backend/cdb/cdbsubselect.c +++ b/src/backend/cdb/cdbsubselect.c @@ -148,13 +148,13 @@ IsCorrelatedOpExpr(OpExpr *opexp, Expr **innerExpr) * returns true if correlated equality condition * *innerExpr - points to the inner expr i.e. bar(innervar) in the condition * *eqOp and *sortOp - equality and < operators, to implement the condition as a mergejoin. + * The *eqOp and *sortOp should be determined according to innervar's type. */ static bool IsCorrelatedEqualityOpExpr(OpExpr *opexp, Expr **innerExpr, Oid *eqOp, Oid *sortOp, bool *hashable) { Oid opfamily; - Oid ltype; - Oid rtype; + Oid innerExprType; List *l; Assert(opexp); @@ -163,11 +163,17 @@ IsCorrelatedEqualityOpExpr(OpExpr *opexp, Expr **innerExpr, Oid *eqOp, Oid *sort Assert(eqOp); Assert(sortOp); + if (!IsCorrelatedOpExpr(opexp, innerExpr)) + return false; + + Assert(*innerExpr); + innerExprType = exprType((Node *)*innerExpr); + /* * If this is an expression of the form a = b, then we want to know about * the vars involved. */ - if (!op_mergejoinable(opexp->opno, exprType(linitial(opexp->args)))) + if (!op_mergejoinable(opexp->opno, innerExprType)) return false; /* @@ -182,24 +188,19 @@ IsCorrelatedEqualityOpExpr(OpExpr *opexp, Expr **innerExpr, Oid *eqOp, Oid *sort list_free(l); /* - * Look up the correct sort operator from the chosen opfamily. + * Look up the correct equility/sort operators from the chosen opfamily. */ - ltype = exprType(linitial(opexp->args)); - rtype = exprType(lsecond(opexp->args)); - *eqOp = get_opfamily_member(opfamily, ltype, rtype, BTEqualStrategyNumber); + *eqOp = get_opfamily_member(opfamily, innerExprType, innerExprType, BTEqualStrategyNumber); if (!OidIsValid(*eqOp)) /* should not happen */ elog(ERROR, "could not find member %d(%u,%u) of opfamily %u", - BTEqualStrategyNumber, ltype, rtype, opfamily); + BTEqualStrategyNumber, innerExprType, innerExprType, opfamily); - *sortOp = get_opfamily_member(opfamily, ltype, rtype, BTLessStrategyNumber); + *sortOp = get_opfamily_member(opfamily, innerExprType, innerExprType, BTLessStrategyNumber); if (!OidIsValid(*sortOp)) /* should not happen */ elog(ERROR, "could not find member %d(%u,%u) of opfamily %u", - BTLessStrategyNumber, ltype, rtype, opfamily); + BTLessStrategyNumber, innerExprType, innerExprType, opfamily); - *hashable = op_hashjoinable(*eqOp, ltype); - - if (!IsCorrelatedOpExpr(opexp, innerExpr)) - return false; + *hashable = op_hashjoinable(*eqOp, innerExprType); return true; } diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index 966335ab41..2995f52a6d 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3036,3 +3036,189 @@ select * from param_t a where a.i in (1 row) drop table if exists param_t; +-- A guard test case for gpexpand's populate SQL +-- Some simple notes and background is: we want to compute +-- table size efficiently, it is better to avoid invoke +-- pg_relation_size() in serial on QD, since this function +-- will dispatch for each tuple. The bad pattern SQL is like +-- select pg_relation_size(oid) from pg_class where xxx +-- The idea is force pg_relations_size is evaluated on each +-- segment and the sum the result together to get the final +-- result. To make sure correctness, we have to evaluate +-- pg_relation_size before any motion. The skill here is +-- to wrap this in a subquery, due to volatile of pg_relation_size, +-- this subquery won't be pulled up. Plus the skill of +-- gp_dist_random('pg_class') we can achieve this goal. +-- the below test is to verify the plan, we should see pg_relation_size +-- is evaludated on each segment and then motion then sum together. The +-- SQL pattern is a catalog join a table size "dict". +set gp_enable_multiphase_agg = on; +-- force nestloop join to make test stable since we +-- are testing plan and do not care about where we +-- put hash table. +set enable_hashjoin = off; +set enable_nestloop = on; +set enable_indexscan = off; +set enable_bitmapscan = off; +explain (verbose on, costs off) +with cte(table_oid, size) as +( + select + table_oid, + sum(size) size + from ( + select oid, + pg_relation_size(oid) + from gp_dist_random('pg_class') + ) x(table_oid, size) + group by table_oid +) +select pc.relname, ts.size +from pg_class pc, cte ts +where pc.oid = ts.table_oid; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> Nested Loop + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + Join Filter: (pc.oid = pg_class.oid) + -> Redistribute Motion 1:3 (slice2) + Output: pc.relname, pc.oid + Hash Key: pc.oid + -> Seq Scan on pg_catalog.pg_class pc + Output: pc.relname, pc.oid + -> Materialize + Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> HashAggregate + Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) + Group Key: pg_class.oid + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) + Hash Key: pg_class.oid + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Optimizer: Postgres query optimizer +(21 rows) + +set gp_enable_multiphase_agg = off; +explain (verbose on, costs off) +with cte(table_oid, size) as +( + select + table_oid, + sum(size) size + from ( + select oid, + pg_relation_size(oid) + from gp_dist_random('pg_class') + ) x(table_oid, size) + group by table_oid +) +select pc.relname, ts.size +from pg_class pc, cte ts +where pc.oid = ts.table_oid; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> Nested Loop + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + Join Filter: (pc.oid = pg_class.oid) + -> Redistribute Motion 1:3 (slice2) + Output: pc.relname, pc.oid + Hash Key: pc.oid + -> Seq Scan on pg_catalog.pg_class pc + Output: pc.relname, pc.oid + -> Materialize + Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> HashAggregate + Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) + Group Key: pg_class.oid + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) + Hash Key: pg_class.oid + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Optimizer: Postgres query optimizer +(21 rows) + +reset gp_enable_multiphase_agg; +reset enable_hashjoin; +reset enable_nestloop; +reset enable_indexscan; +reset enable_bitmapscan; +create table sublink_outer_table(a int, b int) distributed by(b); +create table sublink_inner_table(x int, y bigint) distributed by(y); +set optimizer to off; +explain select t.* from sublink_outer_table t join (select y ,10*avg(x) s from sublink_inner_table group by y) RR on RR.y = t.b and t.a > rr.s; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=436.00..1310.77 rows=28700 width=8) + -> Hash Join (cost=436.00..928.11 rows=9567 width=8) + Hash Cond: (t.b = sublink_inner_table.y) + Join Filter: ((t.a)::numeric > (('10'::numeric * avg(sublink_inner_table.x)))) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=431.83..431.83 rows=333 width=40) + -> HashAggregate (cost=423.50..428.50 rows=333 width=40) + Group Key: sublink_inner_table.y + -> Seq Scan on sublink_inner_table (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(10 rows) + +explain select * from sublink_outer_table T where a > (select 10*avg(x) from sublink_inner_table R where T.b=R.y); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=436.00..1310.77 rows=28700 width=8) + -> Hash Join (cost=436.00..928.11 rows=9567 width=8) + Hash Cond: (t.b = "Expr_SUBQUERY".csq_c0) + Join Filter: ((t.a)::numeric > "Expr_SUBQUERY".csq_c1) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=431.83..431.83 rows=333 width=40) + -> Subquery Scan on "Expr_SUBQUERY" (cost=423.50..431.83 rows=333 width=40) + -> HashAggregate (cost=423.50..428.50 rows=333 width=40) + Group Key: r.y + -> Seq Scan on sublink_inner_table r (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(11 rows) + +set enable_hashagg to off; +explain select t.* from sublink_outer_table t join (select y ,10*avg(x) s from sublink_inner_table group by y) RR on RR.y = t.b and t.a > rr.s; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=2404.84..3279.62 rows=28700 width=8) + -> Hash Join (cost=2404.84..2896.95 rows=9567 width=8) + Hash Cond: (t.b = sublink_inner_table.y) + Join Filter: ((t.a)::numeric > (('10'::numeric * avg(sublink_inner_table.x)))) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=2400.67..2400.67 rows=333 width=40) + -> GroupAggregate (cost=2197.59..2397.34 rows=333 width=40) + Group Key: sublink_inner_table.y + -> Sort (cost=2197.59..2262.51 rows=25967 width=12) + Sort Key: sublink_inner_table.y + -> Seq Scan on sublink_inner_table (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(12 rows) + +explain select * from sublink_outer_table T where a > (select 10*avg(x) from sublink_inner_table R where T.b=R.y); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=2404.84..3279.62 rows=28700 width=8) + -> Hash Join (cost=2404.84..2896.95 rows=9567 width=8) + Hash Cond: (t.b = "Expr_SUBQUERY".csq_c0) + Join Filter: ((t.a)::numeric > "Expr_SUBQUERY".csq_c1) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=2400.67..2400.67 rows=333 width=40) + -> Subquery Scan on "Expr_SUBQUERY" (cost=2197.59..2400.67 rows=333 width=40) + -> GroupAggregate (cost=2197.59..2397.34 rows=333 width=40) + Group Key: r.y + -> Sort (cost=2197.59..2262.51 rows=25967 width=12) + Sort Key: r.y + -> Seq Scan on sublink_inner_table r (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(13 rows) + +drop table sublink_outer_table; +drop table sublink_inner_table; +reset optimizer; +reset enable_hashagg; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 533abf961d..8aba40fb65 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3125,3 +3125,189 @@ select * from param_t a where a.i in (1 row) drop table if exists param_t; +-- A guard test case for gpexpand's populate SQL +-- Some simple notes and background is: we want to compute +-- table size efficiently, it is better to avoid invoke +-- pg_relation_size() in serial on QD, since this function +-- will dispatch for each tuple. The bad pattern SQL is like +-- select pg_relation_size(oid) from pg_class where xxx +-- The idea is force pg_relations_size is evaluated on each +-- segment and the sum the result together to get the final +-- result. To make sure correctness, we have to evaluate +-- pg_relation_size before any motion. The skill here is +-- to wrap this in a subquery, due to volatile of pg_relation_size, +-- this subquery won't be pulled up. Plus the skill of +-- gp_dist_random('pg_class') we can achieve this goal. +-- the below test is to verify the plan, we should see pg_relation_size +-- is evaludated on each segment and then motion then sum together. The +-- SQL pattern is a catalog join a table size "dict". +set gp_enable_multiphase_agg = on; +-- force nestloop join to make test stable since we +-- are testing plan and do not care about where we +-- put hash table. +set enable_hashjoin = off; +set enable_nestloop = on; +set enable_indexscan = off; +set enable_bitmapscan = off; +explain (verbose on, costs off) +with cte(table_oid, size) as +( + select + table_oid, + sum(size) size + from ( + select oid, + pg_relation_size(oid) + from gp_dist_random('pg_class') + ) x(table_oid, size) + group by table_oid +) +select pc.relname, ts.size +from pg_class pc, cte ts +where pc.oid = ts.table_oid; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> Nested Loop + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + Join Filter: (pc.oid = pg_class.oid) + -> Redistribute Motion 1:3 (slice2) + Output: pc.relname, pc.oid + Hash Key: pc.oid + -> Seq Scan on pg_catalog.pg_class pc + Output: pc.relname, pc.oid + -> Materialize + Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> HashAggregate + Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) + Group Key: pg_class.oid + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) + Hash Key: pg_class.oid + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Optimizer: Postgres query optimizer +(21 rows) + +set gp_enable_multiphase_agg = off; +explain (verbose on, costs off) +with cte(table_oid, size) as +( + select + table_oid, + sum(size) size + from ( + select oid, + pg_relation_size(oid) + from gp_dist_random('pg_class') + ) x(table_oid, size) + group by table_oid +) +select pc.relname, ts.size +from pg_class pc, cte ts +where pc.oid = ts.table_oid; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> Nested Loop + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + Join Filter: (pc.oid = pg_class.oid) + -> Redistribute Motion 1:3 (slice2) + Output: pc.relname, pc.oid + Hash Key: pc.oid + -> Seq Scan on pg_catalog.pg_class pc + Output: pc.relname, pc.oid + -> Materialize + Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> HashAggregate + Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) + Group Key: pg_class.oid + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) + Hash Key: pg_class.oid + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Optimizer: Postgres query optimizer +(21 rows) + +reset gp_enable_multiphase_agg; +reset enable_hashjoin; +reset enable_nestloop; +reset enable_indexscan; +reset enable_bitmapscan; +create table sublink_outer_table(a int, b int) distributed by(b); +create table sublink_inner_table(x int, y bigint) distributed by(y); +set optimizer to off; +explain select t.* from sublink_outer_table t join (select y ,10*avg(x) s from sublink_inner_table group by y) RR on RR.y = t.b and t.a > rr.s; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=436.00..1310.77 rows=28700 width=8) + -> Hash Join (cost=436.00..928.11 rows=9567 width=8) + Hash Cond: (t.b = sublink_inner_table.y) + Join Filter: ((t.a)::numeric > (('10'::numeric * avg(sublink_inner_table.x)))) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=431.83..431.83 rows=333 width=40) + -> HashAggregate (cost=423.50..428.50 rows=333 width=40) + Group Key: sublink_inner_table.y + -> Seq Scan on sublink_inner_table (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(10 rows) + +explain select * from sublink_outer_table T where a > (select 10*avg(x) from sublink_inner_table R where T.b=R.y); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=436.00..1310.77 rows=28700 width=8) + -> Hash Join (cost=436.00..928.11 rows=9567 width=8) + Hash Cond: (t.b = "Expr_SUBQUERY".csq_c0) + Join Filter: ((t.a)::numeric > "Expr_SUBQUERY".csq_c1) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=431.83..431.83 rows=333 width=40) + -> Subquery Scan on "Expr_SUBQUERY" (cost=423.50..431.83 rows=333 width=40) + -> HashAggregate (cost=423.50..428.50 rows=333 width=40) + Group Key: r.y + -> Seq Scan on sublink_inner_table r (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(11 rows) + +set enable_hashagg to off; +explain select t.* from sublink_outer_table t join (select y ,10*avg(x) s from sublink_inner_table group by y) RR on RR.y = t.b and t.a > rr.s; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=2404.84..3279.62 rows=28700 width=8) + -> Hash Join (cost=2404.84..2896.95 rows=9567 width=8) + Hash Cond: (t.b = sublink_inner_table.y) + Join Filter: ((t.a)::numeric > (('10'::numeric * avg(sublink_inner_table.x)))) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=2400.67..2400.67 rows=333 width=40) + -> GroupAggregate (cost=2197.59..2397.34 rows=333 width=40) + Group Key: sublink_inner_table.y + -> Sort (cost=2197.59..2262.51 rows=25967 width=12) + Sort Key: sublink_inner_table.y + -> Seq Scan on sublink_inner_table (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(12 rows) + +explain select * from sublink_outer_table T where a > (select 10*avg(x) from sublink_inner_table R where T.b=R.y); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=2404.84..3279.62 rows=28700 width=8) + -> Hash Join (cost=2404.84..2896.95 rows=9567 width=8) + Hash Cond: (t.b = "Expr_SUBQUERY".csq_c0) + Join Filter: ((t.a)::numeric > "Expr_SUBQUERY".csq_c1) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=2400.67..2400.67 rows=333 width=40) + -> Subquery Scan on "Expr_SUBQUERY" (cost=2197.59..2400.67 rows=333 width=40) + -> GroupAggregate (cost=2197.59..2397.34 rows=333 width=40) + Group Key: r.y + -> Sort (cost=2197.59..2262.51 rows=25967 width=12) + Sort Key: r.y + -> Seq Scan on sublink_inner_table r (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(13 rows) + +drop table sublink_outer_table; +drop table sublink_inner_table; +reset optimizer; +reset enable_hashagg; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index 5391e4747c..de95b3d355 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1214,3 +1214,86 @@ select * from param_t a where a.i in drop table if exists param_t; + +-- A guard test case for gpexpand's populate SQL +-- Some simple notes and background is: we want to compute +-- table size efficiently, it is better to avoid invoke +-- pg_relation_size() in serial on QD, since this function +-- will dispatch for each tuple. The bad pattern SQL is like +-- select pg_relation_size(oid) from pg_class where xxx +-- The idea is force pg_relations_size is evaluated on each +-- segment and the sum the result together to get the final +-- result. To make sure correctness, we have to evaluate +-- pg_relation_size before any motion. The skill here is +-- to wrap this in a subquery, due to volatile of pg_relation_size, +-- this subquery won't be pulled up. Plus the skill of +-- gp_dist_random('pg_class') we can achieve this goal. + +-- the below test is to verify the plan, we should see pg_relation_size +-- is evaludated on each segment and then motion then sum together. The +-- SQL pattern is a catalog join a table size "dict". + +set gp_enable_multiphase_agg = on; +-- force nestloop join to make test stable since we +-- are testing plan and do not care about where we +-- put hash table. +set enable_hashjoin = off; +set enable_nestloop = on; +set enable_indexscan = off; +set enable_bitmapscan = off; + +explain (verbose on, costs off) +with cte(table_oid, size) as +( + select + table_oid, + sum(size) size + from ( + select oid, + pg_relation_size(oid) + from gp_dist_random('pg_class') + ) x(table_oid, size) + group by table_oid +) +select pc.relname, ts.size +from pg_class pc, cte ts +where pc.oid = ts.table_oid; + +set gp_enable_multiphase_agg = off; + +explain (verbose on, costs off) +with cte(table_oid, size) as +( + select + table_oid, + sum(size) size + from ( + select oid, + pg_relation_size(oid) + from gp_dist_random('pg_class') + ) x(table_oid, size) + group by table_oid +) +select pc.relname, ts.size +from pg_class pc, cte ts +where pc.oid = ts.table_oid; + +reset gp_enable_multiphase_agg; +reset enable_hashjoin; +reset enable_nestloop; +reset enable_indexscan; +reset enable_bitmapscan; +create table sublink_outer_table(a int, b int) distributed by(b); +create table sublink_inner_table(x int, y bigint) distributed by(y); + +set optimizer to off; +explain select t.* from sublink_outer_table t join (select y ,10*avg(x) s from sublink_inner_table group by y) RR on RR.y = t.b and t.a > rr.s; +explain select * from sublink_outer_table T where a > (select 10*avg(x) from sublink_inner_table R where T.b=R.y); + +set enable_hashagg to off; +explain select t.* from sublink_outer_table t join (select y ,10*avg(x) s from sublink_inner_table group by y) RR on RR.y = t.b and t.a > rr.s; +explain select * from sublink_outer_table T where a > (select 10*avg(x) from sublink_inner_table R where T.b=R.y); +drop table sublink_outer_table; +drop table sublink_inner_table; +reset optimizer; +reset enable_hashagg; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
