This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit da1f254b8289b4fbb3c1d63ce00361c7fd0a449b Author: Evgeniy Ratkov <[email protected]> AuthorDate: Mon Nov 21 12:03:37 2022 +0300 Fix failure when DynamicSeqScan has a subPlan #14505 If Query plan contains SubPlan under DynamicSeqScan node we got a situation: macro "Insist" fails at condition "rsi->pstype == planstate->type". We have duplicate messages for Materialize and Broadcast Motion nodes in messages from QEs to QD. This is because QE executing slice initializes two identical subplans for DynamicSeqScan node. The fist subplan item is initialized at executor start. The second one - at execution of DynamicSeqScan node in initNextTableToScan. This duplication was caused by commit c4a3ce097f554a15ef489630928f03d19e129b39 that added initialization of quals and targetlist at executor start alongside initialization at running stage. Another issue with second subplan initialization is that this subplan structure is allocated in temporary memory context partitionMemoryContext. In particular, after targetlist remapping we end up to dangling pointer in list of subplans. --- src/backend/executor/nodeDynamicBitmapHeapscan.c | 26 +------ src/backend/executor/nodeDynamicSeqscan.c | 24 +------ src/include/nodes/execnodes.h | 10 --- src/test/regress/expected/gporca.out | 2 +- src/test/regress/expected/gporca_optimizer.out | 2 +- src/test/regress/expected/qp_dropped_cols.out | 84 ++++++++++++++++++++++ .../regress/expected/qp_dropped_cols_optimizer.out | 71 ++++++++++++++++++ src/test/regress/sql/gporca.sql | 2 +- src/test/regress/sql/qp_dropped_cols.sql | 30 ++++++++ 9 files changed, 192 insertions(+), 59 deletions(-) diff --git a/src/backend/executor/nodeDynamicBitmapHeapscan.c b/src/backend/executor/nodeDynamicBitmapHeapscan.c index cf8c980d5d..aa03c4f31d 100644 --- a/src/backend/executor/nodeDynamicBitmapHeapscan.c +++ b/src/backend/executor/nodeDynamicBitmapHeapscan.c @@ -72,6 +72,8 @@ ExecInitDynamicBitmapHeapScan(DynamicBitmapHeapScan *node, EState *estate, int e state->ss.ps.scanopsfixed = false; state->ss.ps.scanopsset = true; + state->ss.ps.qual = ExecInitQual(node->bitmapheapscan.scan.plan.qual, (PlanState *) state); + /* * Initialize result tuple type and projection info. */ @@ -80,8 +82,6 @@ ExecInitDynamicBitmapHeapScan(DynamicBitmapHeapScan *node, EState *estate, int e reloid = exec_rt_fetch(node->bitmapheapscan.scan.scanrelid, estate)->relid; Assert(OidIsValid(reloid)); - state->firstPartition = true; - /* lastRelOid is used to remap varattno for heterogeneous partitions */ state->lastRelOid = reloid; @@ -176,29 +176,9 @@ initNextTableToScan(DynamicBitmapHeapScanState *node) * the new varnos correspond to */ node->lastRelOid = pid; + pfree(attMap); } - /* - * For the very first partition, the qual of planstate is set to null. So, we must - * initialize quals, regardless of remapping requirements. For later - * partitions, we only initialize quals if a column re-mapping is necessary. - */ - if (attMap || node->firstPartition) - { - node->firstPartition = false; - MemoryContextReset(node->partitionMemoryContext); - MemoryContext oldCxt = MemoryContextSwitchTo(node->partitionMemoryContext); - - /* Initialize child expressions */ - scanState->ps.qual = - ExecInitQual(scanState->ps.plan->qual, (PlanState *) scanState); - - MemoryContextSwitchTo(oldCxt); - } - - if (attMap) - free_attrmap(attMap); - node->bhsState = ExecInitBitmapHeapScanForPartition(&plan->bitmapheapscan, estate, node->eflags, currentRelation); diff --git a/src/backend/executor/nodeDynamicSeqscan.c b/src/backend/executor/nodeDynamicSeqscan.c index 657aa5d056..a4eb8a0e9f 100644 --- a/src/backend/executor/nodeDynamicSeqscan.c +++ b/src/backend/executor/nodeDynamicSeqscan.c @@ -83,8 +83,6 @@ ExecInitDynamicSeqScan(DynamicSeqScan *node, EState *estate, int eflags) reloid = exec_rt_fetch(node->seqscan.scanrelid, estate)->relid; Assert(OidIsValid(reloid)); - state->firstPartition = true; - /* lastRelOid is used to remap varattno for heterogeneous partitions */ state->lastRelOid = reloid; @@ -167,29 +165,9 @@ initNextTableToScan(DynamicSeqScanState *node) * the new varnos correspond to */ node->lastRelOid = *pid; + pfree(attMap); } - /* - * For the very first partition, the qual of planstate is set to null. So, we must - * initialize quals, regardless of remapping requirements. For later - * partitions, we only initialize quals if a column re-mapping is necessary. - */ - if (attMap || node->firstPartition) - { - node->firstPartition = false; - MemoryContextReset(node->partitionMemoryContext); - MemoryContext oldCxt = MemoryContextSwitchTo(node->partitionMemoryContext); - - /* Initialize child expressions */ - scanState->ps.qual = - ExecInitQual(scanState->ps.plan->qual, (PlanState *) scanState); - - MemoryContextSwitchTo(oldCxt); - } - - if (attMap) - free_attrmap(attMap); - node->seqScanState = ExecInitSeqScanForPartition(&plan->seqscan, estate, currentRelation); return true; diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index a10c120e17..44b54dc68f 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1876,11 +1876,6 @@ typedef struct DynamicBitmapHeapScanState int eflags; BitmapHeapScanState *bhsState; - /* - * The first partition requires initialization of expression states, - * such as qual, regardless of whether we need to re-map varattno - */ - bool firstPartition; /* * lastRelOid is the last relation that corresponds to the * varattno mapping of qual and target list. Each time we open a new partition, we will @@ -2183,11 +2178,6 @@ typedef struct DynamicSeqScanState int eflags; SeqScanState *seqScanState; - /* - * The first partition requires initialization of expression states, - * such as qual and targetlist, regardless of whether we need to re-map varattno - */ - bool firstPartition; /* * lastRelOid is the last relation that corresponds to the * varattno mapping of qual and target list. Each time we open a new partition, we will diff --git a/src/test/regress/expected/gporca.out b/src/test/regress/expected/gporca.out index c8f00b52d5..9d33dc28b0 100644 --- a/src/test/regress/expected/gporca.out +++ b/src/test/regress/expected/gporca.out @@ -11442,7 +11442,7 @@ SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = ---+---+---+---+--- (0 rows) -explain SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1); +explain analyze SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1); QUERY PLAN ------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=10000000004.33..10000000039.45 rows=7 width=20) diff --git a/src/test/regress/expected/gporca_optimizer.out b/src/test/regress/expected/gporca_optimizer.out index 974e7f831d..900dd368e4 100644 --- a/src/test/regress/expected/gporca_optimizer.out +++ b/src/test/regress/expected/gporca_optimizer.out @@ -11527,7 +11527,7 @@ SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = ---+---+---+---+--- (0 rows) -explain SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1); +explain analyze SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1); QUERY PLAN -------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324481.18 rows=1 width=20) diff --git a/src/test/regress/expected/qp_dropped_cols.out b/src/test/regress/expected/qp_dropped_cols.out index 923c7a79e6..df61f4b7e6 100644 --- a/src/test/regress/expected/qp_dropped_cols.out +++ b/src/test/regress/expected/qp_dropped_cols.out @@ -14940,6 +14940,90 @@ SELECT * FROM leaf_with_dropped_cols_index_part2 WHERE a>42 and z<42; ---+---+--- (0 rows) +-- Exchange partitions with dropped columns and check subplan with attribute remapping into dynamicSeqscan +-- start_ignore +DROP TABLE if exists ds_main; +NOTICE: table "ds_main" does not exist, skipping +DROP TABLE if exists ds_part1; +NOTICE: table "ds_part1" does not exist, skipping +DROP TABLE if exists non_part1; +NOTICE: table "non_part1" does not exist, skipping +DROP TABLE if exists non_part2; +NOTICE: table "non_part2" does not exist, skipping +-- end_ignore +CREATE TABLE ds_main ( a INT, b INT, c INT) PARTITION BY RANGE(c)( START(1) END (10) EVERY (2), DEFAULT PARTITION deflt); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE ds_part1 (a int, a1 int, b int, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE non_part1 (c INT); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE non_part2 (e INT, f INT); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'e' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +SET optimizer_enforce_subplans TO ON; +-- drop columns to get attribute remapping +ALTER TABLE ds_part1 drop column a1; +ALTER TABLE ds_main exchange partition for (1) with table ds_part1; +INSERT INTO non_part1 SELECT i FROM generate_series(1, 1)i; +INSERT INTO non_part2 SELECT i, i + 1 FROM generate_series(1, 10)i; +INSERT INTO ds_main SELECT i, i + 1, i + 2 FROM generate_series (1, 1000)i; +analyze ds_main; +analyze non_part1; +analyze non_part2; +EXPLAIN (costs off) SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( SELECT a FROM non_part1) order by a; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: ds_main_1_prt_2.a + -> Sort + Sort Key: ds_main_1_prt_2.a + -> HashAggregate + Group Key: (RowIdExpr) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (RowIdExpr) + -> Nested Loop + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Hash Join + Hash Cond: (ds_main_1_prt_2.c = non_part2.e) + -> Append + Partition Selectors: $0 + -> Seq Scan on ds_main_1_prt_2 + Filter: (a IS NOT NULL) + -> Seq Scan on ds_main_1_prt_3 + Filter: (a IS NOT NULL) + -> Seq Scan on ds_main_1_prt_4 + Filter: (a IS NOT NULL) + -> Seq Scan on ds_main_1_prt_5 + Filter: (a IS NOT NULL) + -> Seq Scan on ds_main_1_prt_6 + Filter: (a IS NOT NULL) + -> Seq Scan on ds_main_1_prt_deflt + Filter: (a IS NOT NULL) + -> Hash + -> Partition Selector (selector id: $0) + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on non_part2 + -> Materialize + -> Seq Scan on non_part1 + Optimizer: Postgres query optimizer +(33 rows) + +SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( SELECT a FROM non_part1) order by a; + a | b | c | e | f +---+---+----+----+---- + 1 | 2 | 3 | 3 | 4 + 2 | 3 | 4 | 4 | 5 + 3 | 4 | 5 | 5 | 6 + 4 | 5 | 6 | 6 | 7 + 5 | 6 | 7 | 7 | 8 + 6 | 7 | 8 | 8 | 9 + 7 | 8 | 9 | 9 | 10 + 8 | 9 | 10 | 10 | 11 +(8 rows) + -- As of this writing, pg_dump creates an invalid dump for some of the tables -- here. See https://github.com/greenplum-db/gpdb/issues/3598. So we must drop -- the tables, or the pg_upgrade test fails. diff --git a/src/test/regress/expected/qp_dropped_cols_optimizer.out b/src/test/regress/expected/qp_dropped_cols_optimizer.out index 37155c9dba..be9cc4ceb1 100644 --- a/src/test/regress/expected/qp_dropped_cols_optimizer.out +++ b/src/test/regress/expected/qp_dropped_cols_optimizer.out @@ -14944,6 +14944,77 @@ SELECT * FROM leaf_with_dropped_cols_index_part2 WHERE a>42 and z<42; ---+---+--- (0 rows) +-- Exchange partitions with dropped columns and check subplan with attribute remapping into dynamicSeqscan +-- start_ignore +DROP TABLE if exists ds_main; +NOTICE: table "ds_main" does not exist, skipping +DROP TABLE if exists ds_part1; +NOTICE: table "ds_part1" does not exist, skipping +DROP TABLE if exists non_part1; +NOTICE: table "non_part1" does not exist, skipping +DROP TABLE if exists non_part2; +NOTICE: table "non_part2" does not exist, skipping +-- end_ignore +CREATE TABLE ds_main ( a INT, b INT, c INT) PARTITION BY RANGE(c)( START(1) END (10) EVERY (2), DEFAULT PARTITION deflt); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE ds_part1 (a int, a1 int, b int, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE non_part1 (c INT); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE non_part2 (e INT, f INT); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'e' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +SET optimizer_enforce_subplans TO ON; +-- drop columns to get attribute remapping +ALTER TABLE ds_part1 drop column a1; +ALTER TABLE ds_main exchange partition for (1) with table ds_part1; +INSERT INTO non_part1 SELECT i FROM generate_series(1, 1)i; +INSERT INTO non_part2 SELECT i, i + 1 FROM generate_series(1, 10)i; +INSERT INTO ds_main SELECT i, i + 1, i + 2 FROM generate_series (1, 1000)i; +analyze ds_main; +analyze non_part1; +analyze non_part2; +EXPLAIN (costs off) SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( SELECT a FROM non_part1) order by a; + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: ds_main.a + -> Sort + Sort Key: ds_main.a + -> Hash Join + Hash Cond: (ds_main.c = non_part2.e) + -> Dynamic Seq Scan on ds_main + Number of partitions to scan: 6 (out of 6) + Filter: ((a = a) AND (SubPlan 1)) + SubPlan 1 + -> Materialize + -> Broadcast Motion 1:3 (slice2) + -> Limit + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on non_part1 + -> Hash + -> Partition Selector (selector id: $0) + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on non_part2 + Optimizer: Pivotal Optimizer (GPORCA) +(20 rows) + +SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( SELECT a FROM non_part1) order by a; + a | b | c | e | f +---+---+----+----+---- + 1 | 2 | 3 | 3 | 4 + 2 | 3 | 4 | 4 | 5 + 3 | 4 | 5 | 5 | 6 + 4 | 5 | 6 | 6 | 7 + 5 | 6 | 7 | 7 | 8 + 6 | 7 | 8 | 8 | 9 + 7 | 8 | 9 | 9 | 10 + 8 | 9 | 10 | 10 | 11 +(8 rows) + -- As of this writing, pg_dump creates an invalid dump for some of the tables -- here. See https://github.com/greenplum-db/gpdb/issues/3598. So we must drop -- the tables, or the pg_upgrade test fails. diff --git a/src/test/regress/sql/gporca.sql b/src/test/regress/sql/gporca.sql index ae5ada7b5d..b28146c8ab 100644 --- a/src/test/regress/sql/gporca.sql +++ b/src/test/regress/sql/gporca.sql @@ -2347,7 +2347,7 @@ analyze ds_part; analyze non_part1; analyze non_part2; SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1); -explain SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1); +explain analyze SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1); SELECT *, a IN ( SELECT b + 1 FROM non_part1) FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b FROM non_part1); CREATE INDEX ds_idx ON ds_part(a); diff --git a/src/test/regress/sql/qp_dropped_cols.sql b/src/test/regress/sql/qp_dropped_cols.sql index 904ec5d90b..a8dbd53964 100644 --- a/src/test/regress/sql/qp_dropped_cols.sql +++ b/src/test/regress/sql/qp_dropped_cols.sql @@ -8297,6 +8297,36 @@ SELECT * FROM leaf_with_dropped_cols_index_part1 WHERE a>42 and z<42; EXPLAIN SELECT * FROM leaf_with_dropped_cols_index_part2 WHERE a>42 and z<42; SELECT * FROM leaf_with_dropped_cols_index_part2 WHERE a>42 and z<42; +-- Exchange partitions with dropped columns and check subplan with attribute remapping into dynamicSeqscan +-- start_ignore +DROP TABLE if exists ds_main; +DROP TABLE if exists ds_part1; +DROP TABLE if exists non_part1; +DROP TABLE if exists non_part2; +-- end_ignore + +CREATE TABLE ds_main ( a INT, b INT, c INT) PARTITION BY RANGE(c)( START(1) END (10) EVERY (2), DEFAULT PARTITION deflt); +CREATE TABLE ds_part1 (a int, a1 int, b int, c int); +CREATE TABLE non_part1 (c INT); +CREATE TABLE non_part2 (e INT, f INT); + +SET optimizer_enforce_subplans TO ON; + +-- drop columns to get attribute remapping +ALTER TABLE ds_part1 drop column a1; +ALTER TABLE ds_main exchange partition for (1) with table ds_part1; + +INSERT INTO non_part1 SELECT i FROM generate_series(1, 1)i; +INSERT INTO non_part2 SELECT i, i + 1 FROM generate_series(1, 10)i; +INSERT INTO ds_main SELECT i, i + 1, i + 2 FROM generate_series (1, 1000)i; + +analyze ds_main; +analyze non_part1; +analyze non_part2; + +EXPLAIN (costs off) SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( SELECT a FROM non_part1) order by a; +SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( SELECT a FROM non_part1) order by a; + -- As of this writing, pg_dump creates an invalid dump for some of the tables -- here. See https://github.com/greenplum-db/gpdb/issues/3598. So we must drop -- the tables, or the pg_upgrade test fails. --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
