This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit da1f254b8289b4fbb3c1d63ce00361c7fd0a449b
Author: Evgeniy Ratkov <[email protected]>
AuthorDate: Mon Nov 21 12:03:37 2022 +0300

    Fix failure when DynamicSeqScan has a subPlan #14505
    
    If Query plan contains SubPlan under DynamicSeqScan node we got a situation:
    macro "Insist" fails at condition "rsi->pstype == planstate->type".
    We have duplicate messages for Materialize and Broadcast Motion nodes in 
messages from QEs to QD.
    This is because QE executing slice initializes two identical subplans for 
DynamicSeqScan node.
    The fist subplan item is initialized at executor start. The second one - at 
execution of DynamicSeqScan node
    in initNextTableToScan. This duplication was caused by commit 
c4a3ce097f554a15ef489630928f03d19e129b39
    that added initialization of quals and targetlist at executor start 
alongside initialization at running stage.
    
    Another issue with second subplan initialization is that this subplan 
structure is allocated in
    temporary memory context partitionMemoryContext. In particular, after 
targetlist remapping we end up to
    dangling pointer in list of subplans.
---
 src/backend/executor/nodeDynamicBitmapHeapscan.c   | 26 +------
 src/backend/executor/nodeDynamicSeqscan.c          | 24 +------
 src/include/nodes/execnodes.h                      | 10 ---
 src/test/regress/expected/gporca.out               |  2 +-
 src/test/regress/expected/gporca_optimizer.out     |  2 +-
 src/test/regress/expected/qp_dropped_cols.out      | 84 ++++++++++++++++++++++
 .../regress/expected/qp_dropped_cols_optimizer.out | 71 ++++++++++++++++++
 src/test/regress/sql/gporca.sql                    |  2 +-
 src/test/regress/sql/qp_dropped_cols.sql           | 30 ++++++++
 9 files changed, 192 insertions(+), 59 deletions(-)

diff --git a/src/backend/executor/nodeDynamicBitmapHeapscan.c 
b/src/backend/executor/nodeDynamicBitmapHeapscan.c
index cf8c980d5d..aa03c4f31d 100644
--- a/src/backend/executor/nodeDynamicBitmapHeapscan.c
+++ b/src/backend/executor/nodeDynamicBitmapHeapscan.c
@@ -72,6 +72,8 @@ ExecInitDynamicBitmapHeapScan(DynamicBitmapHeapScan *node, 
EState *estate, int e
        state->ss.ps.scanopsfixed = false;
        state->ss.ps.scanopsset = true;
 
+       state->ss.ps.qual = ExecInitQual(node->bitmapheapscan.scan.plan.qual, 
(PlanState *) state);
+
        /*
         * Initialize result tuple type and projection info.
         */
@@ -80,8 +82,6 @@ ExecInitDynamicBitmapHeapScan(DynamicBitmapHeapScan *node, 
EState *estate, int e
        reloid = exec_rt_fetch(node->bitmapheapscan.scan.scanrelid, 
estate)->relid;
        Assert(OidIsValid(reloid));
 
-       state->firstPartition = true;
-
        /* lastRelOid is used to remap varattno for heterogeneous partitions */
        state->lastRelOid = reloid;
 
@@ -176,29 +176,9 @@ initNextTableToScan(DynamicBitmapHeapScanState *node)
                 * the new varnos correspond to
                 */
                node->lastRelOid = pid;
+               pfree(attMap);
        }
 
-       /*
-        * For the very first partition, the qual of planstate is set to null. 
So, we must
-        * initialize quals, regardless of remapping requirements. For later
-        * partitions, we only initialize quals if a column re-mapping is 
necessary.
-        */
-       if (attMap || node->firstPartition)
-       {
-               node->firstPartition = false;
-               MemoryContextReset(node->partitionMemoryContext);
-               MemoryContext oldCxt = 
MemoryContextSwitchTo(node->partitionMemoryContext);
-
-               /* Initialize child expressions */
-               scanState->ps.qual =
-                               ExecInitQual(scanState->ps.plan->qual, 
(PlanState *) scanState);
-
-               MemoryContextSwitchTo(oldCxt);
-       }
-
-       if (attMap)
-               free_attrmap(attMap);
-
        node->bhsState = 
ExecInitBitmapHeapScanForPartition(&plan->bitmapheapscan, estate, node->eflags,
                                                                                
                         currentRelation);
 
diff --git a/src/backend/executor/nodeDynamicSeqscan.c 
b/src/backend/executor/nodeDynamicSeqscan.c
index 657aa5d056..a4eb8a0e9f 100644
--- a/src/backend/executor/nodeDynamicSeqscan.c
+++ b/src/backend/executor/nodeDynamicSeqscan.c
@@ -83,8 +83,6 @@ ExecInitDynamicSeqScan(DynamicSeqScan *node, EState *estate, 
int eflags)
        reloid = exec_rt_fetch(node->seqscan.scanrelid, estate)->relid;
        Assert(OidIsValid(reloid));
 
-       state->firstPartition = true;
-
        /* lastRelOid is used to remap varattno for heterogeneous partitions */
        state->lastRelOid = reloid;
 
@@ -167,29 +165,9 @@ initNextTableToScan(DynamicSeqScanState *node)
                 * the new varnos correspond to
                 */
                node->lastRelOid = *pid;
+               pfree(attMap);
        }
 
-       /*
-        * For the very first partition, the qual of planstate is set to null. 
So, we must
-        * initialize quals, regardless of remapping requirements. For later
-        * partitions, we only initialize quals if a column re-mapping is 
necessary.
-        */
-       if (attMap || node->firstPartition)
-       {
-               node->firstPartition = false;
-               MemoryContextReset(node->partitionMemoryContext);
-               MemoryContext oldCxt = 
MemoryContextSwitchTo(node->partitionMemoryContext);
-
-               /* Initialize child expressions */
-               scanState->ps.qual =
-                       ExecInitQual(scanState->ps.plan->qual, (PlanState *) 
scanState);
-
-               MemoryContextSwitchTo(oldCxt);
-       }
-
-       if (attMap)
-               free_attrmap(attMap);
-
        node->seqScanState = ExecInitSeqScanForPartition(&plan->seqscan, estate,
                                                                                
                         currentRelation);
        return true;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index a10c120e17..44b54dc68f 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1876,11 +1876,6 @@ typedef struct DynamicBitmapHeapScanState
        int                     eflags;
        BitmapHeapScanState *bhsState;
 
-       /*
-        * The first partition requires initialization of expression states,
-        * such as qual, regardless of whether we need to re-map varattno
-        */
-       bool            firstPartition;
        /*
         * lastRelOid is the last relation that corresponds to the
         * varattno mapping of qual and target list. Each time we open a new 
partition, we will
@@ -2183,11 +2178,6 @@ typedef struct DynamicSeqScanState
        int                     eflags;
        SeqScanState *seqScanState;
 
-       /*
-        * The first partition requires initialization of expression states,
-        * such as qual and targetlist, regardless of whether we need to re-map 
varattno
-        */
-       bool            firstPartition;
        /*
         * lastRelOid is the last relation that corresponds to the
         * varattno mapping of qual and target list. Each time we open a new 
partition, we will
diff --git a/src/test/regress/expected/gporca.out 
b/src/test/regress/expected/gporca.out
index c8f00b52d5..9d33dc28b0 100644
--- a/src/test/regress/expected/gporca.out
+++ b/src/test/regress/expected/gporca.out
@@ -11442,7 +11442,7 @@ SELECT * FROM ds_part, non_part2 WHERE ds_part.c = 
non_part2.e AND non_part2.f =
 ---+---+---+---+---
 (0 rows)
 
-explain SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND 
non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1);
+explain analyze SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e 
AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1);
                                                  QUERY PLAN                    
                              
 
-------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  
(cost=10000000004.33..10000000039.45 rows=7 width=20)
diff --git a/src/test/regress/expected/gporca_optimizer.out 
b/src/test/regress/expected/gporca_optimizer.out
index 974e7f831d..900dd368e4 100644
--- a/src/test/regress/expected/gporca_optimizer.out
+++ b/src/test/regress/expected/gporca_optimizer.out
@@ -11527,7 +11527,7 @@ SELECT * FROM ds_part, non_part2 WHERE ds_part.c = 
non_part2.e AND non_part2.f =
 ---+---+---+---+---
 (0 rows)
 
-explain SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND 
non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1);
+explain analyze SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e 
AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1);
                                                      QUERY PLAN                
                                     
 
--------------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..1324481.18 rows=1 
width=20)
diff --git a/src/test/regress/expected/qp_dropped_cols.out 
b/src/test/regress/expected/qp_dropped_cols.out
index 923c7a79e6..df61f4b7e6 100644
--- a/src/test/regress/expected/qp_dropped_cols.out
+++ b/src/test/regress/expected/qp_dropped_cols.out
@@ -14940,6 +14940,90 @@ SELECT * FROM leaf_with_dropped_cols_index_part2 WHERE 
a>42 and z<42;
 ---+---+---
 (0 rows)
 
+-- Exchange partitions with dropped columns and check subplan with attribute 
remapping into dynamicSeqscan
+-- start_ignore
+DROP TABLE if exists ds_main;
+NOTICE:  table "ds_main" does not exist, skipping
+DROP TABLE if exists ds_part1;
+NOTICE:  table "ds_part1" does not exist, skipping
+DROP TABLE if exists non_part1;
+NOTICE:  table "non_part1" does not exist, skipping
+DROP TABLE if exists non_part2;
+NOTICE:  table "non_part2" does not exist, skipping
+-- end_ignore
+CREATE TABLE ds_main ( a INT, b INT, c INT) PARTITION BY RANGE(c)( START(1) 
END (10) EVERY (2), DEFAULT PARTITION deflt);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE TABLE ds_part1 (a int, a1 int, b int, c int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE TABLE non_part1 (c INT);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE TABLE non_part2 (e INT, f INT);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'e' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+SET optimizer_enforce_subplans TO ON;
+-- drop columns to get attribute remapping
+ALTER TABLE ds_part1 drop column a1;
+ALTER TABLE ds_main exchange partition for (1) with table ds_part1;
+INSERT INTO non_part1 SELECT i FROM generate_series(1, 1)i;
+INSERT INTO non_part2 SELECT i, i + 1 FROM generate_series(1, 10)i;
+INSERT INTO ds_main SELECT i, i + 1, i + 2 FROM generate_series (1, 1000)i;
+analyze ds_main;
+analyze non_part1;
+analyze non_part2;
+EXPLAIN (costs off) SELECT * FROM ds_main, non_part2 WHERE ds_main.c = 
non_part2.e AND a IN ( SELECT a FROM non_part1) order by a;
+                                            QUERY PLAN                         
                    
+---------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: ds_main_1_prt_2.a
+   ->  Sort
+         Sort Key: ds_main_1_prt_2.a
+         ->  HashAggregate
+               Group Key: (RowIdExpr)
+               ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                     Hash Key: (RowIdExpr)
+                     ->  Nested Loop
+                           ->  Broadcast Motion 3:3  (slice3; segments: 3)
+                                 ->  Hash Join
+                                       Hash Cond: (ds_main_1_prt_2.c = 
non_part2.e)
+                                       ->  Append
+                                             Partition Selectors: $0
+                                             ->  Seq Scan on ds_main_1_prt_2
+                                                   Filter: (a IS NOT NULL)
+                                             ->  Seq Scan on ds_main_1_prt_3
+                                                   Filter: (a IS NOT NULL)
+                                             ->  Seq Scan on ds_main_1_prt_4
+                                                   Filter: (a IS NOT NULL)
+                                             ->  Seq Scan on ds_main_1_prt_5
+                                                   Filter: (a IS NOT NULL)
+                                             ->  Seq Scan on ds_main_1_prt_6
+                                                   Filter: (a IS NOT NULL)
+                                             ->  Seq Scan on 
ds_main_1_prt_deflt
+                                                   Filter: (a IS NOT NULL)
+                                       ->  Hash
+                                             ->  Partition Selector (selector 
id: $0)
+                                                   ->  Broadcast Motion 3:3  
(slice4; segments: 3)
+                                                         ->  Seq Scan on 
non_part2
+                           ->  Materialize
+                                 ->  Seq Scan on non_part1
+ Optimizer: Postgres query optimizer
+(33 rows)
+
+SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( 
SELECT a FROM non_part1) order by a;
+ a | b | c  | e  | f  
+---+---+----+----+----
+ 1 | 2 |  3 |  3 |  4
+ 2 | 3 |  4 |  4 |  5
+ 3 | 4 |  5 |  5 |  6
+ 4 | 5 |  6 |  6 |  7
+ 5 | 6 |  7 |  7 |  8
+ 6 | 7 |  8 |  8 |  9
+ 7 | 8 |  9 |  9 | 10
+ 8 | 9 | 10 | 10 | 11
+(8 rows)
+
 -- As of this writing, pg_dump creates an invalid dump for some of the tables
 -- here. See https://github.com/greenplum-db/gpdb/issues/3598. So we must drop
 -- the tables, or the pg_upgrade test fails.
diff --git a/src/test/regress/expected/qp_dropped_cols_optimizer.out 
b/src/test/regress/expected/qp_dropped_cols_optimizer.out
index 37155c9dba..be9cc4ceb1 100644
--- a/src/test/regress/expected/qp_dropped_cols_optimizer.out
+++ b/src/test/regress/expected/qp_dropped_cols_optimizer.out
@@ -14944,6 +14944,77 @@ SELECT * FROM leaf_with_dropped_cols_index_part2 WHERE 
a>42 and z<42;
 ---+---+---
 (0 rows)
 
+-- Exchange partitions with dropped columns and check subplan with attribute 
remapping into dynamicSeqscan
+-- start_ignore
+DROP TABLE if exists ds_main;
+NOTICE:  table "ds_main" does not exist, skipping
+DROP TABLE if exists ds_part1;
+NOTICE:  table "ds_part1" does not exist, skipping
+DROP TABLE if exists non_part1;
+NOTICE:  table "non_part1" does not exist, skipping
+DROP TABLE if exists non_part2;
+NOTICE:  table "non_part2" does not exist, skipping
+-- end_ignore
+CREATE TABLE ds_main ( a INT, b INT, c INT) PARTITION BY RANGE(c)( START(1) 
END (10) EVERY (2), DEFAULT PARTITION deflt);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE TABLE ds_part1 (a int, a1 int, b int, c int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE TABLE non_part1 (c INT);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE TABLE non_part2 (e INT, f INT);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'e' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+SET optimizer_enforce_subplans TO ON;
+-- drop columns to get attribute remapping
+ALTER TABLE ds_part1 drop column a1;
+ALTER TABLE ds_main exchange partition for (1) with table ds_part1;
+INSERT INTO non_part1 SELECT i FROM generate_series(1, 1)i;
+INSERT INTO non_part2 SELECT i, i + 1 FROM generate_series(1, 10)i;
+INSERT INTO ds_main SELECT i, i + 1, i + 2 FROM generate_series (1, 1000)i;
+analyze ds_main;
+analyze non_part1;
+analyze non_part2;
+EXPLAIN (costs off) SELECT * FROM ds_main, non_part2 WHERE ds_main.c = 
non_part2.e AND a IN ( SELECT a FROM non_part1) order by a;
+                                      QUERY PLAN                               
       
+--------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: ds_main.a
+   ->  Sort
+         Sort Key: ds_main.a
+         ->  Hash Join
+               Hash Cond: (ds_main.c = non_part2.e)
+               ->  Dynamic Seq Scan on ds_main
+                     Number of partitions to scan: 6 (out of 6)
+                     Filter: ((a = a) AND (SubPlan 1))
+                     SubPlan 1
+                       ->  Materialize
+                             ->  Broadcast Motion 1:3  (slice2)
+                                   ->  Limit
+                                         ->  Gather Motion 3:1  (slice3; 
segments: 3)
+                                               ->  Seq Scan on non_part1
+               ->  Hash
+                     ->  Partition Selector (selector id: $0)
+                           ->  Broadcast Motion 3:3  (slice4; segments: 3)
+                                 ->  Seq Scan on non_part2
+ Optimizer: Pivotal Optimizer (GPORCA)
+(20 rows)
+
+SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( 
SELECT a FROM non_part1) order by a;
+ a | b | c  | e  | f  
+---+---+----+----+----
+ 1 | 2 |  3 |  3 |  4
+ 2 | 3 |  4 |  4 |  5
+ 3 | 4 |  5 |  5 |  6
+ 4 | 5 |  6 |  6 |  7
+ 5 | 6 |  7 |  7 |  8
+ 6 | 7 |  8 |  8 |  9
+ 7 | 8 |  9 |  9 | 10
+ 8 | 9 | 10 | 10 | 11
+(8 rows)
+
 -- As of this writing, pg_dump creates an invalid dump for some of the tables
 -- here. See https://github.com/greenplum-db/gpdb/issues/3598. So we must drop
 -- the tables, or the pg_upgrade test fails.
diff --git a/src/test/regress/sql/gporca.sql b/src/test/regress/sql/gporca.sql
index ae5ada7b5d..b28146c8ab 100644
--- a/src/test/regress/sql/gporca.sql
+++ b/src/test/regress/sql/gporca.sql
@@ -2347,7 +2347,7 @@ analyze ds_part;
 analyze non_part1;
 analyze non_part2;
 SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f 
= 10 AND a IN ( SELECT b + 1 FROM non_part1);
-explain SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND 
non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1);
+explain analyze SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e 
AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1);
 
 SELECT *, a IN ( SELECT b + 1 FROM non_part1) FROM ds_part, non_part2 WHERE 
ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b FROM 
non_part1);
 CREATE INDEX ds_idx ON ds_part(a);
diff --git a/src/test/regress/sql/qp_dropped_cols.sql 
b/src/test/regress/sql/qp_dropped_cols.sql
index 904ec5d90b..a8dbd53964 100644
--- a/src/test/regress/sql/qp_dropped_cols.sql
+++ b/src/test/regress/sql/qp_dropped_cols.sql
@@ -8297,6 +8297,36 @@ SELECT * FROM leaf_with_dropped_cols_index_part1 WHERE 
a>42 and z<42;
 EXPLAIN SELECT * FROM leaf_with_dropped_cols_index_part2 WHERE a>42 and z<42;
 SELECT * FROM leaf_with_dropped_cols_index_part2 WHERE a>42 and z<42;
 
+-- Exchange partitions with dropped columns and check subplan with attribute 
remapping into dynamicSeqscan
+-- start_ignore
+DROP TABLE if exists ds_main;
+DROP TABLE if exists ds_part1;
+DROP TABLE if exists non_part1;
+DROP TABLE if exists non_part2;
+-- end_ignore
+
+CREATE TABLE ds_main ( a INT, b INT, c INT) PARTITION BY RANGE(c)( START(1) 
END (10) EVERY (2), DEFAULT PARTITION deflt);
+CREATE TABLE ds_part1 (a int, a1 int, b int, c int);
+CREATE TABLE non_part1 (c INT);
+CREATE TABLE non_part2 (e INT, f INT);
+
+SET optimizer_enforce_subplans TO ON;
+
+-- drop columns to get attribute remapping
+ALTER TABLE ds_part1 drop column a1;
+ALTER TABLE ds_main exchange partition for (1) with table ds_part1;
+
+INSERT INTO non_part1 SELECT i FROM generate_series(1, 1)i;
+INSERT INTO non_part2 SELECT i, i + 1 FROM generate_series(1, 10)i;
+INSERT INTO ds_main SELECT i, i + 1, i + 2 FROM generate_series (1, 1000)i;
+
+analyze ds_main;
+analyze non_part1;
+analyze non_part2;
+
+EXPLAIN (costs off) SELECT * FROM ds_main, non_part2 WHERE ds_main.c = 
non_part2.e AND a IN ( SELECT a FROM non_part1) order by a;
+SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( 
SELECT a FROM non_part1) order by a;
+
 -- As of this writing, pg_dump creates an invalid dump for some of the tables
 -- here. See https://github.com/greenplum-db/gpdb/issues/3598. So we must drop
 -- the tables, or the pg_upgrade test fails.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to