(cloudberry) 06/17: Ban enums as distribution and partition keys

avamingli Wed, 08 Jan 2025 18:20:29 -0800

This is an automated email from the ASF dual-hosted git repository.

avamingli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit a8bb21bd6b36594fc946d9bbe9713a14293beb0d
Author: Andrew Repp <[email protected]>
AuthorDate: Wed Dec 21 14:42:04 2022 -0600

    Ban enums as distribution and partition keys
    
    Enums cannot be safely used for hashed keys.
    See: https://github.com/greenplum-db/gpdb/issues/14198
---
 contrib/btree_gin/expected/enum.out                |  29 ++--
 contrib/btree_gin/sql/enum.sql                     |  25 +--
 src/backend/commands/tablecmds.c                   |   5 +
 src/backend/parser/parse_utilcmd.c                 |   7 +
 src/test/regress/expected/enum.out                 | 170 ++++++++++-----------
 src/test/regress/expected/enum_dist_part_ban.out   |  24 +++
 .../regress/expected/gpdist_legacy_opclasses.out   |  27 ++--
 .../expected/gpdist_legacy_opclasses_optimizer.out |  26 ++--
 src/test/regress/expected/partition_prune.out      |  10 +-
 .../regress/expected/partition_prune_optimizer.out |   6 +-
 src/test/regress/greenplum_schedule                |   3 +
 src/test/regress/sql/enum.sql                      |  22 +--
 src/test/regress/sql/enum_dist_part_ban.sql        |  22 +++
 src/test/regress/sql/gpdist_legacy_opclasses.sql   |   8 +-
 src/test/regress/sql/partition_prune.sql           |   2 +-
 15 files changed, 239 insertions(+), 147 deletions(-)

diff --git a/contrib/btree_gin/expected/enum.out 
b/contrib/btree_gin/expected/enum.out
index c4ac1174ea..2b0ddf11eb 100644
--- a/contrib/btree_gin/expected/enum.out
+++ b/contrib/btree_gin/expected/enum.out
@@ -1,11 +1,14 @@
 set enable_seqscan=off;
 CREATE TYPE rainbow AS ENUM ('r','o','y','g','b','i','v');
 CREATE TABLE test_enum (
+   h int,
    i rainbow
 );
-INSERT INTO test_enum VALUES ('v'),('y'),('r'),('g'),('o'),('i'),('b');
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'h' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO test_enum VALUES (1, 'v'),(2, 'y'),(3, 'r'),(4, 'g'),(5, 'o'),(6, 
'i'),(7, 'b');
 CREATE INDEX idx_enum ON test_enum USING gin (i);
-SELECT * FROM test_enum WHERE i<'g'::rainbow ORDER BY i;
+SELECT i FROM test_enum WHERE i<'g'::rainbow ORDER BY i;
  i 
 ---
  r
@@ -13,7 +16,7 @@ SELECT * FROM test_enum WHERE i<'g'::rainbow ORDER BY i;
  y
 (3 rows)
 
-SELECT * FROM test_enum WHERE i<='g'::rainbow ORDER BY i;
+SELECT i FROM test_enum WHERE i<='g'::rainbow ORDER BY i;
  i 
 ---
  r
@@ -22,13 +25,13 @@ SELECT * FROM test_enum WHERE i<='g'::rainbow ORDER BY i;
  g
 (4 rows)
 
-SELECT * FROM test_enum WHERE i='g'::rainbow ORDER BY i;
+SELECT i FROM test_enum WHERE i='g'::rainbow ORDER BY i;
  i 
 ---
  g
 (1 row)
 
-SELECT * FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
+SELECT i FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
  i 
 ---
  g
@@ -37,7 +40,7 @@ SELECT * FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
  v
 (4 rows)
 
-SELECT * FROM test_enum WHERE i>'g'::rainbow ORDER BY i;
+SELECT i FROM test_enum WHERE i>'g'::rainbow ORDER BY i;
  i 
 ---
  b
@@ -45,7 +48,7 @@ SELECT * FROM test_enum WHERE i>'g'::rainbow ORDER BY i;
  v
 (3 rows)
 
-explain (costs off) SELECT * FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
+explain (costs off) SELECT i FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
                      QUERY PLAN                      
 -----------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
@@ -56,11 +59,17 @@ explain (costs off) SELECT * FROM test_enum WHERE 
i>='g'::rainbow ORDER BY i;
                Recheck Cond: (i >= 'g'::rainbow)
                ->  Bitmap Index Scan on idx_enum
                      Index Cond: (i >= 'g'::rainbow)
- Optimizer: Postgres query optimizer
+ Optimizer: Pivotal Optimizer (GPORCA)
 (9 rows)
 
 -- make sure we handle the non-evenly-numbered oid case for enums
 create type e as enum ('0', '2', '3');
 alter type e add value '1' after '0';
-create table t as select (i % 4)::text::e from generate_series(0, 100000) as i;
-create index on t using gin (e);
+CREATE TABLE t (
+   h int,
+   i e
+);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'h' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into t select j, (j % 4)::text::e from generate_series(0, 100000) as j;
+create index on t using gin (i);
diff --git a/contrib/btree_gin/sql/enum.sql b/contrib/btree_gin/sql/enum.sql
index f35162f8f5..9876be88c8 100644
--- a/contrib/btree_gin/sql/enum.sql
+++ b/contrib/btree_gin/sql/enum.sql
@@ -1,25 +1,30 @@
 set enable_seqscan=off;
-
 CREATE TYPE rainbow AS ENUM ('r','o','y','g','b','i','v');
 
 CREATE TABLE test_enum (
+   h int,
    i rainbow
 );
 
-INSERT INTO test_enum VALUES ('v'),('y'),('r'),('g'),('o'),('i'),('b');
+INSERT INTO test_enum VALUES (1, 'v'),(2, 'y'),(3, 'r'),(4, 'g'),(5, 'o'),(6, 
'i'),(7, 'b');
 
 CREATE INDEX idx_enum ON test_enum USING gin (i);
 
-SELECT * FROM test_enum WHERE i<'g'::rainbow ORDER BY i;
-SELECT * FROM test_enum WHERE i<='g'::rainbow ORDER BY i;
-SELECT * FROM test_enum WHERE i='g'::rainbow ORDER BY i;
-SELECT * FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
-SELECT * FROM test_enum WHERE i>'g'::rainbow ORDER BY i;
+SELECT i FROM test_enum WHERE i<'g'::rainbow ORDER BY i;
+SELECT i FROM test_enum WHERE i<='g'::rainbow ORDER BY i;
+SELECT i FROM test_enum WHERE i='g'::rainbow ORDER BY i;
+SELECT i FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
+SELECT i FROM test_enum WHERE i>'g'::rainbow ORDER BY i;
 
-explain (costs off) SELECT * FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
+explain (costs off) SELECT i FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
 
 -- make sure we handle the non-evenly-numbered oid case for enums
 create type e as enum ('0', '2', '3');
 alter type e add value '1' after '0';
-create table t as select (i % 4)::text::e from generate_series(0, 100000) as i;
-create index on t using gin (e);
+
+CREATE TABLE t (
+   h int,
+   i e
+);
+insert into t select j, (j % 4)::text::e from generate_series(0, 100000) as j;
+create index on t using gin (i);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 92f34467d5..f6ee3a0c30 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -20939,6 +20939,11 @@ ComputePartitionAttrs(ParseState *pstate, Relation 
rel, List *partParams, AttrNu
                        }
                }
 
+               if (strategy == PARTITION_STRATEGY_HASH && 
type_is_enum(atttype))
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("cannot use ENUM column 
\"%s\" in PARTITION BY statement for hash partitions", pelem->name)));
+
                /*
                 * Apply collation override if any
                 */
diff --git a/src/backend/parser/parse_utilcmd.c 
b/src/backend/parser/parse_utilcmd.c
index cd2b40109b..e0007c6636 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -3140,6 +3140,13 @@ getPolicyForDistributedBy(DistributedBy *distributedBy, 
TupleDesc tupdesc)
                                        if (strcmp(colname, 
NameStr(attr->attname)) == 0)
                                        {
                                                Oid                     opclass;
+                                               Oid                     typid;
+
+                                               typid = 
getBaseType(attr->atttypid);
+                                               if (type_is_enum(typid))
+                                                       ereport(ERROR,
+                                                                       
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                                        
errmsg("cannot use ENUM column \"%s\" in DISTRIBUTED BY statement", colname)));
 
                                                opclass = 
cdb_get_opclass_for_column_def(dkelem->opclass, attr->atttypid);
 
diff --git a/src/test/regress/expected/enum.out 
b/src/test/regress/expected/enum.out
index 686ab49742..3b8af8008e 100644
--- a/src/test/regress/expected/enum.out
+++ b/src/test/regress/expected/enum.out
@@ -198,71 +198,71 @@ ORDER BY enumsortorder;
 --
 -- Basic table creation, row selection
 --
-CREATE TABLE enumtest (col rainbow);
-INSERT INTO enumtest values ('red'), ('orange'), ('yellow'), ('green');
-COPY enumtest FROM stdin;
+CREATE TABLE enumtest (i int, col rainbow);
+INSERT INTO enumtest values (1, 'red'), (2, 'orange'), (3, 'yellow'), (4, 
'green');
+COPY enumtest (i, col) FROM stdin;
 SELECT * FROM enumtest;
-  col   
---------
- red
- orange
- yellow
- green
- blue
- purple
+ i |  col   
+---+--------
+ 5 | blue
+ 6 | purple
+ 2 | orange
+ 3 | yellow
+ 4 | green
+ 1 | red
 (6 rows)
 
 --
 -- Operators, no index
 --
 SELECT * FROM enumtest WHERE col = 'orange';
-  col   
---------
- orange
+ i |  col   
+---+--------
+ 2 | orange
 (1 row)
 
 SELECT * FROM enumtest WHERE col <> 'orange' ORDER BY col;
-  col   
---------
- red
- yellow
- green
- blue
- purple
+ i |  col   
+---+--------
+ 1 | red
+ 3 | yellow
+ 4 | green
+ 5 | blue
+ 6 | purple
 (5 rows)
 
 SELECT * FROM enumtest WHERE col > 'yellow' ORDER BY col;
-  col   
---------
- green
- blue
- purple
+ i |  col   
+---+--------
+ 4 | green
+ 5 | blue
+ 6 | purple
 (3 rows)
 
 SELECT * FROM enumtest WHERE col >= 'yellow' ORDER BY col;
-  col   
---------
- yellow
- green
- blue
- purple
+ i |  col   
+---+--------
+ 3 | yellow
+ 4 | green
+ 5 | blue
+ 6 | purple
 (4 rows)
 
 SELECT * FROM enumtest WHERE col < 'green' ORDER BY col;
-  col   
---------
- red
- orange
- yellow
+ i |  col   
+---+--------
+ 1 | red
+ 2 | orange
+ 3 | yellow
 (3 rows)
 
 SELECT * FROM enumtest WHERE col <= 'green' ORDER BY col;
-  col   
---------
- red
- orange
- yellow
- green
+ i |  col   
+---+--------
+ 1 | red
+ 2 | orange
+ 3 | yellow
+ 4 | green
 (4 rows)
 
 --
@@ -311,53 +311,53 @@ SET enable_bitmapscan = off;
 --
 CREATE INDEX enumtest_btree ON enumtest USING btree (col);
 SELECT * FROM enumtest WHERE col = 'orange';
-  col   
---------
- orange
+ i |  col   
+---+--------
+ 2 | orange
 (1 row)
 
 SELECT * FROM enumtest WHERE col <> 'orange' ORDER BY col;
-  col   
---------
- red
- yellow
- green
- blue
- purple
+ i |  col   
+---+--------
+ 1 | red
+ 3 | yellow
+ 4 | green
+ 5 | blue
+ 6 | purple
 (5 rows)
 
 SELECT * FROM enumtest WHERE col > 'yellow' ORDER BY col;
-  col   
---------
- green
- blue
- purple
+ i |  col   
+---+--------
+ 4 | green
+ 5 | blue
+ 6 | purple
 (3 rows)
 
 SELECT * FROM enumtest WHERE col >= 'yellow' ORDER BY col;
-  col   
---------
- yellow
- green
- blue
- purple
+ i |  col   
+---+--------
+ 3 | yellow
+ 4 | green
+ 5 | blue
+ 6 | purple
 (4 rows)
 
 SELECT * FROM enumtest WHERE col < 'green' ORDER BY col;
-  col   
---------
- red
- orange
- yellow
+ i |  col   
+---+--------
+ 1 | red
+ 2 | orange
+ 3 | yellow
 (3 rows)
 
 SELECT * FROM enumtest WHERE col <= 'green' ORDER BY col;
-  col   
---------
- red
- orange
- yellow
- green
+ i |  col   
+---+--------
+ 1 | red
+ 2 | orange
+ 3 | yellow
+ 4 | green
 (4 rows)
 
 SELECT min(col) FROM enumtest;
@@ -384,9 +384,9 @@ DROP INDEX enumtest_btree;
 --
 CREATE INDEX enumtest_hash ON enumtest USING hash (col);
 SELECT * FROM enumtest WHERE col = 'orange';
-  col   
---------
- orange
+ i |  col   
+---+--------
+ 2 | orange
 (1 row)
 
 DROP INDEX enumtest_hash;
@@ -537,11 +537,11 @@ DROP FUNCTION echo_me(rainbow);
 --
 -- RI triggers on enum types
 --
-CREATE TABLE enumtest_parent (id rainbow PRIMARY KEY);
-CREATE TABLE enumtest_child (parent rainbow REFERENCES enumtest_parent);
-INSERT INTO enumtest_parent VALUES ('red');
-INSERT INTO enumtest_child VALUES ('red');
-INSERT INTO enumtest_child VALUES ('blue');  -- fail
+CREATE TABLE enumtest_parent (i int PRIMARY KEY, id rainbow);
+CREATE TABLE enumtest_child (i int REFERENCES enumtest_parent, parent rainbow);
+INSERT INTO enumtest_parent VALUES (1, 'red');
+INSERT INTO enumtest_child VALUES (1, 'red');
+INSERT INTO enumtest_child VALUES (2, 'blue');  -- fail
 -- start_ignore
 -- foreign keys are not checked in GPDB, hence these pass.
 -- end_ignore
@@ -550,9 +550,9 @@ DELETE FROM enumtest_parent;  -- fail
 -- cross-type RI should fail
 --
 CREATE TYPE bogus AS ENUM('good', 'bad', 'ugly');
-CREATE TABLE enumtest_bogus_child(parent bogus REFERENCES enumtest_parent);
+CREATE TABLE enumtest_bogus_child(i int, parent bogus REFERENCES 
enumtest_parent);
 ERROR:  foreign key constraint "enumtest_bogus_child_parent_fkey" cannot be 
implemented
-DETAIL:  Key columns "parent" and "id" are of incompatible types: bogus and 
rainbow.
+DETAIL:  Key columns "parent" and "i" are of incompatible types: bogus and 
integer.
 DROP TYPE bogus;
 -- check renaming a value
 ALTER TYPE rainbow RENAME VALUE 'red' TO 'crimson';
diff --git a/src/test/regress/expected/enum_dist_part_ban.out 
b/src/test/regress/expected/enum_dist_part_ban.out
new file mode 100644
index 0000000000..0034bdeb9a
--- /dev/null
+++ b/src/test/regress/expected/enum_dist_part_ban.out
@@ -0,0 +1,24 @@
+-- test that distributing or hash partitioning by an enum field or expression 
is blocked
+CREATE DATABASE ban_enum;
+\c ban_enum
+-- create a test enum
+create type colorEnum as enum ('r', 'g', 'b');
+-- hash partition by enum column name
+create table part (a int, b colorEnum) partition by hash(b);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+ERROR:  cannot use ENUM column "b" in PARTITION BY statement for hash 
partitions
+-- hash partition by enum column expression
+create table part (a int, b colorEnum) partition by hash((b));
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+ERROR:  cannot use ENUM column "(null)" in PARTITION BY statement for hash 
partitions
+-- distribute by enum column
+create table distr (a colorEnum, b int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+ERROR:  cannot use ENUM column "a" in DISTRIBUTED BY statement
+-- clean up database
+drop type colorEnum;
+\c regression
+DROP DATABASE ban_enum;
diff --git a/src/test/regress/expected/gpdist_legacy_opclasses.out 
b/src/test/regress/expected/gpdist_legacy_opclasses.out
index b2839222ac..44c24a2b08 100644
--- a/src/test/regress/expected/gpdist_legacy_opclasses.out
+++ b/src/test/regress/expected/gpdist_legacy_opclasses.out
@@ -296,27 +296,34 @@ explain (costs off) select * from modern_int a inner join 
legacy_domain_over_int
  Optimizer: Postgres query optimizer
 (9 rows)
 
+-- Distributing by enum has been banned, so this test is updated to instead 
distribute by a dummy int column
+-- Banned because in backup/restore scenarios the data will be in the "wrong" 
segment as oids for each enum
+-- entry are re-generated and hashing them will result in arbitrary segment 
assignment.
 create type colors as enum ('red', 'green', 'blue');
-create table legacy_enum(color colors) distributed by(color cdbhash_enum_ops);
-insert into legacy_enum values ('red'), ('green'), ('blue');
+create table legacy_enum(col1 int, color colors) distributed by(col1);
+insert into legacy_enum values (1, 'red'), (2, 'green'), (3, 'blue');
 explain (costs off) select * from legacy_enum a inner join legacy_enum b on 
a.color = b.color;
                  QUERY PLAN                  
 ---------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: (a.color = b.color)
-         ->  Seq Scan on legacy_enum a
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: a.color
+               ->  Seq Scan on legacy_enum a
          ->  Hash
-               ->  Seq Scan on legacy_enum b
+               ->  Redistribute Motion 3:3  (slice3; segments: 3)
+                     Hash Key: b.color
+                     ->  Seq Scan on legacy_enum b
  Optimizer: Postgres query optimizer
-(7 rows)
+(11 rows)
 
 select * from legacy_enum a inner join legacy_enum b on a.color = b.color;
- color | color 
--------+-------
- blue  | blue
- red   | red
- green | green
+ col1 | color | col1 | color 
+------+-------+------+-------
+    3 | blue  |    3 | blue
+    2 | green |    2 | green
+    1 | red   |    1 | red
 (3 rows)
 
 --
diff --git a/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out 
b/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out
index 99ab75693e..57909dd0bc 100644
--- a/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out
+++ b/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out
@@ -295,27 +295,31 @@ explain (costs off) select * from modern_int a inner join 
legacy_domain_over_int
  Optimizer: Postgres query optimizer
 (9 rows)
 
+-- Distributing by enum has been banned, so this test is updated to instead 
distribute by a dummy int column
+-- Banned because in backup/restore scenarios the data will be in the "wrong" 
segment as oids for each enum
+-- entry are re-generated and hashing them will result in arbitrary segment 
assignment.
 create type colors as enum ('red', 'green', 'blue');
-create table legacy_enum(color colors) distributed by(color cdbhash_enum_ops);
-insert into legacy_enum values ('red'), ('green'), ('blue');
+create table legacy_enum(col1 int, color colors) distributed by(col1);
+insert into legacy_enum values (1, 'red'), (2, 'green'), (3, 'blue');
 explain (costs off) select * from legacy_enum a inner join legacy_enum b on 
a.color = b.color;
-                                     QUERY PLAN                                
     
-------------------------------------------------------------------------------------
+                 QUERY PLAN                  
+---------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: ((legacy_enum.color)::anyenum = 
(legacy_enum_1.color)::anyenum)
          ->  Seq Scan on legacy_enum
          ->  Hash
-               ->  Seq Scan on legacy_enum legacy_enum_1
+               ->  Broadcast Motion 3:3  (slice2; segments: 3)
+                     ->  Seq Scan on legacy_enum legacy_enum_1
  Optimizer: Pivotal Optimizer (GPORCA) version 3.41.0
-(7 rows)
+(8 rows)
 
 select * from legacy_enum a inner join legacy_enum b on a.color = b.color;
- color | color 
--------+-------
- blue  | blue
- red   | red
- green | green
+ col1 | color | col1 | color 
+------+-------+------+-------
+    2 | green |    2 | green
+    3 | blue  |    3 | blue
+    1 | red   |    1 | red
 (3 rows)
 
 --
diff --git a/src/test/regress/expected/partition_prune.out 
b/src/test/regress/expected/partition_prune.out
index f75642f9af..8d6513f137 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -3770,14 +3770,14 @@ explain (costs off) select * from pph_arrpart where a 
in ('{4, 5}', '{1}');
 drop table pph_arrpart;
 -- enum type list partition key
 create type pp_colors as enum ('green', 'blue', 'black');
-create table pp_enumpart (a pp_colors) partition by list (a);
+create table pp_enumpart (col1 int, a pp_colors) partition by list (a);
 create table pp_enumpart_green partition of pp_enumpart for values in 
('green');
 create table pp_enumpart_blue partition of pp_enumpart for values in ('blue');
 explain (costs off) select * from pp_enumpart where a = 'blue';
-                   QUERY PLAN                   
-------------------------------------------------
- Gather Motion 1:1  (slice1; segments: 1)
-   ->  Seq Scan on pp_enumpart_blue pp_enumpart
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on pp_enumpart_blue
          Filter: (a = 'blue'::pp_colors)
  Optimizer: Postgres query optimizer
 (4 rows)
diff --git a/src/test/regress/expected/partition_prune_optimizer.out 
b/src/test/regress/expected/partition_prune_optimizer.out
index 7c9c1bcdd0..42844e5881 100644
--- a/src/test/regress/expected/partition_prune_optimizer.out
+++ b/src/test/regress/expected/partition_prune_optimizer.out
@@ -3581,13 +3581,15 @@ explain (costs off) select * from pph_arrpart where a 
in ('{4, 5}', '{1}');
 drop table pph_arrpart;
 -- enum type list partition key
 create type pp_colors as enum ('green', 'blue', 'black');
-create table pp_enumpart (a pp_colors) partition by list (a);
+create table pp_enumpart (col1 int, a pp_colors) partition by list (a);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table pp_enumpart_green partition of pp_enumpart for values in 
('green');
 create table pp_enumpart_blue partition of pp_enumpart for values in ('blue');
 explain (costs off) select * from pp_enumpart where a = 'blue';
                 QUERY PLAN                
 ------------------------------------------
- Gather Motion 1:1  (slice1; segments: 1)
+ Gather Motion 3:1  (slice1; segments: 3)
    ->  Dynamic Seq Scan on pp_enumpart
          Number of partitions to scan: 1 
          Filter: (a = 'blue'::pp_colors)
diff --git a/src/test/regress/greenplum_schedule 
b/src/test/regress/greenplum_schedule
index 8eb7a1d500..5974951943 100755
--- a/src/test/regress/greenplum_schedule
+++ b/src/test/regress/greenplum_schedule
@@ -353,4 +353,7 @@ test: dynamic_table
 # DML tests for AO/CO unique indexes.
 test: uao_dml/uao_dml_unique_index_delete_row 
uao_dml/uao_dml_unique_index_delete_column 
uao_dml/uao_dml_unique_index_update_row 
uao_dml/uao_dml_unique_index_update_column
 
+# test that distributing or hash partitioning by an enum field or expression 
is blocked
+test: enum_dist_part_ban
+
 # end of tests
diff --git a/src/test/regress/sql/enum.sql b/src/test/regress/sql/enum.sql
index fef8f994f4..495e3a8034 100644
--- a/src/test/regress/sql/enum.sql
+++ b/src/test/regress/sql/enum.sql
@@ -120,11 +120,11 @@ ORDER BY enumsortorder;
 --
 -- Basic table creation, row selection
 --
-CREATE TABLE enumtest (col rainbow);
-INSERT INTO enumtest values ('red'), ('orange'), ('yellow'), ('green');
-COPY enumtest FROM stdin;
-blue
-purple
+CREATE TABLE enumtest (i int, col rainbow);
+INSERT INTO enumtest values (1, 'red'), (2, 'orange'), (3, 'yellow'), (4, 
'green');
+COPY enumtest (i, col) FROM stdin;
+5      blue
+6      purple
 \.
 SELECT * FROM enumtest;
 
@@ -244,17 +244,17 @@ DROP FUNCTION echo_me(rainbow);
 --
 -- RI triggers on enum types
 --
-CREATE TABLE enumtest_parent (id rainbow PRIMARY KEY);
-CREATE TABLE enumtest_child (parent rainbow REFERENCES enumtest_parent);
-INSERT INTO enumtest_parent VALUES ('red');
-INSERT INTO enumtest_child VALUES ('red');
-INSERT INTO enumtest_child VALUES ('blue');  -- fail
+CREATE TABLE enumtest_parent (i int PRIMARY KEY, id rainbow);
+CREATE TABLE enumtest_child (i int REFERENCES enumtest_parent, parent rainbow);
+INSERT INTO enumtest_parent VALUES (1, 'red');
+INSERT INTO enumtest_child VALUES (1, 'red');
+INSERT INTO enumtest_child VALUES (2, 'blue');  -- fail
 DELETE FROM enumtest_parent;  -- fail
 --
 -- cross-type RI should fail
 --
 CREATE TYPE bogus AS ENUM('good', 'bad', 'ugly');
-CREATE TABLE enumtest_bogus_child(parent bogus REFERENCES enumtest_parent);
+CREATE TABLE enumtest_bogus_child(i int, parent bogus REFERENCES 
enumtest_parent);
 DROP TYPE bogus;
 
 -- check renaming a value
diff --git a/src/test/regress/sql/enum_dist_part_ban.sql 
b/src/test/regress/sql/enum_dist_part_ban.sql
new file mode 100644
index 0000000000..23ff33fc58
--- /dev/null
+++ b/src/test/regress/sql/enum_dist_part_ban.sql
@@ -0,0 +1,22 @@
+-- test that distributing or hash partitioning by an enum field or expression 
is blocked
+
+CREATE DATABASE ban_enum;
+\c ban_enum
+
+-- create a test enum
+create type colorEnum as enum ('r', 'g', 'b');
+
+-- hash partition by enum column name
+create table part (a int, b colorEnum) partition by hash(b);
+
+-- hash partition by enum column expression
+create table part (a int, b colorEnum) partition by hash((b));
+
+-- distribute by enum column
+create table distr (a colorEnum, b int);
+
+
+-- clean up database
+drop type colorEnum;
+\c regression
+DROP DATABASE ban_enum;
diff --git a/src/test/regress/sql/gpdist_legacy_opclasses.sql 
b/src/test/regress/sql/gpdist_legacy_opclasses.sql
index 2170e93c64..394cca51d4 100644
--- a/src/test/regress/sql/gpdist_legacy_opclasses.sql
+++ b/src/test/regress/sql/gpdist_legacy_opclasses.sql
@@ -168,9 +168,13 @@ explain (costs off) select * from legacy_domain_over_int a 
inner join legacy_dom
 explain (costs off) select * from legacy_int a inner join 
legacy_domain_over_int b on a.id = b.id;
 explain (costs off) select * from modern_int a inner join 
legacy_domain_over_int b on a.id = b.id;
 
+
+-- Distributing by enum has been banned, so this test is updated to instead 
distribute by a dummy int column
+-- Banned because in backup/restore scenarios the data will be in the "wrong" 
segment as oids for each enum
+-- entry are re-generated and hashing them will result in arbitrary segment 
assignment.
 create type colors as enum ('red', 'green', 'blue');
-create table legacy_enum(color colors) distributed by(color cdbhash_enum_ops);
-insert into legacy_enum values ('red'), ('green'), ('blue');
+create table legacy_enum(col1 int, color colors) distributed by(col1);
+insert into legacy_enum values (1, 'red'), (2, 'green'), (3, 'blue');
 
 explain (costs off) select * from legacy_enum a inner join legacy_enum b on 
a.color = b.color;
 select * from legacy_enum a inner join legacy_enum b on a.color = b.color;
diff --git a/src/test/regress/sql/partition_prune.sql 
b/src/test/regress/sql/partition_prune.sql
index f9e3b1f201..a496f1a798 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -937,7 +937,7 @@ drop table pph_arrpart;
 
 -- enum type list partition key
 create type pp_colors as enum ('green', 'blue', 'black');
-create table pp_enumpart (a pp_colors) partition by list (a);
+create table pp_enumpart (col1 int, a pp_colors) partition by list (a);
 create table pp_enumpart_green partition of pp_enumpart for values in 
('green');
 create table pp_enumpart_blue partition of pp_enumpart for values in ('blue');
 explain (costs off) select * from pp_enumpart where a = 'blue';


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 06/17: Ban enums as distribution and partition keys

Reply via email to