This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 86d9520c08e8f985cf8a120c59fed307a9692f16
Author: wenru yan <[email protected]>
AuthorDate: Mon Aug 15 08:02:20 2022 +0000

    disallow generated columns in distribution key
    
    The generated columns are computed after distribution.
    if generated columns are used as distribution key, they
    will always use null values to compute the distribution
    key value, and it will cause wrong query results.
---
 src/backend/parser/parse_utilcmd.c      | 20 ++++++++++++++++++++
 src/test/regress/expected/generated.out | 15 ++++++++++++---
 src/test/regress/sql/generated.sql      | 10 +++++++---
 3 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/backend/parser/parse_utilcmd.c 
b/src/backend/parser/parse_utilcmd.c
index faefa49e45..cd2b40109b 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -2862,6 +2862,12 @@ transformDistributedBy(ParseState *pstate,
                                ColumnDef  *column = (ColumnDef *) 
lfirst(columns);
                                Oid                     typeOid;
 
+                               if (column->generated == 
ATTRIBUTE_GENERATED_STORED)
+                               {
+                                       /* generated columns can't in 
distribution key, skip */
+                                       continue;
+                               }
+
                                typeOid = typenameTypeId(NULL, 
column->typeName);
 
                                /*
@@ -2974,6 +2980,20 @@ transformDistributedBy(ParseState *pstate,
 
                                        if (strcmp(column->colname, colname) == 
0)
                                        {
+                                               if (column->generated == 
ATTRIBUTE_GENERATED_STORED)
+                                               {
+                                                       /* The generated 
columns are computed after distribution.
+                                                        * If generated columns 
are used as distribution key, they
+                                                        * will always use null 
values to compute the distribution
+                                                        * key value, and it 
will cause wrong query results.
+                                                        */
+                                                       ereport(ERROR,
+                                                                       
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                                                        
errmsg("cannot use generated column in distribution key"),
+                                                                        
errdetail("Column \"%s\" is a generated column.",
+                                                                               
                column->colname),
+                                                                        
parser_errposition(pstate, column->location)));
+                                               }
                                                found = true;
                                                break;
                                        }
diff --git a/src/test/regress/expected/generated.out 
b/src/test/regress/expected/generated.out
index d7c06ddb18..7096eec391 100644
--- a/src/test/regress/expected/generated.out
+++ b/src/test/regress/expected/generated.out
@@ -98,6 +98,15 @@ CREATE TABLE gtest_err_8 (a int PRIMARY KEY, b int GENERATED 
BY DEFAULT AS (a *
 ERROR:  for a generated column, GENERATED ALWAYS must be specified
 LINE 1: ...E gtest_err_8 (a int PRIMARY KEY, b int GENERATED BY DEFAULT...
                                                              ^
+-- generated columns in distribution key (not allowed)
+CREATE TABLE gtest_err_9a (a int, b int GENERATED ALWAYS AS (a * 2) STORED) 
DISTRIBUTED BY (b);  -- error
+ERROR:  cannot use generated column in distribution key
+LINE 1: CREATE TABLE gtest_err_9a (a int, b int GENERATED ALWAYS AS ...
+                                          ^
+DETAIL:  Column "b" is a generated column.
+CREATE TABLE gtest_err_9b (a int GENERATED ALWAYS AS (b * 2) STORED, b int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
 INSERT INTO gtest1 VALUES (1);
 INSERT INTO gtest1 VALUES (2, DEFAULT);  -- ok
 INSERT INTO gtest1 VALUES (3, 33);  -- error
@@ -560,7 +569,7 @@ INSERT INTO gtest22a VALUES (3);
 ERROR:  duplicate key value violates unique constraint "gtest22a_b_key"
 DETAIL:  Key (b)=(1) already exists.
 INSERT INTO gtest22a VALUES (4);
-CREATE TABLE gtest22b (a int, b int GENERATED ALWAYS AS (a / 2) STORED, 
PRIMARY KEY (a, b));
+CREATE TABLE gtest22b (a int, b int GENERATED ALWAYS AS (a / 2) STORED, 
PRIMARY KEY (a, b)) distributed by (a);
 INSERT INTO gtest22b VALUES (2);
 INSERT INTO gtest22b VALUES (2);
 ERROR:  duplicate key value violates unique constraint "gtest22b_pkey"
@@ -655,9 +664,9 @@ INSERT INTO gtest23b VALUES (1);  -- ok
 INSERT INTO gtest23b VALUES (5);  -- error
 DROP TABLE gtest23b;
 DROP TABLE gtest23a;
-CREATE TABLE gtest23p (x int, y int GENERATED ALWAYS AS (x * 2) STORED, 
PRIMARY KEY (y));
+CREATE TABLE gtest23p (x int, y int GENERATED ALWAYS AS (x * 2) STORED, 
PRIMARY KEY (x, y)) distributed by (x);
 INSERT INTO gtest23p VALUES (1), (2), (3);
-CREATE TABLE gtest23q (a int PRIMARY KEY, b int REFERENCES gtest23p (y));
+CREATE TABLE gtest23q (a int PRIMARY KEY, b int, CONSTRAINT fk_gtest FOREIGN 
KEY (a, b) REFERENCES gtest23p (x, y));
 INSERT INTO gtest23q VALUES (1, 2);  -- ok
 -- GPDB doesn't enforce foreign key constraints, so this doesn't error out.
 INSERT INTO gtest23q VALUES (2, 5);  -- error
diff --git a/src/test/regress/sql/generated.sql 
b/src/test/regress/sql/generated.sql
index fc6808df35..6392458370 100644
--- a/src/test/regress/sql/generated.sql
+++ b/src/test/regress/sql/generated.sql
@@ -44,6 +44,10 @@ CREATE TABLE gtest_err_7d (a int PRIMARY KEY, b int 
GENERATED ALWAYS AS (generat
 -- GENERATED BY DEFAULT not allowed
 CREATE TABLE gtest_err_8 (a int PRIMARY KEY, b int GENERATED BY DEFAULT AS (a 
* 2) STORED);
 
+-- generated columns in distribution key (not allowed)
+CREATE TABLE gtest_err_9a (a int, b int GENERATED ALWAYS AS (a * 2) STORED) 
DISTRIBUTED BY (b);  -- error
+CREATE TABLE gtest_err_9b (a int GENERATED ALWAYS AS (b * 2) STORED, b int);
+
 INSERT INTO gtest1 VALUES (1);
 INSERT INTO gtest1 VALUES (2, DEFAULT);  -- ok
 INSERT INTO gtest1 VALUES (3, 33);  -- error
@@ -297,7 +301,7 @@ CREATE TABLE gtest22a (a int PRIMARY KEY, b int GENERATED 
ALWAYS AS (a / 2) STOR
 INSERT INTO gtest22a VALUES (2);
 INSERT INTO gtest22a VALUES (3);
 INSERT INTO gtest22a VALUES (4);
-CREATE TABLE gtest22b (a int, b int GENERATED ALWAYS AS (a / 2) STORED, 
PRIMARY KEY (a, b));
+CREATE TABLE gtest22b (a int, b int GENERATED ALWAYS AS (a / 2) STORED, 
PRIMARY KEY (a, b)) distributed by (a);
 INSERT INTO gtest22b VALUES (2);
 INSERT INTO gtest22b VALUES (2);
 
@@ -337,10 +341,10 @@ INSERT INTO gtest23b VALUES (5);  -- error
 DROP TABLE gtest23b;
 DROP TABLE gtest23a;
 
-CREATE TABLE gtest23p (x int, y int GENERATED ALWAYS AS (x * 2) STORED, 
PRIMARY KEY (y));
+CREATE TABLE gtest23p (x int, y int GENERATED ALWAYS AS (x * 2) STORED, 
PRIMARY KEY (x, y)) distributed by (x);
 INSERT INTO gtest23p VALUES (1), (2), (3);
 
-CREATE TABLE gtest23q (a int PRIMARY KEY, b int REFERENCES gtest23p (y));
+CREATE TABLE gtest23q (a int PRIMARY KEY, b int, CONSTRAINT fk_gtest FOREIGN 
KEY (a, b) REFERENCES gtest23p (x, y));
 INSERT INTO gtest23q VALUES (1, 2);  -- ok
 -- GPDB doesn't enforce foreign key constraints, so this doesn't error out.
 INSERT INTO gtest23q VALUES (2, 5);  -- error


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to