This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 86d9520c08e8f985cf8a120c59fed307a9692f16 Author: wenru yan <[email protected]> AuthorDate: Mon Aug 15 08:02:20 2022 +0000 disallow generated columns in distribution key The generated columns are computed after distribution. if generated columns are used as distribution key, they will always use null values to compute the distribution key value, and it will cause wrong query results. --- src/backend/parser/parse_utilcmd.c | 20 ++++++++++++++++++++ src/test/regress/expected/generated.out | 15 ++++++++++++--- src/test/regress/sql/generated.sql | 10 +++++++--- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index faefa49e45..cd2b40109b 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -2862,6 +2862,12 @@ transformDistributedBy(ParseState *pstate, ColumnDef *column = (ColumnDef *) lfirst(columns); Oid typeOid; + if (column->generated == ATTRIBUTE_GENERATED_STORED) + { + /* generated columns can't in distribution key, skip */ + continue; + } + typeOid = typenameTypeId(NULL, column->typeName); /* @@ -2974,6 +2980,20 @@ transformDistributedBy(ParseState *pstate, if (strcmp(column->colname, colname) == 0) { + if (column->generated == ATTRIBUTE_GENERATED_STORED) + { + /* The generated columns are computed after distribution. + * If generated columns are used as distribution key, they + * will always use null values to compute the distribution + * key value, and it will cause wrong query results. + */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot use generated column in distribution key"), + errdetail("Column \"%s\" is a generated column.", + column->colname), + parser_errposition(pstate, column->location))); + } found = true; break; } diff --git a/src/test/regress/expected/generated.out b/src/test/regress/expected/generated.out index d7c06ddb18..7096eec391 100644 --- a/src/test/regress/expected/generated.out +++ b/src/test/regress/expected/generated.out @@ -98,6 +98,15 @@ CREATE TABLE gtest_err_8 (a int PRIMARY KEY, b int GENERATED BY DEFAULT AS (a * ERROR: for a generated column, GENERATED ALWAYS must be specified LINE 1: ...E gtest_err_8 (a int PRIMARY KEY, b int GENERATED BY DEFAULT... ^ +-- generated columns in distribution key (not allowed) +CREATE TABLE gtest_err_9a (a int, b int GENERATED ALWAYS AS (a * 2) STORED) DISTRIBUTED BY (b); -- error +ERROR: cannot use generated column in distribution key +LINE 1: CREATE TABLE gtest_err_9a (a int, b int GENERATED ALWAYS AS ... + ^ +DETAIL: Column "b" is a generated column. +CREATE TABLE gtest_err_9b (a int GENERATED ALWAYS AS (b * 2) STORED, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO gtest1 VALUES (1); INSERT INTO gtest1 VALUES (2, DEFAULT); -- ok INSERT INTO gtest1 VALUES (3, 33); -- error @@ -560,7 +569,7 @@ INSERT INTO gtest22a VALUES (3); ERROR: duplicate key value violates unique constraint "gtest22a_b_key" DETAIL: Key (b)=(1) already exists. INSERT INTO gtest22a VALUES (4); -CREATE TABLE gtest22b (a int, b int GENERATED ALWAYS AS (a / 2) STORED, PRIMARY KEY (a, b)); +CREATE TABLE gtest22b (a int, b int GENERATED ALWAYS AS (a / 2) STORED, PRIMARY KEY (a, b)) distributed by (a); INSERT INTO gtest22b VALUES (2); INSERT INTO gtest22b VALUES (2); ERROR: duplicate key value violates unique constraint "gtest22b_pkey" @@ -655,9 +664,9 @@ INSERT INTO gtest23b VALUES (1); -- ok INSERT INTO gtest23b VALUES (5); -- error DROP TABLE gtest23b; DROP TABLE gtest23a; -CREATE TABLE gtest23p (x int, y int GENERATED ALWAYS AS (x * 2) STORED, PRIMARY KEY (y)); +CREATE TABLE gtest23p (x int, y int GENERATED ALWAYS AS (x * 2) STORED, PRIMARY KEY (x, y)) distributed by (x); INSERT INTO gtest23p VALUES (1), (2), (3); -CREATE TABLE gtest23q (a int PRIMARY KEY, b int REFERENCES gtest23p (y)); +CREATE TABLE gtest23q (a int PRIMARY KEY, b int, CONSTRAINT fk_gtest FOREIGN KEY (a, b) REFERENCES gtest23p (x, y)); INSERT INTO gtest23q VALUES (1, 2); -- ok -- GPDB doesn't enforce foreign key constraints, so this doesn't error out. INSERT INTO gtest23q VALUES (2, 5); -- error diff --git a/src/test/regress/sql/generated.sql b/src/test/regress/sql/generated.sql index fc6808df35..6392458370 100644 --- a/src/test/regress/sql/generated.sql +++ b/src/test/regress/sql/generated.sql @@ -44,6 +44,10 @@ CREATE TABLE gtest_err_7d (a int PRIMARY KEY, b int GENERATED ALWAYS AS (generat -- GENERATED BY DEFAULT not allowed CREATE TABLE gtest_err_8 (a int PRIMARY KEY, b int GENERATED BY DEFAULT AS (a * 2) STORED); +-- generated columns in distribution key (not allowed) +CREATE TABLE gtest_err_9a (a int, b int GENERATED ALWAYS AS (a * 2) STORED) DISTRIBUTED BY (b); -- error +CREATE TABLE gtest_err_9b (a int GENERATED ALWAYS AS (b * 2) STORED, b int); + INSERT INTO gtest1 VALUES (1); INSERT INTO gtest1 VALUES (2, DEFAULT); -- ok INSERT INTO gtest1 VALUES (3, 33); -- error @@ -297,7 +301,7 @@ CREATE TABLE gtest22a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a / 2) STOR INSERT INTO gtest22a VALUES (2); INSERT INTO gtest22a VALUES (3); INSERT INTO gtest22a VALUES (4); -CREATE TABLE gtest22b (a int, b int GENERATED ALWAYS AS (a / 2) STORED, PRIMARY KEY (a, b)); +CREATE TABLE gtest22b (a int, b int GENERATED ALWAYS AS (a / 2) STORED, PRIMARY KEY (a, b)) distributed by (a); INSERT INTO gtest22b VALUES (2); INSERT INTO gtest22b VALUES (2); @@ -337,10 +341,10 @@ INSERT INTO gtest23b VALUES (5); -- error DROP TABLE gtest23b; DROP TABLE gtest23a; -CREATE TABLE gtest23p (x int, y int GENERATED ALWAYS AS (x * 2) STORED, PRIMARY KEY (y)); +CREATE TABLE gtest23p (x int, y int GENERATED ALWAYS AS (x * 2) STORED, PRIMARY KEY (x, y)) distributed by (x); INSERT INTO gtest23p VALUES (1), (2), (3); -CREATE TABLE gtest23q (a int PRIMARY KEY, b int REFERENCES gtest23p (y)); +CREATE TABLE gtest23q (a int PRIMARY KEY, b int, CONSTRAINT fk_gtest FOREIGN KEY (a, b) REFERENCES gtest23p (x, y)); INSERT INTO gtest23q VALUES (1, 2); -- ok -- GPDB doesn't enforce foreign key constraints, so this doesn't error out. INSERT INTO gtest23q VALUES (2, 5); -- error --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
