This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 32be1d074ef252399988f77e78db421fa096400a Author: Nihal Jain <[email protected]> AuthorDate: Wed Nov 2 13:43:28 2022 +0530 Fix gpcheckcat partition distribution policy check Earlier gpcheckcat used to error out when root partition has different distribution policy than child partition. But commit a45be43489294386c23c5fa068ce071ec068d0d8 introduces a new statement `ALTER TABLE EXPAND PARTITION PREPARE` which modifies the distribution policy of the leaf partitions to be randomly distributed. Based on this a policy is good if the following conditions are satisfied: - numsegments value is equal for all root, middle and leaf level partitions - if a root is randomly distributed, then all middle, leaf level partitions must also be randomly distributed - if a root is hash distributed, then middle level should be same as root and leaf level partition can be hash on same key or randomly distributed This commit modifies the existing SQL query such that whenever a parent is hash distributed, the child can be randomly distributed if and only if it is a leaf level partition. Also added a few behave test cases for the same. --- gpMgmt/bin/gpcheckcat | 10 +++-- gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature | 45 ++++++++++++++++++++++ .../gpcheckcat/create_multilevel_partition.sql | 14 +++++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/gpMgmt/bin/gpcheckcat b/gpMgmt/bin/gpcheckcat index ecdd2d383f..da0d56479b 100755 --- a/gpMgmt/bin/gpcheckcat +++ b/gpMgmt/bin/gpcheckcat @@ -563,8 +563,10 @@ def checkPartitionIntegrity(): err = [] db = connect() - # MPP-11120: check for child partitions with different - # distribution policies. + # Check for the distribution policy of parent and child partitions based on the following conditions: + # 1. If a root is randomly distributed, then all middle, leaf level partitions must also be randomly distributed + # 2. If a root is hash distributed, then middle level should be same as root and + # leaf level partition can be hash on same key or randomly distributed qry = ''' select inhparent::regclass, inhrelid::regclass, pg_get_table_distributedby(inhparent) as dby_parent, @@ -572,7 +574,9 @@ def checkPartitionIntegrity(): from pg_inherits inner join pg_partitioned_table on (pg_inherits.inhparent = pg_partitioned_table.partrelid) where pg_get_table_distributedby(inhrelid) is distinct from pg_get_table_distributedby(inhparent) and not (pg_get_table_distributedby(inhparent) = 'DISTRIBUTED RANDOMLY' and pg_get_table_distributedby(inhrelid) = '') - and not (inhrelid in (select ftrelid from pg_catalog.pg_foreign_table) and pg_get_table_distributedby(inhrelid) = ''); + and not (inhrelid in (select ftrelid from pg_catalog.pg_foreign_table) and pg_get_table_distributedby(inhrelid) = '') + and not (pg_get_table_distributedby(inhparent) like 'DISTRIBUTED BY%' and pg_get_table_distributedby(inhrelid) = 'DISTRIBUTED RANDOMLY' + and (select isleaf from pg_partition_tree(inhparent) where relid = inhrelid)); ''' try: curs = db.query(qry) diff --git a/gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature b/gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature index ff8108362d..8645f720db 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature @@ -187,6 +187,51 @@ Feature: gpcheckcat tests Then the user runs "dropdb policy_db" And the path "gpcheckcat.repair.*" is removed from current working directory + Scenario: gpcheckcat should not report when parent is hash distributed and child is randomly distributed and child is a leaf level partition + Given database "policy_db" is dropped and recreated + And the user runs "psql policy_db -f test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql" + And the user runs sql "set allow_system_table_mods=true; update gp_distribution_policy set distkey = '', distclass='' where localoid='sales_1_prt_2_2_prt_asia'::regclass::oid;" in "policy_db" on all the segments + Then psql should return a return code of 0 + When the user runs "gpcheckcat -R part_integrity policy_db" + Then gpcheckcat should return a return code of 0 + And gpcheckcat should not print "child partition\(s\) are distributed differently from the root partition, and must be manually redistributed, for some tables" to stdout + And gpcheckcat should not print "Failed test\(s\) that are not reported here: part_integrity" to stdout + And the user runs "dropdb policy_db" + + Scenario: gpcheckcat should report when parent is hash distributed and child is randomly distributed and child is a middle level partition + Given database "policy_db" is dropped and recreated + And the user runs "psql policy_db -f test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql" + And the user runs sql "set allow_system_table_mods=true; update gp_distribution_policy set distkey = '', distclass='' where localoid='sales_1_prt_2'::regclass::oid;" in "policy_db" on all the segments + Then psql should return a return code of 0 + When the user runs "gpcheckcat -R part_integrity policy_db" + Then gpcheckcat should return a return code of 1 + And gpcheckcat should print "child partition\(s\) are distributed differently from the root partition, and must be manually redistributed, for some tables" to stdout + And gpcheckcat should print "Failed test\(s\) that are not reported here: part_integrity" to stdout + And the user runs "dropdb policy_db" + + Scenario: gpcheckcat should report when parent is randomly distributed and child is hash distributed + Given database "policy_db" is dropped and recreated + And the user runs "psql policy_db -f test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql" + And the user runs sql "set allow_system_table_mods=true; update gp_distribution_policy set distkey = '', distclass='' where localoid='sales'::regclass::oid;" in "policy_db" on all the segments + Then psql should return a return code of 0 + When the user runs "gpcheckcat -R part_integrity policy_db" + Then gpcheckcat should return a return code of 1 + And gpcheckcat should print "child partition\(s\) are distributed differently from the root partition, and must be manually redistributed, for some tables" to stdout + And gpcheckcat should print "Failed test\(s\) that are not reported here: part_integrity" to stdout + And the user runs "dropdb policy_db" + + Scenario: gpcheckcat should not report part_integrity errors from readable external partitions + Given database "policy_db" is dropped and recreated + And the user runs "psql policy_db -c "create table part(a int) partition by list(a); create table p1(a int); create external web table p2_ext (like p1) EXECUTE 'cat something.txt' FORMAT 'TEXT';"" + And the user runs "psql policy_db -c "alter table part attach partition p1 for values in (1); alter table part attach partition p2_ext for values in (2);"" + Then psql should return a return code of 0 + When the user runs "gpcheckcat -R part_integrity policy_db" + Then gpcheckcat should return a return code of 0 + And gpcheckcat should not print "child partition\(s\) have different numsegments value from the root partition" to stdout + And gpcheckcat should not print "child partition\(s\) are distributed differently from the root partition, and must be manually redistributed, for some tables" to stdout + And gpcheckcat should not print "Failed test\(s\) that are not reported here: part_integrity" to stdout + And the user runs "dropdb policy_db" + Scenario: gpcheckcat foreign key check should report missing catalog entries. Also test missing_extraneous for the same case. Given database "fkey_db" is dropped and recreated And the path "gpcheckcat.repair.*" is removed from current working directory diff --git a/gpMgmt/test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql b/gpMgmt/test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql new file mode 100644 index 0000000000..f55d0c6896 --- /dev/null +++ b/gpMgmt/test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql @@ -0,0 +1,14 @@ +CREATE TABLE sales (trans_id int, date date, amount +decimal(9,2), region text) +DISTRIBUTED BY (trans_id) +PARTITION BY RANGE (date) +SUBPARTITION BY LIST (region) +SUBPARTITION TEMPLATE +( SUBPARTITION usa VALUES ('usa'), + SUBPARTITION asia VALUES ('asia'), + SUBPARTITION europe VALUES ('europe'), + DEFAULT SUBPARTITION other_regions) + (START (date '2011-01-01') INCLUSIVE + END (date '2012-01-01') EXCLUSIVE + EVERY (INTERVAL '1 month'), + DEFAULT PARTITION outlying_dates ); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
