This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 32be1d074ef252399988f77e78db421fa096400a
Author: Nihal Jain <[email protected]>
AuthorDate: Wed Nov 2 13:43:28 2022 +0530

    Fix gpcheckcat partition distribution policy check
    
        Earlier gpcheckcat used to error out when root partition has different 
distribution policy than child partition. But commit 
a45be43489294386c23c5fa068ce071ec068d0d8 introduces a new statement `ALTER 
TABLE EXPAND PARTITION PREPARE` which modifies the distribution policy of the 
leaf partitions to be randomly distributed. Based on this a policy is good if 
the following conditions are satisfied:
    
        - numsegments value is equal for all root, middle and leaf level 
partitions
        - if a root is randomly distributed, then all middle, leaf level 
partitions must also be randomly distributed
        - if a root is hash distributed, then middle level should be same as 
root and leaf level partition can be hash on same key or randomly distributed
    
        This commit modifies the existing SQL query such that whenever a parent 
is hash distributed, the child can be randomly distributed if and only if it is 
a leaf level partition.
    
        Also added a few behave test cases for the same.
---
 gpMgmt/bin/gpcheckcat                              | 10 +++--
 gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature   | 45 ++++++++++++++++++++++
 .../gpcheckcat/create_multilevel_partition.sql     | 14 +++++++
 3 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/gpMgmt/bin/gpcheckcat b/gpMgmt/bin/gpcheckcat
index ecdd2d383f..da0d56479b 100755
--- a/gpMgmt/bin/gpcheckcat
+++ b/gpMgmt/bin/gpcheckcat
@@ -563,8 +563,10 @@ def checkPartitionIntegrity():
     err = []
     db = connect()
 
-    # MPP-11120: check for child partitions with different
-    # distribution policies.
+    # Check for the distribution policy of parent and child partitions based 
on the following conditions:
+    #   1. If a root is randomly distributed, then all middle, leaf level 
partitions must also be randomly distributed
+    #   2. If a root is hash distributed, then middle level should be same as 
root and
+    #      leaf level partition can be hash on same key or randomly distributed
     qry = '''
     select inhparent::regclass, inhrelid::regclass,
            pg_get_table_distributedby(inhparent) as dby_parent,
@@ -572,7 +574,9 @@ def checkPartitionIntegrity():
     from pg_inherits inner join pg_partitioned_table on (pg_inherits.inhparent 
= pg_partitioned_table.partrelid)
     where pg_get_table_distributedby(inhrelid) is distinct from 
pg_get_table_distributedby(inhparent)
           and not (pg_get_table_distributedby(inhparent) = 'DISTRIBUTED 
RANDOMLY' and pg_get_table_distributedby(inhrelid) = '')
-          and not (inhrelid in (select ftrelid from 
pg_catalog.pg_foreign_table) and pg_get_table_distributedby(inhrelid) = '');
+          and not (inhrelid in (select ftrelid from 
pg_catalog.pg_foreign_table) and pg_get_table_distributedby(inhrelid) = '')
+          and not (pg_get_table_distributedby(inhparent) like 'DISTRIBUTED 
BY%' and pg_get_table_distributedby(inhrelid) = 'DISTRIBUTED RANDOMLY'
+                   and (select isleaf from pg_partition_tree(inhparent) where 
relid = inhrelid));
     '''
     try:
         curs = db.query(qry)
diff --git a/gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature 
b/gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature
index ff8108362d..8645f720db 100644
--- a/gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature
@@ -187,6 +187,51 @@ Feature: gpcheckcat tests
          Then the user runs "dropdb policy_db"
           And the path "gpcheckcat.repair.*" is removed from current working 
directory
 
+    Scenario: gpcheckcat should not report when parent is hash distributed and 
child is randomly distributed and child is a leaf level partition
+        Given database "policy_db" is dropped and recreated
+        And the user runs "psql policy_db -f 
test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql"
+        And the user runs sql "set allow_system_table_mods=true; update 
gp_distribution_policy set distkey = '', distclass='' where 
localoid='sales_1_prt_2_2_prt_asia'::regclass::oid;" in "policy_db" on all the 
segments
+        Then psql should return a return code of 0
+        When the user runs "gpcheckcat -R part_integrity policy_db"
+        Then gpcheckcat should return a return code of 0
+        And gpcheckcat should not print "child partition\(s\) are distributed 
differently from the root partition, and must be manually redistributed, for 
some tables" to stdout
+        And gpcheckcat should not print "Failed test\(s\) that are not 
reported here: part_integrity" to stdout
+        And the user runs "dropdb policy_db"
+
+    Scenario: gpcheckcat should report when parent is hash distributed and 
child is randomly distributed and child is a middle level partition
+        Given database "policy_db" is dropped and recreated
+        And the user runs "psql policy_db -f 
test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql"
+        And the user runs sql "set allow_system_table_mods=true; update 
gp_distribution_policy set distkey = '', distclass='' where 
localoid='sales_1_prt_2'::regclass::oid;" in "policy_db" on all the segments
+        Then psql should return a return code of 0
+        When the user runs "gpcheckcat -R part_integrity policy_db"
+        Then gpcheckcat should return a return code of 1
+        And gpcheckcat should print "child partition\(s\) are distributed 
differently from the root partition, and must be manually redistributed, for 
some tables" to stdout
+        And gpcheckcat should print "Failed test\(s\) that are not reported 
here: part_integrity" to stdout
+        And the user runs "dropdb policy_db"
+
+    Scenario: gpcheckcat should report when parent is randomly distributed and 
child is hash distributed
+        Given database "policy_db" is dropped and recreated
+        And the user runs "psql policy_db -f 
test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql"
+        And the user runs sql "set allow_system_table_mods=true; update 
gp_distribution_policy set distkey = '', distclass='' where 
localoid='sales'::regclass::oid;" in "policy_db" on all the segments
+        Then psql should return a return code of 0
+        When the user runs "gpcheckcat -R part_integrity policy_db"
+        Then gpcheckcat should return a return code of 1
+        And gpcheckcat should print "child partition\(s\) are distributed 
differently from the root partition, and must be manually redistributed, for 
some tables" to stdout
+        And gpcheckcat should print "Failed test\(s\) that are not reported 
here: part_integrity" to stdout
+        And the user runs "dropdb policy_db"
+
+    Scenario: gpcheckcat should not report part_integrity errors from readable 
external partitions
+        Given database "policy_db" is dropped and recreated
+        And the user runs "psql policy_db -c "create table part(a int) 
partition by list(a); create table p1(a int); create external web table p2_ext 
(like p1) EXECUTE 'cat something.txt' FORMAT 'TEXT';""
+        And the user runs "psql policy_db -c "alter table part attach 
partition p1 for values in (1); alter table part attach partition p2_ext for 
values in (2);""
+        Then psql should return a return code of 0
+        When the user runs "gpcheckcat -R part_integrity policy_db"
+        Then gpcheckcat should return a return code of 0
+        And gpcheckcat should not print "child partition\(s\) have different 
numsegments value from the root partition" to stdout
+        And gpcheckcat should not print "child partition\(s\) are distributed 
differently from the root partition, and must be manually redistributed, for 
some tables" to stdout
+        And gpcheckcat should not print "Failed test\(s\) that are not 
reported here: part_integrity" to stdout
+        And the user runs "dropdb policy_db"
+
     Scenario: gpcheckcat foreign key check should report missing catalog 
entries. Also test missing_extraneous for the same case.
         Given database "fkey_db" is dropped and recreated
         And the path "gpcheckcat.repair.*" is removed from current working 
directory
diff --git 
a/gpMgmt/test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql
 
b/gpMgmt/test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql
new file mode 100644
index 0000000000..f55d0c6896
--- /dev/null
+++ 
b/gpMgmt/test/behave/mgmt_utils/steps/data/gpcheckcat/create_multilevel_partition.sql
@@ -0,0 +1,14 @@
+CREATE TABLE sales (trans_id int, date date, amount
+decimal(9,2), region text)
+DISTRIBUTED BY (trans_id)
+PARTITION BY RANGE (date)
+SUBPARTITION BY LIST (region)
+SUBPARTITION TEMPLATE
+( SUBPARTITION usa VALUES ('usa'),
+  SUBPARTITION asia VALUES ('asia'),
+  SUBPARTITION europe VALUES ('europe'),
+  DEFAULT SUBPARTITION other_regions)
+  (START (date '2011-01-01') INCLUSIVE
+   END (date '2012-01-01') EXCLUSIVE
+   EVERY (INTERVAL '1 month'),
+   DEFAULT PARTITION outlying_dates );


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to