On Wed, Nov 04, 2020 at 08:23:56PM -0600, Justin Pryzby wrote:
> On Tue, Oct 27, 2020 at 07:33:12PM -0500, Justin Pryzby wrote:
> > I'm attaching a counter-proposal to your catalog change, which preserves
> > indisclustered on children of clustered, partitioned indexes, and 
> > invalidates
> > indisclustered when attaching unclustered indexes.
> 
> ..and now propagates CLUSTER ON to child indexes.
> 
> I left this as separate patches to show what I mean and what's new while we
> discuss it.

This fixes some omissions in the previous patch and error in its test cases.

CLUSTER ON recurses to children, since I think a clustered parent index means
that all its child indexes are clustered.  "SET WITHOUT CLUSTER" doesn't have
to recurse to children, but I did it like that for consistency and it avoids
the need to special case InvalidOid.

-- 
Justin
>From 75690aacef0d294e2667bd1091cf647dc9f5d187 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 7 Jun 2020 16:58:42 -0500
Subject: [PATCH v4 1/5] Implement CLUSTER of partitioned table..

This requires either specification of a partitioned index on which to cluster,
or that an partitioned index was previously set clustered.
---
 doc/src/sgml/ref/cluster.sgml         |   6 +
 src/backend/commands/cluster.c        | 167 +++++++++++++++++++-------
 src/bin/psql/tab-complete.c           |   1 +
 src/include/nodes/parsenodes.h        |   5 +-
 src/test/regress/expected/cluster.out |  58 ++++++++-
 src/test/regress/sql/cluster.sql      |  24 +++-
 6 files changed, 208 insertions(+), 53 deletions(-)

diff --git a/doc/src/sgml/ref/cluster.sgml b/doc/src/sgml/ref/cluster.sgml
index b9450e7366..0476cfff72 100644
--- a/doc/src/sgml/ref/cluster.sgml
+++ b/doc/src/sgml/ref/cluster.sgml
@@ -172,6 +172,12 @@ CLUSTER [VERBOSE]
     are periodically reclustered.
    </para>
 
+   <para>
+    Clustering a partitioned table clusters each of its partitions using the
+    index partition of the given partitioned index or (if not specified) the
+    partitioned index marked as clustered.
+   </para>
+
  </refsect1>
 
  <refsect1>
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 04d12a7ece..391e018bbd 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -32,7 +32,9 @@
 #include "catalog/index.h"
 #include "catalog/namespace.h"
 #include "catalog/objectaccess.h"
+#include "catalog/partition.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_inherits.h"
 #include "catalog/toasting.h"
 #include "commands/cluster.h"
 #include "commands/progress.h"
@@ -72,6 +74,9 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 							bool verbose, bool *pSwapToastByContent,
 							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
+static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
+		Oid indexOid);
+static void cluster_multiple_rels(List *rvs, int options);
 
 
 /*---------------------------------------------------------------------------
@@ -113,7 +118,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 											AccessExclusiveLock,
 											0,
 											RangeVarCallbackOwnsTable, NULL);
-		rel = table_open(tableOid, NoLock);
+		rel = table_open(tableOid, ShareUpdateExclusiveLock);
 
 		/*
 		 * Reject clustering a remote temp table ... their local buffer
@@ -124,14 +129,6 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("cannot cluster temporary tables of other sessions")));
 
-		/*
-		 * Reject clustering a partitioned table.
-		 */
-		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot cluster a partitioned table")));
-
 		if (stmt->indexname == NULL)
 		{
 			ListCell   *index;
@@ -169,8 +166,32 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		/* close relation, keep lock till commit */
 		table_close(rel, NoLock);
 
-		/* Do the job. */
-		cluster_rel(tableOid, indexOid, stmt->options);
+		if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		{
+			/* Do the job. */
+			cluster_rel(tableOid, indexOid, stmt->options);
+		}
+		else
+		{
+			List	   *rvs;
+			MemoryContext cluster_context;
+
+			/* Refuse to hold strong locks in a user transaction */
+			PreventInTransactionBlock(isTopLevel, "CLUSTER");
+
+			cluster_context = AllocSetContextCreate(PortalContext,
+												"Cluster",
+												ALLOCSET_DEFAULT_SIZES);
+
+			rvs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
+			cluster_multiple_rels(rvs, stmt->options);
+
+			/* Start a new transaction for the cleanup work. */
+			StartTransactionCommand();
+
+			/* Clean up working storage */
+			MemoryContextDelete(cluster_context);
+		}
 	}
 	else
 	{
@@ -180,7 +201,6 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		 */
 		MemoryContext cluster_context;
 		List	   *rvs;
-		ListCell   *rv;
 
 		/*
 		 * We cannot run this form of CLUSTER inside a user transaction block;
@@ -204,25 +224,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		 */
 		rvs = get_tables_to_cluster(cluster_context);
 
-		/* Commit to get out of starting transaction */
-		PopActiveSnapshot();
-		CommitTransactionCommand();
-
-		/* Ok, now that we've got them all, cluster them one by one */
-		foreach(rv, rvs)
-		{
-			RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
-
-			/* Start a new transaction for each relation. */
-			StartTransactionCommand();
-			/* functions in indexes may want a snapshot set */
-			PushActiveSnapshot(GetTransactionSnapshot());
-			/* Do the job. */
-			cluster_rel(rvtc->tableOid, rvtc->indexOid,
-						stmt->options | CLUOPT_RECHECK);
-			PopActiveSnapshot();
-			CommitTransactionCommand();
-		}
+		cluster_multiple_rels(rvs, stmt->options | CLUOPT_RECHECK_ISCLUSTERED);
 
 		/* Start a new transaction for the cleanup work. */
 		StartTransactionCommand();
@@ -328,9 +330,10 @@ cluster_rel(Oid tableOid, Oid indexOid, int options)
 			}
 
 			/*
-			 * Check that the index is still the one with indisclustered set.
+			 * Check that the index is still the one with indisclustered set, if needed.
 			 */
-			if (!get_index_isclustered(indexOid))
+			if ((options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
+					!get_index_isclustered(indexOid))
 			{
 				relation_close(OldHeap, AccessExclusiveLock);
 				pgstat_progress_end_command();
@@ -374,8 +377,13 @@ cluster_rel(Oid tableOid, Oid indexOid, int options)
 
 	/* Check heap and index are valid to cluster on */
 	if (OidIsValid(indexOid))
+	{
 		check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
 
+		/* Mark the index as clustered */
+		mark_index_clustered(OldHeap, indexOid, true);
+	}
+
 	/*
 	 * Quietly ignore the request if this is a materialized view which has not
 	 * been populated from its query. No harm is done because there is no data
@@ -391,6 +399,14 @@ cluster_rel(Oid tableOid, Oid indexOid, int options)
 		return;
 	}
 
+	/* For a partitioned rel, we're done. */
+	if (!RELKIND_HAS_STORAGE(get_rel_relkind(tableOid)))
+	{
+		relation_close(OldHeap, AccessExclusiveLock);
+		pgstat_progress_end_command();
+		return;
+	}
+
 	/*
 	 * All predicate locks on the tuples or pages are about to be made
 	 * invalid, because we move tuples around.  Promote them to relation
@@ -459,6 +475,9 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD
 	 * the worst consequence of following broken HOT chains would be that we
 	 * might put recently-dead tuples out-of-order in the new table, and there
 	 * is little harm in that.)
+	 *
+	 * This also refuses to cluster on an "incomplete" partitioned index
+	 * created with "ON ONLY".
 	 */
 	if (!OldIndex->rd_index->indisvalid)
 		ereport(ERROR,
@@ -483,12 +502,6 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 	Relation	pg_index;
 	ListCell   *index;
 
-	/* Disallow applying to a partitioned table */
-	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("cannot mark index clustered in partitioned table")));
-
 	/*
 	 * If the index is already marked clustered, no need to do anything.
 	 */
@@ -560,10 +573,6 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
 	TransactionId frozenXid;
 	MultiXactId cutoffMulti;
 
-	/* Mark the correct index as clustered */
-	if (OidIsValid(indexOid))
-		mark_index_clustered(OldHeap, indexOid, true);
-
 	/* Remember info about rel before closing OldHeap */
 	relpersistence = OldHeap->rd_rel->relpersistence;
 	is_system_catalog = IsSystemRelation(OldHeap);
@@ -1557,3 +1566,75 @@ get_tables_to_cluster(MemoryContext cluster_context)
 
 	return rvs;
 }
+
+/*
+ * Return a List of tables and associated index, where each index is a
+ * partition of the given index
+ */
+static List *
+get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
+{
+	List		*inhoids;
+	ListCell	*lc;
+	List		*rvs = NIL;
+	MemoryContext	old_context;
+
+	inhoids = find_all_inheritors(indexOid, NoLock, NULL);
+
+	foreach(lc, inhoids)
+	{
+		Oid		indexrelid = lfirst_oid(lc);
+		Oid		relid = IndexGetRelation(indexrelid, false);
+		RelToCluster	*rvtc;
+
+		/*
+		 * Partitioned rels are also processed by cluster_rel, to
+		 * call check_index_is_clusterable() and mark_index_clustered().
+		 */
+
+		/*
+		 * We have to build the list in a different memory context so it will
+		 * survive the cross-transaction processing
+		 */
+		old_context = MemoryContextSwitchTo(cluster_context);
+
+		rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+		rvtc->tableOid = relid;
+		rvtc->indexOid = indexrelid;
+		rvs = lappend(rvs, rvtc);
+
+		MemoryContextSwitchTo(old_context);
+	}
+
+	return rvs;
+}
+
+/* Cluster each relation in a separate transaction */
+static void
+cluster_multiple_rels(List *rvs, int options)
+{
+	ListCell *lc;
+
+	/* Commit to get out of starting transaction */
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+
+	/* Ok, now that we've got them all, cluster them one by one */
+	foreach(lc, rvs)
+	{
+		RelToCluster *rvtc = (RelToCluster *) lfirst(lc);
+
+		/* Start a new transaction for each relation. */
+		StartTransactionCommand();
+
+		/* functions in indexes may want a snapshot set */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
+		/* Do the job. */
+		cluster_rel(rvtc->tableOid, rvtc->indexOid,
+					options | CLUOPT_RECHECK);
+
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+	}
+}
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 5238a960f7..07ef7fc1b7 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -588,6 +588,7 @@ static const SchemaQuery Query_for_list_of_clusterables = {
 	.catname = "pg_catalog.pg_class c",
 	.selcondition =
 	"c.relkind IN (" CppAsString2(RELKIND_RELATION) ", "
+	CppAsString2(RELKIND_PARTITIONED_TABLE) ", "
 	CppAsString2(RELKIND_MATVIEW) ")",
 	.viscondition = "pg_catalog.pg_table_is_visible(c.oid)",
 	.namespace = "c.relnamespace",
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 53de31f3b1..087b1c7af6 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -3199,8 +3199,9 @@ typedef struct AlterSystemStmt
  */
 typedef enum ClusterOption
 {
-	CLUOPT_RECHECK = 1 << 0,	/* recheck relation state */
-	CLUOPT_VERBOSE = 1 << 1		/* print progress info */
+	CLUOPT_VERBOSE = 1 << 0,	/* print progress info */
+	CLUOPT_RECHECK = 1 << 1,	/* recheck relation state */
+	CLUOPT_RECHECK_ISCLUSTERED = 1 << 2,	/* recheck relation state for indisclustered */
 } ClusterOption;
 
 typedef struct ClusterStmt
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index bdae8fe00c..e4448350e7 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -439,14 +439,62 @@ select * from clstr_temp;
 
 drop table clstr_temp;
 RESET SESSION AUTHORIZATION;
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
 CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
+CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a);
+CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT;
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a);
+CLUSTER clstrpart USING clstrpart_only_idx; -- fails
+ERROR:  cannot cluster on invalid index "clstrpart_only_idx"
+DROP INDEX clstrpart_only_idx;
 CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
-ERROR:  cannot mark index clustered in partitioned table
+-- Check that clustering sets new relfilenodes:
+CREATE TEMP TABLE old_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
 CLUSTER clstrpart USING clstrpart_idx;
-ERROR:  cannot cluster a partitioned table
-DROP TABLE clstrpart;
+CREATE TEMP TABLE new_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
+SELECT relname, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY 1;
+   relname   | relkind | ?column? 
+-------------+---------+----------
+ clstrpart   | p       | t
+ clstrpart1  | p       | t
+ clstrpart11 | r       | f
+ clstrpart12 | p       | t
+ clstrpart2  | r       | f
+ clstrpart3  | p       | t
+ clstrpart33 | r       | f
+(7 rows)
+
+-- Check that clustering sets new indisclustered:
+SELECT i.indexrelid::regclass::text, relkind, indisclustered FROM pg_partition_tree('clstrpart_idx'::regclass) AS tree JOIN pg_index i ON i.indexrelid=tree.relid JOIN pg_class c ON c.oid=indexrelid ORDER BY 1;
+    indexrelid     | relkind | indisclustered 
+-------------------+---------+----------------
+ clstrpart11_a_idx | i       | t
+ clstrpart12_a_idx | I       | t
+ clstrpart1_a_idx  | I       | t
+ clstrpart2_a_idx  | i       | t
+ clstrpart33_a_idx | i       | t
+ clstrpart3_a_idx  | I       | t
+ clstrpart_idx     | I       | t
+(7 rows)
+
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a) CLUSTER
+Number of partitions: 3 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 188183647c..22225dc924 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -196,12 +196,30 @@ drop table clstr_temp;
 
 RESET SESSION AUTHORIZATION;
 
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
 CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
+CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a);
+CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT;
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a);
+CLUSTER clstrpart USING clstrpart_only_idx; -- fails
+DROP INDEX clstrpart_only_idx;
 CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+-- Check that clustering sets new relfilenodes:
+CREATE TEMP TABLE old_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
 CLUSTER clstrpart USING clstrpart_idx;
-DROP TABLE clstrpart;
+CREATE TEMP TABLE new_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
+SELECT relname, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY 1;
+-- Check that clustering sets new indisclustered:
+SELECT i.indexrelid::regclass::text, relkind, indisclustered FROM pg_partition_tree('clstrpart_idx'::regclass) AS tree JOIN pg_index i ON i.indexrelid=tree.relid JOIN pg_class c ON c.oid=indexrelid ORDER BY 1;
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From a3ab89d138c5b2a65b5b0900910d8e01b8a270ca Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Tue, 6 Oct 2020 22:11:12 -0500
Subject: [PATCH v4 2/5] Propagate changes to indisclustered to child/parents

---
 src/backend/commands/cluster.c        | 109 ++++++++++++++++----------
 src/backend/commands/indexcmds.c      |   2 +
 src/test/regress/expected/cluster.out |  46 +++++++++++
 src/test/regress/sql/cluster.sql      |  11 +++
 4 files changed, 125 insertions(+), 43 deletions(-)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 391e018bbd..4f30174ba7 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -73,6 +73,7 @@ static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
 static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 							bool verbose, bool *pSwapToastByContent,
 							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
+static void set_indisclustered(Oid indexOid, bool isclustered, Relation pg_index);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
 static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
 		Oid indexOid);
@@ -489,66 +490,88 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD
 	index_close(OldIndex, NoLock);
 }
 
+/*
+ * Helper for mark_index_clustered
+ * Mark a single index as clustered or not.
+ * pg_index is passed by caller to avoid repeatedly re-opening it.
+ */
+static void
+set_indisclustered(Oid indexOid, bool isclustered, Relation pg_index)
+{
+	HeapTuple	indexTuple;
+	Form_pg_index indexForm;
+
+	indexTuple = SearchSysCacheCopy1(INDEXRELID,
+									ObjectIdGetDatum(indexOid));
+	if (!HeapTupleIsValid(indexTuple))
+		elog(ERROR, "cache lookup failed for index %u", indexOid);
+	indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+	/* this was checked earlier, but let's be real sure */
+	if (isclustered && !indexForm->indisvalid)
+		elog(ERROR, "cannot cluster on invalid index %u", indexOid);
+
+	indexForm->indisclustered = isclustered;
+	CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+	heap_freetuple(indexTuple);
+}
+
 /*
  * mark_index_clustered: mark the specified index as the one clustered on
  *
- * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
+ * With indexOid == InvalidOid, mark all indexes of rel not-clustered.
+ * Otherwise, mark children of the clustered index as clustered, and parents of
+ * other indexes as unclustered.
+ * We wish to maintain the following properties:
+ * 1) Only one index on a relation can be marked clustered at once
+ * 2) If a partitioned index is clustered, then all its children must be
+ *     clustered.
  */
 void
 mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 {
-	HeapTuple	indexTuple;
-	Form_pg_index indexForm;
-	Relation	pg_index;
-	ListCell   *index;
-
-	/*
-	 * If the index is already marked clustered, no need to do anything.
-	 */
-	if (OidIsValid(indexOid))
-	{
-		if (get_index_isclustered(indexOid))
-			return;
-	}
+	ListCell	*lc, *lc2;
+	List		*indexes;
+	Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
+	List		*inh = find_all_inheritors(RelationGetRelid(rel), ShareRowExclusiveLock, NULL);
 
 	/*
 	 * Check each index of the relation and set/clear the bit as needed.
+	 * Iterate over the relation's children rather than the index's children
+	 * since we need to unset cluster for indexes on intermediate children,
+	 * too.
 	 */
-	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-
-	foreach(index, RelationGetIndexList(rel))
+	foreach(lc, inh)
 	{
-		Oid			thisIndexOid = lfirst_oid(index);
-
-		indexTuple = SearchSysCacheCopy1(INDEXRELID,
-										 ObjectIdGetDatum(thisIndexOid));
-		if (!HeapTupleIsValid(indexTuple))
-			elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
-		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+		Oid			inhrelid = lfirst_oid(lc);
+		Relation	thisrel = table_open(inhrelid, ShareRowExclusiveLock);
 
-		/*
-		 * Unset the bit if set.  We know it's wrong because we checked this
-		 * earlier.
-		 */
-		if (indexForm->indisclustered)
+		indexes = RelationGetIndexList(thisrel);
+		foreach (lc2, indexes)
 		{
-			indexForm->indisclustered = false;
-			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
-		}
-		else if (thisIndexOid == indexOid)
-		{
-			/* this was checked earlier, but let's be real sure */
-			if (!indexForm->indisvalid)
-				elog(ERROR, "cannot cluster on invalid index %u", indexOid);
-			indexForm->indisclustered = true;
-			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
-		}
+			bool	isclustered;
+			Oid		thisIndexOid = lfirst_oid(lc2);
+			List	*parentoids = get_rel_relispartition(thisIndexOid) ?
+				get_partition_ancestors(thisIndexOid) : NIL;
 
-		InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
-									 InvalidOid, is_internal);
+			/*
+			 * A child of the clustered index must be set clustered;
+			 * indexes which are not children of the clustered index are
+			 * set unclustered
+			 */
+			isclustered = (thisIndexOid == indexOid) ||
+					list_member_oid(parentoids, indexOid);
+			Assert(OidIsValid(indexOid) || !isclustered);
+			set_indisclustered(thisIndexOid, isclustered, pg_index);
+
+			InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
+										 InvalidOid, is_internal);
+		}
 
-		heap_freetuple(indexTuple);
+		list_free(indexes);
+		table_close(thisrel, ShareRowExclusiveLock);
 	}
+	list_free(inh);
 
 	table_close(pg_index, RowExclusiveLock);
 }
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 54d90c28e7..538055c9a0 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -25,6 +25,7 @@
 #include "catalog/catalog.h"
 #include "catalog/index.h"
 #include "catalog/indexing.h"
+#include "catalog/partition.h"
 #include "catalog/pg_am.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_inherits.h"
@@ -32,6 +33,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
+#include "commands/cluster.h"
 #include "commands/comment.h"
 #include "commands/dbcommands.h"
 #include "commands/defrem.h"
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index e4448350e7..a9fb9f1021 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -495,6 +495,52 @@ Indexes:
     "clstrpart_idx" btree (a) CLUSTER
 Number of partitions: 3 (Use \d+ to list them.)
 
+-- Test that it recurses to grandchildren:
+\d clstrpart33
+            Table "public.clstrpart33"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart3 DEFAULT
+Indexes:
+    "clstrpart33_a_idx" btree (a) CLUSTER
+
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+\d clstrpart33
+            Table "public.clstrpart33"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart3 DEFAULT
+Indexes:
+    "clstrpart33_a_idx" btree (a)
+
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+\d clstrpart33
+            Table "public.clstrpart33"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart3 DEFAULT
+Indexes:
+    "clstrpart33_a_idx" btree (a) CLUSTER
+
+-- Check that only one child is marked clustered after marking clustered on a different parent
+CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a);
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2;
+\d clstrpart1
+       Partitioned table "public.clstrpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart FOR VALUES FROM (1) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart1_a_idx" btree (a)
+    "clstrpart1_idx_2" btree (a) CLUSTER
+Number of partitions: 2 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 22225dc924..d15bd51496 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -220,6 +220,17 @@ CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitio
 CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
 CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
 \d clstrpart
+-- Test that it recurses to grandchildren:
+\d clstrpart33
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+\d clstrpart33
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+\d clstrpart33
+-- Check that only one child is marked clustered after marking clustered on a different parent
+CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a);
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2;
+\d clstrpart1
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From 491f16c3e2750ccc4a1b57d9f906149f042e30a3 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Thu, 5 Nov 2020 18:58:03 -0600
Subject: [PATCH v4 3/5] Invalidate parent indexes

---
 src/backend/commands/cluster.c        | 21 +++++++++++++++++++++
 src/test/regress/expected/cluster.out | 26 ++++++++++++++++++++++++++
 src/test/regress/sql/cluster.sql      |  8 ++++++++
 3 files changed, 55 insertions(+)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 4f30174ba7..35beff6f9f 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -573,6 +573,27 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 	}
 	list_free(inh);
 
+	/*
+	 * Set parent of all indexes as unclustered when a rel is unclustered; and,
+	 * when an index is clustered, set parents of all /other/ indexes as
+	 * unclustered.
+	 */
+	indexes = RelationGetIndexList(rel);
+	foreach (lc, indexes)
+	{
+		Oid	thisIndexOid = lfirst_oid(lc);
+
+		if (thisIndexOid == indexOid)
+			continue;
+
+		while (get_rel_relispartition(thisIndexOid))
+		{
+			thisIndexOid = get_partition_parent(thisIndexOid);
+			set_indisclustered(thisIndexOid, false, pg_index);
+		}
+	}
+	list_free(indexes);
+
 	table_close(pg_index, RowExclusiveLock);
 }
 
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index a9fb9f1021..6d88978387 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -541,6 +541,32 @@ Indexes:
     "clstrpart1_idx_2" btree (a) CLUSTER
 Number of partitions: 2 (Use \d+ to list them.)
 
+-- Check that the parent index is marked not clustered after clustering a partition on a different index:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CLUSTER clstrpart1 USING clstrpart1_idx_2;
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a)
+Number of partitions: 3 (Use \d+ to list them.)
+
+-- Check that the parent index is marked not clustered after setting a partition not clustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 SET WITHOUT CLUSTER;
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a)
+Number of partitions: 3 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index d15bd51496..c9bb204a93 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -231,6 +231,14 @@ CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a);
 ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2;
 \d clstrpart1
+-- Check that the parent index is marked not clustered after clustering a partition on a different index:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CLUSTER clstrpart1 USING clstrpart1_idx_2;
+\d clstrpart
+-- Check that the parent index is marked not clustered after setting a partition not clustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 SET WITHOUT CLUSTER;
+\d clstrpart
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From 068dde5053d185a7eb94a9b9f744e730dc546d98 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Thu, 5 Nov 2020 19:11:41 -0600
Subject: [PATCH v4 4/5] Invalidate parent index cluster on attach

---
 src/backend/commands/indexcmds.c      | 21 +++++++++++++++++++++
 src/test/regress/expected/cluster.out | 14 ++++++++++++++
 src/test/regress/sql/cluster.sql      |  5 +++++
 3 files changed, 40 insertions(+)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 538055c9a0..fd8b2d56d2 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -3775,6 +3775,27 @@ IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
 	/* set relispartition correctly on the partition */
 	update_relispartition(partRelid, OidIsValid(parentOid));
 
+	/*
+	 * If the attached index is not clustered, invalidate cluster mark on
+	 * any parents
+	 */
+	if ((OidIsValid(parentOid) && get_index_isclustered(parentOid)) ||
+			get_index_isclustered(partRelid))
+	{
+		Relation indrel;
+
+		/* Make relispartition visible */
+		CommandCounterIncrement();
+
+		indrel = table_open(IndexGetRelation(partRelid, false),
+				ShareUpdateExclusiveLock);
+		mark_index_clustered(indrel,
+				get_index_isclustered(partRelid) ? partRelid : InvalidOid,
+				true);
+		table_close(indrel, ShareUpdateExclusiveLock);
+
+	}
+
 	if (fix_dependencies)
 	{
 		/*
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index 6d88978387..f0c962db75 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -567,6 +567,20 @@ Indexes:
     "clstrpart_idx" btree (a)
 Number of partitions: 3 (Use \d+ to list them.)
 
+-- Check that attaching an unclustered index marks the parent unclustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES);
+ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50);
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a)
+Number of partitions: 4 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index c9bb204a93..ff7ffed6e4 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -239,6 +239,11 @@ CLUSTER clstrpart1 USING clstrpart1_idx_2;
 ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 ALTER TABLE clstrpart1 SET WITHOUT CLUSTER;
 \d clstrpart
+-- Check that attaching an unclustered index marks the parent unclustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES);
+ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50);
+\d clstrpart
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From 0f14d0d9ea4c668fbdcbb57e8d4c9078352c4964 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Tue, 6 Oct 2020 20:40:18 -0500
Subject: [PATCH v4 5/5] Preserve indisclustered on children of clustered,
 partitioned indexes

Note, this takes a parentIndex, but that wasn't previously used ...
UpdateIndexRelation(Oid indexoid, Oid heapoid, Oid parentIndexId,
---
 src/backend/catalog/index.c           |  2 +-
 src/test/regress/expected/cluster.out | 12 ++++++++++++
 src/test/regress/sql/cluster.sql      |  4 ++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index d1e120e5ae..af1565ec4c 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -603,7 +603,7 @@ UpdateIndexRelation(Oid indexoid,
 	values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
 	values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
 	values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
-	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
+	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(OidIsValid(parentIndexId) && get_index_isclustered(parentIndexId));
 	values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
 	values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
 	values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready);
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index f0c962db75..fc642f87d2 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -581,6 +581,18 @@ Indexes:
     "clstrpart_idx" btree (a)
 Number of partitions: 4 (Use \d+ to list them.)
 
+-- Check that new children inherit clustered mark
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40);
+\d clstrpart4
+             Table "public.clstrpart4"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart FOR VALUES FROM (30) TO (40)
+Indexes:
+    "clstrpart4_a_idx" btree (a) CLUSTER
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index ff7ffed6e4..5df338f60d 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -244,6 +244,10 @@ ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES);
 ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50);
 \d clstrpart
+-- Check that new children inherit clustered mark
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40);
+\d clstrpart4
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

Reply via email to