@cfbot: rebased
>From 686cd8fc644f1f86d81d7748b66feddd634c4dc8 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Thu, 26 Nov 2020 14:37:08 -0600 Subject: [PATCH v10 1/8] pg_dump: make CLUSTER ON a separate dump object..
..since it needs to be restored after any child indexes are restored *and attached*. The order needs to be: 1) restore child and parent index (order doesn't matter); 2) attach child index; 3) set cluster on child and parent index (order doesn't matter); --- src/bin/pg_dump/pg_dump.c | 86 ++++++++++++++++++++++++++-------- src/bin/pg_dump/pg_dump.h | 8 ++++ src/bin/pg_dump/pg_dump_sort.c | 8 ++++ 3 files changed, 82 insertions(+), 20 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 25717ce0e6..9a3044fd8c 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -215,6 +215,7 @@ static void dumpSequence(Archive *fout, const TableInfo *tbinfo); static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, const IndxInfo *indxinfo); static void dumpIndexAttach(Archive *fout, const IndexAttachInfo *attachinfo); +static void dumpIndexClusterOn(Archive *fout, const IndexClusterInfo *clusterinfo); static void dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, const ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo); @@ -7232,6 +7233,11 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) i_inddependcollversions; int ntups; + int ncluster = 0; + IndexClusterInfo *clusterinfo; + clusterinfo = (IndexClusterInfo *) + pg_malloc0(numTables * sizeof(IndexClusterInfo)); + for (i = 0; i < numTables; i++) { TableInfo *tbinfo = &tblinfo[i]; @@ -7611,6 +7617,27 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) /* Plain secondary index */ indxinfo[j].indexconstraint = 0; } + + /* Record each table's CLUSTERed index, if any */ + if (indxinfo[j].indisclustered) + { + IndxInfo *index = &indxinfo[j]; + IndexClusterInfo *cluster = &clusterinfo[ncluster]; + + cluster->dobj.objType = DO_INDEX_CLUSTER_ON; + cluster->dobj.catId.tableoid = 0; + cluster->dobj.catId.oid = 0; + AssignDumpId(&cluster->dobj); + cluster->dobj.name = pg_strdup(index->dobj.name); + cluster->dobj.namespace = index->indextable->dobj.namespace; + cluster->index = index; + cluster->indextable = &tblinfo[i]; + + /* The CLUSTER ON depends on its index.. */ + addObjectDependency(&cluster->dobj, index->dobj.dumpId); + + ncluster++; + } } PQclear(res); @@ -10472,6 +10499,9 @@ dumpDumpableObject(Archive *fout, const DumpableObject *dobj) case DO_SUBSCRIPTION: dumpSubscription(fout, (const SubscriptionInfo *) dobj); break; + case DO_INDEX_CLUSTER_ON: + dumpIndexClusterOn(fout, (IndexClusterInfo *) dobj); + break; case DO_PRE_DATA_BOUNDARY: case DO_POST_DATA_BOUNDARY: /* never dumped, nothing to do */ @@ -16721,6 +16751,41 @@ getAttrName(int attrnum, const TableInfo *tblInfo) return NULL; /* keep compiler quiet */ } +/* + * dumpIndexClusterOn + * record that the index is clustered. + */ +static void +dumpIndexClusterOn(Archive *fout, const IndexClusterInfo *clusterinfo) +{ + DumpOptions *dopt = fout->dopt; + TableInfo *tbinfo = clusterinfo->indextable; + char *qindxname; + PQExpBuffer q; + + if (dopt->dataOnly) + return; + + q = createPQExpBuffer(); + qindxname = pg_strdup(fmtId(clusterinfo->dobj.name)); + + /* index name is not qualified in this syntax */ + appendPQExpBuffer(q, "\nALTER TABLE %s CLUSTER ON %s;\n", + fmtQualifiedDumpable(tbinfo), qindxname); + + if (clusterinfo->dobj.dump & DUMP_COMPONENT_DEFINITION) + ArchiveEntry(fout, clusterinfo->dobj.catId, clusterinfo->dobj.dumpId, + ARCHIVE_OPTS(.tag = clusterinfo->dobj.name, + .namespace = tbinfo->dobj.namespace->dobj.name, + .owner = tbinfo->rolname, + .description = "INDEX CLUSTER ON", + .section = SECTION_POST_DATA, + .createStmt = q->data)); + + destroyPQExpBuffer(q); + free(qindxname); +} + /* * dumpIndex * write out to fout a user-defined index @@ -16775,16 +16840,6 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo) * similar code in dumpConstraint! */ - /* If the index is clustered, we need to record that. */ - if (indxinfo->indisclustered) - { - appendPQExpBuffer(q, "\nALTER TABLE %s CLUSTER", - fmtQualifiedDumpable(tbinfo)); - /* index name is not qualified in this syntax */ - appendPQExpBuffer(q, " ON %s;\n", - qindxname); - } - /* * If the index has any statistics on some of its columns, generate * the associated ALTER INDEX queries. @@ -17111,16 +17166,6 @@ dumpConstraint(Archive *fout, const ConstraintInfo *coninfo) * similar code in dumpIndex! */ - /* If the index is clustered, we need to record that. */ - if (indxinfo->indisclustered) - { - appendPQExpBuffer(q, "\nALTER TABLE %s CLUSTER", - fmtQualifiedDumpable(tbinfo)); - /* index name is not qualified in this syntax */ - appendPQExpBuffer(q, " ON %s;\n", - fmtId(indxinfo->dobj.name)); - } - /* If the index defines identity, we need to record that. */ if (indxinfo->indisreplident) { @@ -18626,6 +18671,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, break; case DO_INDEX: case DO_INDEX_ATTACH: + case DO_INDEX_CLUSTER_ON: case DO_STATSEXT: case DO_REFRESH_MATVIEW: case DO_TRIGGER: diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 5340843081..af916632db 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -54,6 +54,7 @@ typedef enum DO_ATTRDEF, DO_INDEX, DO_INDEX_ATTACH, + DO_INDEX_CLUSTER_ON, DO_STATSEXT, DO_RULE, DO_TRIGGER, @@ -387,6 +388,13 @@ typedef struct _indxInfo DumpId indexconstraint; } IndxInfo; +typedef struct _indexClusterInfo +{ + DumpableObject dobj; + TableInfo *indextable; /* link to table the index is for */ + IndxInfo *index; /* link to index itself */ +} IndexClusterInfo; + typedef struct _indexAttachInfo { DumpableObject dobj; diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index 46461fb6a1..dd5b233196 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -75,6 +75,7 @@ enum dbObjectTypePriorities PRIO_CONSTRAINT, PRIO_INDEX, PRIO_INDEX_ATTACH, + PRIO_INDEX_CLUSTER_ON, PRIO_STATSEXT, PRIO_RULE, PRIO_TRIGGER, @@ -108,6 +109,7 @@ static const int dbObjectTypePriority[] = PRIO_ATTRDEF, /* DO_ATTRDEF */ PRIO_INDEX, /* DO_INDEX */ PRIO_INDEX_ATTACH, /* DO_INDEX_ATTACH */ + PRIO_INDEX_CLUSTER_ON, /* DO_INDEX_CLUSTER_ON */ PRIO_STATSEXT, /* DO_STATSEXT */ PRIO_RULE, /* DO_RULE */ PRIO_TRIGGER, /* DO_TRIGGER */ @@ -136,6 +138,7 @@ static const int dbObjectTypePriority[] = PRIO_PUBLICATION, /* DO_PUBLICATION */ PRIO_PUBLICATION_REL, /* DO_PUBLICATION_REL */ PRIO_SUBSCRIPTION /* DO_SUBSCRIPTION */ + }; StaticAssertDecl(lengthof(dbObjectTypePriority) == (DO_SUBSCRIPTION + 1), @@ -1348,6 +1351,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize) "INDEX ATTACH %s (ID %d)", obj->name, obj->dumpId); return; + case DO_INDEX_CLUSTER_ON: + snprintf(buf, bufsize, + "INDEX CLUSTER ON %s (ID %d)", + obj->name, obj->dumpId); + return; case DO_STATSEXT: snprintf(buf, bufsize, "STATISTICS %s (ID %d OID %u)", -- 2.17.0
>From 3d846ff4f4f311ade1e8c7afe1e7481249ca6d89 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Sun, 7 Jun 2020 16:58:42 -0500 Subject: [PATCH v10 2/8] Implement CLUSTER of partitioned table.. This requires either specification of a partitioned index on which to cluster, or that an partitioned index was previously set clustered. VACUUM (including vacuum full) has recursed into partition hierarchies since partitions were introduced in v10 (3c3bb9933). --- doc/src/sgml/ref/cluster.sgml | 7 ++ src/backend/commands/cluster.c | 173 +++++++++++++++++++------- src/bin/psql/tab-complete.c | 1 + src/include/commands/cluster.h | 1 + src/test/regress/expected/cluster.out | 58 ++++++++- src/test/regress/sql/cluster.sql | 24 +++- 6 files changed, 209 insertions(+), 55 deletions(-) diff --git a/doc/src/sgml/ref/cluster.sgml b/doc/src/sgml/ref/cluster.sgml index 0d9720fd8e..17509b35eb 100644 --- a/doc/src/sgml/ref/cluster.sgml +++ b/doc/src/sgml/ref/cluster.sgml @@ -197,6 +197,13 @@ CLUSTER [VERBOSE] in the <structname>pg_stat_progress_cluster</structname> view. See <xref linkend="cluster-progress-reporting"/> for details. </para> + + <para> + Clustering a partitioned table clusters each of its partitions using the + partition of the specified partitioned index or (if not specified) the + partitioned index marked as clustered. + </para> + </refsect1> <refsect1> diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 096a06f7b3..0c08ac56dc 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -32,7 +32,9 @@ #include "catalog/index.h" #include "catalog/namespace.h" #include "catalog/objectaccess.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" +#include "catalog/pg_inherits.h" #include "catalog/toasting.h" #include "commands/cluster.h" #include "commands/defrem.h" @@ -73,6 +75,9 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); static List *get_tables_to_cluster(MemoryContext cluster_context); +static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context, + Oid indexOid); +static void cluster_multiple_rels(List *rvs, int options); /*--------------------------------------------------------------------------- @@ -135,7 +140,7 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) AccessExclusiveLock, 0, RangeVarCallbackOwnsTable, NULL); - rel = table_open(tableOid, NoLock); + rel = table_open(tableOid, ShareUpdateExclusiveLock); /* * Reject clustering a remote temp table ... their local buffer @@ -146,14 +151,6 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster temporary tables of other sessions"))); - /* - * Reject clustering a partitioned table. - */ - if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot cluster a partitioned table"))); - if (stmt->indexname == NULL) { ListCell *index; @@ -189,10 +186,34 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) } /* close relation, keep lock till commit */ - table_close(rel, NoLock); + table_close(rel, ShareUpdateExclusiveLock); - /* Do the job. */ - cluster_rel(tableOid, indexOid, ¶ms); + if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + { + /* Do the job. */ + cluster_rel(tableOid, indexOid, ¶ms); + } + else + { + List *rvs; + MemoryContext cluster_context; + + /* Refuse to hold strong locks in a user transaction */ + PreventInTransactionBlock(isTopLevel, "CLUSTER"); + + cluster_context = AllocSetContextCreate(PortalContext, + "Cluster", + ALLOCSET_DEFAULT_SIZES); + + rvs = get_tables_to_cluster_partitioned(cluster_context, indexOid); + cluster_multiple_rels(rvs, params.options); + + /* Start a new transaction for the cleanup work. */ + StartTransactionCommand(); + + /* Clean up working storage */ + MemoryContextDelete(cluster_context); + } } else { @@ -202,7 +223,6 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) */ MemoryContext cluster_context; List *rvs; - ListCell *rv; /* * We cannot run this form of CLUSTER inside a user transaction block; @@ -225,28 +245,7 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) * cluster_context. */ rvs = get_tables_to_cluster(cluster_context); - - /* Commit to get out of starting transaction */ - PopActiveSnapshot(); - CommitTransactionCommand(); - - /* Ok, now that we've got them all, cluster them one by one */ - foreach(rv, rvs) - { - RelToCluster *rvtc = (RelToCluster *) lfirst(rv); - ClusterParams cluster_params = params; - - /* Start a new transaction for each relation. */ - StartTransactionCommand(); - /* functions in indexes may want a snapshot set */ - PushActiveSnapshot(GetTransactionSnapshot()); - /* Do the job. */ - cluster_params.options |= CLUOPT_RECHECK; - cluster_rel(rvtc->tableOid, rvtc->indexOid, - &cluster_params); - PopActiveSnapshot(); - CommitTransactionCommand(); - } + cluster_multiple_rels(rvs, params.options | CLUOPT_RECHECK_ISCLUSTERED); /* Start a new transaction for the cleanup work. */ StartTransactionCommand(); @@ -352,9 +351,10 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) } /* - * Check that the index is still the one with indisclustered set. + * Check that the index is still the one with indisclustered set, if needed. */ - if (!get_index_isclustered(indexOid)) + if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 && + !get_index_isclustered(indexOid)) { relation_close(OldHeap, AccessExclusiveLock); pgstat_progress_end_command(); @@ -398,8 +398,13 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) /* Check heap and index are valid to cluster on */ if (OidIsValid(indexOid)) + { check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock); + /* Mark the index as clustered */ + mark_index_clustered(OldHeap, indexOid, true); + } + /* * Quietly ignore the request if this is a materialized view which has not * been populated from its query. No harm is done because there is no data @@ -415,6 +420,14 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) return; } + /* For a partitioned rel, we're done. */ + if (!RELKIND_HAS_STORAGE(get_rel_relkind(tableOid))) + { + relation_close(OldHeap, AccessExclusiveLock); + pgstat_progress_end_command(); + return; + } + /* * All predicate locks on the tuples or pages are about to be made * invalid, because we move tuples around. Promote them to relation @@ -483,6 +496,9 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD * the worst consequence of following broken HOT chains would be that we * might put recently-dead tuples out-of-order in the new table, and there * is little harm in that.) + * + * This also refuses to cluster on an "incomplete" partitioned index + * created with "ON ONLY". */ if (!OldIndex->rd_index->indisvalid) ereport(ERROR, @@ -507,12 +523,6 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal) Relation pg_index; ListCell *index; - /* Disallow applying to a partitioned table */ - if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot mark index clustered in partitioned table"))); - /* * If the index is already marked clustered, no need to do anything. */ @@ -584,10 +594,6 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose) TransactionId frozenXid; MultiXactId cutoffMulti; - /* Mark the correct index as clustered */ - if (OidIsValid(indexOid)) - mark_index_clustered(OldHeap, indexOid, true); - /* Remember info about rel before closing OldHeap */ relpersistence = OldHeap->rd_rel->relpersistence; is_system_catalog = IsSystemRelation(OldHeap); @@ -1582,3 +1588,76 @@ get_tables_to_cluster(MemoryContext cluster_context) return rvs; } + +/* + * Return a List of tables and associated index, where each index is a + * partition of the given index + */ +static List * +get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid) +{ + List *inhoids; + ListCell *lc; + List *rvs = NIL; + MemoryContext old_context; + + inhoids = find_all_inheritors(indexOid, NoLock, NULL); + + /* + * We have to build the list in a different memory context so it will + * survive the cross-transaction processing + */ + old_context = MemoryContextSwitchTo(cluster_context); + + foreach(lc, inhoids) + { + Oid indexrelid = lfirst_oid(lc); + Oid relid = IndexGetRelation(indexrelid, false); + RelToCluster *rvtc; + + /* + * Partitioned rels (including the top indexOid) are included, + * so as to be processed by cluster_rel, which calls + * check_index_is_clusterable() and mark_index_clustered(). + */ + rvtc = (RelToCluster *) palloc(sizeof(RelToCluster)); + rvtc->tableOid = relid; + rvtc->indexOid = indexrelid; + rvs = lappend(rvs, rvtc); + } + + MemoryContextSwitchTo(old_context); + return rvs; +} + +/* Cluster each relation in a separate transaction */ +static void +cluster_multiple_rels(List *rvs, int options) +{ + ListCell *lc; + + /* Commit to get out of starting transaction */ + PopActiveSnapshot(); + CommitTransactionCommand(); + + /* Ok, now that we've got them all, cluster them one by one */ + foreach(lc, rvs) + { + RelToCluster *rvtc = (RelToCluster *) lfirst(lc); + ClusterParams cluster_params = { .options = options, }; + + /* Start a new transaction for each relation. */ + StartTransactionCommand(); + + /* functions in indexes may want a snapshot set */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* Do the job. */ + cluster_params.options |= CLUOPT_RECHECK; + cluster_rel(rvtc->tableOid, rvtc->indexOid, + &cluster_params); + + PopActiveSnapshot(); + CommitTransactionCommand(); + } +} diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index a053bc1e45..3fd192b8d2 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -588,6 +588,7 @@ static const SchemaQuery Query_for_list_of_clusterables = { .catname = "pg_catalog.pg_class c", .selcondition = "c.relkind IN (" CppAsString2(RELKIND_RELATION) ", " + CppAsString2(RELKIND_PARTITIONED_TABLE) ", " CppAsString2(RELKIND_MATVIEW) ")", .viscondition = "pg_catalog.pg_table_is_visible(c.oid)", .namespace = "c.relnamespace", diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h index a941f2accd..c30ca01726 100644 --- a/src/include/commands/cluster.h +++ b/src/include/commands/cluster.h @@ -22,6 +22,7 @@ /* flag bits for ClusterParams->flags */ #define CLUOPT_RECHECK 0x01 /* recheck relation state */ #define CLUOPT_VERBOSE 0x02 /* print progress info */ +#define CLUOPT_RECHECK_ISCLUSTERED 0x04 /* recheck relation state for indisclustered */ /* options for CLUSTER */ typedef struct ClusterParams diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index e46a66952f..8fb496434e 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -444,14 +444,62 @@ DROP TABLE clustertest; CREATE TABLE clustertest (f1 int PRIMARY KEY); CLUSTER clustertest USING clustertest_pkey; CLUSTER clustertest; --- Check that partitioned tables cannot be clustered +-- Check that partitioned tables can be clustered CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a); +CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a); +CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10); +CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a); +CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30); +CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a); +CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT; +ALTER TABLE clstrpart SET WITHOUT CLUSTER; +CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a); +CLUSTER clstrpart USING clstrpart_only_idx; -- fails +ERROR: cannot cluster on invalid index "clstrpart_only_idx" +DROP INDEX clstrpart_only_idx; CREATE INDEX clstrpart_idx ON clstrpart (a); -ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; -ERROR: cannot mark index clustered in partitioned table +-- Check that clustering sets new relfilenodes: +CREATE TEMP TABLE old_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; CLUSTER clstrpart USING clstrpart_idx; -ERROR: cannot cluster a partitioned table -DROP TABLE clstrpart; +CREATE TEMP TABLE new_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; +SELECT relname, old.level, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY relname COLLATE "C"; + relname | level | relkind | ?column? +-------------+-------+---------+---------- + clstrpart | 0 | p | t + clstrpart1 | 1 | p | t + clstrpart11 | 2 | r | f + clstrpart12 | 2 | p | t + clstrpart2 | 1 | r | f + clstrpart3 | 1 | p | t + clstrpart33 | 2 | r | f +(7 rows) + +-- Check that clustering sets new indisclustered: +SELECT relname, relkind, indisclustered FROM pg_partition_tree('clstrpart_idx'::regclass) AS tree JOIN pg_index i ON i.indexrelid=tree.relid JOIN pg_class c ON c.oid=indexrelid ORDER BY relname COLLATE "C"; + relname | relkind | indisclustered +-------------------+---------+---------------- + clstrpart11_a_idx | i | t + clstrpart12_a_idx | I | t + clstrpart1_a_idx | I | t + clstrpart2_a_idx | i | t + clstrpart33_a_idx | i | t + clstrpart3_a_idx | I | t + clstrpart_idx | I | t +(7 rows) + +CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned +CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs +CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf +\d clstrpart + Partitioned table "public.clstrpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: RANGE (a) +Indexes: + "clstrpart_idx" btree (a) CLUSTER +Number of partitions: 3 (Use \d+ to list them.) + -- Test CLUSTER with external tuplesorting create table clstr_4 as select * from tenk1; create index cluster_sort on clstr_4 (hundred, thousand, tenthous); diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql index aee9cf83e0..57fc661863 100644 --- a/src/test/regress/sql/cluster.sql +++ b/src/test/regress/sql/cluster.sql @@ -202,12 +202,30 @@ CREATE TABLE clustertest (f1 int PRIMARY KEY); CLUSTER clustertest USING clustertest_pkey; CLUSTER clustertest; --- Check that partitioned tables cannot be clustered +-- Check that partitioned tables can be clustered CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a); +CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a); +CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10); +CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a); +CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30); +CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a); +CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT; +ALTER TABLE clstrpart SET WITHOUT CLUSTER; +CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a); +CLUSTER clstrpart USING clstrpart_only_idx; -- fails +DROP INDEX clstrpart_only_idx; CREATE INDEX clstrpart_idx ON clstrpart (a); -ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +-- Check that clustering sets new relfilenodes: +CREATE TEMP TABLE old_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; CLUSTER clstrpart USING clstrpart_idx; -DROP TABLE clstrpart; +CREATE TEMP TABLE new_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; +SELECT relname, old.level, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY relname COLLATE "C"; +-- Check that clustering sets new indisclustered: +SELECT relname, relkind, indisclustered FROM pg_partition_tree('clstrpart_idx'::regclass) AS tree JOIN pg_index i ON i.indexrelid=tree.relid JOIN pg_class c ON c.oid=indexrelid ORDER BY relname COLLATE "C"; +CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned +CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs +CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf +\d clstrpart -- Test CLUSTER with external tuplesorting -- 2.17.0
>From c2c0faeed2612aa0d947058e31fc53e54dacd7e6 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Mon, 8 Feb 2021 20:45:26 -0600 Subject: [PATCH v10 3/8] f! progress reporting.. This follows the precedent of pg_stat_progress_create_index, which simultaneously shows the progress of 1) the leaf partition, and 2) the overall progress as partitions_done / partitions_total. This also includes current_child_index_relid, but the create_index view doesn't, so maybe this shouldn't, either. --- doc/src/sgml/monitoring.sgml | 21 +++++++++++ src/backend/catalog/system_views.sql | 4 +- src/backend/commands/cluster.c | 55 ++++++++++++++++------------ src/backend/commands/vacuum.c | 5 +++ src/include/commands/progress.h | 3 ++ src/test/regress/expected/rules.out | 4 +- 6 files changed, 67 insertions(+), 25 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 56018745c8..28493c7931 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -6293,6 +6293,27 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, is <literal>rebuilding index</literal>. </para></entry> </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>total_partitions</structfield> <type>bigint</type> + </para> + When clustering a partitioned table, this column is set to the total + number of partitions to be clustered. + <para> + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>done_partitions</structfield> <type>bigint</type> + </para> + <para> + When clustering a partitioned table, this column is set to the number + of partitions which have been clustered. + </para></entry> + </row> + </tbody> </tgroup> </table> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 5f2541d316..0256809f06 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1142,7 +1142,9 @@ CREATE VIEW pg_stat_progress_cluster AS S.param5 AS heap_tuples_written, S.param6 AS heap_blks_total, S.param7 AS heap_blks_scanned, - S.param8 AS index_rebuild_count + S.param8 AS index_rebuild_count, + S.param18 AS partitions_total, + S.param19 AS partitions_done FROM pg_stat_get_progress_info('CLUSTER') AS S LEFT JOIN pg_database D ON S.datid = D.oid; diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 0c08ac56dc..7d8d1d8a69 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -77,7 +77,7 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, static List *get_tables_to_cluster(MemoryContext cluster_context); static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid); -static void cluster_multiple_rels(List *rvs, int options); +static void cluster_multiple_rels(List *rvs, int options, bool ispartitioned); /*--------------------------------------------------------------------------- @@ -188,16 +188,26 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) /* close relation, keep lock till commit */ table_close(rel, ShareUpdateExclusiveLock); + pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid); if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) { /* Do the job. */ + pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, + PROGRESS_CLUSTER_COMMAND_CLUSTER); cluster_rel(tableOid, indexOid, ¶ms); + pgstat_progress_end_command(); } else { List *rvs; MemoryContext cluster_context; + int64 progress_val[2]; + const int progress_index[2] = { + PROGRESS_CLUSTER_COMMAND, + PROGRESS_CLUSTER_PARTITIONS_TOTAL, + }; + /* Refuse to hold strong locks in a user transaction */ PreventInTransactionBlock(isTopLevel, "CLUSTER"); @@ -206,7 +216,15 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) ALLOCSET_DEFAULT_SIZES); rvs = get_tables_to_cluster_partitioned(cluster_context, indexOid); - cluster_multiple_rels(rvs, params.options); + + /* Report command and number of partitions */ + progress_val[0] = PROGRESS_CLUSTER_COMMAND_CLUSTER; + /* XXX: rvs includes the parent, which is not a "partition" */ + progress_val[1] = list_length(rvs); + + pgstat_progress_update_multi_param(2, progress_index, progress_val); + cluster_multiple_rels(rvs, params.options, true); + pgstat_progress_end_command(); /* Start a new transaction for the cleanup work. */ StartTransactionCommand(); @@ -245,7 +263,11 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) * cluster_context. */ rvs = get_tables_to_cluster(cluster_context); - cluster_multiple_rels(rvs, params.options | CLUOPT_RECHECK_ISCLUSTERED); + + pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, + PROGRESS_CLUSTER_COMMAND_CLUSTER); + cluster_multiple_rels(rvs, params.options | CLUOPT_RECHECK_ISCLUSTERED, false); + pgstat_progress_end_command(); /* Start a new transaction for the cleanup work. */ StartTransactionCommand(); @@ -282,14 +304,6 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) /* Check for user-requested abort. */ CHECK_FOR_INTERRUPTS(); - pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid); - if (OidIsValid(indexOid)) - pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, - PROGRESS_CLUSTER_COMMAND_CLUSTER); - else - pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, - PROGRESS_CLUSTER_COMMAND_VACUUM_FULL); - /* * We grab exclusive access to the target rel and index for the duration * of the transaction. (This is redundant for the single-transaction @@ -300,10 +314,7 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) /* If the table has gone away, we can skip processing it */ if (!OldHeap) - { - pgstat_progress_end_command(); return; - } /* * Since we may open a new transaction for each relation, we have to check @@ -319,7 +330,6 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) if (!pg_class_ownercheck(tableOid, GetUserId())) { relation_close(OldHeap, AccessExclusiveLock); - pgstat_progress_end_command(); return; } @@ -334,7 +344,6 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) if (RELATION_IS_OTHER_TEMP(OldHeap)) { relation_close(OldHeap, AccessExclusiveLock); - pgstat_progress_end_command(); return; } @@ -346,7 +355,6 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid))) { relation_close(OldHeap, AccessExclusiveLock); - pgstat_progress_end_command(); return; } @@ -357,7 +365,6 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) !get_index_isclustered(indexOid)) { relation_close(OldHeap, AccessExclusiveLock); - pgstat_progress_end_command(); return; } } @@ -416,7 +423,6 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) !RelationIsPopulated(OldHeap)) { relation_close(OldHeap, AccessExclusiveLock); - pgstat_progress_end_command(); return; } @@ -424,7 +430,6 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) if (!RELKIND_HAS_STORAGE(get_rel_relkind(tableOid))) { relation_close(OldHeap, AccessExclusiveLock); - pgstat_progress_end_command(); return; } @@ -440,8 +445,6 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) rebuild_relation(OldHeap, indexOid, verbose); /* NB: rebuild_relation does table_close() on OldHeap */ - - pgstat_progress_end_command(); } /* @@ -1632,9 +1635,10 @@ get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid) /* Cluster each relation in a separate transaction */ static void -cluster_multiple_rels(List *rvs, int options) +cluster_multiple_rels(List *rvs, int options, bool ispartitioned) { ListCell *lc; + off_t childnum = 0; /* Commit to get out of starting transaction */ PopActiveSnapshot(); @@ -1657,6 +1661,11 @@ cluster_multiple_rels(List *rvs, int options) cluster_rel(rvtc->tableOid, rvtc->indexOid, &cluster_params); + if (ispartitioned) + pgstat_progress_update_param(PROGRESS_CLUSTER_PARTITIONS_DONE, + ++childnum); + + PopActiveSnapshot(); CommitTransactionCommand(); } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 662aff04b4..1d8a7ca774 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -37,6 +37,7 @@ #include "catalog/pg_namespace.h" #include "commands/cluster.h" #include "commands/defrem.h" +#include "commands/progress.h" #include "commands/vacuum.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -1937,7 +1938,11 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params) cluster_params.options |= CLUOPT_VERBOSE; /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */ + pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, relid); + pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, + PROGRESS_CLUSTER_COMMAND_VACUUM_FULL); cluster_rel(relid, InvalidOid, &cluster_params); + pgstat_progress_end_command(); } else table_relation_vacuum(onerel, params, vac_strategy); diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index c6b139d57d..d85781ae0b 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -60,6 +60,9 @@ #define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS 5 #define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED 6 #define PROGRESS_CLUSTER_INDEX_REBUILD_COUNT 7 +/* Need to avoid the CREATE INDEX params */ +#define PROGRESS_CLUSTER_PARTITIONS_TOTAL 17 +#define PROGRESS_CLUSTER_PARTITIONS_DONE 18 /* Phases of cluster (as advertised via PROGRESS_CLUSTER_PHASE) */ #define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP 1 diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 9b59a7b4a5..a8bcc599ac 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1943,7 +1943,9 @@ pg_stat_progress_cluster| SELECT s.pid, s.param5 AS heap_tuples_written, s.param6 AS heap_blks_total, s.param7 AS heap_blks_scanned, - s.param8 AS index_rebuild_count + s.param8 AS index_rebuild_count, + s.param18 AS partitions_total, + s.param19 AS partitions_done FROM (pg_stat_get_progress_info('CLUSTER'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_progress_copy| SELECT s.pid, -- 2.17.0
>From c98512d58e64f98ae87558da594d379759df80e1 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Tue, 6 Oct 2020 22:11:12 -0500 Subject: [PATCH v10 4/8] Propagate changes to indisclustered to child/parents --- src/backend/commands/cluster.c | 110 ++++++++++++++++---------- src/backend/commands/indexcmds.c | 2 + src/test/regress/expected/cluster.out | 46 +++++++++++ src/test/regress/sql/cluster.sql | 11 +++ 4 files changed, 126 insertions(+), 43 deletions(-) diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 7d8d1d8a69..dd8014c206 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -74,6 +74,7 @@ static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose); static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); +static void set_indisclustered(Oid indexOid, bool isclustered, Relation pg_index); static List *get_tables_to_cluster(MemoryContext cluster_context); static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid); @@ -513,66 +514,89 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD index_close(OldIndex, NoLock); } +/* + * Helper for mark_index_clustered + * Mark a single index as clustered or not. + * pg_index is passed by caller to avoid repeatedly re-opening it. + */ +static void +set_indisclustered(Oid indexOid, bool isclustered, Relation pg_index) +{ + HeapTuple indexTuple; + Form_pg_index indexForm; + + indexTuple = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(indexOid)); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexOid); + indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + + /* this was checked earlier, but let's be real sure */ + if (isclustered && !indexForm->indisvalid) + elog(ERROR, "cannot cluster on invalid index %u", indexOid); + + indexForm->indisclustered = isclustered; + CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); + heap_freetuple(indexTuple); +} + /* * mark_index_clustered: mark the specified index as the one clustered on * - * With indexOid == InvalidOid, will mark all indexes of rel not-clustered. + * With indexOid == InvalidOid, mark all indexes of rel not-clustered. + * Otherwise, mark children of the clustered index as clustered, and parents of + * other indexes as unclustered. + * We wish to maintain the following properties: + * 1) Only one index on a relation can be marked clustered at once + * 2) If a partitioned index is clustered, then all its children must be + * clustered. */ void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal) { - HeapTuple indexTuple; - Form_pg_index indexForm; - Relation pg_index; - ListCell *index; - - /* - * If the index is already marked clustered, no need to do anything. - */ - if (OidIsValid(indexOid)) - { - if (get_index_isclustered(indexOid)) - return; - } + ListCell *lc, *lc2; + List *indexes; + Relation pg_index = table_open(IndexRelationId, RowExclusiveLock); + List *inh = find_all_inheritors(RelationGetRelid(rel), + ShareRowExclusiveLock, NULL); /* * Check each index of the relation and set/clear the bit as needed. + * Iterate over the relation's children rather than the index's children + * since we need to unset cluster for indexes on intermediate children, + * too. */ - pg_index = table_open(IndexRelationId, RowExclusiveLock); - - foreach(index, RelationGetIndexList(rel)) + foreach(lc, inh) { - Oid thisIndexOid = lfirst_oid(index); - - indexTuple = SearchSysCacheCopy1(INDEXRELID, - ObjectIdGetDatum(thisIndexOid)); - if (!HeapTupleIsValid(indexTuple)) - elog(ERROR, "cache lookup failed for index %u", thisIndexOid); - indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + Oid inhrelid = lfirst_oid(lc); + Relation thisrel = table_open(inhrelid, ShareRowExclusiveLock); - /* - * Unset the bit if set. We know it's wrong because we checked this - * earlier. - */ - if (indexForm->indisclustered) + indexes = RelationGetIndexList(thisrel); + foreach (lc2, indexes) { - indexForm->indisclustered = false; - CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); - } - else if (thisIndexOid == indexOid) - { - /* this was checked earlier, but let's be real sure */ - if (!indexForm->indisvalid) - elog(ERROR, "cannot cluster on invalid index %u", indexOid); - indexForm->indisclustered = true; - CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); - } + bool isclustered; + Oid thisIndexOid = lfirst_oid(lc2); + List *parentoids = get_rel_relispartition(thisIndexOid) ? + get_partition_ancestors(thisIndexOid) : NIL; - InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0, - InvalidOid, is_internal); + /* + * A child of the clustered index must be set clustered; + * indexes which are not children of the clustered index are + * set unclustered + */ + isclustered = (thisIndexOid == indexOid) || + list_member_oid(parentoids, indexOid); + Assert(OidIsValid(indexOid) || !isclustered); + set_indisclustered(thisIndexOid, isclustered, pg_index); + + InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0, + InvalidOid, is_internal); + } - heap_freetuple(indexTuple); + list_free(indexes); + table_close(thisrel, ShareRowExclusiveLock); } + list_free(inh); table_close(pg_index, RowExclusiveLock); } diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 166374cc0c..7d176c1062 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -25,6 +25,7 @@ #include "catalog/catalog.h" #include "catalog/index.h" #include "catalog/indexing.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" #include "catalog/pg_constraint.h" #include "catalog/pg_inherits.h" @@ -32,6 +33,7 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" +#include "commands/cluster.h" #include "commands/comment.h" #include "commands/dbcommands.h" #include "commands/defrem.h" diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index 8fb496434e..a2af5c15ec 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -500,6 +500,52 @@ Indexes: "clstrpart_idx" btree (a) CLUSTER Number of partitions: 3 (Use \d+ to list them.) +-- Test that it recurses to grandchildren: +\d clstrpart33 + Table "public.clstrpart33" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: clstrpart3 DEFAULT +Indexes: + "clstrpart33_a_idx" btree (a) CLUSTER + +ALTER TABLE clstrpart SET WITHOUT CLUSTER; +\d clstrpart33 + Table "public.clstrpart33" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: clstrpart3 DEFAULT +Indexes: + "clstrpart33_a_idx" btree (a) + +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +\d clstrpart33 + Table "public.clstrpart33" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: clstrpart3 DEFAULT +Indexes: + "clstrpart33_a_idx" btree (a) CLUSTER + +-- Check that only one child is marked clustered after marking clustered on a different parent +CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a); +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2; +\d clstrpart1 + Partitioned table "public.clstrpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: clstrpart FOR VALUES FROM (1) TO (10) +Partition key: RANGE (a) +Indexes: + "clstrpart1_a_idx" btree (a) + "clstrpart1_idx_2" btree (a) CLUSTER +Number of partitions: 2 (Use \d+ to list them.) + -- Test CLUSTER with external tuplesorting create table clstr_4 as select * from tenk1; create index cluster_sort on clstr_4 (hundred, thousand, tenthous); diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql index 57fc661863..3b7fa3142f 100644 --- a/src/test/regress/sql/cluster.sql +++ b/src/test/regress/sql/cluster.sql @@ -226,6 +226,17 @@ CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitio CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf \d clstrpart +-- Test that it recurses to grandchildren: +\d clstrpart33 +ALTER TABLE clstrpart SET WITHOUT CLUSTER; +\d clstrpart33 +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +\d clstrpart33 +-- Check that only one child is marked clustered after marking clustered on a different parent +CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a); +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2; +\d clstrpart1 -- Test CLUSTER with external tuplesorting -- 2.17.0
>From 6badf17a18b344cc1be6dcae195478011889ee43 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Thu, 5 Nov 2020 18:58:03 -0600 Subject: [PATCH v10 5/8] Invalidate parent indexes --- src/backend/commands/cluster.c | 21 +++++++++++++++++++++ src/test/regress/expected/cluster.out | 26 ++++++++++++++++++++++++++ src/test/regress/sql/cluster.sql | 8 ++++++++ 3 files changed, 55 insertions(+) diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index dd8014c206..1b8ff911b0 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -598,6 +598,27 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal) } list_free(inh); + /* + * Set parent of all indexes as unclustered when a rel is unclustered; and, + * when an index is clustered, set parents of all /other/ indexes as + * unclustered. + */ + indexes = RelationGetIndexList(rel); + foreach (lc, indexes) + { + Oid thisIndexOid = lfirst_oid(lc); + + if (thisIndexOid == indexOid) + continue; + + while (get_rel_relispartition(thisIndexOid)) + { + thisIndexOid = get_partition_parent(thisIndexOid, true); + set_indisclustered(thisIndexOid, false, pg_index); + } + } + list_free(indexes); + table_close(pg_index, RowExclusiveLock); } diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index a2af5c15ec..c316c41905 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -546,6 +546,32 @@ Indexes: "clstrpart1_idx_2" btree (a) CLUSTER Number of partitions: 2 (Use \d+ to list them.) +-- Check that the parent index is marked not clustered after clustering a partition on a different index: +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +CLUSTER clstrpart1 USING clstrpart1_idx_2; +\d clstrpart + Partitioned table "public.clstrpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: RANGE (a) +Indexes: + "clstrpart_idx" btree (a) +Number of partitions: 3 (Use \d+ to list them.) + +-- Check that the parent index is marked not clustered after setting a partition not clustered: +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +ALTER TABLE clstrpart1 SET WITHOUT CLUSTER; +\d clstrpart + Partitioned table "public.clstrpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: RANGE (a) +Indexes: + "clstrpart_idx" btree (a) +Number of partitions: 3 (Use \d+ to list them.) + -- Test CLUSTER with external tuplesorting create table clstr_4 as select * from tenk1; create index cluster_sort on clstr_4 (hundred, thousand, tenthous); diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql index 3b7fa3142f..1cc7c1aaca 100644 --- a/src/test/regress/sql/cluster.sql +++ b/src/test/regress/sql/cluster.sql @@ -237,6 +237,14 @@ CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a); ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2; \d clstrpart1 +-- Check that the parent index is marked not clustered after clustering a partition on a different index: +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +CLUSTER clstrpart1 USING clstrpart1_idx_2; +\d clstrpart +-- Check that the parent index is marked not clustered after setting a partition not clustered: +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +ALTER TABLE clstrpart1 SET WITHOUT CLUSTER; +\d clstrpart -- Test CLUSTER with external tuplesorting -- 2.17.0
>From 8bb301a35c7319967a204e962a88c47ce15138d5 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Thu, 5 Nov 2020 19:11:41 -0600 Subject: [PATCH v10 6/8] Invalidate parent index cluster on attach --- src/backend/commands/indexcmds.c | 21 +++++++++++++++++++++ src/test/regress/expected/cluster.out | 14 ++++++++++++++ src/test/regress/sql/cluster.sql | 5 +++++ 3 files changed, 40 insertions(+) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 7d176c1062..bf13bed627 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -4110,6 +4110,27 @@ IndexSetParentIndex(Relation partitionIdx, Oid parentOid) /* set relispartition correctly on the partition */ update_relispartition(partRelid, OidIsValid(parentOid)); + /* + * If the attached index is not clustered, invalidate cluster mark on + * any parents + */ + if ((OidIsValid(parentOid) && get_index_isclustered(parentOid)) || + get_index_isclustered(partRelid)) + { + Relation indrel; + + /* Make relispartition visible */ + CommandCounterIncrement(); + + indrel = table_open(IndexGetRelation(partRelid, false), + ShareUpdateExclusiveLock); + mark_index_clustered(indrel, + get_index_isclustered(partRelid) ? partRelid : InvalidOid, + true); + table_close(indrel, ShareUpdateExclusiveLock); + + } + if (fix_dependencies) { /* diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index c316c41905..846975fc06 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -572,6 +572,20 @@ Indexes: "clstrpart_idx" btree (a) Number of partitions: 3 (Use \d+ to list them.) +-- Check that attaching an unclustered index marks the parent unclustered: +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES); +ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50); +\d clstrpart + Partitioned table "public.clstrpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: RANGE (a) +Indexes: + "clstrpart_idx" btree (a) +Number of partitions: 4 (Use \d+ to list them.) + -- Test CLUSTER with external tuplesorting create table clstr_4 as select * from tenk1; create index cluster_sort on clstr_4 (hundred, thousand, tenthous); diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql index 1cc7c1aaca..ee14e7079f 100644 --- a/src/test/regress/sql/cluster.sql +++ b/src/test/regress/sql/cluster.sql @@ -245,6 +245,11 @@ CLUSTER clstrpart1 USING clstrpart1_idx_2; ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; ALTER TABLE clstrpart1 SET WITHOUT CLUSTER; \d clstrpart +-- Check that attaching an unclustered index marks the parent unclustered: +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES); +ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50); +\d clstrpart -- Test CLUSTER with external tuplesorting -- 2.17.0
>From f0ffdeb4b8fc950003e3426e83ed512f79828442 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Tue, 6 Oct 2020 20:40:18 -0500 Subject: [PATCH v10 7/8] Preserve indisclustered on children of clustered, partitioned indexes Note, this takes a parentIndex, but that wasn't previously used ... UpdateIndexRelation(Oid indexoid, Oid heapoid, Oid parentIndexId, --- src/backend/catalog/index.c | 3 ++- src/test/regress/expected/cluster.out | 12 ++++++++++++ src/test/regress/sql/cluster.sql | 4 ++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index a628b3281c..8e97d88c32 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -615,7 +615,8 @@ UpdateIndexRelation(Oid indexoid, values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary); values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion); values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate); - values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false); + values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(OidIsValid(parentIndexId) && + get_index_isclustered(parentIndexId)); values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid); values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false); values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready); diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index 846975fc06..9429482980 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -586,6 +586,18 @@ Indexes: "clstrpart_idx" btree (a) Number of partitions: 4 (Use \d+ to list them.) +-- Check that new children inherit clustered mark +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40); +\d clstrpart4 + Table "public.clstrpart4" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: clstrpart FOR VALUES FROM (30) TO (40) +Indexes: + "clstrpart4_a_idx" btree (a) CLUSTER + -- Test CLUSTER with external tuplesorting create table clstr_4 as select * from tenk1; create index cluster_sort on clstr_4 (hundred, thousand, tenthous); diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql index ee14e7079f..af0849904d 100644 --- a/src/test/regress/sql/cluster.sql +++ b/src/test/regress/sql/cluster.sql @@ -250,6 +250,10 @@ ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES); ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50); \d clstrpart +-- Check that new children inherit clustered mark +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40); +\d clstrpart4 -- Test CLUSTER with external tuplesorting -- 2.17.0
>From a183a1ad79d00e25dfe4912e6c170631b7a0258c Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Wed, 25 Nov 2020 17:34:07 -0600 Subject: [PATCH v10 8/8] pg_dump: partitioned index depend on its partitions This is required for restoring clustered parent index, which is marked INVALID until indexes have been built on all its child tables, and it's prohibited to CLUSTER ON an INVALID index See also: 8cff4f5348d075e063100071013f00a900c32b0f --- src/backend/commands/tablecmds.c | 6 +++--- src/bin/pg_dump/common.c | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 88a68a4697..aaf955458c 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -18153,6 +18153,9 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl) table_close(idxRel, RowExclusiveLock); } + /* make sure we see the validation we just did */ + CommandCounterIncrement(); + /* * If this index is in turn a partition of a larger index, validating it * might cause the parent to become valid also. Try that. @@ -18164,9 +18167,6 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl) Relation parentIdx, parentTbl; - /* make sure we see the validation we just did */ - CommandCounterIncrement(); - parentIdxId = get_partition_parent(RelationGetRelid(partedIdx), false); parentTblId = get_partition_parent(RelationGetRelid(partedTbl), false); parentIdx = relation_open(parentIdxId, AccessExclusiveLock); diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index 1a261a5545..cdfba058fc 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -429,6 +429,12 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables) attachinfo[k].parentIdx = parentidx; attachinfo[k].partitionIdx = index; + /* + * We want dependencies from parent to partition (so that the + * partition index is created first) + */ + addObjectDependency(&parentidx->dobj, index->dobj.dumpId); + /* * We must state the DO_INDEX_ATTACH object's dependencies * explicitly, since it will not match anything in pg_depend. @@ -446,6 +452,8 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables) */ addObjectDependency(&attachinfo[k].dobj, index->dobj.dumpId); addObjectDependency(&attachinfo[k].dobj, parentidx->dobj.dumpId); + // addObjectDependency(&parentidx->dobj, attachinfo[k].dobj.dumpId); + addObjectDependency(&attachinfo[k].dobj, index->indextable->dobj.dumpId); addObjectDependency(&attachinfo[k].dobj, -- 2.17.0