Thanks for looking.
On Sun, Aug 09, 2020 at 02:00:09PM +0900, Michael Paquier wrote:
> > exactly what's needed.
>
> For now, I would recommend to focus first on 0001 to add support for
> partitioned tables and indexes to REINDEX. CIC is much more
> complicated btw, but I am not entering in the details now.
>
> + /* Avoid erroring out */
> if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
> {
> This comment does not help, and actually this becomes incorrect as
> reindex for this relkind becomes supported once 0001 is done.
I made a minimal change to avoid forgetting to eventually change that part.
> ReindexPartitionedRel() fails to consider the concurrent case here for
> partition indexes and tables, as reindex_index()/reindex_relation()
> are the APIs used in the non-concurrent case. Once you consider the
> concurrent case correctly, we also need to be careful with partitions
> that have a temporary persistency (note that we don't allow partition
> trees to mix persistency types, all partitions have to be temporary or
> permanent).
Fixed.
> I think that you are right to make the entry point to handle
> partitioned index in ReindexIndex() and partitioned table in
> ReindexTable(), but the structure of the patch should be different:
> - The second portion of ReindexMultipleTables() should be moved into a
> separate routine, taking in input a list of relation OIDs. This needs
> to be extended a bit so as reindex_index() gets called for an index
> relkind if the relpersistence is temporary or if we have a
> non-concurrent reindex. The idea is that we finish with a single code
> path able to work on a list of relations. And your patch adds more of
> that as of ReindexPartitionedRel().
It's a good idea.
> - We should *not* handle directly partitioned index and/or table in
> ReindexRelationConcurrently() to not complicate the logic where we
> gather all the indexes of a table/matview. So I think that the list
> of partition indexes/tables to work on should be built directly in
> ReindexIndex() and ReindexTable(), and then this should call the
> second part of ReindexMultipleTables() refactored in the previous
> point.
I think I addressed these mostly as you intended.
> This way, each partition index gets done individually in its
> own transaction. For a partition table, all indexes of this partition
> are rebuilt in the same set of transactions. For the concurrent case,
> we have already reindex_concurrently_swap that it able to switch the
> dependencies of two indexes within a partition tree, so we can rely on
> that so as a failure in the middle of the operation never leaves the
> a partition structure in an inconsistent state.
--
Justin
>From 25486d56303de512368f322c87528c2be29af0de Mon Sep 17 00:00:00 2001
From: Justin Pryzby <[email protected]>
Date: Sun, 7 Jun 2020 15:59:54 -0500
Subject: [PATCH v5 1/3] Implement REINDEX of partitioned tables/indexes
---
doc/src/sgml/ref/reindex.sgml | 5 -
src/backend/catalog/index.c | 8 +-
src/backend/commands/indexcmds.c | 137 ++++++++++++++-------
src/backend/tcop/utility.c | 6 +-
src/include/commands/defrem.h | 6 +-
src/test/regress/expected/create_index.out | 18 +--
src/test/regress/sql/create_index.sql | 13 +-
7 files changed, 122 insertions(+), 71 deletions(-)
diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml
index aac5d5be23..e2e32b3ba0 100644
--- a/doc/src/sgml/ref/reindex.sgml
+++ b/doc/src/sgml/ref/reindex.sgml
@@ -258,11 +258,6 @@ REINDEX [ ( <replaceable class="parameter">option</replaceable> [, ...] ) ] { IN
can always reindex anything.
</para>
- <para>
- Reindexing partitioned tables or partitioned indexes is not supported.
- Each individual partition can be reindexed separately instead.
- </para>
-
<refsect2 id="sql-reindex-concurrently" xreflabel="Rebuilding Indexes Concurrently">
<title>Rebuilding Indexes Concurrently</title>
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 1be27eec52..f69f027890 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3660,12 +3660,8 @@ reindex_relation(Oid relid, int flags, int options)
*/
rel = table_open(relid, ShareLock);
- /*
- * This may be useful when implemented someday; but that day is not today.
- * For now, avoid erroring out when called in a multi-table context
- * (REINDEX SCHEMA) and happen to come across a partitioned table. The
- * partitions may be reindexed on their own anyway.
- */
+ /* Skip partitioned tables if called in a multi-table context. The
+ * partitions are reindexed on their own. */
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
ereport(WARNING,
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 2baca12c5f..68d75916ae 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -88,7 +88,10 @@ static List *ChooseIndexColumnNames(List *indexElems);
static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
Oid relId, Oid oldRelId, void *arg);
static bool ReindexRelationConcurrently(Oid relationOid, int options);
-static void ReindexPartitionedIndex(Relation parentIdx);
+
+static List *PartitionedLeaves(Oid reloid);
+static void ReindexMultipleRelsWorker(List *relids, int options,
+ bool concurrent);
static void update_relispartition(Oid relationId, bool newval);
static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
@@ -2420,11 +2423,10 @@ ChooseIndexColumnNames(List *indexElems)
* Recreate a specific index.
*/
void
-ReindexIndex(RangeVar *indexRelation, int options, bool concurrent)
+ReindexIndex(RangeVar *indexRelation, int options, bool concurrent, bool isTopLevel)
{
struct ReindexIndexCallbackState state;
Oid indOid;
- Relation irel;
char persistence;
/*
@@ -2445,22 +2447,13 @@ ReindexIndex(RangeVar *indexRelation, int options, bool concurrent)
RangeVarCallbackForReindexIndex,
&state);
- /*
- * Obtain the current persistence of the existing index. We already hold
- * lock on the index.
- */
- irel = index_open(indOid, NoLock);
-
- if (irel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+ persistence = get_rel_persistence(indOid);
+ if (get_rel_relkind(indOid) == RELKIND_PARTITIONED_INDEX)
{
- ReindexPartitionedIndex(irel);
- return;
+ List *relids = PartitionedLeaves(indOid);
+ ReindexMultipleRelsWorker(relids, options, concurrent);
}
-
- persistence = irel->rd_rel->relpersistence;
- index_close(irel, NoLock);
-
- if (concurrent && persistence != RELPERSISTENCE_TEMP)
+ else if (concurrent && persistence != RELPERSISTENCE_TEMP)
ReindexRelationConcurrently(indOid, options);
else
reindex_index(indOid, false, persistence,
@@ -2542,7 +2535,7 @@ RangeVarCallbackForReindexIndex(const RangeVar *relation,
* Recreate all indexes of a table (and of its toast table, if any)
*/
Oid
-ReindexTable(RangeVar *relation, int options, bool concurrent)
+ReindexTable(RangeVar *relation, int options, bool concurrent, bool isTopLevel)
{
Oid heapOid;
bool result;
@@ -2560,7 +2553,12 @@ ReindexTable(RangeVar *relation, int options, bool concurrent)
0,
RangeVarCallbackOwnsTable, NULL);
- if (concurrent && get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
+ if (get_rel_relkind(heapOid) == RELKIND_PARTITIONED_TABLE)
+ {
+ List *relids = PartitionedLeaves(heapOid);
+ ReindexMultipleRelsWorker(relids, options, concurrent);
+ }
+ else if (concurrent && get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
{
result = ReindexRelationConcurrently(heapOid, options);
@@ -2604,7 +2602,6 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
MemoryContext private_context;
MemoryContext old;
List *relids = NIL;
- ListCell *l;
int num_keys;
bool concurrent_warning = false;
@@ -2688,11 +2685,8 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
* Only regular tables and matviews can have indexes, so ignore any
* other kind of relation.
*
- * It is tempting to also consider partitioned tables here, but that
- * has the problem that if the children are in the same schema, they
- * would be processed twice. Maybe we could have a separate list of
- * partitioned tables, and expand that afterwards into relids,
- * ignoring any duplicates.
+ * Partitioned tables/indexes are skipped but matching leaf
+ * partitions are processed.
*/
if (classtuple->relkind != RELKIND_RELATION &&
classtuple->relkind != RELKIND_MATVIEW)
@@ -2755,22 +2749,59 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
table_endscan(scan);
table_close(relationRelation, AccessShareLock);
- /* Now reindex each rel in a separate transaction */
+ ReindexMultipleRelsWorker(relids, options, concurrent);
+ MemoryContextDelete(private_context);
+}
+
+/* Reindex each rel in a separate transaction */
+static void
+ReindexMultipleRelsWorker(List *relids, int options, bool concurrent)
+{
+ ListCell *l;
+
+ /*
+ * This cannot run inside a user transaction block; if
+ * we were inside a transaction, then its commit- and
+ * start-transaction-command calls would not have the
+ * intended effect!
+ */
+ // PreventInTransactionBlock(isTopLevel,
+ // "REINDEX of partitioned relations"); // XXX
+
PopActiveSnapshot();
CommitTransactionCommand();
+
foreach(l, relids)
{
Oid relid = lfirst_oid(l);
+ char relkind;
StartTransactionCommand();
+
/* functions in indexes may want a snapshot set */
PushActiveSnapshot(GetTransactionSnapshot());
+ relkind = get_rel_relkind(relid);
+
+ if (relkind == RELKIND_PARTITIONED_INDEX ||
+ relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ abort();
+ // ReindexPartitionedRel(relid, options, concurrent, true);
+ }
+
if (concurrent && get_rel_persistence(relid) != RELPERSISTENCE_TEMP)
{
+ /* Handles concurrent indexing of both indexes and tables */
(void) ReindexRelationConcurrently(relid, options);
/* ReindexRelationConcurrently() does the verbose output */
}
+ else if (relkind == RELKIND_INDEX)
+ {
+ reindex_index(relid, false, get_rel_persistence(relid),
+ options | REINDEXOPT_REPORT_PROGRESS);
+ PopActiveSnapshot();
+ }
else
{
bool result;
@@ -2792,8 +2823,6 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
CommitTransactionCommand();
}
StartTransactionCommand();
-
- MemoryContextDelete(private_context);
}
@@ -2805,8 +2834,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
* view. For tables and materialized views, all its indexes will be rebuilt,
* excluding invalid indexes and any indexes used in exclusion constraints,
* but including its associated toast table indexes. For indexes, the index
- * itself will be rebuilt. If 'relationOid' belongs to a partitioned table
- * then we issue a warning to mention these are not yet supported.
+ * itself will be rebuilt.
*
* The locks taken on parent tables and involved indexes are kept until the
* transaction is committed, at which point a session lock is taken on each
@@ -3010,13 +3038,6 @@ ReindexRelationConcurrently(Oid relationOid, int options)
MemoryContextSwitchTo(oldcontext);
break;
}
- case RELKIND_PARTITIONED_TABLE:
- /* see reindex_relation() */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"",
- get_rel_name(relationOid))));
- return false;
default:
/* Return error if type of relation is not supported */
ereport(ERROR,
@@ -3478,17 +3499,41 @@ ReindexRelationConcurrently(Oid relationOid, int options)
}
/*
- * ReindexPartitionedIndex
- * Reindex each child of the given partitioned index.
- *
- * Not yet implemented.
+ * PartitionedLeaves
+ * Return a list of leaf partitions to be reindexed.
*/
-static void
-ReindexPartitionedIndex(Relation parentIdx)
+static List *
+PartitionedLeaves(Oid reloid)
{
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("REINDEX is not yet implemented for partitioned indexes")));
+ MemoryContext oldcontext, reindex_context;
+ List *inhoids;
+ ListCell *lc;
+
+ /*
+ * Create list of children in longlived context, since we will process
+ * each child in a separate transaction
+ */
+ reindex_context = AllocSetContextCreate(PortalContext, "Reindex",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(reindex_context);
+ inhoids = find_all_inheritors(reloid, NoLock, NULL);
+
+ /*
+ * We have a full list of direct and indirect children, so remove from
+ * the list any partitioned relations (including the rel itself) and
+ * handle the leaves directly.
+ */
+ foreach (lc, inhoids)
+ {
+ Oid relid = lfirst_oid(lc);
+ char relkind = get_rel_relkind(relid);
+ if (relkind == RELKIND_PARTITIONED_INDEX ||
+ relkind == RELKIND_PARTITIONED_TABLE)
+ inhoids = foreach_delete_current(inhoids, lc);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ return inhoids;
}
/*
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 9b0c376c8c..fd6bc65c18 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -926,10 +926,12 @@ standard_ProcessUtility(PlannedStmt *pstmt,
switch (stmt->kind)
{
case REINDEX_OBJECT_INDEX:
- ReindexIndex(stmt->relation, stmt->options, stmt->concurrent);
+ ReindexIndex(stmt->relation, stmt->options,
+ stmt->concurrent, isTopLevel);
break;
case REINDEX_OBJECT_TABLE:
- ReindexTable(stmt->relation, stmt->options, stmt->concurrent);
+ ReindexTable(stmt->relation, stmt->options,
+ stmt->concurrent, isTopLevel);
break;
case REINDEX_OBJECT_SCHEMA:
case REINDEX_OBJECT_SYSTEM:
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index c26a102b17..df32f5b201 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -34,8 +34,10 @@ extern ObjectAddress DefineIndex(Oid relationId,
bool check_not_in_use,
bool skip_build,
bool quiet);
-extern void ReindexIndex(RangeVar *indexRelation, int options, bool concurrent);
-extern Oid ReindexTable(RangeVar *relation, int options, bool concurrent);
+extern void ReindexIndex(RangeVar *indexRelation, int options, bool concurrent,
+ bool isTopLevel);
+extern Oid ReindexTable(RangeVar *relation, int options, bool concurrent,
+ bool isTopLevel);
extern void ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
int options, bool concurrent);
extern char *makeObjectName(const char *name1, const char *name2,
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index e3e6634d7e..61b4a30db4 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2196,18 +2196,20 @@ SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_ind
concur_reindex_part_index_0_2 | concur_reindex_part_index_0 | 2
(5 rows)
--- REINDEX fails for partitioned indexes
+-- REINDEX for partitioned indexes
+REINDEX INDEX concur_reindex_part_index;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index;
+REINDEX INDEX concur_reindex_part_index_0;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index_0;
REINDEX INDEX concur_reindex_part_index_10;
-ERROR: REINDEX is not yet implemented for partitioned indexes
REINDEX INDEX CONCURRENTLY concur_reindex_part_index_10;
-ERROR: REINDEX is not yet implemented for partitioned indexes
--- REINDEX is a no-op for partitioned tables
+-- REINDEX for partitioned tables
+REINDEX TABLE concur_reindex_part_10;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
+REINDEX TABLE concur_reindex_part_0;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_0;
REINDEX TABLE concur_reindex_part_10;
-WARNING: REINDEX of partitioned tables is not yet implemented, skipping "concur_reindex_part_10"
-NOTICE: table "concur_reindex_part_10" has no indexes to reindex
REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
-WARNING: REINDEX of partitioned tables is not yet implemented, skipping "concur_reindex_part_10"
-NOTICE: table "concur_reindex_part_10" has no indexes that can be reindexed concurrently
SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_index')
ORDER BY relid, level;
relid | parentrelid | level
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index f3667bacdc..a751dd5caa 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -903,12 +903,21 @@ CREATE INDEX concur_reindex_part_index_0_2 ON ONLY concur_reindex_part_0_2 (c1);
ALTER INDEX concur_reindex_part_index_0 ATTACH PARTITION concur_reindex_part_index_0_2;
SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_index')
ORDER BY relid, level;
--- REINDEX fails for partitioned indexes
+-- REINDEX for partitioned indexes
+REINDEX INDEX concur_reindex_part_index;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index;
+REINDEX INDEX concur_reindex_part_index_0;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index_0;
REINDEX INDEX concur_reindex_part_index_10;
REINDEX INDEX CONCURRENTLY concur_reindex_part_index_10;
--- REINDEX is a no-op for partitioned tables
+-- REINDEX for partitioned tables
REINDEX TABLE concur_reindex_part_10;
REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
+REINDEX TABLE concur_reindex_part_0;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_0;
+REINDEX TABLE concur_reindex_part_10;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
+
SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_index')
ORDER BY relid, level;
-- REINDEX should preserve dependencies of partition tree.
--
2.17.0
>From a40c1bc3d63323a9417374bb7467b8c04d011912 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <[email protected]>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH v5 2/3] Allow CREATE INDEX CONCURRENTLY on partitioned table
Note, this effectively reverts 050098b14, so take care to not reintroduce the
bug it fixed.
---
doc/src/sgml/ref/create_index.sgml | 9 ---
src/backend/commands/indexcmds.c | 104 +++++++++++++++++++------
src/test/regress/expected/indexing.out | 57 +++++++++++++-
src/test/regress/sql/indexing.sql | 16 +++-
4 files changed, 148 insertions(+), 38 deletions(-)
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 33aa64e81d..c780dc9547 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -657,15 +657,6 @@ Indexes:
cannot.
</para>
- <para>
- Concurrent builds for indexes on partitioned tables are currently not
- supported. However, you may concurrently build the index on each
- partition individually and then finally create the partitioned index
- non-concurrently in order to reduce the time where writes to the
- partitioned table will be locked out. In this case, building the
- partitioned index is a metadata only operation.
- </para>
-
</refsect2>
</refsect1>
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 68d75916ae..cdf31a47d8 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -655,17 +655,6 @@ DefineIndex(Oid relationId,
partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
if (partitioned)
{
- /*
- * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
- * the error is thrown also for temporary tables. Seems better to be
- * consistent, even though we could do it on temporary table because
- * we're not actually doing it concurrently.
- */
- if (stmt->concurrent)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("cannot create index on partitioned table \"%s\" concurrently",
- RelationGetRelationName(rel))));
if (stmt->excludeOpNames)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1100,6 +1089,9 @@ DefineIndex(Oid relationId,
if (pd->nparts != 0)
flags |= INDEX_CREATE_INVALID;
}
+ else if (concurrent && OidIsValid(parentIndexId))
+ /* Initial concurrent build of index partition is invalid */
+ flags |= INDEX_CREATE_INVALID;
if (stmt->deferrable)
constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1152,8 +1144,17 @@ DefineIndex(Oid relationId,
*/
if (!stmt->relation || stmt->relation->inh)
{
+ /*
+ * Need to close the relation before recursing into children, so
+ * copy needed data into a longlived context.
+ */
+
+ MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+ ALLOCSET_DEFAULT_SIZES);
+ MemoryContext oldcontext = MemoryContextSwitchTo(ind_context);
PartitionDesc partdesc = RelationGetPartitionDesc(rel);
int nparts = partdesc->nparts;
+ char *relname = pstrdup(RelationGetRelationName(rel));
Oid *part_oids = palloc(sizeof(Oid) * nparts);
bool invalidate_parent = false;
TupleDesc parentDesc;
@@ -1163,8 +1164,10 @@ DefineIndex(Oid relationId,
nparts);
memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+ parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+ table_close(rel, NoLock);
+ MemoryContextSwitchTo(oldcontext);
- parentDesc = RelationGetDescr(rel);
opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
for (i = 0; i < numberOfKeyAttributes; i++)
opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1199,18 +1202,20 @@ DefineIndex(Oid relationId,
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot create unique index on partitioned table \"%s\"",
- RelationGetRelationName(rel)),
+ relname),
errdetail("Table \"%s\" contains partitions that are foreign tables.",
- RelationGetRelationName(rel))));
+ relname)));
table_close(childrel, lockmode);
continue;
}
+ oldcontext = MemoryContextSwitchTo(ind_context);
childidxs = RelationGetIndexList(childrel);
attmap =
build_attrmap_by_name(RelationGetDescr(childrel),
parentDesc);
+ MemoryContextSwitchTo(oldcontext);
foreach(cell, childidxs)
{
@@ -1336,10 +1341,17 @@ DefineIndex(Oid relationId,
createdConstraintId,
is_alter_table, check_rights, check_not_in_use,
skip_build, quiet);
+ if (concurrent)
+ {
+ PushActiveSnapshot(GetTransactionSnapshot());
+ invalidate_parent = true;
+ }
}
- pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
- i + 1);
+ /* For concurrent build, this is a catalog-only stage */
+ if (!concurrent)
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+ i + 1);
free_attrmap(attmap);
}
@@ -1366,23 +1378,72 @@ DefineIndex(Oid relationId,
table_close(pg_index, RowExclusiveLock);
heap_freetuple(newtup);
}
+ } else
+ table_close(rel, NoLock);
+
+ if (concurrent)
+ {
+ List *childs;
+ ListCell *lc;
+ int npart = 0;
+
+ /* Reindex invalid child indexes created earlier */
+ MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+ ALLOCSET_DEFAULT_SIZES);
+ MemoryContext oldcontext = MemoryContextSwitchTo(ind_context);
+ childs = find_inheritance_children(indexRelationId, NoLock);
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Make the catalog changes visible to get_partition_parent */
+ CommandCounterIncrement();
+
+ foreach (lc, childs)
+ {
+ Oid indexrelid = lfirst_oid(lc);
+ Relation cldidx;
+ bool isvalid;
+
+ if (!OidIsValid(parentIndexId))
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+ npart++);
+
+ cldidx = index_open(indexrelid, ShareUpdateExclusiveLock);
+ isvalid = cldidx->rd_index->indisvalid;
+ index_close(cldidx, NoLock);
+ if (isvalid)
+ continue;
+
+ /* This may be a partitioned index, which is fine too */
+ ReindexRelationConcurrently(indexrelid, 0);
+ PushActiveSnapshot(GetTransactionSnapshot());
+ }
+
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /*
+ * CIC needs to mark a partitioned index as VALID, which itself
+ * requires setting READY, which is unset for CIC (even though
+ * it's meaningless for an index without storage).
+ */
+ index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+ CommandCounterIncrement();
+ index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
}
/*
* Indexes on partitioned tables are not themselves built, so we're
* done here.
*/
- table_close(rel, NoLock);
if (!OidIsValid(parentIndexId))
pgstat_progress_end_command();
return address;
}
+ table_close(rel, NoLock);
if (!concurrent)
{
- /* Close the heap and we're done, in the non-concurrent case */
- table_close(rel, NoLock);
-
/* If this is the top-level index, we're done. */
if (!OidIsValid(parentIndexId))
pgstat_progress_end_command();
@@ -1390,10 +1451,9 @@ DefineIndex(Oid relationId,
return address;
}
- /* save lockrelid and locktag for below, then close rel */
+ /* save lockrelid and locktag for below */
heaprelid = rel->rd_lockInfo.lockRelId;
SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
- table_close(rel, NoLock);
/*
* For a concurrent build, it's important to make the catalog entries
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index f78865ef81..1ae58e110f 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,60 @@ select relname, relkind, relhassubclass, inhparent::regclass
(8 rows)
drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR: cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR: could not create unique index "idxpart2_a_idx2"
+DETAIL: Key (a)=(10) is duplicated.
+\d idxpart
+ Partitioned table "public.idxpart"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition key: RANGE (a)
+Indexes:
+ "idxpart_a_idx" btree (a)
+ "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+ Partitioned table "public.idxpart1"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+ "idxpart1_a_idx" btree (a)
+ "idxpart1_a_idx1" btree (a)
+ "idxpart1_a_idx2" UNIQUE, btree (a)
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+ Table "public.idxpart2"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+ "idxpart2_a_idx" btree (a)
+ "idxpart2_a_idx1" btree (a)
+ "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+
drop table idxpart;
-- Verify bugfix with query on indexed partitioned table with no partitions
-- https://postgr.es/m/[email protected]
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 35d159f41b..d908ee6d17 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,20 @@ select relname, relkind, relhassubclass, inhparent::regclass
where relname like 'idxpart%' order by relname;
drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
drop table idxpart;
-- Verify bugfix with query on indexed partitioned table with no partitions
--
2.17.0
>From 6da1017ce19902cd26b714bcdb6538dc2e128601 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <[email protected]>
Date: Sun, 7 Jun 2020 16:58:42 -0500
Subject: [PATCH v5 3/3] Implement CLUSTER of partitioned table..
This requires either specification of a partitioned index on which to cluster,
or that an partitioned index was previously set clustered.
---
src/backend/commands/cluster.c | 139 +++++++++++++++++++-------
src/bin/psql/tab-complete.c | 1 +
src/test/regress/expected/cluster.out | 23 ++++-
src/test/regress/sql/cluster.sql | 12 ++-
4 files changed, 133 insertions(+), 42 deletions(-)
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 04d12a7ece..7409c17b0b 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -33,6 +33,7 @@
#include "catalog/namespace.h"
#include "catalog/objectaccess.h"
#include "catalog/pg_am.h"
+#include "catalog/pg_inherits.h"
#include "catalog/toasting.h"
#include "commands/cluster.h"
#include "commands/progress.h"
@@ -72,6 +73,9 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
bool verbose, bool *pSwapToastByContent,
TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
static List *get_tables_to_cluster(MemoryContext cluster_context);
+static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
+ Oid indexOid);
+static void cluster_multiple_rels(List *rvs, int options);
/*---------------------------------------------------------------------------
@@ -124,14 +128,6 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot cluster temporary tables of other sessions")));
- /*
- * Reject clustering a partitioned table.
- */
- if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("cannot cluster a partitioned table")));
-
if (stmt->indexname == NULL)
{
ListCell *index;
@@ -169,8 +165,37 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
/* close relation, keep lock till commit */
table_close(rel, NoLock);
- /* Do the job. */
- cluster_rel(tableOid, indexOid, stmt->options);
+ if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ {
+ /* Do the job. */
+ cluster_rel(tableOid, indexOid, stmt->options);
+ } else {
+ List *rvs;
+ MemoryContext cluster_context;
+
+ /* Check index directly since cluster_rel isn't called for partitioned table */
+ check_index_is_clusterable(rel, indexOid, true, AccessExclusiveLock);
+
+ /* Refuse to hold strong locks in a user transaction */
+ PreventInTransactionBlock(isTopLevel, "CLUSTER");
+
+ cluster_context = AllocSetContextCreate(PortalContext,
+ "Cluster",
+ ALLOCSET_DEFAULT_SIZES);
+
+ rvs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
+ cluster_multiple_rels(rvs, stmt->options);
+
+ /* Start a new transaction for the cleanup work. */
+ StartTransactionCommand();
+
+ rel = table_open(tableOid, ShareUpdateExclusiveLock);
+ mark_index_clustered(rel, indexOid, true);
+ table_close(rel, NoLock);
+
+ /* Clean up working storage */
+ MemoryContextDelete(cluster_context);
+ }
}
else
{
@@ -180,7 +205,6 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
*/
MemoryContext cluster_context;
List *rvs;
- ListCell *rv;
/*
* We cannot run this form of CLUSTER inside a user transaction block;
@@ -204,25 +228,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
*/
rvs = get_tables_to_cluster(cluster_context);
- /* Commit to get out of starting transaction */
- PopActiveSnapshot();
- CommitTransactionCommand();
-
- /* Ok, now that we've got them all, cluster them one by one */
- foreach(rv, rvs)
- {
- RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
-
- /* Start a new transaction for each relation. */
- StartTransactionCommand();
- /* functions in indexes may want a snapshot set */
- PushActiveSnapshot(GetTransactionSnapshot());
- /* Do the job. */
- cluster_rel(rvtc->tableOid, rvtc->indexOid,
- stmt->options | CLUOPT_RECHECK);
- PopActiveSnapshot();
- CommitTransactionCommand();
- }
+ cluster_multiple_rels(rvs, stmt->options);
/* Start a new transaction for the cleanup work. */
StartTransactionCommand();
@@ -483,12 +489,6 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
Relation pg_index;
ListCell *index;
- /* Disallow applying to a partitioned table */
- if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("cannot mark index clustered in partitioned table")));
-
/*
* If the index is already marked clustered, no need to do anything.
*/
@@ -1557,3 +1557,70 @@ get_tables_to_cluster(MemoryContext cluster_context)
return rvs;
}
+
+/*
+ * Return a List of tables and associated index, where each index is a
+ * partition of the given index
+ */
+static List *
+get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
+{
+ List *inhoids;
+ ListCell *lc;
+ List *rvs = NIL;
+
+ MemoryContext old_context = MemoryContextSwitchTo(cluster_context);
+
+ inhoids = find_all_inheritors(indexOid, NoLock, NULL);
+ foreach(lc, inhoids)
+ {
+ Oid indexrelid = lfirst_oid(lc);
+ Oid relid = IndexGetRelation(indexrelid, false);
+ RelToCluster *rvtc;
+
+ /*
+ * We have a full list of direct and indirect children, so skip
+ * partitioned tables and just handle their children.
+ */
+ if (get_rel_relkind(relid) == RELKIND_PARTITIONED_TABLE)
+ continue;
+
+ rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+ rvtc->tableOid = relid;
+ rvtc->indexOid = indexrelid;
+ rvs = lappend(rvs, rvtc);
+ }
+
+ MemoryContextSwitchTo(old_context);
+ return rvs;
+}
+
+/* Cluster each relation in a separate transaction */
+static void
+cluster_multiple_rels(List *rvs, int options)
+{
+ ListCell *lc;
+
+ /* Commit to get out of starting transaction */
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+
+ /* Ok, now that we've got them all, cluster them one by one */
+ foreach(lc, rvs)
+ {
+ RelToCluster *rvtc = (RelToCluster *) lfirst(lc);
+
+ /* Start a new transaction for each relation. */
+ StartTransactionCommand();
+
+ /* functions in indexes may want a snapshot set */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ /* Do the job. */
+ cluster_rel(rvtc->tableOid, rvtc->indexOid,
+ options | CLUOPT_RECHECK);
+
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ }
+}
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index c4af40bfa9..e68808218a 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -587,6 +587,7 @@ static const SchemaQuery Query_for_list_of_clusterables = {
.catname = "pg_catalog.pg_class c",
.selcondition =
"c.relkind IN (" CppAsString2(RELKIND_RELATION) ", "
+ CppAsString2(RELKIND_PARTITIONED_TABLE) ", "
CppAsString2(RELKIND_MATVIEW) ")",
.viscondition = "pg_catalog.pg_table_is_visible(c.oid)",
.namespace = "c.relnamespace",
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index bdae8fe00c..21b28fde6e 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -439,13 +439,28 @@ select * from clstr_temp;
drop table clstr_temp;
RESET SESSION AUTHORIZATION;
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
-ERROR: cannot mark index clustered in partitioned table
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
CLUSTER clstrpart USING clstrpart_idx;
-ERROR: cannot cluster a partitioned table
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
+ Partitioned table "public.clstrpart"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+Partition key: RANGE (a)
+Indexes:
+ "clstrpart_idx" btree (a) CLUSTER
+Number of partitions: 2 (Use \d+ to list them.)
+
DROP TABLE clstrpart;
-- Test CLUSTER with external tuplesorting
create table clstr_4 as select * from tenk1;
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 188183647c..4a76848213 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -196,11 +196,19 @@ drop table clstr_temp;
RESET SESSION AUTHORIZATION;
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
CLUSTER clstrpart USING clstrpart_idx;
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
DROP TABLE clstrpart;
-- Test CLUSTER with external tuplesorting
--
2.17.0