On Mon, Feb 15, 2021 at 10:07:05PM +0300, Anastasia Lubennikova wrote: > 5) Speaking of documentation, I think we need to add a paragraph about CIC > on partitioned indexes which will explain that invalid indexes may appear > and what user should do to fix them.
I'm not sure about that - it's already documented in general, for nonpartitioned indexes. -- Justin
>From fb60da3c0fac8f1699a6caeea57476770c66576d Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Sat, 6 Jun 2020 17:42:23 -0500 Subject: [PATCH 1/5] Allow CREATE INDEX CONCURRENTLY on partitioned table Note, this effectively reverts 050098b14, so take care to not reintroduce the bug it fixed. --- doc/src/sgml/ref/create_index.sgml | 9 -- src/backend/commands/indexcmds.c | 143 ++++++++++++++++++------- src/test/regress/expected/indexing.out | 60 ++++++++++- src/test/regress/sql/indexing.sql | 18 +++- 4 files changed, 176 insertions(+), 54 deletions(-) diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 965dcf472c..7c75119d78 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -686,15 +686,6 @@ Indexes: cannot. </para> - <para> - Concurrent builds for indexes on partitioned tables are currently not - supported. However, you may concurrently build the index on each - partition individually and then finally create the partitioned index - non-concurrently in order to reduce the time where writes to the - partitioned table will be locked out. In this case, building the - partitioned index is a metadata only operation. - </para> - </refsect2> </refsect1> diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 8bc652ecd3..9ab1a66971 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -68,6 +68,7 @@ /* non-export function prototypes */ +static void reindex_invalid_child_indexes(Oid indexRelationId); static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts); static void CheckPredicate(Expr *predicate); static void ComputeIndexAttrs(IndexInfo *indexInfo, @@ -680,17 +681,6 @@ DefineIndex(Oid relationId, partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE; if (partitioned) { - /* - * Note: we check 'stmt->concurrent' rather than 'concurrent', so that - * the error is thrown also for temporary tables. Seems better to be - * consistent, even though we could do it on temporary table because - * we're not actually doing it concurrently. - */ - if (stmt->concurrent) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot create index on partitioned table \"%s\" concurrently", - RelationGetRelationName(rel)))); if (stmt->excludeOpNames) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -1128,6 +1118,11 @@ DefineIndex(Oid relationId, if (pd->nparts != 0) flags |= INDEX_CREATE_INVALID; } + else if (concurrent && OidIsValid(parentIndexId)) + { + /* If concurrent, initially build index partitions as "invalid" */ + flags |= INDEX_CREATE_INVALID; + } if (stmt->deferrable) constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE; @@ -1183,6 +1178,14 @@ DefineIndex(Oid relationId, partdesc = RelationGetPartitionDesc(rel); if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0) { + /* + * Need to close the relation before recursing into children, so + * copy needed data into a longlived context. + */ + + MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldcontext = MemoryContextSwitchTo(ind_context); int nparts = partdesc->nparts; Oid *part_oids = palloc(sizeof(Oid) * nparts); bool invalidate_parent = false; @@ -1193,8 +1196,10 @@ DefineIndex(Oid relationId, nparts); memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts); + parentDesc = CreateTupleDescCopy(RelationGetDescr(rel)); + table_close(rel, NoLock); + MemoryContextSwitchTo(oldcontext); - parentDesc = RelationGetDescr(rel); opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes); for (i = 0; i < numberOfKeyAttributes; i++) opfamOids[i] = get_opclass_family(classObjectId[i]); @@ -1237,10 +1242,12 @@ DefineIndex(Oid relationId, continue; } + oldcontext = MemoryContextSwitchTo(ind_context); childidxs = RelationGetIndexList(childrel); attmap = build_attrmap_by_name(RelationGetDescr(childrel), parentDesc); + MemoryContextSwitchTo(oldcontext); foreach(cell, childidxs) { @@ -1311,10 +1318,14 @@ DefineIndex(Oid relationId, */ if (!found) { - IndexStmt *childStmt = copyObject(stmt); + IndexStmt *childStmt; bool found_whole_row; ListCell *lc; + oldcontext = MemoryContextSwitchTo(ind_context); + childStmt = copyObject(stmt); + MemoryContextSwitchTo(oldcontext); + /* * We can't use the same index name for the child index, * so clear idxname to let the recursive invocation choose @@ -1366,10 +1377,18 @@ DefineIndex(Oid relationId, createdConstraintId, is_alter_table, check_rights, check_not_in_use, skip_build, quiet); + if (concurrent) + { + PopActiveSnapshot(); + PushActiveSnapshot(GetTransactionSnapshot()); + invalidate_parent = true; + } } - pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, - i + 1); + /* For concurrent build, this is a catalog-only stage */ + if (!concurrent) + pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, + i + 1); free_attrmap(attmap); } @@ -1379,41 +1398,33 @@ DefineIndex(Oid relationId, * invalid, this is incorrect, so update our row to invalid too. */ if (invalidate_parent) - { - Relation pg_index = table_open(IndexRelationId, RowExclusiveLock); - HeapTuple tup, - newtup; - - tup = SearchSysCache1(INDEXRELID, - ObjectIdGetDatum(indexRelationId)); - if (!HeapTupleIsValid(tup)) - elog(ERROR, "cache lookup failed for index %u", - indexRelationId); - newtup = heap_copytuple(tup); - ((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false; - CatalogTupleUpdate(pg_index, &tup->t_self, newtup); - ReleaseSysCache(tup); - table_close(pg_index, RowExclusiveLock); - heap_freetuple(newtup); - } - } + index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID); + } else + table_close(rel, NoLock); /* * Indexes on partitioned tables are not themselves built, so we're * done here. */ - table_close(rel, NoLock); if (!OidIsValid(parentIndexId)) + { + if (concurrent) + reindex_invalid_child_indexes(indexRelationId); + pgstat_progress_end_command(); + } + return address; } - if (!concurrent) + if (!concurrent || OidIsValid(parentIndexId)) { - /* Close the heap and we're done, in the non-concurrent case */ - table_close(rel, NoLock); + /* + * We're done if this is the top-level index, + * or the catalog-only phase of a partition built concurrently + */ - /* If this is the top-level index, we're done. */ + table_close(rel, NoLock); if (!OidIsValid(parentIndexId)) pgstat_progress_end_command(); @@ -1626,6 +1637,62 @@ DefineIndex(Oid relationId, return address; } +/* Reindex invalid child indexes created earlier */ +static void +reindex_invalid_child_indexes(Oid indexRelationId) +{ + ListCell *lc; + int npart = 0; + ReindexParams params = { + .options = REINDEXOPT_CONCURRENTLY + }; + + MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldcontext; + List *childs = find_inheritance_children(indexRelationId, ShareLock); + List *partitions = NIL; + + PreventInTransactionBlock(true, "REINDEX INDEX"); + + foreach (lc, childs) + { + Oid partoid = lfirst_oid(lc); + + /* XXX: need to retrofit progress reporting into it */ + // pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, + // npart++); + + if (get_index_isvalid(partoid) || + !RELKIND_HAS_STORAGE(get_rel_relkind(partoid))) + continue; + + /* Save partition OID */ + oldcontext = MemoryContextSwitchTo(ind_context); + partitions = lappend_oid(partitions, partoid); + MemoryContextSwitchTo(oldcontext); + } + + /* + * Process each partition listed in a separate transaction. Note that + * this commits and then starts a new transaction immediately. + * XXX: since this is done in 2*N transactions, it could just as well + * call ReindexRelationConcurrently directly + */ + ReindexMultipleInternal(partitions, ¶ms); + + /* + * CIC needs to mark a partitioned index as VALID, which itself + * requires setting READY, which is unset for CIC (even though + * it's meaningless for an index without storage). + * This must be done only while holding a lock which precludes adding + * partitions. + * See also: validatePartitionedIndex(). + */ + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); + CommandCounterIncrement(); + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID); +} /* * CheckMutability diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out index c93f4470c9..f04abc6897 100644 --- a/src/test/regress/expected/indexing.out +++ b/src/test/regress/expected/indexing.out @@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass (8 rows) drop table idxpart; --- Some unsupported features +-- CIC on partitioned table create table idxpart (a int, b int, c text) partition by range (a); -create table idxpart1 partition of idxpart for values from (0) to (10); -create index concurrently on idxpart (a); -ERROR: cannot create index on partitioned table "idxpart" concurrently +create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a); +create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a); +create table idxpart111 partition of idxpart11 default partition by range(a); +create table idxpart1111 partition of idxpart111 default partition by range(a); +create table idxpart2 partition of idxpart for values from (10) to (20); +insert into idxpart2 values(10),(10); -- not unique +create index concurrently on idxpart (a); -- partitioned +create index concurrently on idxpart1 (a); -- partitioned and partition +create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves +create index concurrently on idxpart2 (a); -- leaf +create unique index concurrently on idxpart (a); -- partitioned, unique failure +ERROR: could not create unique index "idxpart2_a_idx2_ccnew" +DETAIL: Key (a)=(10) is duplicated. +\d idxpart + Partitioned table "public.idxpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition key: RANGE (a) +Indexes: + "idxpart_a_idx" btree (a) + "idxpart_a_idx1" UNIQUE, btree (a) INVALID +Number of partitions: 2 (Use \d+ to list them.) + +\d idxpart1 + Partitioned table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition of: idxpart FOR VALUES FROM (0) TO (10) +Partition key: RANGE (a) +Indexes: + "idxpart1_a_idx" btree (a) INVALID + "idxpart1_a_idx1" btree (a) + "idxpart1_a_idx2" UNIQUE, btree (a) INVALID +Number of partitions: 1 (Use \d+ to list them.) + +\d idxpart2 + Table "public.idxpart2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition of: idxpart FOR VALUES FROM (10) TO (20) +Indexes: + "idxpart2_a_idx" btree (a) + "idxpart2_a_idx1" btree (a) + "idxpart2_a_idx2" UNIQUE, btree (a) INVALID + "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID + drop table idxpart; -- Verify bugfix with query on indexed partitioned table with no partitions -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql index 42f398b67c..3d4b6e9bc9 100644 --- a/src/test/regress/sql/indexing.sql +++ b/src/test/regress/sql/indexing.sql @@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass where relname like 'idxpart%' order by relname; drop table idxpart; --- Some unsupported features +-- CIC on partitioned table create table idxpart (a int, b int, c text) partition by range (a); -create table idxpart1 partition of idxpart for values from (0) to (10); -create index concurrently on idxpart (a); +create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a); +create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a); +create table idxpart111 partition of idxpart11 default partition by range(a); +create table idxpart1111 partition of idxpart111 default partition by range(a); +create table idxpart2 partition of idxpart for values from (10) to (20); +insert into idxpart2 values(10),(10); -- not unique +create index concurrently on idxpart (a); -- partitioned +create index concurrently on idxpart1 (a); -- partitioned and partition +create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves +create index concurrently on idxpart2 (a); -- leaf +create unique index concurrently on idxpart (a); -- partitioned, unique failure +\d idxpart +\d idxpart1 +\d idxpart2 drop table idxpart; -- Verify bugfix with query on indexed partitioned table with no partitions -- 2.17.0
>From 0dfaaf20b8333b816ab2d4501675e9a7dd0fc436 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Sun, 14 Feb 2021 18:31:43 -0600 Subject: [PATCH 2/5] f! progress reporting --- src/backend/commands/indexcmds.c | 33 +++++++------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 9ab1a66971..8f4eab22eb 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1647,40 +1647,20 @@ reindex_invalid_child_indexes(Oid indexRelationId) .options = REINDEXOPT_CONCURRENTLY }; - MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX", - ALLOCSET_DEFAULT_SIZES); - MemoryContext oldcontext; - List *childs = find_inheritance_children(indexRelationId, ShareLock); - List *partitions = NIL; - PreventInTransactionBlock(true, "REINDEX INDEX"); - foreach (lc, childs) + foreach (lc, find_inheritance_children(indexRelationId, ShareLock)) { Oid partoid = lfirst_oid(lc); - /* XXX: need to retrofit progress reporting into it */ - // pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, - // npart++); - - if (get_index_isvalid(partoid) || - !RELKIND_HAS_STORAGE(get_rel_relkind(partoid))) - continue; + if (!get_index_isvalid(partoid) && + RELKIND_HAS_STORAGE(get_rel_relkind(partoid))) + ReindexRelationConcurrently(partoid, ¶ms); - /* Save partition OID */ - oldcontext = MemoryContextSwitchTo(ind_context); - partitions = lappend_oid(partitions, partoid); - MemoryContextSwitchTo(oldcontext); + pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, + npart++); } - /* - * Process each partition listed in a separate transaction. Note that - * this commits and then starts a new transaction immediately. - * XXX: since this is done in 2*N transactions, it could just as well - * call ReindexRelationConcurrently directly - */ - ReindexMultipleInternal(partitions, ¶ms); - /* * CIC needs to mark a partitioned index as VALID, which itself * requires setting READY, which is unset for CIC (even though @@ -1689,6 +1669,7 @@ reindex_invalid_child_indexes(Oid indexRelationId) * partitions. * See also: validatePartitionedIndex(). */ + CommandCounterIncrement(); index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); CommandCounterIncrement(); index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID); -- 2.17.0
>From 2a24e8bbc6f23d3e688d72a652ba0bd0dd2dc4af Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Fri, 30 Oct 2020 16:23:02 -0500 Subject: [PATCH 3/5] WIP: Add SKIPVALID flag for more integration XXX: this breaks progress reporting? --- src/backend/commands/indexcmds.c | 36 +++++++++++++++----------------- src/include/catalog/index.h | 1 + 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 8f4eab22eb..e54314e9a4 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1637,40 +1637,33 @@ DefineIndex(Oid relationId, return address; } -/* Reindex invalid child indexes created earlier */ +/* + * Reindex invalid child indexes created earlier thereby validating + * the parent index. + */ static void reindex_invalid_child_indexes(Oid indexRelationId) { - ListCell *lc; - int npart = 0; ReindexParams params = { - .options = REINDEXOPT_CONCURRENTLY + .options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID }; - PreventInTransactionBlock(true, "REINDEX INDEX"); - - foreach (lc, find_inheritance_children(indexRelationId, ShareLock)) - { - Oid partoid = lfirst_oid(lc); - - if (!get_index_isvalid(partoid) && - RELKIND_HAS_STORAGE(get_rel_relkind(partoid))) - ReindexRelationConcurrently(partoid, ¶ms); - - pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, - npart++); - } - /* * CIC needs to mark a partitioned index as VALID, which itself * requires setting READY, which is unset for CIC (even though * it's meaningless for an index without storage). * This must be done only while holding a lock which precludes adding * partitions. - * See also: validatePartitionedIndex(). */ CommandCounterIncrement(); index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); + + /* + * Process each partition listed in a separate transaction. Note that + * this commits and then starts a new transaction immediately. + */ + ReindexPartitions(indexRelationId, ¶ms, true); + CommandCounterIncrement(); index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID); } @@ -3094,6 +3087,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel) if (!RELKIND_HAS_STORAGE(partkind)) continue; + /* Skip valid indexes, if requested */ + if ((params->options & REINDEXOPT_SKIPVALID) != 0 && + get_index_isvalid(partoid)) + continue; + Assert(partkind == RELKIND_INDEX || partkind == RELKIND_RELATION); diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index e22d506436..994fe94fa1 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -42,6 +42,7 @@ typedef struct ReindexParams #define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */ #define REINDEXOPT_MISSING_OK 0x04 /* skip missing relations */ #define REINDEXOPT_CONCURRENTLY 0x08 /* concurrent mode */ +#define REINDEXOPT_SKIPVALID 0x10 /* skip valid indexes */ /* state info for validate_index bulkdelete callback */ typedef struct ValidateIndexState -- 2.17.0
>From 2b6fedc7cc9f3a0dfe522d789ffed414a799605c Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Fri, 30 Oct 2020 23:52:31 -0500 Subject: [PATCH 4/5] ReindexPartitions() to set indisvalid.. Something like this should probably have been included in a6642b3ae060976b42830b7dc8f29ec190ab05e4 See also 71a05b223, which mentioned the absence of any way to validate an index. --- src/backend/commands/indexcmds.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index e54314e9a4..99508b0d36 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1652,8 +1652,6 @@ reindex_invalid_child_indexes(Oid indexRelationId) * CIC needs to mark a partitioned index as VALID, which itself * requires setting READY, which is unset for CIC (even though * it's meaningless for an index without storage). - * This must be done only while holding a lock which precludes adding - * partitions. */ CommandCounterIncrement(); index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); @@ -1663,9 +1661,6 @@ reindex_invalid_child_indexes(Oid indexRelationId) * this commits and then starts a new transaction immediately. */ ReindexPartitions(indexRelationId, ¶ms, true); - - CommandCounterIncrement(); - index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID); } /* @@ -3107,6 +3102,24 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel) */ ReindexMultipleInternal(partitions, params); + /* + * If indexes exist on all of the partitioned table's children, and we + * just reindexed them, then we know they're valid, and so can mark the + * parent index as valid. + * This handles the case of CREATE INDEX CONCURRENTLY. + * See also: validatePartitionedIndex(). + */ + if (get_rel_relkind(relid) == RELKIND_PARTITIONED_INDEX + && !get_index_isvalid(relid)) + { + Oid tableoid = IndexGetRelation(relid, false); + List *child_tables = find_all_inheritors(tableoid, ShareLock, NULL); + + /* Both lists include their parent relation as well as any intermediate partitioned rels */ + if (list_length(inhoids) == list_length(child_tables)) + index_set_state_flags(relid, INDEX_CREATE_SET_VALID); + } + /* * Clean up working storage --- note we must do this after * StartTransactionCommand, else we might be trying to delete the active -- 2.17.0