On Mon, Mar 27, 2023 at 01:28:24PM +0300, Alexander Pyhalov wrote:
> Justin Pryzby писал 2023-03-26 17:51:
> > On Sun, Dec 04, 2022 at 01:09:35PM -0600, Justin Pryzby wrote:
> > > This currently handles partitions with a loop around the whole CIC
> > > implementation, which means that things like WaitForLockers() happen
> > > once for each index, the same as REINDEX CONCURRENTLY on a partitioned
> > > table.  Contrast that with ReindexRelationConcurrently(), which handles
> > > all the indexes on a table in one pass by looping around indexes within
> > > each phase.
> > 
> > Rebased over the progress reporting fix (27f5c712b).
> > 
> > I added a list of (intermediate) partitioned tables, rather than looping
> > over the list of inheritors again, to save calling rel_get_relkind().
> > 
> > I think this patch is done.
> 
> Overall looks good to me. However, I think that using 'partitioned' as list
> of partitioned index oids in DefineIndex() is a bit misleading - we've just
> used it as boolean, specifying if we are dealing with a partitioned
> relation.

Right.  This is also rebased on 8c852ba9a4 (Allow some exclusion
constraints on partitions).

-- 
Justin
>From 3f60cbdd12b67115f86854ff60a4009028b8b99f Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH] Allow CREATE INDEX CONCURRENTLY on partitioned table

https://www.postgresql.org/message-id/flat/20201031063117.gf3...@telsasoft.com
---
 doc/src/sgml/ddl.sgml                  |   4 +-
 doc/src/sgml/ref/create_index.sgml     |  14 +-
 src/backend/commands/indexcmds.c       | 201 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out | 127 +++++++++++++++-
 src/test/regress/sql/indexing.sql      |  26 +++-
 5 files changed, 297 insertions(+), 75 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 58aaa691c6a..afa982154a8 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4161,9 +4161,7 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      so that they are applied automatically to the entire hierarchy.
      This is very
      convenient, as not only will the existing partitions become indexed, but
-     also any partitions that are created in the future will.  One limitation is
-     that it's not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier when creating such a partitioned index.  To avoid long lock
+     also any partitions that are created in the future will.  To avoid long lock
      times, it is possible to use <command>CREATE INDEX ON ONLY</command>
      the partitioned table; such an index is marked invalid, and the partitions
      do not get the index applied automatically.  The indexes on partitions can
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 40986aa502f..b05102efdaf 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -645,7 +645,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while build an index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -692,15 +695,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index baf3e6e57a5..dfe64052b81 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -92,6 +92,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(List *colnames);
 static List *ChooseIndexColumnNames(List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+										  Oid indexRelationId,
+										  IndexInfo *indexInfo,
+										  LOCKTAG heaplocktag,
+										  LockRelId heaprelid);
 static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -555,7 +560,6 @@ DefineIndex(Oid relationId,
 	bool		amissummarizing;
 	amoptions_function amoptions;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -563,12 +567,10 @@ DefineIndex(Oid relationId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -699,20 +701,6 @@ DefineIndex(Oid relationId,
 	 * partition.
 	 */
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
-	if (partitioned)
-	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
-	}
 
 	/*
 	 * Don't try to CREATE INDEX on temp tables of other backends.
@@ -1102,10 +1090,6 @@ DefineIndex(Oid relationId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1170,6 +1154,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1519,21 +1508,7 @@ DefineIndex(Oid relationId,
 			 */
 			if (invalidate_parent)
 			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 
 				/*
 				 * CCI here to make this update visible, in case this recurses
@@ -1545,37 +1520,49 @@ DefineIndex(Oid relationId,
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		else
+		if (!concurrent)
 		{
-			/* Update progress for an intermediate partitioned index itself */
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
-		}
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
 
-		return address;
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+			else
+			{
+				/*
+				 * Update progress for an intermediate partitioned index
+				 * itself
+				 */
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			}
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
 		/*
 		 * If this is the top-level index, the command is done overall;
-		 * otherwise, increment progress to report one child index is done.
+		 * otherwise (when being called recursively), increment progress to
+		 * report that one child index is done.  Except in the concurrent
+		 * (catalog-only) case, which is handled later.
 		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
-		else
+		else if (!concurrent)
 			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
@@ -1586,6 +1573,114 @@ DefineIndex(Oid relationId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
+	if (!partitioned)
+	{
+		/* CREATE INDEX CONCURRENTLY on a nonpartitioned table */
+		DefineIndexConcurrentInternal(relationId, indexRelationId,
+									  indexInfo, heaplocktag, heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/*
+		 * For CIC on a partitioned table, finish by building indexes on
+		 * partitions
+		 */
+
+		ListCell   *lc;
+		List	   *childs;
+		List	   *tosetvalid = NIL;
+		MemoryContext cic_context,
+					old_context;
+
+		/* Create special memory context for cross-transaction storage */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Oid			tabrelid;
+			char		relkind;
+
+			/*
+			 * Pre-existing partitions which were ATTACHED were already
+			 * counted in the progress report.
+			 */
+			if (get_index_isvalid(indrelid))
+				continue;
+
+			/*
+			 * Partitioned indexes are counted in the progress report, but
+			 * don't need to be further processed.
+			 */
+			relkind = get_rel_relkind(indrelid);
+			if (!RELKIND_HAS_STORAGE(relkind))
+			{
+				/* The toplevel index doesn't count towards "partitions done" */
+				if (indrelid != indexRelationId)
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+
+				/*
+				 * Build up a list of all the intermediate partitioned tables
+				 * which will later need to be set valid.
+				 */
+				old_context = MemoryContextSwitchTo(cic_context);
+				tosetvalid = lappend_oid(tosetvalid, indrelid);
+				MemoryContextSwitchTo(old_context);
+				continue;
+			}
+
+			rel = table_open(relationId, ShareUpdateExclusiveLock);
+			heaprelid = rel->rd_lockInfo.lockRelId;
+			table_close(rel, ShareUpdateExclusiveLock);
+			SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+
+			/* Process each partition in a separate transaction */
+			tabrelid = IndexGetRelation(indrelid, false);
+			DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+										  heaplocktag, heaprelid);
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
+		/* Set as valid all partitioned indexes, including the parent */
+		foreach(lc, tosetvalid)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+		}
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid relationId,
+							  Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
+
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
 	 * visible to other transactions before we start to build the index. That
@@ -1781,10 +1876,6 @@ DefineIndex(Oid relationId,
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
 }
 
 
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 2be8ffa7ec4..aefa203b14f 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,130 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index b69c41832ca..5ddeaf1c613 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.34.1

Reply via email to