On 2021-Apr-23, Alvaro Herrera wrote:
> I think the patch I posted was too simple. I think a real fix requires
> us to keep track of exactly in what way the partdesc is outdated, so
> that we can compare to the current situation in deciding to use that
> partdesc or build a new one. For example, we could keep track of a list
> of OIDs of detached partitions (and it simplifies things a lot if allow
> only a single partition in this situation, because it's either zero OIDs
> or one OID); or we can save the Xmin of the pg_inherits tuple for the
> detached partition (and we can compare that Xmin to our current active
> snapshot and decide whether to use that partdesc or not).
>
> I'll experiment with this a little more and propose a patch later today.
This (POC-quality) seems to do the trick.
(I restored the API of find_inheritance_children, because it was getting
a little obnoxious. I haven't thought this through but I think we
should do something like it.)
> I don't think it's too much of a problem to state that you need to
> finalize one detach before you can do the next one. After all, with
> regular detach, you'd have the tables exclusively locked so you can't do
> them in parallel anyway. (It also increases the chances that people
> will finalize detach operations that went unnoticed.)
I haven't added a mechanism to verify this; but with asserts on, this
patch will crash if you have more than one. I think the behavior is not
necessarily sane with asserts off, since you'll get an arbitrary
detach-Xmin assigned to the partdesc, depending on catalog scan order.
--
Álvaro Herrera Valdivia, Chile
"El hombre nunca sabe de lo que es capaz hasta que lo intenta" (C. Dickens)
diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c
index 6447b52854..e3ee72c0b7 100644
--- a/src/backend/catalog/pg_inherits.c
+++ b/src/backend/catalog/pg_inherits.c
@@ -63,8 +63,16 @@ typedef struct SeenRelsEntry
* committed to the active snapshot.
*/
List *
-find_inheritance_children(Oid parentrelId, bool omit_detached,
- LOCKMODE lockmode, bool *detached_exist)
+find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
+{
+ return find_inheritance_children_extended(parentrelId, true, lockmode,
+ NULL, NULL);
+}
+
+List *
+find_inheritance_children_extended(Oid parentrelId, bool omit_detached,
+ LOCKMODE lockmode, bool *detached_exist,
+ TransactionId *detached_xmin)
{
List *list = NIL;
Relation relation;
@@ -132,7 +140,15 @@ find_inheritance_children(Oid parentrelId, bool omit_detached,
snap = GetActiveSnapshot();
if (!XidInMVCCSnapshot(xmin, snap))
+ {
+ /* XXX only one detached partition allowed */
+ if (detached_xmin)
+ {
+ Assert(*detached_xmin == InvalidTransactionId);
+ *detached_xmin = xmin;
+ }
continue;
+ }
}
}
@@ -247,8 +263,7 @@ find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
ListCell *lc;
/* Get the direct children of this rel */
- currentchildren = find_inheritance_children(currentrel, true,
- lockmode, NULL);
+ currentchildren = find_inheritance_children(currentrel, lockmode);
/*
* Add to the queue only those children not already seen. This avoids
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 7d00f4eb25..dd931d09af 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -3507,7 +3507,7 @@ renameatt_internal(Oid myrelid,
* expected_parents will only be 0 if we are not already recursing.
*/
if (expected_parents == 0 &&
- find_inheritance_children(myrelid, true, NoLock, NULL) != NIL)
+ find_inheritance_children(myrelid, NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("inherited column \"%s\" must be renamed in child tables too",
@@ -3706,7 +3706,7 @@ rename_constraint_internal(Oid myrelid,
else
{
if (expected_parents == 0 &&
- find_inheritance_children(myrelid, true, NoLock, NULL) != NIL)
+ find_inheritance_children(myrelid, NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("inherited constraint \"%s\" must be renamed in child tables too",
@@ -6580,7 +6580,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
*/
if (colDef->identity &&
recurse &&
- find_inheritance_children(myrelid, true, NoLock, NULL) != NIL)
+ find_inheritance_children(myrelid, NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("cannot recursively add identity column to table that has child tables")));
@@ -6826,7 +6826,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
* use find_all_inheritors to do it in one pass.
*/
children =
- find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL);
+ find_inheritance_children(RelationGetRelid(rel), lockmode);
/*
* If we are told not to recurse, there had better not be any child
@@ -7689,7 +7689,7 @@ ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recurs
* resulting state can be properly dumped and restored.
*/
if (!recurse &&
- find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL))
+ find_inheritance_children(RelationGetRelid(rel), lockmode))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too")));
@@ -8297,7 +8297,7 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
* use find_all_inheritors to do it in one pass.
*/
children =
- find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL);
+ find_inheritance_children(RelationGetRelid(rel), lockmode);
if (children)
{
@@ -8785,7 +8785,7 @@ ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
* use find_all_inheritors to do it in one pass.
*/
children =
- find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL);
+ find_inheritance_children(RelationGetRelid(rel), lockmode);
/*
* Check if ONLY was specified with ALTER TABLE. If so, allow the
@@ -11318,8 +11318,7 @@ ATExecDropConstraint(Relation rel, const char *constrName,
* use find_all_inheritors to do it in one pass.
*/
if (!is_no_inherit_constraint)
- children = find_inheritance_children(RelationGetRelid(rel), true,
- lockmode, NULL);
+ children = find_inheritance_children(RelationGetRelid(rel), lockmode);
else
children = NIL;
@@ -11703,8 +11702,7 @@ ATPrepAlterColumnType(List **wqueue,
}
}
else if (!recursing &&
- find_inheritance_children(RelationGetRelid(rel), true,
- NoLock, NULL) != NIL)
+ find_inheritance_children(RelationGetRelid(rel), NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("type of inherited column \"%s\" must be changed in child tables too",
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index d8393aa4de..f305f8bc0f 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -1141,8 +1141,7 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
ListCell *l;
List *idxs = NIL;
- idxs = find_inheritance_children(indexOid, true,
- ShareRowExclusiveLock, NULL);
+ idxs = find_inheritance_children(indexOid, ShareRowExclusiveLock);
foreach(l, idxs)
childTbls = lappend_oid(childTbls,
IndexGetRelation(lfirst_oid(l),
diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c
index 2305dff407..73fa0459a1 100644
--- a/src/backend/partitioning/partdesc.c
+++ b/src/backend/partitioning/partdesc.c
@@ -78,11 +78,33 @@ RelationGetPartitionDesc(Relation rel, bool omit_detached)
* If relcache has a partition descriptor, use that. However, we can only
* do so when we are asked to include all partitions including detached;
* and also when we know that there are no detached partitions.
+ *
+ * If there is no active snapshot, detached partitions aren't omitted
+ * either, so we can use the cached descriptor too in that case.
*/
if (likely(rel->rd_partdesc &&
- (!rel->rd_partdesc->detached_exist || !omit_detached)))
+ (!rel->rd_partdesc->detached_exist || !omit_detached ||
+ !ActiveSnapshotSet())))
return rel->rd_partdesc;
+ /*
+ * If we're asked to omit detached partitions, we may be able to use a
+ * cached version of it, if it was built with the same conditions that
+ * we see now.
+ */
+ if (omit_detached &&
+ rel->rd_partdesc_nodetached &&
+ TransactionIdIsValid(rel->rd_partdesc_nodetached_xmin) &&
+ ActiveSnapshotSet())
+ {
+ Snapshot activesnap;
+
+ activesnap = GetActiveSnapshot();
+
+ if (!XidInMVCCSnapshot(rel->rd_partdesc_nodetached_xmin, activesnap))
+ return rel->rd_partdesc_nodetached;
+ }
+
return RelationBuildPartitionDesc(rel, omit_detached);
}
@@ -117,6 +139,7 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached)
Oid *oids = NULL;
bool *is_leaf = NULL;
bool detached_exist;
+ TransactionId detached_xmin;
ListCell *cell;
int i,
nparts;
@@ -132,8 +155,11 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached)
* some well-defined point in time.
*/
detached_exist = false;
- inhoids = find_inheritance_children(RelationGetRelid(rel), omit_detached,
- NoLock, &detached_exist);
+ detached_xmin = InvalidTransactionId;
+ inhoids = find_inheritance_children_extended(RelationGetRelid(rel),
+ omit_detached, NoLock,
+ &detached_exist,
+ &detached_xmin);
nparts = list_length(inhoids);
/* Allocate working arrays for OIDs, leaf flags, and boundspecs. */
@@ -282,36 +308,29 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached)
/*
* We have a fully valid partdesc. Reparent it so that it has the right
- * lifespan, and if appropriate put it into the relation's relcache entry.
+ * lifespan.
*/
- if (omit_detached && detached_exist)
+ MemoryContextSetParent(new_pdcxt, CacheMemoryContext);
+
+ /*
+ * But first, a kluge: if there's an old rd_pdcxt, it contains an old
+ * partition descriptor that may still be referenced somewhere.
+ * Preserve it, while not leaking it, by reattaching it as a child
+ * context of the new rd_pdcxt. Eventually it will get dropped by
+ * either RelationClose or RelationClearRelation.
+ */
+ if (rel->rd_pdcxt != NULL)
+ MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
+ rel->rd_pdcxt = new_pdcxt;
+
+ /* Store it into relcache */
+ if (omit_detached && detached_exist && ActiveSnapshotSet())
{
- /*
- * A transient partition descriptor is only good for the current
- * statement, so make it a child of the current portal's context.
- */
- MemoryContextSetParent(new_pdcxt, PortalContext);
+ rel->rd_partdesc_nodetached = partdesc;
+ rel->rd_partdesc_nodetached_xmin = detached_xmin;
}
else
{
- /*
- * This partdesc goes into relcache.
- */
-
- MemoryContextSetParent(new_pdcxt, CacheMemoryContext);
-
- /*
- * But first, a kluge: if there's an old rd_pdcxt, it contains an old
- * partition descriptor that may still be referenced somewhere.
- * Preserve it, while not leaking it, by reattaching it as a child
- * context of the new rd_pdcxt. Eventually it will get dropped by
- * either RelationClose or RelationClearRelation.
- */
- if (rel->rd_pdcxt != NULL)
- MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
- rel->rd_pdcxt = new_pdcxt;
-
- /* Store it into relcache */
rel->rd_partdesc = partdesc;
}
diff --git a/src/include/catalog/pg_inherits.h b/src/include/catalog/pg_inherits.h
index 4d28ede5a6..f47925588a 100644
--- a/src/include/catalog/pg_inherits.h
+++ b/src/include/catalog/pg_inherits.h
@@ -50,8 +50,10 @@ DECLARE_INDEX(pg_inherits_parent_index, 2187, on pg_inherits using btree(inhpare
#define InheritsParentIndexId 2187
-extern List *find_inheritance_children(Oid parentrelId, bool omit_detached,
- LOCKMODE lockmode, bool *detached_exist);
+extern List *find_inheritance_children(Oid parentrelId, LOCKMODE lockmode);
+extern List *find_inheritance_children_extended(Oid parentrelId, bool omit_detached,
+ LOCKMODE lockmode, bool *detached_exist, TransactionId *detached_xmin);
+
extern List *find_all_inheritors(Oid parentrelId, LOCKMODE lockmode,
List **parents);
extern bool has_subclass(Oid relationId);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 34e25eb597..6e5a5bf4a7 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -127,6 +127,9 @@ typedef struct RelationData
/* data managed by RelationGetPartitionDesc: */
PartitionDesc rd_partdesc; /* partition descriptor, or NULL */
+ PartitionDesc rd_partdesc_nodetached; /* partition descriptor */
+ TransactionId rd_partdesc_nodetached_xmin; /* xmin of partdesc */
+
MemoryContext rd_pdcxt; /* private context for rd_partdesc, if any */
/* data managed by RelationGetPartitionQual: */