Hi,

On 2019-03-05 23:07:21 -0800, Andres Freund wrote:
> My next steps are:
> - final polish & push the basic DDL and pg_dump patches

Done and pushed. Some collation dependent fallout, I'm hoping I've just
fixed that.

> - cleanup & polish the ON CONFLICT refactoring

Here's a cleaned up version of that patch.  David, Alvaro, you also
played in that area, any objections? I think this makes that part of the
code easier to read actually. Robert, thanks for looking at that patch
already.

Greetings,

Andres Freund
>From e4caa7de3006370f52b4dafe204d45f9d99fa5a4 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Wed, 6 Mar 2019 11:30:33 -0800
Subject: [PATCH v16] Don't reuse slots between root and partition in ON
 CONFLICT ... UPDATE.

Until now the the slot to store the conflicting tuple, and the result
of the ON CONFLICT SET, where reused between partitions. That
necessitated changing slots descriptor when switching partitions.

Besides the overhead of switching descriptors on a slot (which
requires memory allocations and prevents JITing), that's importantly
also problematic for tableam. There individual partitions might belong
to different tableams, needing different kinds of slots.

In passing also fix ExecOnConflictUpdate to clear the existing slot at
exit. Otherwise that slot could continue to hold a pin till the query
ends, which could be far too long if the input data set is large, and
there's no further conflicts. While previously also problematic, it's
now more important as there will be more such slots when partitioned.

Author: Andres Freund
Reviewed-By: Robert Haas
Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n...@alap3.anarazel.de
---
 src/backend/executor/execPartition.c   | 52 +++++++++++++------
 src/backend/executor/nodeModifyTable.c | 70 +++++++++-----------------
 src/include/nodes/execnodes.h          |  5 +-
 3 files changed, 64 insertions(+), 63 deletions(-)

diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index e41801662b3..4491ee69912 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -723,28 +723,55 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 		if (node->onConflictAction == ONCONFLICT_UPDATE)
 		{
 			TupleConversionMap *map;
+			TupleDesc	leaf_desc;
 
 			map = leaf_part_rri->ri_PartitionInfo->pi_RootToPartitionMap;
+			leaf_desc = RelationGetDescr(leaf_part_rri->ri_RelationDesc);
 
 			Assert(node->onConflictSet != NIL);
 			Assert(rootResultRelInfo->ri_onConflict != NULL);
 
+			leaf_part_rri->ri_onConflict = makeNode(OnConflictSetState);
+
+			/*
+			 * Need a separate existing slot for each partition, as the
+			 * partition could be of a different AM, even if the tuple
+			 * descriptors match.
+			 */
+			leaf_part_rri->ri_onConflict->oc_Existing =
+				ExecInitExtraTupleSlot(mtstate->ps.state,
+									   leaf_desc,
+									   &TTSOpsBufferHeapTuple);
+
 			/*
 			 * If the partition's tuple descriptor matches exactly the root
-			 * parent (the common case), we can simply re-use the parent's ON
+			 * parent (the common case), we can re-use most of the parent's ON
 			 * CONFLICT SET state, skipping a bunch of work.  Otherwise, we
 			 * need to create state specific to this partition.
 			 */
 			if (map == NULL)
-				leaf_part_rri->ri_onConflict = rootResultRelInfo->ri_onConflict;
+			{
+				/*
+				 * It's safe to reuse these from the partition root, as we
+				 * only process one tuple at a time (therefore we won't
+				 * overwrite needed data in slots), and the results of
+				 * projections are independent of the underlying
+				 * storage. Projections and where clauses themselves don't
+				 * store state / are independent of the underlying storage.
+				 */
+				leaf_part_rri->ri_onConflict->oc_ProjSlot =
+					rootResultRelInfo->ri_onConflict->oc_ProjSlot;
+				leaf_part_rri->ri_onConflict->oc_ProjInfo =
+					rootResultRelInfo->ri_onConflict->oc_ProjInfo;
+				leaf_part_rri->ri_onConflict->oc_WhereClause =
+					rootResultRelInfo->ri_onConflict->oc_WhereClause;
+			}
 			else
 			{
 				List	   *onconflset;
 				TupleDesc	tupDesc;
 				bool		found_whole_row;
 
-				leaf_part_rri->ri_onConflict = makeNode(OnConflictSetState);
-
 				/*
 				 * Translate expressions in onConflictSet to account for
 				 * different attribute numbers.  For that, map partition
@@ -778,20 +805,17 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 				/* Finally, adjust this tlist to match the partition. */
 				onconflset = adjust_partition_tlist(onconflset, map);
 
-				/*
-				 * Build UPDATE SET's projection info.  The user of this
-				 * projection is responsible for setting the slot's tupdesc!
-				 * We set aside a tupdesc that's good for the common case of a
-				 * partition that's tupdesc-equal to the partitioned table;
-				 * partitions of different tupdescs must generate their own.
-				 */
+				/* create the tuple slot for the UPDATE SET projection */
 				tupDesc = ExecTypeFromTL(onconflset);
-				ExecSetSlotDescriptor(mtstate->mt_conflproj, tupDesc);
+				leaf_part_rri->ri_onConflict->oc_ProjSlot =
+					ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc,
+										   &TTSOpsVirtual);
+
+				/* build UPDATE SET projection state */
 				leaf_part_rri->ri_onConflict->oc_ProjInfo =
 					ExecBuildProjectionInfo(onconflset, econtext,
-											mtstate->mt_conflproj,
+											leaf_part_rri->ri_onConflict->oc_ProjSlot,
 											&mtstate->ps, partrelDesc);
-				leaf_part_rri->ri_onConflict->oc_ProjTupdesc = tupDesc;
 
 				/*
 				 * If there is a WHERE clause, initialize state where it will
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 14d25fd2aa8..b9bd86ff8fd 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -1304,6 +1304,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 	ExprContext *econtext = mtstate->ps.ps_ExprContext;
 	Relation	relation = resultRelInfo->ri_RelationDesc;
 	ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
+	TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
 	HeapTupleData tuple;
 	HeapUpdateFailureData hufd;
 	LockTupleMode lockmode;
@@ -1413,7 +1414,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 	ExecCheckHeapTupleVisible(estate, &tuple, buffer);
 
 	/* Store target's existing tuple in the state's dedicated slot */
-	ExecStoreBufferHeapTuple(&tuple, mtstate->mt_existing, buffer);
+	ExecStorePinnedBufferHeapTuple(&tuple, existing, buffer);
 
 	/*
 	 * Make tuple and any needed join variables available to ExecQual and
@@ -1422,13 +1423,13 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 	 * has been made to reference INNER_VAR in setrefs.c, but there is no
 	 * other redirection.
 	 */
-	econtext->ecxt_scantuple = mtstate->mt_existing;
+	econtext->ecxt_scantuple = existing;
 	econtext->ecxt_innertuple = excludedSlot;
 	econtext->ecxt_outertuple = NULL;
 
 	if (!ExecQual(onConflictSetWhere, econtext))
 	{
-		ReleaseBuffer(buffer);
+		ExecClearTuple(existing);	/* see return below */
 		InstrCountFiltered1(&mtstate->ps, 1);
 		return true;			/* done with the tuple */
 	}
@@ -1451,7 +1452,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 		 * INSERT or UPDATE path.
 		 */
 		ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
-							 mtstate->mt_existing,
+							 existing,
 							 mtstate->ps.state);
 	}
 
@@ -1469,11 +1470,17 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 
 	/* Execute UPDATE with projection */
 	*returning = ExecUpdate(mtstate, &tuple.t_self, NULL,
-							mtstate->mt_conflproj, planSlot,
+							resultRelInfo->ri_onConflict->oc_ProjSlot,
+							planSlot,
 							&mtstate->mt_epqstate, mtstate->ps.state,
 							canSetTag);
 
-	ReleaseBuffer(buffer);
+	/*
+	 * Clear out existing tuple, as there might not be another conflict among
+	 * the next input rows. Don't want to hold resources till the end of the
+	 * query.
+	 */
+	ExecClearTuple(existing);
 	return true;
 }
 
@@ -1633,7 +1640,6 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
 						ResultRelInfo *targetRelInfo,
 						TupleTableSlot *slot)
 {
-	ModifyTable *node;
 	ResultRelInfo *partrel;
 	PartitionRoutingInfo *partrouteinfo;
 	TupleConversionMap *map;
@@ -1698,19 +1704,6 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
 		slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
 	}
 
-	/* Initialize information needed to handle ON CONFLICT DO UPDATE. */
-	Assert(mtstate != NULL);
-	node = (ModifyTable *) mtstate->ps.plan;
-	if (node->onConflictAction == ONCONFLICT_UPDATE)
-	{
-		Assert(mtstate->mt_existing != NULL);
-		ExecSetSlotDescriptor(mtstate->mt_existing,
-							  RelationGetDescr(partrel->ri_RelationDesc));
-		Assert(mtstate->mt_conflproj != NULL);
-		ExecSetSlotDescriptor(mtstate->mt_conflproj,
-							  partrel->ri_onConflict->oc_ProjTupdesc);
-	}
-
 	return slot;
 }
 
@@ -2319,43 +2312,28 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 		econtext = mtstate->ps.ps_ExprContext;
 		relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
 
-		/*
-		 * Initialize slot for the existing tuple.  If we'll be performing
-		 * tuple routing, the tuple descriptor to use for this will be
-		 * determined based on which relation the update is actually applied
-		 * to, so we don't set its tuple descriptor here.
-		 */
-		mtstate->mt_existing =
-			ExecInitExtraTupleSlot(mtstate->ps.state,
-								   mtstate->mt_partition_tuple_routing ?
-								   NULL : relationDesc, &TTSOpsBufferHeapTuple);
-
 		/* carried forward solely for the benefit of explain */
 		mtstate->mt_excludedtlist = node->exclRelTlist;
 
 		/* create state for DO UPDATE SET operation */
 		resultRelInfo->ri_onConflict = makeNode(OnConflictSetState);
 
-		/*
-		 * Create the tuple slot for the UPDATE SET projection.
-		 *
-		 * Just like mt_existing above, we leave it without a tuple descriptor
-		 * in the case of partitioning tuple routing, so that it can be
-		 * changed by ExecPrepareTupleRouting.  In that case, we still save
-		 * the tupdesc in the parent's state: it can be reused by partitions
-		 * with an identical descriptor to the parent.
-		 */
+		/* initialize slot for the existing tuple */
+		resultRelInfo->ri_onConflict->oc_Existing =
+			ExecInitExtraTupleSlot(mtstate->ps.state, relationDesc,
+								   &TTSOpsBufferHeapTuple);
+
+		/* create the tuple slot for the UPDATE SET projection */
 		tupDesc = ExecTypeFromTL((List *) node->onConflictSet);
-		mtstate->mt_conflproj =
-			ExecInitExtraTupleSlot(mtstate->ps.state,
-								   mtstate->mt_partition_tuple_routing ?
-								   NULL : tupDesc, &TTSOpsHeapTuple);
-		resultRelInfo->ri_onConflict->oc_ProjTupdesc = tupDesc;
+		resultRelInfo->ri_onConflict->oc_ProjSlot =
+			ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc,
+								   &TTSOpsVirtual);
 
 		/* build UPDATE SET projection state */
 		resultRelInfo->ri_onConflict->oc_ProjInfo =
 			ExecBuildProjectionInfo(node->onConflictSet, econtext,
-									mtstate->mt_conflproj, &mtstate->ps,
+									resultRelInfo->ri_onConflict->oc_ProjSlot,
+									&mtstate->ps,
 									relationDesc);
 
 		/* initialize state to evaluate the WHERE clause, if any */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 996d872c562..6a5411eba8c 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -377,8 +377,9 @@ typedef struct OnConflictSetState
 {
 	NodeTag		type;
 
+	TupleTableSlot *oc_Existing;	/* slot to store existing target tuple in */
+	TupleTableSlot *oc_ProjSlot;	/* CONFLICT ... SET ... projection target */
 	ProjectionInfo *oc_ProjInfo;	/* for ON CONFLICT DO UPDATE SET */
-	TupleDesc	oc_ProjTupdesc; /* TupleDesc for the above projection */
 	ExprState  *oc_WhereClause; /* state for the WHERE clause */
 } OnConflictSetState;
 
@@ -1109,9 +1110,7 @@ typedef struct ModifyTableState
 	List	  **mt_arowmarks;	/* per-subplan ExecAuxRowMark lists */
 	EPQState	mt_epqstate;	/* for evaluating EvalPlanQual rechecks */
 	bool		fireBSTriggers; /* do we need to fire stmt triggers? */
-	TupleTableSlot *mt_existing;	/* slot to store existing target tuple in */
 	List	   *mt_excludedtlist;	/* the excluded pseudo relation's tlist  */
-	TupleTableSlot *mt_conflproj;	/* CONFLICT ... SET ... projection target */
 
 	/*
 	 * Slot for storing tuples in the root partitioned table's rowtype during
-- 
2.21.0.dirty

Reply via email to