From 47e87e416dc67b1a22b7189388b897ceda76e1fa Mon Sep 17 00:00:00 2001
From: amitlan <amitlangote09@gmail.com>
Date: Tue, 25 May 2021 22:48:47 +0900
Subject: [PATCH v5 1/2] ExecFindPartition: cache last used partition

---
 src/backend/executor/execPartition.c | 252 +++++++++++++++++++--------
 1 file changed, 183 insertions(+), 69 deletions(-)

diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 606c920b06..dd812ae3fc 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -133,6 +133,16 @@ struct PartitionTupleRouting
  *		routing it through this table). A NULL value is stored if no tuple
  *		conversion is required.
  *
+ * savedPartResultInfo
+ *		If non-NULL, ResultRelInfo for the partition that was most recently
+ *		chosen as the routing target; ExecFindPartition() checks if the
+ *		same one can be used for the current row before applying the tuple-
+ *		routing algorithm to it.
+ *
+ * savedPartDispatchInfo
+ *		If non-NULL, PartititionDispatch for the sub-partitioned partition
+ *		that was most recently chosen as the routing target
+ *
  * indexes
  *		Array of partdesc->nparts elements.  For leaf partitions the index
  *		corresponds to the partition's ResultRelInfo in the encapsulating
@@ -150,6 +160,8 @@ typedef struct PartitionDispatchData
 	PartitionDesc partdesc;
 	TupleTableSlot *tupslot;
 	AttrMap    *tupmap;
+	ResultRelInfo *savedPartResultInfo;
+	PartitionDispatch savedPartDispatchInfo;
 	int			indexes[FLEXIBLE_ARRAY_MEMBER];
 }			PartitionDispatchData;
 
@@ -234,6 +246,87 @@ ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
 	return proute;
 }
 
+/*
+ * Remember this partition for the next tuple inserted into this parent; see
+ * CanUseSavedPartitionForTuple() for how it's decided whether a tuple can
+ * indeed reuse this partition.
+ *
+ * Do this only if we have range/list partitions, because only
+ * in that case it's conceivable that consecutively inserted rows
+ * tend to go into the same partition.
+ */
+static inline void
+SavePartitionForNextTuple(PartitionDispatch dispatch,
+						  ResultRelInfo *partInfo,
+						  PartitionDispatch dispatchInfo)
+{
+	if ((dispatch->key->strategy == PARTITION_STRATEGY_RANGE ||
+		 dispatch->key->strategy == PARTITION_STRATEGY_LIST))
+	{
+		dispatch->savedPartResultInfo = partInfo;
+		dispatch->savedPartDispatchInfo = dispatchInfo;
+	}
+}
+
+/*
+ * Check if the saved partition accepts this tuple by evaluating its
+ * partition constraint against the tuple.  If it does, we save a trip
+ * to get_partition_for_tuple(), which can be a slightly more expensive
+ * way to get the same partition, especially if there are many
+ * partitions to search through.
+ */
+static inline bool
+CanUseSavedPartitionForTuple(PartitionDispatch dispatch,
+							 TupleTableSlot *rootslot,
+							 EState *estate)
+{
+	ResultRelInfo *rri;
+	TupleTableSlot *slot;
+	TupleConversionMap *map;
+
+	if (dispatch->savedPartResultInfo == NULL)
+		return false;
+
+	rri = dispatch->savedPartResultInfo;
+
+	/*
+	 * If needed, convert the root-parent layout tuple into the partition's
+	 * layout, because ExecPartitionCheck() expects to be passed the
+	 * latter.
+	 */
+	map = rri->ri_RootToPartitionMap;
+	if (map)
+		slot = execute_attr_map_slot(map->attrMap, rootslot,
+									 rri->ri_PartitionTupleSlot);
+	else
+		slot = rootslot;
+
+	return ExecPartitionCheck(rri, slot, estate, false);
+}
+
+/*
+ * Convert tuple to a given sub-partitioned partition's layout, if
+ * needed.
+ */
+static inline TupleTableSlot *
+ConvertTupleToPartition(PartitionDispatch dispatch,
+						TupleTableSlot *slot,
+						TupleTableSlot *parent_slot)
+{
+	if (dispatch->tupslot)
+	{
+		AttrMap    *map = dispatch->tupmap;
+
+		Assert(map != NULL);
+		slot = execute_attr_map_slot(map, slot, dispatch->tupslot);
+		/* Don't leak the previous parent's copy of the tuple. */
+		if (parent_slot)
+			ExecClearTuple(parent_slot);
+	}
+
+	return slot;
+}
+
 /*
  * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
  * the tuple contained in *slot should belong to.
@@ -292,6 +385,35 @@ ExecFindPartition(ModifyTableState *mtstate,
 		CHECK_FOR_INTERRUPTS();
 
 		rel = dispatch->reldesc;
+
+		if (CanUseSavedPartitionForTuple(dispatch, rootslot, estate))
+		{
+			/* If the saved partition is leaf partition, just return it. */
+			if (dispatch->savedPartDispatchInfo == NULL)
+			{
+				/* Restore ecxt's scantuple before returning. */
+				ecxt->ecxt_scantuple = ecxt_scantuple_saved;
+				MemoryContextSwitchTo(oldcxt);
+				return dispatch->savedPartResultInfo;
+			}
+			else
+			{
+				/*
+				 * Saved partition is sub-partitioned, so continue the loop to
+				 * find the next level partition.
+				 */
+				myslot = dispatch->tupslot;
+				dispatch = dispatch->savedPartDispatchInfo;
+				slot = ConvertTupleToPartition(dispatch, slot, myslot);
+				continue;
+			}
+		}
+		else
+		{
+			dispatch->savedPartResultInfo = rri = NULL;
+			dispatch->savedPartDispatchInfo = NULL;
+		}
+
 		partdesc = dispatch->partdesc;
 
 		/*
@@ -331,16 +453,10 @@ ExecFindPartition(ModifyTableState *mtstate,
 		if (is_leaf)
 		{
 			/*
-			 * We've reached the leaf -- hurray, we're done.  Look to see if
-			 * we've already got a ResultRelInfo for this partition.
+			 * We've reached the leaf -- hurray, we're done.  Build the
+			 * ResultRelInfo for this partition if not already done.
 			 */
-			if (likely(dispatch->indexes[partidx] >= 0))
-			{
-				/* ResultRelInfo already built */
-				Assert(dispatch->indexes[partidx] < proute->num_partitions);
-				rri = proute->partitions[dispatch->indexes[partidx]];
-			}
-			else
+			if (unlikely(dispatch->indexes[partidx] < 0))
 			{
 				/*
 				 * If the partition is known in the owning ModifyTableState
@@ -370,65 +486,50 @@ ExecFindPartition(ModifyTableState *mtstate,
 												rootResultRelInfo, partidx);
 				}
 			}
+
+			Assert(dispatch->indexes[partidx] < proute->num_partitions);
+			rri = proute->partitions[dispatch->indexes[partidx]];
 			Assert(rri != NULL);
 
+			SavePartitionForNextTuple(dispatch, rri, NULL);
+
 			/* Signal to terminate the loop */
 			dispatch = NULL;
 		}
 		else
 		{
+			PartitionDispatch subdispatch;
+
 			/*
-			 * Partition is a sub-partitioned table; get the PartitionDispatch
+			 * Partition is a sub-partitioned table; get the PartitionDispatch.
+			 * Build it if not already done, passing the current one in as the
+			 * parent PartitionDspatch.
 			 */
-			if (likely(dispatch->indexes[partidx] >= 0))
-			{
-				/* Already built. */
-				Assert(dispatch->indexes[partidx] < proute->num_dispatch);
-
-				rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
-
-				/*
-				 * Move down to the next partition level and search again
-				 * until we find a leaf partition that matches this tuple
-				 */
-				dispatch = pd[dispatch->indexes[partidx]];
-			}
-			else
-			{
-				/* Not yet built. Do that now. */
-				PartitionDispatch subdispatch;
-
-				/*
-				 * Create the new PartitionDispatch.  We pass the current one
-				 * in as the parent PartitionDispatch
-				 */
+			if (unlikely(dispatch->indexes[partidx] < 0))
 				subdispatch = ExecInitPartitionDispatchInfo(estate,
 															proute,
 															partdesc->oids[partidx],
 															dispatch, partidx,
 															mtstate->rootResultRelInfo);
-				Assert(dispatch->indexes[partidx] >= 0 &&
-					   dispatch->indexes[partidx] < proute->num_dispatch);
-
-				rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
-				dispatch = subdispatch;
-			}
+			Assert(dispatch->indexes[partidx] >= 0 &&
+				   dispatch->indexes[partidx] < proute->num_dispatch);
 
 			/*
-			 * Convert the tuple to the new parent's layout, if different from
-			 * the previous parent.
+			 * Move down to the next partition level and search again
+			 * until we find a leaf partition that matches this tuple
 			 */
-			if (dispatch->tupslot)
-			{
-				AttrMap    *map = dispatch->tupmap;
-				TupleTableSlot *tempslot = myslot;
+			subdispatch = pd[dispatch->indexes[partidx]];
+			rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
 
-				myslot = dispatch->tupslot;
-				slot = execute_attr_map_slot(map, slot, myslot);
+			/*
+			 * Save both the PartitionDispatch and the ResultRelInfo of
+			 * this partition to consider reusing for the next tuple.
+			 */
+			SavePartitionForNextTuple(dispatch, rri, subdispatch);
 
-				if (tempslot != NULL)
-					ExecClearTuple(tempslot);
-			}
+			myslot = dispatch->tupslot;
+			dispatch = subdispatch;
+			slot = ConvertTupleToPartition(dispatch, slot, myslot);
 		}
 
 		/*
@@ -858,27 +959,11 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 	return leaf_part_rri;
 }
 
-/*
- * ExecInitRoutingInfo
- *		Set up information needed for translating tuples between root
- *		partitioned table format and partition format, and keep track of it
- *		in PartitionTupleRouting.
- */
-static void
-ExecInitRoutingInfo(ModifyTableState *mtstate,
-					EState *estate,
-					PartitionTupleRouting *proute,
-					PartitionDispatch dispatch,
-					ResultRelInfo *partRelInfo,
-					int partidx,
-					bool is_borrowed_rel)
+static inline void
+InitRootToPartitionMap(ResultRelInfo *partRelInfo,
+					   ResultRelInfo *rootRelInfo,
+					   EState *estate)
 {
-	ResultRelInfo *rootRelInfo = partRelInfo->ri_RootResultRelInfo;
-	MemoryContext oldcxt;
-	int			rri_index;
-
-	oldcxt = MemoryContextSwitchTo(proute->memcxt);
-
 	/*
 	 * Set up a tuple conversion map to convert a tuple routed to the
 	 * partition from the parent's type to the partition's.
@@ -907,6 +992,30 @@ ExecInitRoutingInfo(ModifyTableState *mtstate,
 	}
 	else
 		partRelInfo->ri_PartitionTupleSlot = NULL;
+}
+
+/*
+ * ExecInitRoutingInfo
+ *		Set up information needed for translating tuples between root
+ *		partitioned table format and partition format, and keep track of it
+ *		in PartitionTupleRouting.
+ */
+static void
+ExecInitRoutingInfo(ModifyTableState *mtstate,
+					EState *estate,
+					PartitionTupleRouting *proute,
+					PartitionDispatch dispatch,
+					ResultRelInfo *partRelInfo,
+					int partidx,
+					bool is_borrowed_rel)
+{
+	ResultRelInfo *rootRelInfo = partRelInfo->ri_RootResultRelInfo;
+	MemoryContext oldcxt;
+	int			rri_index;
+
+	oldcxt = MemoryContextSwitchTo(proute->memcxt);
+
+	InitRootToPartitionMap(partRelInfo, rootRelInfo, estate);
 
 	/*
 	 * If the partition is a foreign table, let the FDW init itself for
@@ -1051,6 +1160,9 @@ ExecInitPartitionDispatchInfo(EState *estate,
 		pd->tupslot = NULL;
 	}
 
+	pd->savedPartResultInfo = NULL;
+	pd->savedPartDispatchInfo = NULL;
+
 	/*
 	 * Initialize with -1 to signify that the corresponding partition's
 	 * ResultRelInfo or PartitionDispatch has not been created yet.
@@ -1094,6 +1206,8 @@ ExecInitPartitionDispatchInfo(EState *estate,
 		ResultRelInfo *rri = makeNode(ResultRelInfo);
 
 		InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
+		/* The map is needed in CanUseSavedPartitionForTuple(). */
+		InitRootToPartitionMap(rri, rootResultRelInfo, estate);
 		proute->nonleaf_partitions[dispatchidx] = rri;
 	}
 	else
-- 
2.24.1

