Simon Riggs wrote:
> On Tue, 2010-04-13 at 21:09 +0300, Heikki Linnakangas wrote:
>> A quick fix would be to check if there's any entries in the hash table
>> before scanning it. That would eliminate the overhead when there's no
>> in-progress transactions in the master. But as soon as there's even one,
>> the overhead comes back.
> 
> Any fix should be fairly quick because of the way its modularised - with
> something like this in mind.
> 
> I'll try a circular buffer implementation, with fastpath.

I started experimenting with a sorted array based implementation on
Tuesday but got carried away with other stuff. I now got back to that
and cleaned it up.

How does the attached patch look like? It's probably similar to what you
had in mind.

-- 
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 88169b4..4a04051 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -64,8 +64,10 @@ typedef struct ProcArrayStruct
 	int			numProcs;		/* number of valid procs entries */
 	int			maxProcs;		/* allocated size of procs array */
 
-	int			numKnownAssignedXids;	/* current number of known assigned
-										 * xids */
+	int			firstKnownAssigned;		/* location of first valid known
+										 * assigned xid in the array */
+	int			lastKnownAssigned;		/* location of last valid known
+										 * assigned xid in the array */
 	int			maxKnownAssignedXids;	/* allocated size of known assigned
 										 * xids */
 
@@ -87,7 +89,8 @@ static ProcArrayStruct *procArray;
 /*
  * Bookkeeping for tracking emulated transactions in recovery
  */
-static HTAB *KnownAssignedXidsHash;
+static TransactionId *KnownAssignedXidsArray;
+static bool *KnownAssignedXidsValidArray;
 static TransactionId latestObservedXid = InvalidTransactionId;
 
 /*
@@ -201,28 +204,33 @@ CreateSharedProcArray(void)
 		/* Normal processing */
 		procArray->numProcs = 0;
 		procArray->maxProcs = PROCARRAY_MAXPROCS;
-		procArray->numKnownAssignedXids = 0;
 		procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS;
+		procArray->firstKnownAssignedXid = 0;
+		procArray->lastKnownAssignedXid = 0;
 		procArray->lastOverflowedXid = InvalidTransactionId;
 	}
 
 	if (XLogRequestRecoveryConnections)
 	{
-		/* Create or attach to the KnownAssignedXids hash table */
-		HASHCTL		info;
-
-		MemSet(&info, 0, sizeof(info));
-		info.keysize = sizeof(TransactionId);
-		info.entrysize = sizeof(TransactionId);
-		info.hash = tag_hash;
-
-		KnownAssignedXidsHash = ShmemInitHash("KnownAssignedXids Hash",
-											  TOTAL_MAX_CACHED_SUBXIDS,
-											  TOTAL_MAX_CACHED_SUBXIDS,
-											  &info,
-											  HASH_ELEM | HASH_FUNCTION);
-		if (!KnownAssignedXidsHash)
-			elog(FATAL, "could not initialize known assigned xids hash table");
+		Size size;
+		/* Create or attach to the KnownAssignedXids arrays */
+		size = mul_size(sizeof(TransactionId), TOTAL_MAX_CACHED_SUBXIDS);
+		KnownAssignedXidsArray = ShmemInitStruct("KnownAssignedXidsArray",
+												 size,
+												 &found);
+		if (!KnownAssignedXidsArray)
+			elog(FATAL, "could not initialize known assigned xids array");
+		if (!found)
+			MemSet(KnownAssignedXidsArray, 0, size);
+
+		size = mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS);
+		KnownAssignedXidsValidArray = ShmemInitStruct("KnownAssignedXidsValidArray",
+													  size,
+													  &found);
+		if (!KnownAssignedXidsValidArray)
+			elog(FATAL, "could not initialize known assigned xids used array");
+		if (!found)
+			MemSet(KnownAssignedXidsValidArray, 0, size);
 	}
 }
 
@@ -2162,7 +2170,7 @@ DisplayXidCache(void)
  *
  * During recovery we do not fret too much about the distinction between
  * top-level xids and subtransaction xids. We hold both together in
- * a hash table called KnownAssignedXids. In backends, this is copied into
+ * an array called KnownAssignedXids. In backends, this is copied into
  * snapshots in GetSnapshotData(), taking advantage
  * of the fact that XidInMVCCSnapshot() doesn't care about the distinction
  * either. Subtransaction xids are effectively treated as top-level xids
@@ -2207,7 +2215,7 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
 	 * We can see WAL records before the running-xacts snapshot that contain
 	 * XIDs that are not in the running-xacts snapshot, but that we know to
 	 * have finished before the running-xacts snapshot was taken. Don't waste
-	 * precious shared memory by keeping them in the hash table.
+	 * precious shared memory by keeping them in the array.
 	 *
 	 * We can also see WAL records before the running-xacts snapshot that
 	 * contain XIDs that are not in the running-xacts snapshot for a different
@@ -2330,24 +2338,30 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
 /*
  * Private module functions to manipulate KnownAssignedXids
  *
- * There are 3 main users of the KnownAssignedXids data structure:
+ * We use a fixed-size sorted array in shared memory to keep track of XIDs
+ * that we consider as in-progress in the master at this time. This data
+ * structure is called KnownAssignedXids, and it has 4 main users:
  *
- *	 * backends taking snapshots
+ *	 * backends taking snapshots, copying all XIDs in the array
+ *	 * backends checking if an XID exists in the array
  *	 * startup process adding new knownassigned xids
  *	 * startup process removing xids as transactions end
  *
- * If we make KnownAssignedXids a simple sorted array then the first two
- * operations are fast, but the last one is at least O(N). If we make
- * KnownAssignedXids a hash table then the last two operations are fast,
- * though we have to do more work at snapshot time. Doing more work at
- * commit could slow down taking snapshots anyway because of lwlock
- * contention. Scanning the hash table is O(N) on the max size of the array,
- * so performs poorly in comparison when we have very low numbers of
- * write transactions to process. But at least it is constant overhead
- * and a sequential memory scan will utilise hardware memory readahead
- * to give much improved performance. In any case the emphasis must be on
- * having the standby process changes quickly so that it can provide
- * high availability. So we choose to implement as a hash table.
+ * Typically, there is only a few entries in the array, compared to the
+ * maximum size, so we keep track of the first and last valid entry in the
+ * array to make scanning it quick. That also makes it quick to add entries
+ * to the end. XIDs are assigned in monotonically increasing order, so new
+ * entries always go to the end.
+ *
+ * When an entry is removed, it's merely marked as invalid, but left in
+ * place in the array. There is a second array of booleans,
+ * KnownAssignedXidsValidArray, which keeps track of which entries in the
+ * KnownAssignedXidsArray are valid.
+ *
+ * Because we leave the entry in place when an XID is marked as removed, the
+ * array will eventually fill up. When an entry needs to be added and there
+ * is no room for it, the array is compacted by copying all valid entries to
+ * the beginning of the array, removing all invalid entries.
  */
 
 /*
@@ -2358,41 +2372,49 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
 static void
 KnownAssignedXidsAdd(TransactionId *xids, int nxids)
 {
-	TransactionId *result;
-	bool		found;
 	int			i;
 
 	for (i = 0; i < nxids; i++)
 	{
-		Assert(TransactionIdIsValid(xids[i]));
+		TransactionId xid = xids[i];
 
-		elog(trace_recovery(DEBUG4), "adding KnownAssignedXid %u", xids[i]);
+		Assert(TransactionIdIsValid(xid));
 
-		procArray->numKnownAssignedXids++;
-		if (procArray->numKnownAssignedXids > procArray->maxKnownAssignedXids)
-		{
-			KnownAssignedXidsDisplay(LOG);
-			LWLockRelease(ProcArrayLock);
-			elog(ERROR, "too many KnownAssignedXids (%u)", procArray->maxKnownAssignedXids);
-		}
+		elog(trace_recovery(DEBUG4), "adding KnownAssignedXid %u", xid);
 
-		result = (TransactionId *) hash_search(KnownAssignedXidsHash, &xids[i], HASH_ENTER,
-											   &found);
+		Assert(procArray->lastKnownAssignedXid == 0 ||
+			   TransactionIdFollows(xid, KnownAssignedXidsArray[procArray->lastKnownAssignedXid - 1]));
 
-		if (!result)
+		/* Compact if necessary */
+		if (procArray->lastKnownAssignedXid == procArray->maxKnownAssignedXids)
 		{
-			LWLockRelease(ProcArrayLock);
-			ereport(ERROR,
-					(errcode(ERRCODE_OUT_OF_MEMORY),
-					 errmsg("out of shared memory")));
-		}
+			int j;
+			int k;
 
-		if (found)
-		{
-			KnownAssignedXidsDisplay(LOG);
-			LWLockRelease(ProcArrayLock);
-			elog(ERROR, "found duplicate KnownAssignedXid %u", xids[i]);
+			k = 0;
+			for (j = procArray->firstKnownAssignedXid; j < procArray->lastKnownAssignedXid; j++)
+			{
+				if (KnownAssignedXidsValidArray[j])
+				{
+					KnownAssignedXidsArray[k] = KnownAssignedXidsArray[j];
+					KnownAssignedXidsValidArray[k] = true;
+					k++;
+				}
+			}
+			procArray->firstKnownAssignedXid = 0;
+			procArray->lastKnownAssignedXid = k;
+
+			if (procArray->lastKnownAssignedXid == procArray->maxKnownAssignedXids)
+			{
+				KnownAssignedXidsDisplay(LOG);
+				LWLockRelease(ProcArrayLock);
+				elog(ERROR, "too many KnownAssignedXids (%u)", procArray->maxKnownAssignedXids);
+			}
 		}
+
+		KnownAssignedXidsArray[procArray->lastKnownAssignedXid] = xid;
+		KnownAssignedXidsValidArray[procArray->lastKnownAssignedXid] = true;
+		procArray->lastKnownAssignedXid++;
 	}
 }
 
@@ -2404,10 +2426,21 @@ KnownAssignedXidsAdd(TransactionId *xids, int nxids)
 static bool
 KnownAssignedXidsExist(TransactionId xid)
 {
-	bool		found;
+	int low = procArray->firstKnownAssignedXid;
+	int high = procArray->lastKnownAssignedXid - 1;
 
-	(void) hash_search(KnownAssignedXidsHash, &xid, HASH_FIND, &found);
-	return found;
+	while (low <= high)
+	{
+		int middle = low + (high - low) / 2;
+				
+		if (xid == KnownAssignedXidsArray[middle])
+			return true;
+		else if (xid > KnownAssignedXidsArray[middle])
+			low = middle + 1;
+		else
+			high = middle - 1;
+	}
+	return false;
 }
 
 /*
@@ -2418,17 +2451,37 @@ KnownAssignedXidsExist(TransactionId xid)
 static void
 KnownAssignedXidsRemove(TransactionId xid)
 {
-	bool		found;
+	int low = procArray->firstKnownAssignedXid;
+	int high = procArray->lastKnownAssignedXid - 1;
 
 	Assert(TransactionIdIsValid(xid));
 
 	elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
 
-	(void) hash_search(KnownAssignedXidsHash, &xid, HASH_REMOVE, &found);
-
-	if (found)
-		procArray->numKnownAssignedXids--;
-	Assert(procArray->numKnownAssignedXids >= 0);
+	while (low <= high)
+	{
+		int middle = low + (high - low) / 2;
+				
+		if (xid == KnownAssignedXidsArray[middle])
+		{
+			KnownAssignedXidsValidArray[middle] = false;
+			if (middle == procArray->lastKnownAssignedXid - 1)
+			{
+				while (procArray->lastKnownAssignedXid >= 0 && !KnownAssignedXidsValidArray[procArray->lastKnownAssignedXid - 1])
+					procArray->lastKnownAssignedXid--;
+			}
+			if (middle == procArray->firstKnownAssignedXid)
+			{
+				while (procArray->firstKnownAssignedXid < procArray->lastKnownAssignedXid && !KnownAssignedXidsValidArray[procArray->firstKnownAssignedXid])
+					procArray->firstKnownAssignedXid++;
+			}
+			return;
+		}
+		else if (xid > KnownAssignedXidsArray[middle])
+			low = middle + 1;
+		else
+			high = middle - 1;
+	}
 
 	/*
 	 * We can fail to find an xid if the xid came from a subtransaction that
@@ -2466,26 +2519,30 @@ static int
 KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
 							   TransactionId xmax)
 {
-	HASH_SEQ_STATUS status;
-	TransactionId *knownXid;
 	int			count = 0;
+	int			i;
 
-	hash_seq_init(&status, KnownAssignedXidsHash);
-	while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
+	for (i = procArray->firstKnownAssignedXid; i < procArray->lastKnownAssignedXid; i++)
 	{
+		TransactionId xid = KnownAssignedXidsArray[i];
+
+		if (!KnownAssignedXidsValidArray[i])
+			continue;
+
 		/*
-		 * Filter out anything higher than xmax
+		 * Filter out anything higher than xmax. The array is sorted so
+		 * we can stop as soon as we find one that's too big
 		 */
-		if (TransactionIdPrecedes(xmax, *knownXid))
-			continue;
+		if (TransactionIdPrecedes(xmax, xid))
+			break;
 
-		*xarray = *knownXid;
+		*xarray = xid;
 		xarray++;
 		count++;
 
 		/* update xmin if required */
-		if (TransactionIdPrecedes(*knownXid, *xmin))
-			*xmin = *knownXid;
+		if (TransactionIdPrecedes(xid, *xmin))
+			*xmin = xid;
 	}
 
 	return count;
@@ -2500,34 +2557,34 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
 static void
 KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts)
 {
-	TransactionId *knownXid;
-	HASH_SEQ_STATUS status;
+	int i;
 
-	if (TransactionIdIsValid(xid))
-		elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", xid);
-	else
+	if (!TransactionIdIsValid(xid))
+	{
 		elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
+		procArray->firstKnownAssignedXid = procArray->lastKnownAssignedXid = 0;
+		return;
+	}
+	elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", xid);
 
-	hash_seq_init(&status, KnownAssignedXidsHash);
-	while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
+	for (i = procArray->firstKnownAssignedXid; i < procArray->lastKnownAssignedXid; i++)
 	{
-		TransactionId removeXid = *knownXid;
-		bool		found;
+		TransactionId removeXid = KnownAssignedXidsArray[i];
+		if (!KnownAssignedXidsValidArray[i])
+			continue;
 
 		if (!TransactionIdIsValid(xid) || TransactionIdPrecedes(removeXid, xid))
 		{
 			if (keepPreparedXacts && StandbyTransactionIdIsPrepared(removeXid))
 				continue;
-			else
-			{
-				(void) hash_search(KnownAssignedXidsHash, &removeXid,
-								   HASH_REMOVE, &found);
-				if (found)
-					procArray->numKnownAssignedXids--;
-				Assert(procArray->numKnownAssignedXids >= 0);
-			}
+			KnownAssignedXidsValidArray[i] = false;
 		}
 	}
+	while (procArray->lastKnownAssignedXid >= 0 && !KnownAssignedXidsValidArray[procArray->lastKnownAssignedXid - 1])
+		procArray->lastKnownAssignedXid--;
+
+	while (procArray->firstKnownAssignedXid < procArray->lastKnownAssignedXid && !KnownAssignedXidsValidArray[procArray->firstKnownAssignedXid])
+		procArray->firstKnownAssignedXid++;
 }
 
 /*
@@ -2538,26 +2595,21 @@ KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts)
 static void
 KnownAssignedXidsDisplay(int trace_level)
 {
-	HASH_SEQ_STATUS status;
-	TransactionId *knownXid;
 	StringInfoData buf;
-	TransactionId *xids;
 	int			nxids;
 	int			i;
 
-	xids = palloc(sizeof(TransactionId) * TOTAL_MAX_CACHED_SUBXIDS);
-	nxids = 0;
-
-	hash_seq_init(&status, KnownAssignedXidsHash);
-	while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
-		xids[nxids++] = *knownXid;
-
-	qsort(xids, nxids, sizeof(TransactionId), xidComparator);
-
 	initStringInfo(&buf);
 
-	for (i = 0; i < nxids; i++)
-		appendStringInfo(&buf, "%u ", xids[i]);
+	nxids = 0;
+	for (i = procArray->firstKnownAssignedXid; i < procArray->lastKnownAssignedXid; i++)
+	{
+		if (KnownAssignedXidsValidArray[i])
+		{
+			appendStringInfo(&buf, "%u ", KnownAssignedXidsArray[i]);
+			nxids++;
+		}
+	}
 
 	elog(trace_level, "%d KnownAssignedXids %s", nxids, buf.data);
 
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index c5c1228..eee0d58 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -203,7 +203,7 @@
  * Enable debugging print statements for WAL-related operations; see
  * also the wal_debug GUC var.
  */
-/* #define WAL_DEBUG */
+#define WAL_DEBUG
 
 /*
  * Enable tracing of resource consumption during sort operations;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to