From b1857cf7f60484d7e8409a60f106939875cfb632 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@enterprisedb.com>
Date: Mon, 10 Apr 2017 14:03:12 +1200
Subject: [PATCH] Fix isolation tester performance under CLOBBER_CACHE_ALWAYS.

Commit 4deb4138 added an isolation test for SERIALIZABLE DEFERRABLE.  It
included a change to a query used internally by the isolation tester which
turned out to perform terribly under CLOBBER_CACHE_ALWAYS.  Move the new
logic into C code, and while doing that also improve the performance of the
existing logic.  Package as an undocumented non-public function
pg_isolation_test_session_is_blocked().
---
 src/backend/storage/lmgr/predicate.c      | 48 +++++++++++++++++++++
 src/backend/utils/adt/lockfuncs.c         | 72 +++++++++++++++++++++++++++++++
 src/include/catalog/pg_proc.h             |  2 +
 src/include/storage/predicate_internals.h |  2 +
 src/test/isolation/isolationtester.c      | 12 +-----
 5 files changed, 126 insertions(+), 10 deletions(-)

diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 10bac71e94b..032fae36361 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -1556,6 +1556,54 @@ GetSafeSnapshot(Snapshot origSnapshot)
 }
 
 /*
+ * If the given process is currently blocked in GetSafeSnapshot, write the
+ * process IDs of all processes that it is waiting for into caller-supplied
+ * buffer 'output'.  The list is truncated at 'output_size', and the number of
+ * PIDs written into the output buffer is returned.  Return zero if the given
+ * not currently blocked in GetSafeSnapshot or unknown.
+ */
+int
+GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
+{
+	SERIALIZABLEXACT *sxact;
+	int			num_written = 0;
+
+	LWLockAcquire(SerializableXactHashLock, LW_SHARED);
+
+	/* Find blocked_pid's SERIALIZABLEXACT by linear search. */
+	for (sxact = FirstPredXact(); sxact != NULL; sxact = NextPredXact(sxact))
+	{
+		if (sxact->pid == blocked_pid)
+			break;
+	}
+
+	/* Did we find it, and is it currently waiting in GetSafeSnapshot? */
+	if (sxact != NULL && SxactIsDeferrableWaiting(sxact))
+	{
+		RWConflict possibleUnsafeConflict;
+
+		/* Traverse the list of possible unsafe conflicts collecting PIDs. */
+		possibleUnsafeConflict = (RWConflict)
+			SHMQueueNext(&sxact->possibleUnsafeConflicts,
+						 &sxact->possibleUnsafeConflicts,
+						 offsetof(RWConflictData, inLink));
+
+		while (num_written < output_size && possibleUnsafeConflict != NULL)
+		{
+			output[num_written++] = possibleUnsafeConflict->sxactOut->pid;
+			possibleUnsafeConflict = (RWConflict)
+				SHMQueueNext(&sxact->possibleUnsafeConflicts,
+							 &possibleUnsafeConflict->inLink,
+							 offsetof(RWConflictData, inLink));
+		}
+	}
+
+	LWLockRelease(SerializableXactHashLock);
+
+	return num_written;
+}
+
+/*
  * Acquire a snapshot that can be used for the current transaction.
  *
  * Make sure we have a SERIALIZABLEXACT reference in MySerializableXact.
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index 63f956e6708..726ba73d427 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -516,6 +516,78 @@ pg_blocking_pids(PG_FUNCTION_ARGS)
 										  sizeof(int32), true, 'i'));
 }
 
+/*
+ * Check if a backend is waiting for a heavyweight lock or a safe snapshot.
+ * Use a whitelist of PIDs for backends involved in an isolation test, as a
+ * way of ignoring locks held incidentally by autovacuum.
+ *
+ * This is an undocumented function intended for use by the isolation tester,
+ * and may change in future releases as required for testing purposes.
+ */
+Datum
+pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
+{
+	int			blocked_pid = PG_GETARG_INT32(0);
+	ArrayType  *interesting_pids = PG_GETARG_ARRAYTYPE_P(1);
+	ArrayType  *blocking_pids;
+	int			num_interesting_pids;
+	int			num_blocking_pids;
+	int			dummy;
+	int			i,
+				j;
+
+	/*
+	 * Get the PIDs of all sessions blocking the given session's attempt to
+	 * acquire heavyweight locks.
+	 */
+	blocking_pids =
+		DatumGetArrayTypeP(DirectFunctionCall1(pg_blocking_pids, blocked_pid));
+
+	/*
+	 * Check if any of these are in the list of interesting PIDs, being the
+	 * sessions that the isolation tester is running.  We don't use
+	 * "arrayoverlaps" here, because it would lead to cache lookups and one of
+	 * our goals is to run quickly under CLOBBER_CACHE_ALWAYS.  We expect
+	 * blocking_pids to be usually empty and otherwise a very small number in
+	 * the isolation tester cases, so make that the outer loop of a naive
+	 * search for a match.  We use knowledge about the layout of int[]
+	 * inspired by contrib/intarray rather than going through the polymorphic
+	 * array interface, after sanity checking the types.
+	 */
+
+	Assert(ARR_NDIM(blocking_pids) == 1);
+	Assert(ARR_ELEMTYPE(blocking_pids) == INT4OID);
+	num_blocking_pids = ArrayGetNItems(1, ARR_DIMS(blocking_pids));
+
+	/* Tolerate zero-dimensional arrays cast from "'{}'::int[]". */
+	Assert(ARR_NDIM(interesting_pids) == 0 || ARR_NDIM(interesting_pids) == 1);
+	Assert(ARR_ELEMTYPE(interesting_pids) == INT4OID);
+	num_interesting_pids = ArrayGetNItems(ARR_NDIM(interesting_pids),
+										  ARR_DIMS(interesting_pids));
+	if (ARR_HASNULL(interesting_pids) && array_contains_nulls(interesting_pids))
+		elog(ERROR, "array must not contain nulls");
+
+#define RAWINTS(x) ((int32 *) (ARR_DATA_PTR((x))))
+
+	for (i = 0; i < num_blocking_pids; ++i)
+		for (j = 0; j < num_interesting_pids; ++j)
+			if (RAWINTS(blocking_pids)[i] == RAWINTS(interesting_pids)[j])
+				PG_RETURN_BOOL(true);
+
+	/*
+	 * Check if blocked_pid is waiting for a safe snapshot.  We could in
+	 * theory also check the resulting array of blocker PIDs against the
+	 * interesting PIDs whitelist, but since there is no danger of autovacuum
+	 * blocking GetSafeSnapshot there seems to be no point in expending cycles
+	 * on allocating a buffer and searching for overlap there too: it's
+	 * sufficient for the isolation tester's purposes to use a single element
+	 * buffer and check if the number of safe snapshot blockers is non-zero.
+	 */
+	if (GetSafeSnapshotBlockingPids(blocked_pid, &dummy, 1) > 0)
+		PG_RETURN_BOOL(true);
+
+	PG_RETURN_BOOL(false);
+}
 
 /*
  * Functions for manipulating advisory locks
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 643838bb054..58ad255d14b 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3140,6 +3140,8 @@ DATA(insert OID = 1371 (  pg_lock_status   PGNSP PGUID 12 1 1000 0 0 f f f f t t
 DESCR("view system lock information");
 DATA(insert OID = 2561 (  pg_blocking_pids PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 1007 "23" _null_ _null_ _null_ _null_ _null_ pg_blocking_pids _null_ _null_ _null_ ));
 DESCR("get array of PIDs of sessions blocking specified backend PID");
+DATA(insert OID = 3378 (  pg_isolation_test_session_is_blocked PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 16 "23 1007" _null_ _null_ _null_ _null_ _null_ pg_isolation_test_session_is_blocked _null_ _null_ _null_ ));
+DESCR("is the given backend PID waiting for a safe snapshot, or for one of the given PIDs for a lock?");
 DATA(insert OID = 1065 (  pg_prepared_xact PGNSP PGUID 12 1 1000 0 0 f f f f t t v s 0 0 2249 "" "{28,25,1184,26,26}" "{o,o,o,o,o}" "{transaction,gid,prepared,ownerid,dbid}" _null_ _null_ pg_prepared_xact _null_ _null_ _null_ ));
 DESCR("view two-phase transactions");
 DATA(insert OID = 3819 (  pg_get_multixact_members PGNSP PGUID 12 1 1000 0 0 f f f f t t v s 1 0 2249 "28" "{28,28,25}" "{i,o,o}" "{multixid,xid,mode}" _null_ _null_ pg_get_multixact_members _null_ _null_ _null_ ));
diff --git a/src/include/storage/predicate_internals.h b/src/include/storage/predicate_internals.h
index 408d94cc7a5..4b185c1eacd 100644
--- a/src/include/storage/predicate_internals.h
+++ b/src/include/storage/predicate_internals.h
@@ -474,5 +474,7 @@ typedef struct TwoPhasePredicateRecord
  * locking internals.
  */
 extern PredicateLockData *GetPredicateLockStatusData(void);
+extern int GetSafeSnapshotBlockingPids(int blocked_pid,
+									   int *output, int output_size);
 
 #endif   /* PREDICATE_INTERNALS_H */
diff --git a/src/test/isolation/isolationtester.c b/src/test/isolation/isolationtester.c
index 4d18710bdfd..30ebb06beae 100644
--- a/src/test/isolation/isolationtester.c
+++ b/src/test/isolation/isolationtester.c
@@ -224,20 +224,12 @@ main(int argc, char **argv)
 	 */
 	initPQExpBuffer(&wait_query);
 	appendPQExpBufferStr(&wait_query,
-						 "SELECT pg_catalog.pg_blocking_pids($1) && '{");
+						 "SELECT pg_catalog.pg_isolation_test_session_is_blocked($1, '{");
 	/* The spec syntax requires at least one session; assume that here. */
 	appendPQExpBufferStr(&wait_query, backend_pids[1]);
 	for (i = 2; i < nconns; i++)
 		appendPQExpBuffer(&wait_query, ",%s", backend_pids[i]);
-	appendPQExpBufferStr(&wait_query, "}'::integer[]");
-
-	/* Also detect certain wait events. */
-	appendPQExpBufferStr(&wait_query,
-						 " OR EXISTS ("
-						 "  SELECT * "
-						 "  FROM pg_catalog.pg_stat_activity "
-						 "  WHERE pid = $1 "
-						 "  AND wait_event IN ('SafeSnapshot'))");
+	appendPQExpBufferStr(&wait_query, "}'::integer[])");
 
 	res = PQprepare(conns[0], PREP_WAITING, wait_query.data, 0, NULL);
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
-- 
2.12.2

