The shm_mq code handles blocking mode and non-blocking mode
asymmetrically in a couple of places, with the unfortunate result that
if you are using non-blocking mode, and your counterparty dies before
attaching the queue, operations on the queue continue to return
SHM_MQ_WOULD_BLOCK instead of, as they should, returning
SHM_MQ_DETACHED.  The attached patch fixes the problem.  Thanks to my
colleague Rushabh Lathia for helping track this down.

(There's are some further bugs in this area outside the shm_mq code
... but I'm still trying to figure out exactly what they are and what
we should do about them.  This much, however, seems clear-cut.)

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
diff --git a/src/backend/storage/ipc/shm_mq.c b/src/backend/storage/ipc/shm_mq.c
index 80956ce..61f9298 100644
--- a/src/backend/storage/ipc/shm_mq.c
+++ b/src/backend/storage/ipc/shm_mq.c
@@ -142,6 +142,8 @@ static shm_mq_result shm_mq_send_bytes(shm_mq_handle *mq, Size nbytes,
 				  const void *data, bool nowait, Size *bytes_written);
 static shm_mq_result shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed,
 					 bool nowait, Size *nbytesp, void **datap);
+static bool shm_mq_counterparty_gone(volatile shm_mq *mq,
+						 BackgroundWorkerHandle *handle);
 static bool shm_mq_wait_internal(volatile shm_mq *mq, PGPROC *volatile * ptr,
 					 BackgroundWorkerHandle *handle);
 static uint64 shm_mq_get_bytes_read(volatile shm_mq *mq, bool *detached);
@@ -499,6 +501,8 @@ shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
 	{
 		if (nowait)
 		{
+			if (shm_mq_counterparty_gone(mq, mqh->mqh_handle))
+				return SHM_MQ_DETACHED;
 			if (shm_mq_get_sender(mq) == NULL)
 				return SHM_MQ_WOULD_BLOCK;
 		}
@@ -794,6 +798,11 @@ shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, const void *data,
 			 */
 			if (nowait)
 			{
+				if (shm_mq_counterparty_gone(mq, mqh->mqh_handle))
+				{
+					*bytes_written = sent;
+					return SHM_MQ_DETACHED;
+				}
 				if (shm_mq_get_receiver(mq) == NULL)
 				{
 					*bytes_written = sent;
@@ -948,6 +957,45 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait,
 }
 
 /*
+ * Test whether a counterparty who may not even be alive yet is definitely gone.
+ */
+static bool
+shm_mq_counterparty_gone(volatile shm_mq *mq, BackgroundWorkerHandle *handle)
+{
+	bool	detached;
+	pid_t	pid;
+
+	/* Acquire the lock just long enough to check the pointer. */
+	SpinLockAcquire(&mq->mq_mutex);
+	detached = mq->mq_detached;
+	SpinLockRelease(&mq->mq_mutex);
+
+	/* If the queue has been detached, counterparty is definitely gone. */
+	if (detached)
+		return true;
+
+	/* If there's a handle, check worker status. */
+	if (handle != NULL)
+	{
+		BgwHandleStatus status;
+
+		/* Check for unexpected worker death. */
+		status = GetBackgroundWorkerPid(handle, &pid);
+		if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED)
+		{
+			/* Mark it detached, just to make it official. */
+			SpinLockAcquire(&mq->mq_mutex);
+			mq->mq_detached = true;
+			SpinLockRelease(&mq->mq_mutex);
+			return true;
+		}
+	}
+
+	/* Counterparty is not definitively gone. */
+	return false;
+}
+
+/*
  * This is used when a process is waiting for its counterpart to attach to the
  * queue.  We exit when the other process attaches as expected, or, if
  * handle != NULL, when the referenced background process or the postmaster
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to