The shm_mq code handles blocking mode and non-blocking mode
asymmetrically in a couple of places, with the unfortunate result that
if you are using non-blocking mode, and your counterparty dies before
attaching the queue, operations on the queue continue to return
SHM_MQ_WOULD_BLOCK instead of, as they should, returning
SHM_MQ_DETACHED. The attached patch fixes the problem. Thanks to my
colleague Rushabh Lathia for helping track this down.
(There's are some further bugs in this area outside the shm_mq code
... but I'm still trying to figure out exactly what they are and what
we should do about them. This much, however, seems clear-cut.)
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
diff --git a/src/backend/storage/ipc/shm_mq.c b/src/backend/storage/ipc/shm_mq.c
index 80956ce..61f9298 100644
--- a/src/backend/storage/ipc/shm_mq.c
+++ b/src/backend/storage/ipc/shm_mq.c
@@ -142,6 +142,8 @@ static shm_mq_result shm_mq_send_bytes(shm_mq_handle *mq, Size nbytes,
const void *data, bool nowait, Size *bytes_written);
static shm_mq_result shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed,
bool nowait, Size *nbytesp, void **datap);
+static bool shm_mq_counterparty_gone(volatile shm_mq *mq,
+ BackgroundWorkerHandle *handle);
static bool shm_mq_wait_internal(volatile shm_mq *mq, PGPROC *volatile * ptr,
BackgroundWorkerHandle *handle);
static uint64 shm_mq_get_bytes_read(volatile shm_mq *mq, bool *detached);
@@ -499,6 +501,8 @@ shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
{
if (nowait)
{
+ if (shm_mq_counterparty_gone(mq, mqh->mqh_handle))
+ return SHM_MQ_DETACHED;
if (shm_mq_get_sender(mq) == NULL)
return SHM_MQ_WOULD_BLOCK;
}
@@ -794,6 +798,11 @@ shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, const void *data,
*/
if (nowait)
{
+ if (shm_mq_counterparty_gone(mq, mqh->mqh_handle))
+ {
+ *bytes_written = sent;
+ return SHM_MQ_DETACHED;
+ }
if (shm_mq_get_receiver(mq) == NULL)
{
*bytes_written = sent;
@@ -948,6 +957,45 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait,
}
/*
+ * Test whether a counterparty who may not even be alive yet is definitely gone.
+ */
+static bool
+shm_mq_counterparty_gone(volatile shm_mq *mq, BackgroundWorkerHandle *handle)
+{
+ bool detached;
+ pid_t pid;
+
+ /* Acquire the lock just long enough to check the pointer. */
+ SpinLockAcquire(&mq->mq_mutex);
+ detached = mq->mq_detached;
+ SpinLockRelease(&mq->mq_mutex);
+
+ /* If the queue has been detached, counterparty is definitely gone. */
+ if (detached)
+ return true;
+
+ /* If there's a handle, check worker status. */
+ if (handle != NULL)
+ {
+ BgwHandleStatus status;
+
+ /* Check for unexpected worker death. */
+ status = GetBackgroundWorkerPid(handle, &pid);
+ if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED)
+ {
+ /* Mark it detached, just to make it official. */
+ SpinLockAcquire(&mq->mq_mutex);
+ mq->mq_detached = true;
+ SpinLockRelease(&mq->mq_mutex);
+ return true;
+ }
+ }
+
+ /* Counterparty is not definitively gone. */
+ return false;
+}
+
+/*
* This is used when a process is waiting for its counterpart to attach to the
* queue. We exit when the other process attaches as expected, or, if
* handle != NULL, when the referenced background process or the postmaster
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers