I wrote: > Robert Haas <robertmh...@gmail.com> writes: >> No opinion without seeing what you propose to change.
> OK, will put out a proposal. I began with the intention of making no non-cosmetic changes, but then I started to wonder why ConditionVariablePrepareToSleep bothers with a !proclist_contains test, when the calling process surely ought not be in the list -- or any other list -- since it wasn't prepared to sleep. And that led me down a different rabbit hole ending in the conclusion that proclist.h could stand some improvements too. I do not like the fact that it's impossible to tell whether a proclist_node is in any proclist or not. Initially, a proclist_node contains zeroes which is a distinguishable state, but proclist_delete_offset resets it to next = prev = INVALID_PGPROCNO which looks the same as a node that's in a singleton list. We should have it reset to the initial state of zeroes instead, and then we can add assertions to proclist_push_xxx that the supplied node is not already in a list. Hence, I propose the first attached patch which tightens things up in proclist.h and then removes the !proclist_contains test in ConditionVariablePrepareToSleep; the assertion in proclist_push_tail supersedes that. The second attached patch is the cosmetic changes I want to make in condition_variable.c/.h. I still think that we ought to change the Asserts on cv_sleep_target in ConditionVariablePrepareToSleep and ConditionVariableSleep to be full test-and-elog tests. Those are checking a global correctness property ("global" meaning "interactions between completely unrelated modules can break this"), and they'd be extremely cheap compared to the rest of what those functions are doing, so I think insisting that they be Asserts is penny wise and pound foolish. Anybody besides Robert want to vote on that? Another loose end that I'm seeing here is that while a process waiting on a condition variable will respond to a cancel or die interrupt, it will not notice postmaster death. This seems unwise to me. I think we should adjust the WaitLatch call to include WL_POSTMASTER_DEATH as a wake condition and just do a summary proc_exit(1) if it sees that. I'd even argue that that is a back-patchable bug fix. regards, tom lane
diff --git a/src/include/storage/proclist_types.h b/src/include/storage/proclist_types.h index 237fb76..f4dac10 100644 --- a/src/include/storage/proclist_types.h +++ b/src/include/storage/proclist_types.h @@ -16,7 +16,12 @@ #define PROCLIST_TYPES_H /* - * A node in a list of processes. + * A node in a doubly-linked list of processes. The link fields contain + * the 0-based PGPROC indexes of the next and previous process, or + * INVALID_PGPROCNO in the next-link of the last node and the prev-link + * of the first node. A node that is currently not in any list + * should have next == prev == 0; this is not a possible state for a node + * that is in a list, because we disallow circularity. */ typedef struct proclist_node { @@ -25,7 +30,8 @@ typedef struct proclist_node } proclist_node; /* - * Head of a doubly-linked list of PGPROCs, identified by pgprocno. + * Header of a doubly-linked list of PGPROCs, identified by pgprocno. + * An empty list is represented by head == tail == INVALID_PGPROCNO. */ typedef struct proclist_head { @@ -42,4 +48,4 @@ typedef struct proclist_mutable_iter int next; /* pgprocno of the next PGPROC */ } proclist_mutable_iter; -#endif +#endif /* PROCLIST_TYPES_H */ diff --git a/src/include/storage/proclist.h b/src/include/storage/proclist.h index 4e25b47..59a478e 100644 --- a/src/include/storage/proclist.h +++ b/src/include/storage/proclist.h @@ -42,7 +42,7 @@ proclist_is_empty(proclist_head *list) /* * Get a pointer to a proclist_node inside a given PGPROC, given a procno and - * an offset. + * the proclist_node field's offset within struct PGPROC. */ static inline proclist_node * proclist_node_get(int procno, size_t node_offset) @@ -53,13 +53,15 @@ proclist_node_get(int procno, size_t node_offset) } /* - * Insert a node at the beginning of a list. + * Insert a process at the beginning of a list. */ static inline void proclist_push_head_offset(proclist_head *list, int procno, size_t node_offset) { proclist_node *node = proclist_node_get(procno, node_offset); + Assert(node->next == 0 && node->prev == 0); + if (list->head == INVALID_PGPROCNO) { Assert(list->tail == INVALID_PGPROCNO); @@ -79,13 +81,15 @@ proclist_push_head_offset(proclist_head *list, int procno, size_t node_offset) } /* - * Insert a node at the end of a list. + * Insert a process at the end of a list. */ static inline void proclist_push_tail_offset(proclist_head *list, int procno, size_t node_offset) { proclist_node *node = proclist_node_get(procno, node_offset); + Assert(node->next == 0 && node->prev == 0); + if (list->tail == INVALID_PGPROCNO) { Assert(list->head == INVALID_PGPROCNO); @@ -105,30 +109,38 @@ proclist_push_tail_offset(proclist_head *list, int procno, size_t node_offset) } /* - * Delete a node. The node must be in the list. + * Delete a process from a list --- it must be in the list! */ static inline void proclist_delete_offset(proclist_head *list, int procno, size_t node_offset) { proclist_node *node = proclist_node_get(procno, node_offset); + Assert(node->next != 0 || node->prev != 0); + if (node->prev == INVALID_PGPROCNO) + { + Assert(list->head == procno); list->head = node->next; + } else proclist_node_get(node->prev, node_offset)->next = node->next; if (node->next == INVALID_PGPROCNO) + { + Assert(list->tail == procno); list->tail = node->prev; + } else proclist_node_get(node->next, node_offset)->prev = node->prev; - node->next = node->prev = INVALID_PGPROCNO; + node->next = node->prev = 0; } /* - * Check if a node is currently in a list. It must be known that the node is - * not in any _other_ proclist that uses the same proclist_node, so that the - * only possibilities are that it is in this list or none. + * Check if a process is currently in a list. It must be known that the + * process is not in any _other_ proclist that uses the same proclist_node, + * so that the only possibilities are that it is in this list or none. */ static inline bool proclist_contains_offset(proclist_head *list, int procno, @@ -136,27 +148,26 @@ proclist_contains_offset(proclist_head *list, int procno, { proclist_node *node = proclist_node_get(procno, node_offset); - /* - * If this is not a member of a proclist, then the next and prev pointers - * should be 0. Circular lists are not allowed so this condition is not - * confusable with a real pgprocno 0. - */ + /* If it's not in any list, it's definitely not in this one. */ if (node->prev == 0 && node->next == 0) return false; - /* If there is a previous node, then this node must be in the list. */ - if (node->prev != INVALID_PGPROCNO) - return true; - /* - * There is no previous node, so the only way this node can be in the list - * is if it's the head node. + * It must, in fact, be in this list. Ideally, in assert-enabled builds, + * we'd verify that. But since this function is typically used while + * holding a spinlock, crawling the whole list is unacceptable. However, + * we can verify matters in O(1) time when the node is a list head or + * tail, and that seems worth doing, since in practice that should often + * be enough to catch mistakes. */ - return list->head == procno; + Assert(node->prev != INVALID_PGPROCNO || list->head == procno); + Assert(node->next != INVALID_PGPROCNO || list->tail == procno); + + return true; } /* - * Remove and return the first node from a list (there must be one). + * Remove and return the first process from a list (there must be one). */ static inline PGPROC * proclist_pop_head_node_offset(proclist_head *list, size_t node_offset) @@ -205,4 +216,4 @@ proclist_pop_head_node_offset(proclist_head *list, size_t node_offset) proclist_node_get((iter).cur, \ offsetof(PGPROC, link_member))->next) -#endif +#endif /* PROCLIST_H */ diff --git a/src/backend/storage/lmgr/condition_variable.c b/src/backend/storage/lmgr/condition_variable.c index 60234db..e3bc034 100644 --- a/src/backend/storage/lmgr/condition_variable.c +++ b/src/backend/storage/lmgr/condition_variable.c @@ -86,8 +86,7 @@ ConditionVariablePrepareToSleep(ConditionVariable *cv) /* Add myself to the wait queue. */ SpinLockAcquire(&cv->mutex); - if (!proclist_contains(&cv->wakeup, pgprocno, cvWaitLink)) - proclist_push_tail(&cv->wakeup, pgprocno, cvWaitLink); + proclist_push_tail(&cv->wakeup, pgprocno, cvWaitLink); SpinLockRelease(&cv->mutex); }
diff --git a/src/backend/storage/lmgr/condition_variable.c b/src/backend/storage/lmgr/condition_variable.c index e3bc034..0b9d676 100644 --- a/src/backend/storage/lmgr/condition_variable.c +++ b/src/backend/storage/lmgr/condition_variable.c @@ -43,11 +43,17 @@ ConditionVariableInit(ConditionVariable *cv) } /* - * Prepare to wait on a given condition variable. This can optionally be - * called before entering a test/sleep loop. Alternatively, the call to - * ConditionVariablePrepareToSleep can be omitted. The only advantage of - * calling ConditionVariablePrepareToSleep is that it avoids an initial - * double-test of the user's predicate in the case that we need to wait. + * Prepare to wait on a given condition variable. + * + * This can optionally be called before entering a test/sleep loop. + * Doing so is more efficient if we'll need to sleep at least once. + * However, if the first test of the exit condition is likely to succeed, + * it's more efficient to omit the ConditionVariablePrepareToSleep call. + * See comments in ConditionVariableSleep for more detail. + * + * Only one condition variable can be used at a time, ie, + * ConditionVariableCancelSleep must be called before any attempt is made + * to sleep on a different condition variable. */ void ConditionVariablePrepareToSleep(ConditionVariable *cv) @@ -79,8 +85,8 @@ ConditionVariablePrepareToSleep(ConditionVariable *cv) cv_sleep_target = cv; /* - * Reset my latch before adding myself to the queue and before entering - * the caller's predicate loop. + * Reset my latch before adding myself to the queue, to ensure that we + * don't miss a wakeup that occurs immediately. */ ResetLatch(MyLatch); @@ -90,20 +96,21 @@ ConditionVariablePrepareToSleep(ConditionVariable *cv) SpinLockRelease(&cv->mutex); } -/*-------------------------------------------------------------------------- - * Wait for the given condition variable to be signaled. This should be - * called in a predicate loop that tests for a specific exit condition and - * otherwise sleeps, like so: +/* + * Wait for the given condition variable to be signaled. * - * ConditionVariablePrepareToSleep(cv); [optional] + * This should be called in a predicate loop that tests for a specific exit + * condition and otherwise sleeps, like so: + * + * ConditionVariablePrepareToSleep(cv); // optional * while (condition for which we are waiting is not true) * ConditionVariableSleep(cv, wait_event_info); * ConditionVariableCancelSleep(); * - * Supply a value from one of the WaitEventXXX enums defined in pgstat.h to - * control the contents of pg_stat_activity's wait_event_type and wait_event - * columns while waiting. - *-------------------------------------------------------------------------*/ + * wait_event_info should be a value from one of the WaitEventXXX enums + * defined in pgstat.h. This controls the contents of pg_stat_activity's + * wait_event_type and wait_event columns while waiting. + */ void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info) { @@ -113,13 +120,14 @@ ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info) /* * If the caller didn't prepare to sleep explicitly, then do so now and * return immediately. The caller's predicate loop should immediately - * call again if its exit condition is not yet met. This initial spurious - * return can be avoided by calling ConditionVariablePrepareToSleep(cv) + * call again if its exit condition is not yet met. This will result in + * the exit condition being tested twice before we first sleep. The extra + * test can be prevented by calling ConditionVariablePrepareToSleep(cv) * first. Whether it's worth doing that depends on whether you expect the - * condition to be met initially, in which case skipping the prepare - * allows you to skip manipulation of the wait list, or not met initially, - * in which case preparing first allows you to skip a spurious test of the - * caller's exit condition. + * condition to be met initially, in which case skipping the prepare is + * recommended because it avoids manipulations of the wait list, or not + * met initially, in which case preparing first is better because it + * avoids one extra test of the exit condition. */ if (cv_sleep_target == NULL) { @@ -130,7 +138,7 @@ ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info) /* Any earlier condition variable sleep must have been canceled. */ Assert(cv_sleep_target == cv); - while (!done) + do { CHECK_FOR_INTERRUPTS(); @@ -140,18 +148,23 @@ ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info) */ WaitEventSetWait(cv_wait_event_set, -1, &event, 1, wait_event_info); - /* Reset latch before testing whether we can return. */ + /* Reset latch before examining the state of the wait list. */ ResetLatch(MyLatch); /* * If this process has been taken out of the wait list, then we know - * that is has been signaled by ConditionVariableSignal. We put it - * back into the wait list, so we don't miss any further signals while - * the caller's loop checks its condition. If it hasn't been taken - * out of the wait list, then the latch must have been set by - * something other than ConditionVariableSignal; though we don't - * guarantee not to return spuriously, we'll avoid these obvious - * cases. + * that it has been signaled by ConditionVariableSignal (or + * ConditionVariableBroadcast), so we should return to the caller. But + * that doesn't guarantee that the exit condition is met, only that we + * ought to check it. So we must put the process back into the wait + * list, to ensure we don't miss any additional wakeup occurring while + * the caller checks its exit condition. We can take ourselves out of + * the wait list only when the caller calls + * ConditionVariableCancelSleep. + * + * If we're still in the wait list, then the latch must have been set + * by something other than ConditionVariableSignal; though we don't + * guarantee not to return spuriously, we'll avoid this obvious case. */ SpinLockAcquire(&cv->mutex); if (!proclist_contains(&cv->wakeup, MyProc->pgprocno, cvWaitLink)) @@ -160,13 +173,17 @@ ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info) proclist_push_tail(&cv->wakeup, MyProc->pgprocno, cvWaitLink); } SpinLockRelease(&cv->mutex); - } + } while (!done); } /* - * Cancel any pending sleep operation. We just need to remove ourselves - * from the wait queue of any condition variable for which we have previously - * prepared a sleep. + * Cancel any pending sleep operation. + * + * We just need to remove ourselves from the wait queue of any condition + * variable for which we have previously prepared a sleep. + * + * Do nothing if nothing is pending; this allows this function to be called + * during transaction abort to clean up any unfinished CV sleep. */ void ConditionVariableCancelSleep(void) diff --git a/src/include/storage/condition_variable.h b/src/include/storage/condition_variable.h index c7afbbc..7dac477 100644 --- a/src/include/storage/condition_variable.h +++ b/src/include/storage/condition_variable.h @@ -27,30 +27,33 @@ typedef struct { - slock_t mutex; - proclist_head wakeup; + slock_t mutex; /* spinlock protecting the wakeup list */ + proclist_head wakeup; /* list of wake-able processes */ } ConditionVariable; /* Initialize a condition variable. */ -extern void ConditionVariableInit(ConditionVariable *); +extern void ConditionVariableInit(ConditionVariable *cv); /* * To sleep on a condition variable, a process should use a loop which first * checks the condition, exiting the loop if it is met, and then calls * ConditionVariableSleep. Spurious wakeups are possible, but should be - * infrequent. After exiting the loop, ConditionVariableCancelSleep should + * infrequent. After exiting the loop, ConditionVariableCancelSleep must * be called to ensure that the process is no longer in the wait list for - * the condition variable. + * the condition variable. Only one condition variable can be used at a + * time, ie, ConditionVariableCancelSleep must be called before any attempt + * is made to sleep on a different condition variable. */ -extern void ConditionVariableSleep(ConditionVariable *, uint32 wait_event_info); +extern void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info); extern void ConditionVariableCancelSleep(void); /* - * The use of this function is optional and not necessary for correctness; - * for efficiency, it should be called prior entering the loop described above - * if it is thought that the condition is unlikely to hold immediately. + * Optionally, ConditionVariablePrepareToSleep can be called before entering + * the test-and-sleep loop described above. Doing so is more efficient if + * at least one sleep is needed, whereas not doing so is more efficient when + * no sleep is needed because the test condition is true the first time. */ -extern void ConditionVariablePrepareToSleep(ConditionVariable *); +extern void ConditionVariablePrepareToSleep(ConditionVariable *cv); /* Wake up a single waiter (via signal) or all waiters (via broadcast). */ extern void ConditionVariableSignal(ConditionVariable *cv);