As discussed on the thread "Spinlocks and compiler/memory barriers", now that we've made the spinlock primitives function as compiler barriers (we think), it should be possible to remove volatile qualifiers from many places in the source code. The attached patch does this in lwlock.c. If the changes in commit 0709b7ee72e4bc71ad07b7120acd117265ab51d0 (and follow-on commits) are correct and complete, applying this shouldn't break anything, while possibly giving the compiler room to optimize things better than it does today.
However, demonstrating the necessity of that commit for these changes seems to be non-trivial. I tried applying this patch and reverting commits 5b26278822c69dd76ef89fd50ecc7cdba9c3f035, b4c28d1b92c81941e4fc124884e51a7c110316bf, and 0709b7ee72e4bc71ad07b7120acd117265ab51d0 on a PPC64 POWER8 box with a whopping 192 hardware threads (thanks, IBM!). I then ran the regression tests repeatedly, and I ran several long pgbench runs with as many as 350 concurrent clients. No failures. So I'm posting this patch in the hope that others can help. The relevant tests are: 1. If you apply this patch to master and run tests of whatever kind strikes your fancy, does anything break under high concurrency? If it does, then the above commits weren't enough to make this safe on your platform. 2. If you apply this patch to master, revert the commits mentioned above, and again run tests, does anything now break? If it does (but the first tests were OK), then that shows that those commits did something useful on your platform. The change to make spinlocks act as compiler barriers provides the foundation for, hopefully, a cleaner code base and easier future work on scalabilty and performance projects, so help is much appreciated. Thanks, -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 7c96da5..66fb2e4 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -112,7 +112,7 @@ static lwlock_stats lwlock_stats_dummy; bool Trace_lwlocks = false; inline static void -PRINT_LWDEBUG(const char *where, const volatile LWLock *lock) +PRINT_LWDEBUG(const char *where, const LWLock *lock) { if (Trace_lwlocks) elog(LOG, "%s(%s %d): excl %d shared %d head %p rOK %d", @@ -406,9 +406,7 @@ LWLock * LWLockAssign(void) { LWLock *result; - - /* use volatile pointer to prevent code rearrangement */ - volatile int *LWLockCounter; + int *LWLockCounter; LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int)); SpinLockAcquire(ShmemLock); @@ -429,9 +427,7 @@ int LWLockNewTrancheId(void) { int result; - - /* use volatile pointer to prevent code rearrangement */ - volatile int *LWLockCounter; + int *LWLockCounter; LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int)); SpinLockAcquire(ShmemLock); @@ -511,9 +507,8 @@ LWLockAcquireWithVar(LWLock *l, uint64 *valptr, uint64 val) /* internal function to implement LWLockAcquire and LWLockAcquireWithVar */ static inline bool -LWLockAcquireCommon(LWLock *l, LWLockMode mode, uint64 *valptr, uint64 val) +LWLockAcquireCommon(LWLock *lock, LWLockMode mode, uint64 *valptr, uint64 val) { - volatile LWLock *lock = l; PGPROC *proc = MyProc; bool retry = false; bool result = true; @@ -525,7 +520,7 @@ LWLockAcquireCommon(LWLock *l, LWLockMode mode, uint64 *valptr, uint64 val) PRINT_LWDEBUG("LWLockAcquire", lock); #ifdef LWLOCK_STATS - lwstats = get_lwlock_stats_entry(l); + lwstats = get_lwlock_stats_entry(lock); /* Count lock acquisition attempts */ if (mode == LW_EXCLUSIVE) @@ -642,13 +637,13 @@ LWLockAcquireCommon(LWLock *l, LWLockMode mode, uint64 *valptr, uint64 val) * so that the lock manager or signal manager will see the received * signal when it next waits. */ - LOG_LWDEBUG("LWLockAcquire", T_NAME(l), T_ID(l), "waiting"); + LOG_LWDEBUG("LWLockAcquire", T_NAME(lock), T_ID(lock), "waiting"); #ifdef LWLOCK_STATS lwstats->block_count++; #endif - TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(l), T_ID(l), mode); + TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), T_ID(lock), mode); for (;;) { @@ -659,9 +654,9 @@ LWLockAcquireCommon(LWLock *l, LWLockMode mode, uint64 *valptr, uint64 val) extraWaits++; } - TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(l), T_ID(l), mode); + TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), T_ID(lock), mode); - LOG_LWDEBUG("LWLockAcquire", T_NAME(l), T_ID(l), "awakened"); + LOG_LWDEBUG("LWLockAcquire", T_NAME(lock), T_ID(lock), "awakened"); /* Now loop back and try to acquire lock again. */ retry = true; @@ -675,10 +670,10 @@ LWLockAcquireCommon(LWLock *l, LWLockMode mode, uint64 *valptr, uint64 val) /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); - TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(l), T_ID(l), mode); + TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), T_ID(lock), mode); /* Add lock to list of locks held by this backend */ - held_lwlocks[num_held_lwlocks++] = l; + held_lwlocks[num_held_lwlocks++] = lock; /* * Fix the process wait semaphore's count for any absorbed wakeups. @@ -697,9 +692,8 @@ LWLockAcquireCommon(LWLock *l, LWLockMode mode, uint64 *valptr, uint64 val) * If successful, cancel/die interrupts are held off until lock release. */ bool -LWLockConditionalAcquire(LWLock *l, LWLockMode mode) +LWLockConditionalAcquire(LWLock *lock, LWLockMode mode) { - volatile LWLock *lock = l; bool mustwait; PRINT_LWDEBUG("LWLockConditionalAcquire", lock); @@ -747,14 +741,16 @@ LWLockConditionalAcquire(LWLock *l, LWLockMode mode) { /* Failed to get lock, so release interrupt holdoff */ RESUME_INTERRUPTS(); - LOG_LWDEBUG("LWLockConditionalAcquire", T_NAME(l), T_ID(l), "failed"); - TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(l), T_ID(l), mode); + LOG_LWDEBUG("LWLockConditionalAcquire", + T_NAME(lock), T_ID(lock), "failed"); + TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), + T_ID(lock), mode); } else { /* Add lock to list of locks held by this backend */ - held_lwlocks[num_held_lwlocks++] = l; - TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(l), T_ID(l), mode); + held_lwlocks[num_held_lwlocks++] = lock; + TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), T_ID(lock), mode); } return !mustwait; @@ -775,9 +771,8 @@ LWLockConditionalAcquire(LWLock *l, LWLockMode mode) * wake up, observe that their records have already been flushed, and return. */ bool -LWLockAcquireOrWait(LWLock *l, LWLockMode mode) +LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) { - volatile LWLock *lock = l; PGPROC *proc = MyProc; bool mustwait; int extraWaits = 0; @@ -788,7 +783,7 @@ LWLockAcquireOrWait(LWLock *l, LWLockMode mode) PRINT_LWDEBUG("LWLockAcquireOrWait", lock); #ifdef LWLOCK_STATS - lwstats = get_lwlock_stats_entry(l); + lwstats = get_lwlock_stats_entry(lock); #endif /* Ensure we will have room to remember the lock */ @@ -855,13 +850,14 @@ LWLockAcquireOrWait(LWLock *l, LWLockMode mode) * Wait until awakened. Like in LWLockAcquire, be prepared for bogus * wakups, because we share the semaphore with ProcWaitForSignal. */ - LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(l), T_ID(l), "waiting"); + LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(lock), T_ID(lock), + "waiting"); #ifdef LWLOCK_STATS lwstats->block_count++; #endif - TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(l), T_ID(l), mode); + TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), T_ID(lock), mode); for (;;) { @@ -872,9 +868,10 @@ LWLockAcquireOrWait(LWLock *l, LWLockMode mode) extraWaits++; } - TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(l), T_ID(l), mode); + TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), T_ID(lock), mode); - LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(l), T_ID(l), "awakened"); + LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(lock), T_ID(lock), + "awakened"); } else { @@ -892,14 +889,16 @@ LWLockAcquireOrWait(LWLock *l, LWLockMode mode) { /* Failed to get lock, so release interrupt holdoff */ RESUME_INTERRUPTS(); - LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(l), T_ID(l), "failed"); - TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(l), T_ID(l), mode); + LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(lock), T_ID(lock), "failed"); + TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), T_ID(lock), + mode); } else { /* Add lock to list of locks held by this backend */ - held_lwlocks[num_held_lwlocks++] = l; - TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(l), T_ID(l), mode); + held_lwlocks[num_held_lwlocks++] = lock; + TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), T_ID(lock), + mode); } return !mustwait; @@ -924,10 +923,8 @@ LWLockAcquireOrWait(LWLock *l, LWLockMode mode) * in shared mode, returns 'true'. */ bool -LWLockWaitForVar(LWLock *l, uint64 *valptr, uint64 oldval, uint64 *newval) +LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval) { - volatile LWLock *lock = l; - volatile uint64 *valp = valptr; PGPROC *proc = MyProc; int extraWaits = 0; bool result = false; @@ -938,7 +935,7 @@ LWLockWaitForVar(LWLock *l, uint64 *valptr, uint64 oldval, uint64 *newval) PRINT_LWDEBUG("LWLockWaitForVar", lock); #ifdef LWLOCK_STATS - lwstats = get_lwlock_stats_entry(l); + lwstats = get_lwlock_stats_entry(lock); #endif /* LWLOCK_STATS */ /* @@ -981,7 +978,7 @@ LWLockWaitForVar(LWLock *l, uint64 *valptr, uint64 oldval, uint64 *newval) } else { - value = *valp; + value = *valptr; if (value != oldval) { result = false; @@ -1023,13 +1020,14 @@ LWLockWaitForVar(LWLock *l, uint64 *valptr, uint64 oldval, uint64 *newval) * so that the lock manager or signal manager will see the received * signal when it next waits. */ - LOG_LWDEBUG("LWLockWaitForVar", T_NAME(l), T_ID(l), "waiting"); + LOG_LWDEBUG("LWLockWaitForVar", T_NAME(lock), T_ID(lock), "waiting"); #ifdef LWLOCK_STATS lwstats->block_count++; #endif - TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(l), T_ID(l), LW_EXCLUSIVE); + TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), T_ID(lock), + LW_EXCLUSIVE); for (;;) { @@ -1040,9 +1038,10 @@ LWLockWaitForVar(LWLock *l, uint64 *valptr, uint64 oldval, uint64 *newval) extraWaits++; } - TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(l), T_ID(l), LW_EXCLUSIVE); + TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), T_ID(lock), + LW_EXCLUSIVE); - LOG_LWDEBUG("LWLockWaitForVar", T_NAME(l), T_ID(l), "awakened"); + LOG_LWDEBUG("LWLockWaitForVar", T_NAME(lock), T_ID(lock), "awakened"); /* Now loop back and check the status of the lock again. */ } @@ -1050,7 +1049,7 @@ LWLockWaitForVar(LWLock *l, uint64 *valptr, uint64 oldval, uint64 *newval) /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); - TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(l), T_ID(l), LW_EXCLUSIVE); + TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), T_ID(lock), LW_EXCLUSIVE); /* * Fix the process wait semaphore's count for any absorbed wakeups. @@ -1078,10 +1077,8 @@ LWLockWaitForVar(LWLock *l, uint64 *valptr, uint64 oldval, uint64 *newval) * The caller must be holding the lock in exclusive mode. */ void -LWLockUpdateVar(LWLock *l, uint64 *valptr, uint64 val) +LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val) { - volatile LWLock *lock = l; - volatile uint64 *valp = valptr; PGPROC *head; PGPROC *proc; PGPROC *next; @@ -1093,7 +1090,7 @@ LWLockUpdateVar(LWLock *l, uint64 *valptr, uint64 val) Assert(lock->exclusive == 1); /* Update the lock's value */ - *valp = val; + *valptr = val; /* * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken @@ -1139,9 +1136,8 @@ LWLockUpdateVar(LWLock *l, uint64 *valptr, uint64 val) * LWLockRelease - release a previously acquired lock */ void -LWLockRelease(LWLock *l) +LWLockRelease(LWLock *lock) { - volatile LWLock *lock = l; PGPROC *head; PGPROC *proc; int i; @@ -1154,11 +1150,11 @@ LWLockRelease(LWLock *l) */ for (i = num_held_lwlocks; --i >= 0;) { - if (l == held_lwlocks[i]) + if (lock == held_lwlocks[i]) break; } if (i < 0) - elog(ERROR, "lock %s %d is not held", T_NAME(l), T_ID(l)); + elog(ERROR, "lock %s %d is not held", T_NAME(lock), T_ID(lock)); num_held_lwlocks--; for (; i < num_held_lwlocks; i++) held_lwlocks[i] = held_lwlocks[i + 1]; @@ -1238,14 +1234,15 @@ LWLockRelease(LWLock *l) /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); - TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(l), T_ID(l)); + TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock), T_ID(lock)); /* * Awaken any waiters I removed from the queue. */ while (head != NULL) { - LOG_LWDEBUG("LWLockRelease", T_NAME(l), T_ID(l), "release waiter"); + LOG_LWDEBUG("LWLockRelease", T_NAME(lock), T_ID(lock), + "release waiter"); proc = head; head = proc->lwWaitLink; proc->lwWaitLink = NULL;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers