Hi,
On 2014-06-26 23:01:10 +0200, Andres Freund wrote:
> I think we should rework things so that we fall back to
> pg_write_barrier(), (*((volatile slock_t *) (lock)) = 0) instead of what
> we have right now.
> That'd require to make barrier.h independent from s_lock.h, but I think
> that'd be a pretty clear improvement. One open question is what happens
> with the SpinlockRelease() when barriers are implemented using spinlocks
> and we need a barrier for the SpinlockRelease().
Too tired to think about this any further today. Here's my current state
of this:
* barrier.h's spinlock implementation moved to s_lock.c, loosing the
s_lock.h include. That requires a S_UNLOCK definition which doesn't
again use a barrier. I've coined it S_UNLOCKED_UNLOCK
* s_lock.h now includes barrier.h to implement the generic S_UNLOCK
safely.
* gcc x86-64 had a superflous "cc" clobber. Likely copied from the 32bit
version which does additional operations.
* PPC was missing a compiler barrier
* alpha was missing a "cc" clobber.
* mips was missing a compiler barrier and a "cc" clobber
* I have no idea how to fix pa-risc's S_UNLOCK for !gcc. The referenced
spinlock paper calls a external function to avoid reordering.
Greetings,
Andres Freund
--
Andres Freund http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index efe1b43..5052718 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -187,6 +187,23 @@ update_spins_per_delay(int shared_spins_per_delay)
return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
}
+/*
+ * Memory barrier implementation based on lock/unlock to a spinlock. Check
+ * barrier.h for details.
+ */
+#ifdef PG_SIMULATE_MEMORY_BARRIER
+void
+pg_memory_barrier_impl(void)
+{
+ S_LOCK(&dummy_spinlock);
+#ifdef S_UNLOCKED_UNLOCK
+ S_UNLOCKED_UNLOCK(&dummy_spinlock);
+#else
+ S_UNLOCK(&dummy_spinlock);
+#endif
+}
+#endif /* PG_SIMULATE_MEMORY_BARRIER */
+
/*
* Various TAS implementations that cannot live in s_lock.h as no inline
diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c
index 9b71744..03a4fe8 100644
--- a/src/backend/storage/lmgr/spin.c
+++ b/src/backend/storage/lmgr/spin.c
@@ -25,6 +25,7 @@
#include "access/xlog.h"
#include "miscadmin.h"
#include "replication/walsender.h"
+#include "storage/barrier.h"
#include "storage/lwlock.h"
#include "storage/pg_sema.h"
#include "storage/spin.h"
diff --git a/src/include/storage/barrier.h b/src/include/storage/barrier.h
index bc61de0..e5692ac 100644
--- a/src/include/storage/barrier.h
+++ b/src/include/storage/barrier.h
@@ -13,10 +13,6 @@
#ifndef BARRIER_H
#define BARRIER_H
-#include "storage/s_lock.h"
-
-extern slock_t dummy_spinlock;
-
/*
* A compiler barrier need not (and preferably should not) emit any actual
* machine code, but must act as an optimization fence: the compiler must not
@@ -155,10 +151,14 @@ extern slock_t dummy_spinlock;
* spinlock acquire-and-release would be equivalent to a full memory barrier.
* For example, I'm not sure that Itanium's acq and rel add up to a full
* fence. But all of our actual implementations seem OK in this regard.
+ *
+ * The actual implementation is in s_lock.c so we can include barrier.h from
+ * s_lock.h. Yes, this will make things even slower, but who cares.
*/
#if !defined(pg_memory_barrier)
-#define pg_memory_barrier() \
- do { S_LOCK(&dummy_spinlock); S_UNLOCK(&dummy_spinlock); } while (0)
+#define PG_SIMULATE_MEMORY_BARRIER
+extern void pg_memory_barrier_impl(void);
+#define pg_memory_barrier() pg_memory_barrier_impl()
#endif
/*
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index ba4dfe1..2732054 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -19,7 +19,8 @@
* Should return number of "delays"; see s_lock.c
*
* void S_UNLOCK(slock_t *lock)
- * Unlock a previously acquired lock.
+ * Unlock a previously acquired lock. Needs to imply a compiler and
+ * write memory barrier.
*
* bool S_LOCK_FREE(slock_t *lock)
* Tests if the lock is free. Returns TRUE if free, FALSE if locked.
@@ -39,6 +40,7 @@
* Atomic test-and-set instruction. Attempt to acquire the lock,
* but do *not* wait. Returns 0 if successful, nonzero if unable
* to acquire the lock.
+ * Needs to imply a compiler and read memory barrier.
*
* int TAS_SPIN(slock_t *lock)
* Like TAS(), but this version is used when waiting for a lock
@@ -94,6 +96,8 @@
#ifndef S_LOCK_H
#define S_LOCK_H
+#include "storage/barrier.h"
+
#ifdef HAVE_SPINLOCKS /* skip spinlocks if requested */
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
@@ -224,7 +228,7 @@ tas(volatile slock_t *lock)
" xchgb %0,%1 \n"
: "+q"(_res), "+m"(*lock)
:
-: "memory", "cc");
+: "memory");
return (int) _res;
}
@@ -478,14 +482,14 @@ tas(volatile slock_t *lock)
#define S_UNLOCK(lock) \
do \
{ \
- __asm__ __volatile__ (" lwsync \n"); \
+ __asm__ __volatile__ (" lwsync \n":::"memory"); \
*((volatile slock_t *) (lock)) = 0; \
} while (0)
#else
#define S_UNLOCK(lock) \
do \
{ \
- __asm__ __volatile__ (" sync \n"); \
+ __asm__ __volatile__ (" sync \n":::"memory"); \
*((volatile slock_t *) (lock)) = 0; \
} while (0)
#endif /* USE_PPC_LWSYNC */
@@ -542,7 +546,7 @@ tas(volatile slock_t *lock)
"1: \n"
: "=&r"(_res), "+m"(*lock)
: "r"(lock)
-: "memory");
+: "memory", "cc");
return _res;
}
@@ -580,14 +584,14 @@ tas(volatile slock_t *lock)
"3: \n"
: "=&r"(_res), "+m"(*lock)
:
-: "memory", "0");
+: "memory", "0", "cc");
return (int) _res;
}
#define S_UNLOCK(lock) \
do \
{\
- __asm__ __volatile__ (" mb \n"); \
+ __asm__ __volatile__ (" mb \n":::"memory"); \
*((volatile slock_t *) (lock)) = 0; \
} while (0)
@@ -624,7 +628,7 @@ tas(volatile slock_t *lock)
" .set pop "
: "=&r" (_res), "=&r" (_tmp), "+R" (*_l)
:
-: "memory");
+: "memory", "cc");
return _res;
}
@@ -638,7 +642,10 @@ do \
" .set noreorder \n" \
" .set nomacro \n" \
" sync \n" \
- " .set pop "); \
+ " .set pop "
+:
+:
+: "memory"); \
*((volatile slock_t *) (lock)) = 0; \
} while (0)
@@ -680,7 +687,7 @@ tas(volatile slock_t *lock)
" xor #1,%0 \n"
: "=z"(_res), "+m"(*lock)
: "r"(lock)
-: "memory", "t");
+: "memory", "t", "cc");
return _res;
}
@@ -786,12 +793,13 @@ tas(volatile slock_t *lock)
" ldcwx 0(0,%2),%0 \n"
: "=r"(lockval), "+m"(*lockword)
: "r"(lockword)
-: "memory");
+: "memory", "cc");
return (lockval == 0);
}
#endif /* __GNUC__ */
+/* XXX: no barrier semantics */
#define S_UNLOCK(lock) (*TAS_ACTIVE_WORD(lock) = -1)
#define S_INIT_LOCK(lock) \
@@ -827,6 +835,7 @@ tas(volatile slock_t *lock)
typedef unsigned int slock_t;
#include <ia64/sys/inline.h>
+/* xchg implies acquire semantics */
#define TAS(lock) _Asm_xchg(_SZ_W, lock, 1, _LDHINT_NONE)
/* On IA64, it's a win to use a non-locking test before the xchg proper */
#define TAS_SPIN(lock) (*(lock) ? 1 : TAS(lock))
@@ -942,7 +951,17 @@ extern int tas_sema(volatile slock_t *lock);
#endif /* S_LOCK_FREE */
#if !defined(S_UNLOCK)
-#define S_UNLOCK(lock) (*((volatile slock_t *) (lock)) = 0)
+#define S_UNLOCK(lock) \
+ do \
+ { \
+ pg_write_barrier(); \
+ (*((volatile slock_t *) (lock)) = 0); \
+ } while (0)
+/*
+ * Version without implied memory barrier, only to be used in the spinlock
+ * based fallback for memory barriers.
+ */
+#define S_UNLOCKED_UNLOCK(lock) (*((volatile slock_t *) (lock)) = 0)
#endif /* S_UNLOCK */
#if !defined(S_INIT_LOCK)
@@ -964,6 +983,8 @@ extern int tas(volatile slock_t *lock); /* in port/.../tas.s, or
#define TAS_SPIN(lock) TAS(lock)
#endif /* TAS_SPIN */
+/* spinlock used to enforce barrier semantics */
+extern slock_t dummy_spinlock;
/*
* Platform-independent out-of-line support routines
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers