[HACKERS] experimental: replace s_lock spinlock code with pthread_mutex on linux

Nils Goroll Tue, 26 Jun 2012 15:59:46 -0700

> It's
> still unproven whether it'd be an improvement, but you could expect to
> prove it one way or the other with a well-defined amount of testing.


I've hacked the code to use adaptive pthread mutexes instead of spinlocks. see
attached patch. The patch is for the git head, but it can easily be applied for
9.1.3, which is what I did for my tests.

This had disastrous effects on Solaris because it does not use anything similar
to futexes for PTHREAD_PROCESS_SHARED mutexes (only the _PRIVATE mutexes do
without syscalls for the simple case).

But I was surprised to see that it works relatively well on linux. Here's a
glimpse of my results:

hacked code 9.1.3:

-bash-4.1$ rsync -av --delete /tmp/test_template_data/ ../data/ ; /usr/bin/time
./postgres -D ../data -p 55502 & ppid=$! ; pid=$(pgrep -P $ppid ) ; sleep 15 ;
./pgbench -c 768 -t 20 -j 128 -p 55502 postgres ; kill $pid
sending incremental file list
...
ransaction type: TPC-B (sort of)
scaling factor: 10
query mode: simple
number of clients: 768
number of threads: 128
number of transactions per client: 20
number of transactions actually processed: 15360/15360
tps = 476.873261 (including connections establishing)
tps = 485.964355 (excluding connections establishing)
LOG:  received smart shutdown request
LOG:  autovacuum launcher shutting down
-bash-4.1$ LOG:  shutting down
LOG:  database system is shut down
210.58user 78.88system 0:50.64elapsed 571%CPU (0avgtext+0avgdata
1995968maxresident)k
0inputs+1153872outputs (0major+2464649minor)pagefaults 0swaps

original code (vanilla build on amd64) 9.1.3:

-bash-4.1$ rsync -av --delete /tmp/test_template_data/ ../data/ ; /usr/bin/time
./postgres -D ../data -p 55502 & ppid=$! ; pid=$(pgrep -P $ppid ) ; sleep 15 ;
./pgbench -c 768 -t 20 -j 128 -p 55502 postgres ; kill $pid
sending incremental file list
...
transaction type: TPC-B (sort of)
scaling factor: 10
query mode: simple
number of clients: 768
number of threads: 128
number of transactions per client: 20
number of transactions actually processed: 15360/15360
tps = 499.993685 (including connections establishing)
tps = 510.410883 (excluding connections establishing)
LOG:  received smart shutdown request
-bash-4.1$ LOG:  autovacuum launcher shutting down
LOG:  shutting down
LOG:  database system is shut down
196.21user 71.38system 0:47.99elapsed 557%CPU (0avgtext+0avgdata
1360800maxresident)k
0inputs+1147904outputs (0major+2375965minor)pagefaults 0swaps


config:

-bash-4.1$ egrep '^[a-z]' /tmp/test_template_data/postgresql.conf
max_connections = 1800                  # (change requires restart)
shared_buffers = 10GB                   # min 128kB
temp_buffers = 64MB                     # min 800kB
work_mem = 256MB                        # min 64kB,d efault 1MB
maintenance_work_mem = 2GB              # min 1MB, default 16MB
bgwriter_delay = 10ms                   # 10-10000ms between rounds
bgwriter_lru_maxpages = 1000            # 0-1000 max buffers written/round
bgwriter_lru_multiplier = 10.0          # 0-10.0 multipler on buffers 
scanned/round
wal_level = hot_standby                 # minimal, archive, or hot_standby
wal_buffers = 64MB                      # min 32kB, -1 sets based on 
shared_buffers
commit_delay = 10000                    # range 0-100000, in microseconds
datestyle = 'iso, mdy'
lc_messages = 'en_US.UTF-8'                     # locale for system error 
message
lc_monetary = 'en_US.UTF-8'                     # locale for monetary formatting
lc_numeric = 'en_US.UTF-8'                      # locale for number formatting
lc_time = 'en_US.UTF-8'                         # locale for time formatting
default_text_search_config = 'pg_catalog.english'
seq_page_cost = 1.0                     # measured on an arbitrary scale
random_page_cost = 1.5                  # same scale as above (default: 4.0)
cpu_tuple_cost = 0.005
cpu_index_tuple_cost = 0.0025
cpu_operator_cost = 0.0001
effective_cache_size = 192GB



So it looks like using pthread_mutexes could at least be an option on Linux.

Using futexes directly could be even cheaper.


As a side note, it looks like I have not expressed myself clearly:

I did not intend to suggest to replace proven, working code (which probably is
the best you can get for some platforms) with posix calls. I apologize for the
provocative question.


Regarding the actual production issue, I did not manage to synthetically provoke
the saturation we are seeing in production using pgbench - I could not even get
anywhere near the production load. So I cannot currently test if reducing the
amount of spinning and waking up exactly one waiter (which is what linux/nptl
pthread_mutex_unlock does) would solve/mitigate the production issue I am
working on, and I'd highly appreciate any pointers in this direction.

Cheers, Nils

diff --git a/src/backend/storage/lmgr/s_lock.c 
b/src/backend/storage/lmgr/s_lock.c
index bc8d89f..a45fdf6 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -20,6 +20,8 @@
 
 #include "storage/s_lock.h"
 
+#ifndef USE_PTHREAD_SLOCK
+
 slock_t                dummy_spinlock;
 
 static int     spins_per_delay = DEFAULT_SPINS_PER_DELAY;
@@ -200,9 +202,95 @@ update_spins_per_delay(int shared_spins_per_delay)
  * because the definitions for these are split between this file and s_lock.h.
  */
 
+#endif /* !USE_PTHREAD_SLOCK */
 
 #ifdef HAVE_SPINLOCKS                  /* skip spinlocks if requested */
 
+#ifdef USE_PTHREAD_SLOCK
+#include <pthread.h>
+void
+posix_lock(volatile slock_t *lock, const char *file, int line)
+{
+       struct slock *s_lock = (slock_t *)lock;
+       int ret;
+
+       /* XXX error recovery ! */
+
+       do {
+               ret = pthread_mutex_lock(&s_lock->mutex);
+       } while (ret != 0);
+       s_lock->held = 1;
+}
+
+void
+posix_unlock(volatile slock_t *lock, const char *file, int line)
+{
+       struct slock *s_lock = (slock_t *)lock;
+       int ret;
+
+       /* XXX error recovery ! */
+
+       s_lock->held = 0;
+       do {
+               ret = pthread_mutex_unlock(&s_lock->mutex);
+       } while (ret != 0);
+}
+
+void
+posix_init(volatile slock_t *lock)
+{
+       struct slock *s_lock = (slock_t *)lock;
+       pthread_mutexattr_t attr;
+       int ret;
+
+       ret = pthread_mutexattr_init(&attr);
+       Assert(ret == 0);
+
+#ifdef PTHREAD_MUTEX_ADAPTIVE_NP
+       (void)pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+#else
+       (void)pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL);
+#endif
+
+#ifdef USE_PRIO_INHERIT
+       /*
+        * looks like this incurs significant syscall overhead on linux
+        */
+       (void)pthread_mutexattr_setprotocol(&attr, PTHREAD_PRIO_INHERIT);
+#endif
+
+       ret = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+       Assert(ret == 0);
+
+       s_lock->held = 0;
+
+       ret = pthread_mutex_init(&s_lock->mutex, &attr);
+       Assert(ret == 0);
+
+       pthread_mutexattr_destroy(&attr);
+}
+
+int
+posix_lock_free(volatile slock_t *lock)
+{
+       struct slock *s_lock = (slock_t *)lock;
+
+       return (s_lock->held == 0);
+}
+
+#if 0
+void
+posix_lock_free(slock_t *lock)
+{
+       struct slock *s_lock = lock;
+       int ret;
+
+       ret = pthread_mutex_destroy(&s_lock->mutex);
+       Assert(ret == 0);
+}
+#endif
+
+#else /* !USE_PTHREAD_SLOCK */
 
 #if defined(__GNUC__)
 
@@ -282,6 +370,7 @@ tas_dummy()                                         /* 
really means: extern int tas(slock_t
 }
 #endif   /* sun3 */
 #endif   /* not __GNUC__ */
+#endif   /* USE_PTHREAD_SLOCK */
 #endif   /* HAVE_SPINLOCKS */
 
 
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index d4a783f..200af8f 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -97,6 +97,30 @@
 
 #ifdef HAVE_SPINLOCKS  /* skip spinlocks if requested */
 
+#ifdef USE_PTHREAD_SLOCK
+#include <pthread.h>
+struct slock {
+       pthread_mutex_t         mutex;
+       int                     held;
+};
+
+typedef struct slock slock_t;
+
+extern void posix_lock(volatile slock_t *lock, const char *file, int line);
+extern void posix_unlock(volatile slock_t *lock, const char *file, int line);
+extern void posix_init(volatile slock_t *lock);
+extern int posix_lock_free(volatile slock_t *lock);
+
+#define S_LOCK(lock)           posix_lock((lock), __FILE__, __LINE__)
+#define S_UNLOCK(lock)         posix_unlock((lock), __FILE__, __LINE__)
+#define S_INIT_LOCK(lock)      posix_init(lock)
+#define S_LOCK_FREE(lock)      posix_lock_free(lock)
+#define SPIN_DELAY()           ((void) 0)
+
+#define set_spins_per_delay(s)         ((void) 0)
+#define update_spins_per_delay(s)      (0)
+#define DEFAULT_SPINS_PER_DELAY                0
+#else /* !USE_PTHREAD_SLOCK */
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 /*************************************************************************
@@ -947,7 +971,7 @@ spin_delay(void)
 #error PostgreSQL does not have native spinlock support on this platform.  To 
continue the compilation, rerun configure using --disable-spinlocks.  However, 
performance will be poor.  Please report this to pgsql-b...@postgresql.org.
 #endif
 
-
+#endif  /* USE_PTHREAD_SLOCK */
 #else  /* !HAVE_SPINLOCKS */
 
 
@@ -968,7 +992,6 @@ extern int  tas_sema(volatile slock_t *lock);
 #define S_INIT_LOCK(lock)      s_init_lock_sema(lock)
 #define TAS(lock)      tas_sema(lock)
 
-
 #endif /* HAVE_SPINLOCKS */
 
 
@@ -996,6 +1019,8 @@ extern int tas_sema(volatile slock_t *lock);
 #define S_INIT_LOCK(lock)      S_UNLOCK(lock)
 #endif  /* S_INIT_LOCK */
 
+#ifndef USE_PTHREAD_SLOCK
+
 #if !defined(SPIN_DELAY)
 #define SPIN_DELAY()   ((void) 0)
 #endif  /* SPIN_DELAY */
@@ -1004,6 +1029,7 @@ extern int        tas_sema(volatile slock_t *lock);
 extern int     tas(volatile slock_t *lock);            /* in port/.../tas.s, or
                                                                                
                 * s_lock.c */
 
+
 #define TAS(lock)              tas(lock)
 #endif  /* TAS */
 
@@ -1022,5 +1048,6 @@ extern void s_lock(volatile slock_t *lock, const char 
*file, int line);
 
 extern void set_spins_per_delay(int shared_spins_per_delay);
 extern int     update_spins_per_delay(int shared_spins_per_delay);
+#endif /* !USE_PTHREAD_SLOCK */
 
 #endif  /* S_LOCK_H */

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] experimental: replace s_lock spinlock code with pthread_mutex on linux

Reply via email to