Tom Lane wrote:

Is there an easy way find out which LWLock is contended?



Not from oprofile output, as far as I can think. I've suspected for
some time that the BufMgrLock is a major bottleneck, but have no proof.


Mark ran a DBT-2 testrun with the attached statistics patch applied: It collects stats about all lightweight locks and dumps them during shutdown. The hottest locks are

Lock                         Acquire   %contention   sleep calls
8(WALInsertLock)       8679205 0.030410        263934
1(LockMgrLock)       64089418        0.079783        5113215
5(SInvalLock)       68396470        0.001298        88812
0(BufMgrLock)       246307425       0.120293        29629089

The lock numbers are from 7.4, i.e. without the patch that removes ShmemIndexLock. I've check that 8 is really WALInsertLock in the assembly output.

The scary part from the system perspective are the 35 million context switches that were generated by the BufMgrLock and the LockMgrLock. I remember there were patches that tried other algorithms instead of the simple LRU for the buffer manager. Has anyone tried to change the locking of the buffer manager?

The effect of padding the lightweight locks to a full cacheline appears to be negligable: With the padding, there were around 4 million performance monitor hits on the 'lock xchg' instructions. Without it (test run 300), there were 4.2 million hits.

The complete data is at

http://developer.osdl.org/markw/dbt2-pgsql/303/

The db log with the lock stats is at http://developer.osdl.org/markw/dbt2-pgsql/303/db/log

(Warning: 6.9 MB)

--
   Manfred
Index: src/backend/storage/lmgr/lwlock.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/lmgr/lwlock.c,v
retrieving revision 1.19
diff -u -r1.19 lwlock.c
--- src/backend/storage/lmgr/lwlock.c   20 Dec 2003 17:31:21 -0000      1.19
+++ src/backend/storage/lmgr/lwlock.c   27 Dec 2003 22:51:36 -0000
@@ -36,6 +36,11 @@
        PGPROC     *head;                       /* head of list of waiting PGPROCs */
        PGPROC     *tail;                       /* tail of list of waiting PGPROCs */
        /* tail is undefined when head is NULL */
+       unsigned long long stat_acquire_total;
+       unsigned long long stat_acquire_fail;
+       unsigned long long stat_release_total;
+       unsigned long long stat_release_wakeup;
+       int             fill[20];
 } LWLock;
 
 /*
@@ -159,6 +164,10 @@
                lock->shared = 0;
                lock->head = NULL;
                lock->tail = NULL;
+               lock->stat_acquire_total = 0;
+               lock->stat_acquire_fail = 0;
+               lock->stat_release_total = 0;
+               lock->stat_release_wakeup = 0;
        }
 
        /*
@@ -245,6 +254,10 @@
                if (retry)
                        lock->releaseOK = true;
 
+               lock->stat_acquire_total++;
+               if (retry)
+                       lock->stat_acquire_fail++;
+
                /* If I can get the lock, do so quickly. */
                if (mode == LW_EXCLUSIVE)
                {
@@ -440,6 +453,7 @@
                Assert(lock->shared > 0);
                lock->shared--;
        }
+       lock->stat_release_total++;
 
        /*
         * See if I need to awaken any waiters.  If I released a non-last
@@ -477,6 +491,8 @@
                }
        }
 
+       if (head)
+               lock->stat_release_wakeup++;
        /* We are done updating shared state of the lock itself. */
        SpinLockRelease_NoHoldoff(&lock->mutex);
 
@@ -517,5 +533,19 @@
                HOLD_INTERRUPTS();              /* match the upcoming 
RESUME_INTERRUPTS */
 
                LWLockRelease(held_lwlocks[num_held_lwlocks - 1]);
+       }
+}
+
+void LWLockPrintStats(void);
+void
+LWLockPrintStats(void)
+{
+       int i;
+       for (i=0;i<LWLockCounter[0];i++) {
+               volatile LWLock *lock = LWLockArray + i;
+               elog(LOG, "Lock %d): acquire_total %Ld acquire_fail %Ld release_total 
%Ld release_wakeup %Ld\n",
+                        i,
+                        lock->stat_acquire_total, lock->stat_acquire_fail,
+                        lock->stat_release_total, lock->stat_release_wakeup);
        }
 }
Index: src/backend/postmaster/postmaster.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/postmaster/postmaster.c,v
retrieving revision 1.353
diff -u -r1.353 postmaster.c
--- src/backend/postmaster/postmaster.c 25 Dec 2003 03:52:51 -0000      1.353
+++ src/backend/postmaster/postmaster.c 27 Dec 2003 22:51:38 -0000
@@ -1701,7 +1701,7 @@
        errno = save_errno;
 }
 
-
+void LWLockPrintStats(void);
 
 /*
  * pmdie -- signal handler for processing various postmaster signals.
@@ -1733,6 +1733,7 @@
                        Shutdown = SmartShutdown;
                        ereport(LOG,
                                        (errmsg("received smart shutdown request")));
+                       LWLockPrintStats();
                        if (DLGetHead(BackendList)) /* let reaper() handle this */
                                break;
 
@@ -1766,6 +1767,7 @@
                                break;
                        ereport(LOG,
                                        (errmsg("received fast shutdown request")));
+                       LWLockPrintStats();
                        if (DLGetHead(BackendList)) /* let reaper() handle this */
                        {
                                Shutdown = FastShutdown;
@@ -1812,6 +1814,7 @@
                                kill(BgWriterPID, SIGQUIT);
                        ereport(LOG,
                                        (errmsg("received immediate shutdown 
request")));
+                       LWLockPrintStats();
                        if (ShutdownPID > 0)
                                kill(ShutdownPID, SIGQUIT);
                        if (StartupPID > 0)
---------------------------(end of broadcast)---------------------------
TIP 8: explain analyze is your friend

Reply via email to