On 09/15/2015 03:51 PM, Jesper Pedersen wrote:
It
would be nice to get a better sense of how *long* we block on various
locks. It's hard to tell whether some other lock might be have fewer
blocking events but for a much longer average duration.
I did a run with the attached patch, recording the time from
LWLockQueueSelf() until the lock is required.
Doing block_time / block_count basically only shows "main 0" -- its
called "unassigned:0"; it also shows up in the max exclusive report.
Where it is coming from is another question, since it shouldn't be in use.
Due to the overhead of gettimeofday() I think that _v1 is better to
consider.
Best regards,
Jesper
*** /tmp/K1hW5V_lwlock.c 2015-09-16 09:53:26.682803203 -0400
--- src/backend/storage/lmgr/lwlock.c 2015-09-16 08:59:11.232712749 -0400
***************
*** 92,97 ****
--- 92,98 ----
#include "utils/memutils.h"
#ifdef LWLOCK_STATS
+ #include <sys/time.h>
#include "utils/hsearch.h"
#endif
***************
*** 163,176 ****
--- 164,190 ----
{
lwlock_stats_key key;
int sh_acquire_count;
+ int sh_acquire_max;
int ex_acquire_count;
+ int ex_acquire_max;
int block_count;
+ double block_time;
+ double timestamp;
int dequeue_self_count;
int spin_delay_count;
+ int max_waiters;
} lwlock_stats;
static HTAB *lwlock_stats_htab;
static lwlock_stats lwlock_stats_dummy;
+
+ static double
+ GetTimeInMillis()
+ {
+ struct timeval t;
+ gettimeofday(&t, NULL);
+ return (t.tv_sec + (t.tv_usec / 1000000.0)) * 1000.0;
+ }
#endif
#ifdef LOCK_DEBUG
***************
*** 297,308 ****
while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
{
! fprintf(stderr,
! "PID %d lwlock %s %d: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
! MyProcPid, LWLockTrancheArray[lwstats->key.tranche]->name,
! lwstats->key.instance, lwstats->sh_acquire_count,
! lwstats->ex_acquire_count, lwstats->block_count,
! lwstats->spin_delay_count, lwstats->dequeue_self_count);
}
LWLockRelease(&MainLWLockArray[0].lock);
--- 311,332 ----
while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
{
! if (lwstats->key.tranche == 0 && lwstats->key.instance < NUM_INDIVIDUAL_LWLOCKS)
! fprintf(stderr,
! "PID %d lwlock %s: shacq %u shmax %u exacq %u exmax %u blk %u blktime %u spindelay %u dequeue self %u maxw %u\n",
! MyProcPid, MainLWLockNames[lwstats->key.instance],
! lwstats->sh_acquire_count, lwstats->sh_acquire_max,
! lwstats->ex_acquire_count, lwstats->ex_acquire_max,
! lwstats->block_count, (unsigned int)lwstats->block_time, lwstats->spin_delay_count, lwstats->dequeue_self_count,
! lwstats->max_waiters);
! else
! fprintf(stderr,
! "PID %d lwlock %s %d: shacq %u shmax %u exacq %u exmax %u blk %u blktime %u spindelay %u dequeue self %u maxw %u\n",
! MyProcPid, LWLockTrancheArray[lwstats->key.tranche]->name,
! lwstats->key.instance, lwstats->sh_acquire_count, lwstats->sh_acquire_max,
! lwstats->ex_acquire_count, lwstats->ex_acquire_max,
! lwstats->block_count, (unsigned int)lwstats->block_time, lwstats->spin_delay_count, lwstats->dequeue_self_count,
! lwstats->max_waiters);
}
LWLockRelease(&MainLWLockArray[0].lock);
***************
*** 330,339 ****
--- 354,368 ----
if (!found)
{
lwstats->sh_acquire_count = 0;
+ lwstats->sh_acquire_max = 0;
lwstats->ex_acquire_count = 0;
+ lwstats->ex_acquire_max = 0;
lwstats->block_count = 0;
+ lwstats->block_time = 0.0;
+ lwstats->timestamp = 0.0;
lwstats->dequeue_self_count = 0;
lwstats->spin_delay_count = 0;
+ lwstats->max_waiters = 0;
}
return lwstats;
}
***************
*** 774,779 ****
--- 803,811 ----
LWLockQueueSelf(LWLock *lock, LWLockMode mode)
{
#ifdef LWLOCK_STATS
+ bool include;
+ int counter, size;
+ dlist_iter iter;
lwlock_stats *lwstats;
lwstats = get_lwlock_stats_entry(lock);
***************
*** 791,797 ****
--- 823,870 ----
elog(PANIC, "queueing for lock while waiting on another one");
#ifdef LWLOCK_STATS
+ lwstats->timestamp = GetTimeInMillis();
lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
+
+ /*
+ * We scan the list of waiters from the back in order to find
+ * out how many of the same lock type are waiting for a lock.
+ * Similar types have the potential to be groupped together.
+ *
+ * We also count the number of waiters, including ourself.
+ */
+ include = true;
+ size = 1;
+ counter = 1;
+
+ dlist_reverse_foreach(iter, &lock->waiters)
+ {
+ if (include)
+ {
+ PGPROC *waiter = dlist_container(PGPROC, lwWaitLink, iter.cur);
+
+ if (waiter->lwWaitMode == mode)
+ counter += 1;
+ else
+ include = false;
+ }
+
+ size += 1;
+ }
+
+ if (mode == LW_EXCLUSIVE || mode == LW_WAIT_UNTIL_FREE)
+ {
+ if (counter > lwstats->ex_acquire_max)
+ lwstats->ex_acquire_max = counter;
+ }
+ else if (mode == LW_SHARED)
+ {
+ if (counter > lwstats->sh_acquire_max)
+ lwstats->sh_acquire_max = counter;
+ }
+
+ if (size > lwstats->max_waiters)
+ lwstats->max_waiters = size;
#else
SpinLockAcquire(&lock->mutex);
#endif
***************
*** 943,951 ****
--- 1016,1032 ----
#ifdef LWLOCK_STATS
/* Count lock acquisition attempts */
if (mode == LW_EXCLUSIVE)
+ {
lwstats->ex_acquire_count++;
+ if (lwstats->ex_acquire_max == 0)
+ lwstats->ex_acquire_max = 1;
+ }
else
+ {
lwstats->sh_acquire_count++;
+ if (lwstats->sh_acquire_max == 0)
+ lwstats->sh_acquire_max = 1;
+ }
#endif /* LWLOCK_STATS */
/*
***************
*** 1021,1026 ****
--- 1102,1110 ----
LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
LWLockDequeueSelf(lock);
+ #ifdef LWLOCK_STATS
+ lwstats->block_time += (GetTimeInMillis() - lwstats->timestamp);
+ #endif
break;
}
***************
*** 1068,1073 ****
--- 1152,1160 ----
LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
+ #ifdef LWLOCK_STATS
+ lwstats->block_time += (GetTimeInMillis() - lwstats->timestamp);
+ #endif
/* Now loop back and try to acquire lock again. */
result = false;
}
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers