Hi all,
I've modified the pg_stat_lwlocks patch to be able to work with
the latest PostgreSQL Git code.
This patch provides:
pg_stat_lwlocks New system view to show lwlock statistics.
pg_stat_get_lwlocks() New function to retrieve lwlock statistics.
pg_stat_reset_lwlocks() New function to reset lwlock statistics.
Please try it out.
Regards,
2012/06/26 5:29, Satoshi Nagayasu wrote:
> Hi all,
>
> I've been working on a new system view, pg_stat_lwlocks, to observe
> LWLock, and just completed my 'proof-of-concept' code that can work
> with version 9.1.
>
> Now, I'd like to know the possibility of this feature for future
> release.
>
> With this patch, DBA can easily determine a bottleneck around lwlocks.
> --------------------------------------------------
> postgres=# SELECT * FROM pg_stat_lwlocks ORDER BY time_ms DESC LIMIT 10;
> lwlockid | calls | waits | time_ms
> ----------+--------+-------+---------
> 49 | 193326 | 32096 | 23688
> 8 | 3305 | 133 | 1335
> 2 | 21 | 0 | 0
> 4 | 135188 | 0 | 0
> 5 | 57935 | 0 | 0
> 6 | 141 | 0 | 0
> 7 | 24580 | 1 | 0
> 3 | 3282 | 0 | 0
> 1 | 41 | 0 | 0
> 9 | 3 | 0 | 0
> (10 rows)
>
> postgres=#
> --------------------------------------------------
>
> In this view,
> 'lwlockid' column represents LWLockId used in the backends.
> 'calls' represents how many times LWLockAcquire() was called.
> 'waits' represents how many times LWLockAcquire() needed to wait
> within it before lock acquisition.
> 'time_ms' represents how long LWLockAcquire() totally waited on
> a lwlock.
>
> And lwlocks that use a LWLockId range, such as BufMappingLock or
> LockMgrLock, would be grouped and summed up in a single record.
> For example, lwlockid 49 in the above view represents LockMgrLock
> statistics.
>
> Now, I know there are some considerations.
>
> (1) Performance
>
> I've measured LWLock performance both with and without the patch,
> and confirmed that this patch does not affect the LWLock perfomance
> at all.
>
> pgbench scores with the patch:
> tps = 900.906658 (excluding connections establishing)
> tps = 908.528422 (excluding connections establishing)
> tps = 903.900977 (excluding connections establishing)
> tps = 910.470595 (excluding connections establishing)
> tps = 909.685396 (excluding connections establishing)
>
> pgbench scores without the patch:
> tps = 909.096785 (excluding connections establishing)
> tps = 894.868712 (excluding connections establishing)
> tps = 910.074669 (excluding connections establishing)
> tps = 904.022770 (excluding connections establishing)
> tps = 895.673830 (excluding connections establishing)
>
> Of course, this experiment was not I/O bound, and the cache hit ratio
> was>99.9%.
>
> (2) Memory space
>
> In this patch, I added three new members to LWLock structure
> as uint64 to collect statistics.
>
> It means that those members must be held in the shared memory,
> but I'm not sure whether it's appropriate.
>
> I think another possible option is holding those statistics
> values in local (backend) process memory, and send them through
> the stat collector process (like other statistics values).
>
> (3) LWLock names (or labels)
>
> Now, pg_stat_lwlocks view shows LWLockId itself. But LWLockId is
> not easy for DBA to determine actual lock type.
>
> So, I want to show LWLock names (or labels), like 'WALWriteLock'
> or 'LockMgrLock', but how should I implement it?
>
> Any comments?
>
> Regards,
--
Satoshi Nagayasu <[email protected]>
Uptime Technologies, LLC. http://www.uptime.jp
diff --git a/src/backend/catalog/system_views.sql
b/src/backend/catalog/system_views.sql
index 7cc1d41..f832b45 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -658,6 +658,14 @@ CREATE VIEW pg_stat_bgwriter AS
pg_stat_get_buf_alloc() AS buffers_alloc,
pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
+CREATE VIEW pg_stat_lwlocks AS
+ SELECT
+ S.lwlockid,
+ S.calls,
+ S.waits,
+ S.time_ms
+ FROM pg_stat_get_lwlocks() AS S;
+
CREATE VIEW pg_user_mappings AS
SELECT
U.oid AS umid,
diff --git a/src/backend/storage/lmgr/lwlock.c
b/src/backend/storage/lmgr/lwlock.c
index 95d4b37..2a2c197 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -32,6 +32,7 @@
#include "storage/proc.h"
#include "storage/spin.h"
+#include <sys/time.h>
/* We use the ShmemLock spinlock to protect LWLockAssign */
extern slock_t *ShmemLock;
@@ -46,6 +47,11 @@ typedef struct LWLock
PGPROC *head; /* head of list of waiting
PGPROCs */
PGPROC *tail; /* tail of list of waiting
PGPROCs */
/* tail is undefined when head is NULL */
+
+ /* statistics stuff */
+ uint64 calls;
+ uint64 waits;
+ uint64 time_ms;
} LWLock;
/*
@@ -287,6 +293,9 @@ CreateLWLocks(void)
lock->lock.shared = 0;
lock->lock.head = NULL;
lock->lock.tail = NULL;
+ lock->lock.calls = 0;
+ lock->lock.waits = 0;
+ lock->lock.time_ms = 0;
}
/*
@@ -342,8 +351,10 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
PGPROC *proc = MyProc;
bool retry = false;
int extraWaits = 0;
+ struct timeval wait_start,wait_done;
PRINT_LWDEBUG("LWLockAcquire", lockid, lock);
+ lock->calls++;
#ifdef LWLOCK_STATS
/* Set up local count state first time through in a given process */
@@ -467,6 +478,8 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
#endif
TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode);
+ lock->waits++;
+ gettimeofday(&wait_start, NULL);
for (;;)
{
@@ -478,6 +491,8 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
}
TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode);
+ gettimeofday(&wait_done, NULL);
+ lock->time_ms += (wait_done.tv_sec-wait_start.tv_sec)*1000 +
(wait_done.tv_usec-wait_start.tv_usec)/1000;
LOG_LWDEBUG("LWLockAcquire", lockid, "awakened");
@@ -879,3 +894,48 @@ LWLockHeldByMe(LWLockId lockid)
}
return false;
}
+
+void
+lwlock_get_stat(LWLockId lockid, uint64 *calls, uint64 *waits, uint64 *time_ms)
+{
+ volatile LWLock *lock = &(LWLockArray[lockid].lock);
+
+ SpinLockAcquire(&lock->mutex);
+ *calls = lock->calls;
+ *waits = lock->waits;
+ *time_ms = lock->time_ms;
+ SpinLockRelease(&lock->mutex);
+}
+
+void
+lwlock_get_stat_sum(LWLockId start, LWLockId end, uint64 *calls, uint64
*waits, uint64 *time_ms)
+{
+ LWLockId lockid;
+
+ *calls = 0;
+ *waits = 0;
+ *time_ms = 0;
+
+ for (lockid=start ; lockid<=end ; lockid++)
+ {
+ uint64 calls2, waits2, time_ms2;
+
+ lwlock_get_stat(lockid, &calls2, &waits2, &time_ms2);
+
+ *calls += calls2;
+ *waits += waits2;
+ *time_ms += time_ms2;
+ }
+}
+
+void
+lwlock_reset_stat(LWLockId lockid)
+{
+ volatile LWLock *lock = &(LWLockArray[lockid].lock);
+
+ SpinLockAcquire(&lock->mutex);
+ lock->calls = 0;
+ lock->waits = 0;
+ lock->time_ms = 0;
+ SpinLockRelease(&lock->mutex);
+}
diff --git a/src/backend/utils/adt/pgstatfuncs.c
b/src/backend/utils/adt/pgstatfuncs.c
index 7c0705a..b4e453f 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -117,6 +117,8 @@ extern Datum pg_stat_reset_shared(PG_FUNCTION_ARGS);
extern Datum pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS);
extern Datum pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_lwlocks(PG_FUNCTION_ARGS);
+
/* Global bgwriter statistics, from bgwriter.c */
extern PgStat_MsgBgWriter bgwriterStats;
@@ -1700,3 +1702,109 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
PG_RETURN_VOID();
}
+
+Datum
+pg_stat_get_lwlocks(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ oldcontext =
MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ tupdesc = CreateTemplateTupleDesc(4, false);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lockid",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "calls",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "waits",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "time_ms",
+ INT8OID, -1, 0);
+
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+ funcctx->max_calls = NumLWLocks();
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ Datum values[4];
+ bool nulls[4];
+ HeapTuple tuple;
+ LWLockId lockid;
+ uint64 calls,waits,time_ms;
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ lockid = funcctx->call_cntr;
+
+ if ( lockid<FirstBufMappingLock )
+ {
+ lwlock_get_stat(lockid, &calls, &waits, &time_ms);
+ }
+ else if ( FirstBufMappingLock<=lockid &&
lockid<FirstLockMgrLock )
+ {
+ lwlock_get_stat_sum(FirstBufMappingLock,
FirstLockMgrLock-1,
+ &calls, &waits, &time_ms);
+ funcctx->call_cntr = FirstLockMgrLock - 1;
+ }
+ else if ( FirstLockMgrLock<=lockid &&
lockid<FirstPredicateLockMgrLock )
+ {
+ lwlock_get_stat_sum(FirstLockMgrLock,
FirstPredicateLockMgrLock-1,
+ &calls, &waits, &time_ms);
+ funcctx->call_cntr = FirstPredicateLockMgrLock - 1;
+ }
+ else if ( FirstPredicateLockMgrLock<=lockid &&
lockid<NumFixedLWLocks )
+ {
+ lwlock_get_stat_sum(FirstPredicateLockMgrLock,
NumFixedLWLocks-1,
+ &calls, &waits, &time_ms);
+ funcctx->call_cntr = NumFixedLWLocks - 1;
+ }
+ else if ( NumFixedLWLocks<=lockid && lockid<NumLWLocks() )
+ {
+ lwlock_get_stat_sum(NumFixedLWLocks, NumLWLocks()-1,
+ &calls, &waits, &time_ms);
+ funcctx->call_cntr = NumLWLocks() - 1;
+ }
+
+ values[0] = Int64GetDatum(lockid);
+ values[1] = Int64GetDatum(calls);
+ values[2] = Int64GetDatum(waits);
+ values[3] = Int64GetDatum(time_ms);
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+ else
+ {
+ SRF_RETURN_DONE(funcctx);
+ }
+}
+
+Datum
+pg_stat_reset_lwlocks(PG_FUNCTION_ARGS)
+{
+ LWLockId lockid;
+
+ for (lockid=0 ; lockid<NumLWLocks() ; lockid++)
+ {
+ lwlock_reset_stat(lockid);
+ }
+
+ PG_RETURN_VOID();
+}
+
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bee7154..85d2a7f 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2602,6 +2602,10 @@ DATA(insert OID = 1936 ( pg_stat_get_backend_idset
PGNSP PGUID 12 1 100 0 0 f
DESCR("statistics: currently active backend IDs");
DATA(insert OID = 2022 ( pg_stat_get_activity PGNSP PGUID 12
1 100 0 0 f f f f f t s 1 0 2249 "23"
"{23,26,23,26,25,25,25,16,1184,1184,1184,1184,869,25,23}"
"{i,o,o,o,o,o,o,o,o,o,o,o,o,o,o}"
"{pid,datid,pid,usesysid,application_name,state,query,waiting,xact_start,query_start,backend_start,state_change,client_addr,client_hostname,client_port}"
_null_ pg_stat_get_activity _null_ _null_ _null_ ));
DESCR("statistics: information about currently active backends");
+DATA(insert OID = 3764 ( pg_stat_get_lwlocks PGNSP PGUID 12 1 100 0 0 f f f
f f t s 0 0 2249 "" "{20,20,20,20}" "{o,o,o,o}"
"{lwlockid,calls,waits,time_ms}" _null_ pg_stat_get_lwlocks _null_ _null_
_null_ ));
+DESCR("statistics: light-weight lock statistics");
+DATA(insert OID = 3765 ( pg_stat_reset_lwlocks PGNSP PGUID 12 1 0 0 0 f f f
f f f v 0 0 2278 "" _null_ _null_ _null_ _null_ pg_stat_reset_lwlocks _null_
_null_ _null_ ));
+DESCR("statistics: reset light-weight lock statistics");
DATA(insert OID = 3099 ( pg_stat_get_wal_senders PGNSP PGUID 12 1 10 0 0
f f f f f t s 0 0 2249 "" "{23,25,25,25,25,25,23,25}" "{o,o,o,o,o,o,o,o}"
"{pid,state,sent_location,write_location,flush_location,replay_location,sync_priority,sync_state}"
_null_ pg_stat_get_wal_senders _null_ _null_ _null_ ));
DESCR("statistics: information about currently active replication");
DATA(insert OID = 2026 ( pg_backend_pid PGNSP
PGUID 12 1 0 0 0 f f f f t f s 0 0 23 "" _null_ _null_ _null_ _null_
pg_backend_pid _null_ _null_ _null_ ));
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 82d8ec4..41c4259 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -119,4 +119,8 @@ extern void CreateLWLocks(void);
extern void RequestAddinLWLocks(int n);
+extern void lwlock_get_stat(LWLockId, uint64 *, uint64 *, uint64 *);
+extern void lwlock_get_stat_sum(LWLockId, LWLockId, uint64 *, uint64 *, uint64
*);
+extern void lwlock_reset_stat(LWLockId);
+
#endif /* LWLOCK_H */
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers