This patch adds counters and views to monitor hot standby generated
recovery conflicts. It extends the pg_stat_database view with one
column with the total number of conflicts, and also creates a new view
pg_stat_database_conflicts that contains a breakdown of exactly what
caused the conflicts.
Documentation still pending, but comments meanwhile is of course appreciated ;)
--
Magnus Hagander
Me: http://www.hagander.net/
Work: http://www.redpill-linpro.com/
*** a/src/backend/catalog/system_views.sql
--- b/src/backend/catalog/system_views.sql
***************
*** 502,508 **** CREATE VIEW pg_stat_database AS
pg_stat_get_db_tuples_fetched(D.oid) AS tup_fetched,
pg_stat_get_db_tuples_inserted(D.oid) AS tup_inserted,
pg_stat_get_db_tuples_updated(D.oid) AS tup_updated,
! pg_stat_get_db_tuples_deleted(D.oid) AS tup_deleted
FROM pg_database D;
CREATE VIEW pg_stat_user_functions AS
--- 502,521 ----
pg_stat_get_db_tuples_fetched(D.oid) AS tup_fetched,
pg_stat_get_db_tuples_inserted(D.oid) AS tup_inserted,
pg_stat_get_db_tuples_updated(D.oid) AS tup_updated,
! pg_stat_get_db_tuples_deleted(D.oid) AS tup_deleted,
! pg_stat_get_db_conflict_all(D.oid) AS conflicts
! FROM pg_database D;
!
! CREATE VIEW pg_stat_database_conflicts AS
! SELECT
! D.oid AS datid,
! D.datname AS datname,
! pg_stat_get_db_conflict_database(D.oid) AS confl_database,
! pg_stat_get_db_conflict_tablespace(D.oid) AS confl_tablespace,
! pg_stat_get_db_conflict_lock(D.oid) AS confl_lock,
! pg_stat_get_db_conflict_snapshot(D.oid) AS confl_snapshot,
! pg_stat_get_db_conflict_bufferpin(D.oid) AS confl_bufferpin,
! pg_stat_get_db_conflict_startup_deadlock(D.oid) AS confl_deadlock
FROM pg_database D;
CREATE VIEW pg_stat_user_functions AS
*** a/src/backend/postmaster/pgstat.c
--- b/src/backend/postmaster/pgstat.c
***************
*** 57,62 ****
--- 57,63 ----
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
+ #include "storage/procsignal.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
***************
*** 278,283 **** static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
--- 279,285 ----
static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
+ static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
/* ------------------------------------------------------------
***************
*** 1314,1319 **** pgstat_report_analyze(Relation rel, bool adopt_counts,
--- 1316,1340 ----
pgstat_send(&msg, sizeof(msg));
}
+ /* --------
+ * pgstat_report_recovery_conflict() -
+ *
+ * Tell the collector about a Hot Standby recovery conflict.
+ * --------
+ */
+ void
+ pgstat_report_recovery_conflict(int reason)
+ {
+ PgStat_MsgRecoveryConflict msg;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RECOVERYCONFLICT);
+ msg.m_databaseid = MyDatabaseId;
+ msg.m_reason = reason;
+ pgstat_send(&msg, sizeof(msg));
+ }
/* ----------
* pgstat_ping() -
***************
*** 3053,3058 **** PgstatCollectorMain(int argc, char *argv[])
--- 3074,3083 ----
pgstat_recv_funcpurge((PgStat_MsgFuncpurge *) &msg, len);
break;
+ case PGSTAT_MTYPE_RECOVERYCONFLICT:
+ pgstat_recv_recoveryconflict((PgStat_MsgRecoveryConflict *) &msg, len);
+ break;
+
default:
break;
}
***************
*** 3129,3134 **** pgstat_get_db_entry(Oid databaseid, bool create)
--- 3154,3165 ----
result->n_tuples_updated = 0;
result->n_tuples_deleted = 0;
result->last_autovac_time = 0;
+ result->n_conflict_database = 0;
+ result->n_conflict_tablespace = 0;
+ result->n_conflict_lock = 0;
+ result->n_conflict_snapshot = 0;
+ result->n_conflict_bufferpin = 0;
+ result->n_conflict_startup_deadlock = 0;
memset(&hash_ctl, 0, sizeof(hash_ctl));
hash_ctl.keysize = sizeof(Oid);
***************
*** 4204,4209 **** pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
--- 4235,4275 ----
}
/* ----------
+ * pgstat_recv_recoveryconflict() -
+ *
+ * Process as RECOVERYCONFLICT message.
+ * ----------
+ */
+ static void
+ pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len)
+ {
+ PgStat_StatDBEntry *dbentry;
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ switch (msg->m_reason)
+ {
+ case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+ dbentry->n_conflict_database++;
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+ dbentry->n_conflict_tablespace++;
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_LOCK:
+ dbentry->n_conflict_lock++;
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+ dbentry->n_conflict_snapshot++;
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+ dbentry->n_conflict_bufferpin++;
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ dbentry->n_conflict_startup_deadlock++;
+ break;
+ }
+ }
+
+ /* ----------
* pgstat_recv_funcstat() -
*
* Count what the backend has done.
*** a/src/backend/tcop/postgres.c
--- b/src/backend/tcop/postgres.c
***************
*** 2903,2917 **** ProcessInterrupts(void)
--- 2903,2923 ----
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating autovacuum process due to administrator command")));
else if (RecoveryConflictPending && RecoveryConflictRetryable)
+ {
+ pgstat_report_recovery_conflict(RecoveryConflictReason);
ereport(FATAL,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("terminating connection due to conflict with recovery"),
errdetail_recovery_conflict()));
+ }
else if (RecoveryConflictPending)
+ {
+ pgstat_report_recovery_conflict(RecoveryConflictReason);
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating connection due to conflict with recovery"),
errdetail_recovery_conflict()));
+ }
else
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
***************
*** 2956,2961 **** ProcessInterrupts(void)
--- 2962,2968 ----
RecoveryConflictPending = false;
DisableNotifyInterrupt();
DisableCatchupInterrupt();
+ pgstat_report_recovery_conflict(RecoveryConflictReason);
if (DoingCommandRead)
ereport(FATAL,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
*** a/src/backend/utils/adt/pgstatfuncs.c
--- b/src/backend/utils/adt/pgstatfuncs.c
***************
*** 71,76 **** extern Datum pg_stat_get_db_tuples_fetched(PG_FUNCTION_ARGS);
--- 71,83 ----
extern Datum pg_stat_get_db_tuples_inserted(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_db_tuples_updated(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_db_tuples_deleted(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_database(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_tablespace(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_lock(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_snapshot(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_bufferpin(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_startup_deadlock(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_all(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS);
***************
*** 1130,1135 **** pg_stat_get_db_tuples_deleted(PG_FUNCTION_ARGS)
--- 1137,1252 ----
}
Datum
+ pg_stat_get_db_conflict_database(PG_FUNCTION_ARGS)
+ {
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_database);
+
+ PG_RETURN_INT64(result);
+ }
+
+ Datum
+ pg_stat_get_db_conflict_tablespace(PG_FUNCTION_ARGS)
+ {
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_tablespace);
+
+ PG_RETURN_INT64(result);
+ }
+
+ Datum
+ pg_stat_get_db_conflict_lock(PG_FUNCTION_ARGS)
+ {
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_lock);
+
+ PG_RETURN_INT64(result);
+ }
+
+ Datum
+ pg_stat_get_db_conflict_snapshot(PG_FUNCTION_ARGS)
+ {
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_snapshot);
+
+ PG_RETURN_INT64(result);
+ }
+
+ Datum
+ pg_stat_get_db_conflict_bufferpin(PG_FUNCTION_ARGS)
+ {
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_bufferpin);
+
+ PG_RETURN_INT64(result);
+ }
+
+ Datum
+ pg_stat_get_db_conflict_startup_deadlock(PG_FUNCTION_ARGS)
+ {
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_startup_deadlock);
+
+ PG_RETURN_INT64(result);
+ }
+
+ Datum
+ pg_stat_get_db_conflict_all(PG_FUNCTION_ARGS)
+ {
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_database +
+ dbentry->n_conflict_tablespace +
+ dbentry->n_conflict_lock +
+ dbentry->n_conflict_snapshot +
+ dbentry->n_conflict_bufferpin +
+ dbentry->n_conflict_startup_deadlock);
+
+ PG_RETURN_INT64(result);
+ }
+
+ Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
PG_RETURN_INT64(pgstat_fetch_global()->timed_checkpoints);
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 3109,3114 **** DATA(insert OID = 2761 ( pg_stat_get_db_tuples_updated PGNSP PGUID 12 1 0 0 f f
--- 3109,3128 ----
DESCR("statistics: tuples updated in database");
DATA(insert OID = 2762 ( pg_stat_get_db_tuples_deleted PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_tuples_deleted _null_ _null_ _null_ ));
DESCR("statistics: tuples deleted in database");
+ DATA(insert OID = 3065 ( pg_stat_get_db_conflict_database PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_database _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by drop database");
+ DATA(insert OID = 3066 ( pg_stat_get_db_conflict_tablespace PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_tablespace _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by drop tablespace");
+ DATA(insert OID = 3067 ( pg_stat_get_db_conflict_lock PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_lock _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by relation lock");
+ DATA(insert OID = 3068 ( pg_stat_get_db_conflict_snapshot PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_snapshot _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by snapshot expiry");
+ DATA(insert OID = 3069 ( pg_stat_get_db_conflict_bufferpin PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_bufferpin _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by shared buffer pin");
+ DATA(insert OID = 3070 ( pg_stat_get_db_conflict_startup_deadlock PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_startup_deadlock _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by buffer deadlock");
+ DATA(insert OID = 3071 ( pg_stat_get_db_conflict_all PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_all _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database");
DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 1 0 0 f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints _null_ _null_ _null_ ));
DESCR("statistics: number of timed checkpoints started by the bgwriter");
DATA(insert OID = 2770 ( pg_stat_get_bgwriter_requested_checkpoints PGNSP PGUID 12 1 0 0 f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_requested_checkpoints _null_ _null_ _null_ ));
*** a/src/include/pgstat.h
--- b/src/include/pgstat.h
***************
*** 45,51 **** typedef enum StatMsgType
PGSTAT_MTYPE_ANALYZE,
PGSTAT_MTYPE_BGWRITER,
PGSTAT_MTYPE_FUNCSTAT,
! PGSTAT_MTYPE_FUNCPURGE
} StatMsgType;
/* ----------
--- 45,52 ----
PGSTAT_MTYPE_ANALYZE,
PGSTAT_MTYPE_BGWRITER,
PGSTAT_MTYPE_FUNCSTAT,
! PGSTAT_MTYPE_FUNCPURGE,
! PGSTAT_MTYPE_RECOVERYCONFLICT
} StatMsgType;
/* ----------
***************
*** 364,369 **** typedef struct PgStat_MsgBgWriter
--- 365,381 ----
PgStat_Counter m_buf_alloc;
} PgStat_MsgBgWriter;
+ /* ----------
+ * PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict
+ * ----------
+ */
+ typedef struct PgStat_MsgRecoveryConflict
+ {
+ PgStat_MsgHdr m_hdr;
+
+ Oid m_databaseid;
+ int m_reason;
+ } PgStat_MsgRecoveryConflict;
/* ----------
* PgStat_FunctionCounts The actual per-function counts kept by a backend
***************
*** 460,465 **** typedef union PgStat_Msg
--- 472,478 ----
PgStat_MsgBgWriter msg_bgwriter;
PgStat_MsgFuncstat msg_funcstat;
PgStat_MsgFuncpurge msg_funcpurge;
+ PgStat_MsgRecoveryConflict msg_recoveryconflict;
} PgStat_Msg;
***************
*** 490,495 **** typedef struct PgStat_StatDBEntry
--- 503,515 ----
PgStat_Counter n_tuples_updated;
PgStat_Counter n_tuples_deleted;
TimestampTz last_autovac_time;
+ PgStat_Counter n_conflict_database;
+ PgStat_Counter n_conflict_tablespace;
+ PgStat_Counter n_conflict_lock;
+ PgStat_Counter n_conflict_snapshot;
+ PgStat_Counter n_conflict_bufferpin;
+ PgStat_Counter n_conflict_startup_deadlock;
+
/*
* tables and functions must be last in the struct, because we don't write
***************
*** 689,694 **** extern void pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts,
--- 709,716 ----
extern void pgstat_report_analyze(Relation rel, bool adopt_counts,
PgStat_Counter livetuples, PgStat_Counter deadtuples);
+ extern void pgstat_report_recovery_conflict(int reason);
+
extern void pgstat_initialize(void);
extern void pgstat_bestart(void);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers