On Wed, Oct 22, 2014 at 12:50 AM, Brightwell, Adam <adam.brightw...@crunchydatasolutions.com> wrote: > Though, I would think that the general desire would be to keep the patch > relevant ONLY to the necessary changes. I would not qualify making those > types of changes as relevant, IMHO. I do think this is potential for > cleanup, however, I would suspect that would be best done in a separate > patch. But again, I'd defer to a committer whether such changes are even > necessary/acceptable.
I have been looking at this patch, and I think that it is a mistake to count the .ready files present in archive_status when calling pg_stat_get_archiver(). If there are many files waiting to be archived, this penalizes the run time of this function, and the application behind relying on those results, not to mention that actually the loop used to count the .ready files is a copy of what is in pgarch.c. Hence I think that we should simply count them in pgarch_readyXlog, and then return a value back to pgarch_ArchiverCopyLoop, value that could be decremented by 1 each time a file is successfully archived to keep the stats as precise as possible, and let the information know useful information when archiver process is within a single loop process of pgarch_ArchiverCopyLoop. This way, we just need to extend PgStat_MsgArchiver with a new counter to track this number. The attached patch, based on v2 sent previously, does so. Thoughts? -- Michael
From e12a1aff3f1b423da5277cccf2a76ec09318567a Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@otacoo.com> Date: Tue, 18 Nov 2014 16:30:23 +0900 Subject: [PATCH] Track number of files marked as ready for archiving in pg_stat_archiver This number of files is directly tracked by the archiver process that then reports the number it finds to the stat machinery. Note that when archiver marks a file as successfully archived, it decrements by one the number of files waiting to be archived, giving more precise information to the user. --- doc/src/sgml/monitoring.sgml | 5 +++++ src/backend/catalog/system_views.sql | 1 + src/backend/postmaster/pgarch.c | 33 +++++++++++++++++++-------------- src/backend/postmaster/pgstat.c | 6 +++++- src/backend/utils/adt/pgstatfuncs.c | 21 +++++++++++++++------ src/include/catalog/pg_proc.h | 2 +- src/include/pgstat.h | 5 ++++- src/test/regress/expected/rules.out | 3 ++- 8 files changed, 52 insertions(+), 24 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index b29e5e6..4f4ac73 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -870,6 +870,11 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser <entry>Time of the last failed archival operation</entry> </row> <row> + <entry><structfield>ready_count</></entry> + <entry><type>bigint</type></entry> + <entry>Number of files waiting to be archived</entry> + </row> + <row> <entry><structfield>stats_reset</></entry> <entry><type>timestamp with time zone</type></entry> <entry>Time at which these statistics were last reset</entry> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index a819952..195769c 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -729,6 +729,7 @@ CREATE VIEW pg_stat_archiver AS s.failed_count, s.last_failed_wal, s.last_failed_time, + s.ready_count, s.stats_reset FROM pg_stat_get_archiver() s; diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 6a5c5b0..7f5b813 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -100,7 +100,7 @@ static void pgarch_waken_stop(SIGNAL_ARGS); static void pgarch_MainLoop(void); static void pgarch_ArchiverCopyLoop(void); static bool pgarch_archiveXlog(char *xlog); -static bool pgarch_readyXlog(char *xlog); +static int64 pgarch_readyXlog(char *xlog); static void pgarch_archiveDone(char *xlog); @@ -440,6 +440,7 @@ static void pgarch_ArchiverCopyLoop(void) { char xlog[MAX_XFN_CHARS + 1]; + int64 ready_count; /* * loop through all xlogs with archive_status of .ready and archive @@ -447,7 +448,7 @@ pgarch_ArchiverCopyLoop(void) * some backend will add files onto the list of those that need archiving * while we are still copying earlier archives */ - while (pgarch_readyXlog(xlog)) + while ((ready_count = pgarch_readyXlog(xlog)) != 0) { int failures = 0; @@ -488,11 +489,16 @@ pgarch_ArchiverCopyLoop(void) pgarch_archiveDone(xlog); /* + * File has been archived, reducing by one the entries waiting + * to be archived. + */ + ready_count--; + + /* * Tell the collector about the WAL file that we successfully * archived */ - pgstat_send_archiver(xlog, false); - + pgstat_send_archiver(xlog, ready_count, false); break; /* out of inner retry loop */ } else @@ -501,7 +507,7 @@ pgarch_ArchiverCopyLoop(void) * Tell the collector about the WAL file that we failed to * archive */ - pgstat_send_archiver(xlog, true); + pgstat_send_archiver(xlog, ready_count, true); if (++failures >= NUM_ARCHIVE_RETRIES) { @@ -668,7 +674,8 @@ pgarch_archiveXlog(char *xlog) * No notification is set that file archiving is now in progress, so * this would need to be extended if multiple concurrent archival * tasks were created. If a failure occurs, we will completely - * re-copy the file at the next available opportunity. + * re-copy the file at the next available opportunity. This function + * returns the number of files counted as in ready state. * * It is important that we return the oldest, so that we archive xlogs * in order that they were written, for two reasons: @@ -682,7 +689,7 @@ pgarch_archiveXlog(char *xlog) * higher priority for archiving. This seems okay, or at least not * obviously worth changing. */ -static bool +static int64 pgarch_readyXlog(char *xlog) { /* @@ -695,7 +702,7 @@ pgarch_readyXlog(char *xlog) char newxlog[MAX_XFN_CHARS + 6 + 1]; DIR *rldir; struct dirent *rlde; - bool found = false; + int64 ready_count = 0; snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status"); rldir = AllocateDir(XLogArchiveStatusDir); @@ -714,27 +721,25 @@ pgarch_readyXlog(char *xlog) strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen && strcmp(rlde->d_name + basenamelen, ".ready") == 0) { - if (!found) - { + if (ready_count == 0) strcpy(newxlog, rlde->d_name); - found = true; - } else { if (strcmp(rlde->d_name, newxlog) < 0) strcpy(newxlog, rlde->d_name); } + ready_count++; } } FreeDir(rldir); - if (found) + if (ready_count > 0) { /* truncate off the .ready */ newxlog[strlen(newxlog) - 6] = '\0'; strcpy(xlog, newxlog); } - return found; + return ready_count; } /* diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index c7f41a5..2e9b276 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -3088,10 +3088,12 @@ pgstat_send(void *msg, int len) * ---------- */ void -pgstat_send_archiver(const char *xlog, bool failed) +pgstat_send_archiver(const char *xlog, int64 ready_count, bool failed) { PgStat_MsgArchiver msg; + Assert(ready_count >= 0); + /* * Prepare and send the message */ @@ -3099,6 +3101,7 @@ pgstat_send_archiver(const char *xlog, bool failed) msg.m_failed = failed; strncpy(msg.m_xlog, xlog, sizeof(msg.m_xlog)); msg.m_timestamp = GetCurrentTimestamp(); + msg.m_ready_count = ready_count; pgstat_send(&msg, sizeof(msg)); } @@ -5000,6 +5003,7 @@ pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len) sizeof(archiverStats.last_archived_wal)); archiverStats.last_archived_timestamp = msg->m_timestamp; } + archiverStats.ready_count = msg->m_ready_count; } /* ---------- diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index d621a68..37fd7d2 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -15,11 +15,13 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/xlog_internal.h" #include "catalog/pg_type.h" #include "funcapi.h" #include "libpq/ip.h" #include "miscadmin.h" #include "pgstat.h" +#include "storage/fd.h" #include "utils/builtins.h" #include "utils/inet.h" #include "utils/timestamp.h" @@ -1737,8 +1739,8 @@ Datum pg_stat_get_archiver(PG_FUNCTION_ARGS) { TupleDesc tupdesc; - Datum values[7]; - bool nulls[7]; + Datum values[8]; + bool nulls[8]; PgStat_ArchiverStats *archiver_stats; /* Initialise values and NULL flags arrays */ @@ -1746,7 +1748,7 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS) MemSet(nulls, 0, sizeof(nulls)); /* Initialise attributes information in the tuple descriptor */ - tupdesc = CreateTemplateTupleDesc(7, false); + tupdesc = CreateTemplateTupleDesc(8, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "archived_count", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "last_archived_wal", @@ -1759,7 +1761,9 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS) TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "last_failed_time", TIMESTAMPTZOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stats_reset", + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "ready_count", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "stats_reset", TIMESTAMPTZOID, -1, 0); BlessTupleDesc(tupdesc); @@ -1790,10 +1794,15 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS) else values[5] = TimestampTzGetDatum(archiver_stats->last_failed_timestamp); - if (archiver_stats->stat_reset_timestamp == 0) + if (archiver_stats->ready_count == 0) nulls[6] = true; else - values[6] = TimestampTzGetDatum(archiver_stats->stat_reset_timestamp); + values[6] = Int64GetDatum(archiver_stats->ready_count); + + if (archiver_stats->stat_reset_timestamp == 0) + nulls[7] = true; + else + values[7] = TimestampTzGetDatum(archiver_stats->stat_reset_timestamp); /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum( diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 5d4e889..28e3b46 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2788,7 +2788,7 @@ DATA(insert OID = 2844 ( pg_stat_get_db_blk_read_time PGNSP PGUID 12 1 0 0 0 f DESCR("statistics: block read time, in msec"); DATA(insert OID = 2845 ( pg_stat_get_db_blk_write_time PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 701 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_blk_write_time _null_ _null_ _null_ )); DESCR("statistics: block write time, in msec"); -DATA(insert OID = 3195 ( pg_stat_get_archiver PGNSP PGUID 12 1 0 0 0 f f f f f f s 0 0 2249 "" "{20,25,1184,20,25,1184,1184}" "{o,o,o,o,o,o,o}" "{archived_count,last_archived_wal,last_archived_time,failed_count,last_failed_wal,last_failed_time,stats_reset}" _null_ pg_stat_get_archiver _null_ _null_ _null_ )); +DATA(insert OID = 3195 ( pg_stat_get_archiver PGNSP PGUID 12 1 0 0 0 f f f f f f s 0 0 2249 "" "{20,25,1184,20,25,1184,20,1184}" "{o,o,o,o,o,o,o,o}" "{archived_count,last_archived_wal,last_archived_time,failed_count,last_failed_wal,last_failed_time,ready_count,stats_reset}" _null_ pg_stat_get_archiver _null_ _null_ _null_ )); DESCR("statistics: information about WAL archiver"); DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints _null_ _null_ _null_ )); DESCR("statistics: number of timed checkpoints started by the bgwriter"); diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 0892533..55d046d 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -377,6 +377,7 @@ typedef struct PgStat_MsgArchiver { PgStat_MsgHdr m_hdr; bool m_failed; /* Failed attempt */ + uint64 m_ready_count; char m_xlog[MAX_XFN_CHARS + 1]; TimestampTz m_timestamp; } PgStat_MsgArchiver; @@ -650,6 +651,7 @@ typedef struct PgStat_ArchiverStats PgStat_Counter failed_count; /* failed archival attempts */ char last_failed_wal[MAX_XFN_CHARS + 1]; /* WAL file involved in * last failure */ + int64 ready_count; /* files ready to be archived */ TimestampTz last_failed_timestamp; /* last archival failure time */ TimestampTz stat_reset_timestamp; } PgStat_ArchiverStats; @@ -934,7 +936,8 @@ extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info, extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, void *recdata, uint32 len); -extern void pgstat_send_archiver(const char *xlog, bool failed); +extern void pgstat_send_archiver(const char *xlog, int64 ready_count, + bool failed); extern void pgstat_send_bgwriter(void); /* ---------- diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index c79b45c..5eaf138 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1686,8 +1686,9 @@ pg_stat_archiver| SELECT s.archived_count, s.failed_count, s.last_failed_wal, s.last_failed_time, + s.ready_count, s.stats_reset - FROM pg_stat_get_archiver() s(archived_count, last_archived_wal, last_archived_time, failed_count, last_failed_wal, last_failed_time, stats_reset); + FROM pg_stat_get_archiver() s(archived_count, last_archived_wal, last_archived_time, failed_count, last_failed_wal, last_failed_time, ready_count, stats_reset); pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_checkpoint_write_time() AS checkpoint_write_time, -- 2.1.3
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers