> I still don't see the need for two functions LogStartupProgress and
> LogStartupProgressComplete. Most of the code is duplicate. I think we
> can just do it with a single function something like [1]:

Initially I had written a common function for these 2. You can see
that in the earlier version of the patch. Later separated it since it
was too much for one function. If others also agree to make it common,
I can make that change.

> Why isn't there a
> LogStartupProgressComplete(STARTUP_PROCESS_OP_REDO)? Is it because of
> the below existing log message?
> ereport(LOG,
> (errmsg("redo done at %X/%X system usage: %s",
> LSN_FORMAT_ARGS(ReadRecPtr),
> pg_rusage_show(&ru0))));

Yes. Adding another log message makes it redundant.

>  I think it should be, "," after occurred instead of "."
> + * elapsed or not. TRUE if timeout occurred, FALSE otherwise.
> instead of
> + * elapsed or not. TRUE if timeout occurred. FALSE otherwise.

Fixed.

> I think walkdir() should only call LogStartupProgress(FSYNC_IN_PROGRESS, 
> path);
> when action == datadir_fsync_fname.

I agree and fixed it.

> ResetUnloggedRelations() is calling
> ResetUnloggedRelationsInTablespaceDir("base", op);
> before calling InitStartupProgress().

Fixed.

> This shows StartupXLOG() calling ResetUnloggedRelations() twice.
> Should they both be shown ?  If so, maybe they should be distinguished as 
> here:
>
>        elog(DEBUG1, "resetting unlogged relations: cleanup %d init %d",
>                 (op & UNLOGGED_RELATION_CLEANUP) != 0,
>                 (op & UNLOGGED_RELATION_INIT) != 0);

Fixed. Added separate codes to distinguish.

Please find the patch attached.




On Wed, Jul 21, 2021 at 6:43 PM Justin Pryzby <pry...@telsasoft.com> wrote:
>
> I think walkdir() should only call LogStartupProgress(FSYNC_IN_PROGRESS, 
> path);
> when action == datadir_fsync_fname.
>
> ResetUnloggedRelations() is calling
> ResetUnloggedRelationsInTablespaceDir("base", op);
> before calling InitStartupProgress().
>
> This shows StartupXLOG() calling ResetUnloggedRelations() twice.
> Should they both be shown ?  If so, maybe they should be distinguished as 
> here:
>
>         elog(DEBUG1, "resetting unlogged relations: cleanup %d init %d",
>                  (op & UNLOGGED_RELATION_CLEANUP) != 0,
>                  (op & UNLOGGED_RELATION_INIT) != 0);
>
> On Wed, Jul 21, 2021 at 12:52:24PM +0530, Nitin Jadhav wrote:
> > 2021-07-20 18:47:32.046 IST [102230] LOG:  listening on IPv4 address 
> > "127.0.0.1", port 5445
> > 2021-07-20 18:47:32.048 IST [102230] LOG:  listening on Unix socket 
> > "/tmp/.s.PGSQL.5445"
> > 2021-07-20 18:47:32.051 IST [102234] LOG:  database system was interrupted; 
> > last known up at 2021-07-20 18:46:27 IST
> > 2021-07-20 18:47:32.060 IST [102234] LOG:  data directory sync (fsync) 
> > complete after 0.00 s
> > 2021-07-20 18:47:32.060 IST [102234] LOG:  database system was not properly 
> > shut down; automatic recovery in progress
> > 2021-07-20 18:47:32.063 IST [102234] LOG:  unlogged relations reset after 
> > 0.00 s
> > 2021-07-20 18:47:32.063 IST [102234] LOG:  redo starts at 0/14EF418
> > 2021-07-20 18:47:33.063 IST [102234] LOG:  redo in progress, elapsed time: 
> > 1.00 s, current LSN: 0/5C13D28
> > 2021-07-20 18:47:34.063 IST [102234] LOG:  redo in progress, elapsed time: 
> > 2.00 s, current LSN: 0/A289160
> > 2021-07-20 18:47:35.063 IST [102234] LOG:  redo in progress, elapsed time: 
> > 3.00 s, current LSN: 0/EE2DE10
> > 2021-07-20 18:47:35.563 IST [102234] LOG:  invalid record length at 
> > 0/115C63E0: wanted 24, got 0
> > 2021-07-20 18:47:35.563 IST [102234] LOG:  redo done at 0/115C63B8 system 
> > usage: CPU: user: 3.58 s, system: 0.14 s, elapsed: 3.50 s
> > 2021-07-20 18:47:35.564 IST [102234] LOG:  unlogged relations reset after 
> > 0.00 s
> > 2021-07-20 18:47:35.706 IST [102230] LOG:  database system is ready to 
> > accept connections
>
> --
> Justin
From 85c0e711cc48bcf2f76ed8ed0257af3ecf2de306 Mon Sep 17 00:00:00 2001
From: Nitin <nitin.jadhav@enterprisedb.com>
Date: Fri, 23 Jul 2021 15:46:56 +0530
Subject: [PATCH] Shows the progress of the operations performed during startup
 process. The interval between each progress update is configurable and it
 should be mentioned in seconds

---
 src/backend/access/transam/xlog.c             | 178 ++++++++++++++++++++++++++
 src/backend/postmaster/startup.c              |   1 +
 src/backend/storage/file/fd.c                 |  13 ++
 src/backend/storage/file/reinit.c             |  18 +++
 src/backend/utils/misc/guc.c                  |  13 +-
 src/backend/utils/misc/postgresql.conf.sample |   6 +
 src/include/access/xlog.h                     |  18 +++
 src/include/utils/timeout.h                   |   1 +
 8 files changed, 247 insertions(+), 1 deletion(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3479402..4f05205 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -79,6 +79,7 @@
 #include "utils/relmapper.h"
 #include "utils/pg_rusage.h"
 #include "utils/snapmgr.h"
+#include "utils/timeout.h"
 #include "utils/timestamp.h"
 
 extern uint32 bootstrap_data_checksum_version;
@@ -397,6 +398,23 @@ static bool doRequestWalReceiverReply;
  */
 static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr;
 
+/*
+ * Start timestamp of the operation that occur during startup process.
+ */
+static TimestampTz startupProcessOpStartTime;
+
+/*
+ * Indicates whether the startup progress interval mentioned by the user is
+ * elapsed or not. TRUE if timeout occurred, FALSE otherwise.
+ */
+static bool logStartupProgressTimeout;
+
+/*
+ * The interval between each progress update of the operations that occur
+ * during startup process.
+ */
+int	log_startup_progress_interval;
+
 /*----------
  * Shared-memory data structures for XLOG control
  *
@@ -7351,6 +7369,8 @@ StartupXLOG(void)
 					(errmsg("redo starts at %X/%X",
 							LSN_FORMAT_ARGS(ReadRecPtr))));
 
+			InitStartupProgress();
+
 			/*
 			 * main redo apply loop
 			 */
@@ -7358,6 +7378,8 @@ StartupXLOG(void)
 			{
 				bool		switchedTLI = false;
 
+				LogStartupProgress(STARTUP_PROCESS_OP_REDO, NULL);
+
 #ifdef WAL_DEBUG
 				if (XLOG_DEBUG ||
 					(rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
@@ -13021,3 +13043,159 @@ XLogRequestWalReceiverReply(void)
 {
 	doRequestWalReceiverReply = true;
 }
+
+/*
+ * Schedule a wakeup call for logging the progress of startup process.
+ */
+void
+LogStartupProgressTimeoutHandler(void)
+{
+	logStartupProgressTimeout = true;
+}
+
+/*
+ * Sets the start timestamp of the current operation and also enables the
+ * timeout for logging the progress of startup process.
+ */
+void
+InitStartupProgress(void)
+{
+	/*
+	 * If not called from startup process then just return without
+	 * initialization.
+	 */
+	if (!AmStartupProcess())
+		return;
+
+	startupProcessOpStartTime = GetCurrentTimestamp();
+	enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT,
+						 (log_startup_progress_interval * 1000));
+}
+
+/*
+ * Logs the progress of the operations performed during startup process.
+ */
+void
+LogStartupProgress(StartupProcessOp operation, const char *path)
+{
+	long        secs;
+	int         usecs;
+	int			elapsed_ms;
+	int			interval_in_ms;
+
+	/* If not called from startup process then this feature is of no use. */
+	if (!AmStartupProcess())
+		return;
+
+	/* If the feature is disabled, then no need to proceed further. */
+	if (log_startup_progress_interval < 0)
+		return;
+
+	/* If the timeout has not occurred, then no need to log the details. */
+	if (!logStartupProgressTimeout)
+		return;
+
+	/* Timeout has occurred. */
+	TimestampDifference(startupProcessOpStartTime,
+						GetCurrentTimestamp(),
+						&secs, &usecs);
+
+	/*
+	 * Enable the timer for the next log message based on the time that current
+	 * log message timer was supposed to fire.
+	 */
+	elapsed_ms = (secs * 1000) + (usecs / 1000);
+	interval_in_ms = log_startup_progress_interval * 1000;
+	enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT,
+						 (interval_in_ms - (elapsed_ms % interval_in_ms)));
+
+	switch(operation)
+	{
+		case STARTUP_PROCESS_OP_SYNCFS:
+			ereport(LOG,
+					(errmsg("syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s",
+							secs, (usecs / 10000), path)));
+			break;
+		case STARTUP_PROCESS_OP_FSYNC:
+			ereport(LOG,
+					(errmsg("syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s",
+							secs, (usecs / 10000), path)));
+			break;
+		case STARTUP_PROCESS_OP_REDO:
+			ereport(LOG,
+					(errmsg("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
+							secs, (usecs / 10000), LSN_FORMAT_ARGS(ReadRecPtr))));
+			break;
+		case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT:
+			ereport(LOG,
+					(errmsg("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s",
+							secs, (usecs / 10000), path)));
+			break;
+		case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP:
+			ereport(LOG,
+					(errmsg("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s",
+							secs, (usecs / 10000), path)));
+			break;
+		default:
+			ereport(ERROR,
+					(errmsg("unrecognized operation (%d) in startup progress update",
+							operation)));
+			break;
+	}
+
+	logStartupProgressTimeout = false;
+}
+
+/*
+ * Logs the completion of the operation performed during startup process and
+ * disables the timeout used for logging the progress.
+ */
+void
+LogStartupProgressComplete(StartupProcessOp operation)
+{
+	long        secs;
+	int         usecs;
+
+	/* If not called from startup process then this feature is of no use. */
+	if (!AmStartupProcess())
+		return;
+
+	/* If the feature is disabled, then no need to proceed further. */
+	if (log_startup_progress_interval < 0)
+		return;
+
+	TimestampDifference(startupProcessOpStartTime,
+						GetCurrentTimestamp(),
+						&secs, &usecs);
+
+	switch(operation)
+	{
+		case STARTUP_PROCESS_OP_SYNCFS:
+			ereport(LOG,
+					(errmsg("data directory sync (syncfs) complete after %ld.%02d s",
+							secs, (usecs / 10000))));
+			break;
+		case STARTUP_PROCESS_OP_FSYNC:
+			ereport(LOG,
+					(errmsg("data directory sync (fsync) complete after %ld.%02d s",
+							secs, (usecs / 10000))));
+			break;
+		case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT:
+			ereport(LOG,
+					(errmsg("resetting unlogged relations (init) complete after %ld.%02d s",
+							secs, (usecs / 10000))));
+			break;
+		case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP:
+			ereport(LOG,
+					(errmsg("resetting unlogged relations (cleanup) complete after %ld.%02d s",
+							secs, (usecs / 10000))));
+			break;
+		default:
+			ereport(ERROR,
+					(errmsg("unrecognized operation (%d) in startup progress update",
+							operation)));
+			break;
+	}
+
+	disable_timeout(LOG_STARTUP_PROGRESS_TIMEOUT, false);
+}
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 69077bd..08c271c 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -232,6 +232,7 @@ StartupProcessMain(void)
 	RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler);
 	RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler);
 	RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler);
+	RegisterTimeout(LOG_STARTUP_PROGRESS_TIMEOUT, LogStartupProgressTimeoutHandler);
 
 	/*
 	 * Unblock signals (they were blocked when the postmaster forked us)
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 5d5e8ae..49ecbbd 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -100,6 +100,7 @@
 #include "storage/ipc.h"
 #include "utils/guc.h"
 #include "utils/resowner_private.h"
+#include "utils/timeout.h"
 
 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
 #if defined(HAVE_SYNC_FILE_RANGE)
@@ -3334,6 +3335,8 @@ do_syncfs(const char *path)
 {
 	int			fd;
 
+	LogStartupProgress(STARTUP_PROCESS_OP_SYNCFS, path);
+
 	fd = OpenTransientFile(path, O_RDONLY);
 	if (fd < 0)
 	{
@@ -3410,6 +3413,8 @@ SyncDataDirectory(void)
 		DIR		   *dir;
 		struct dirent *de;
 
+		InitStartupProgress();
+
 		/*
 		 * On Linux, we don't have to open every single file one by one.  We
 		 * can use syncfs() to sync whole filesystems.  We only expect
@@ -3436,6 +3441,8 @@ SyncDataDirectory(void)
 		/* If pg_wal is a symlink, process that too. */
 		if (xlog_is_symlink)
 			do_syncfs("pg_wal");
+
+		LogStartupProgressComplete(STARTUP_PROCESS_OP_SYNCFS);
 		return;
 	}
 #endif							/* !HAVE_SYNCFS */
@@ -3452,6 +3459,8 @@ SyncDataDirectory(void)
 	walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1);
 #endif
 
+	InitStartupProgress();
+
 	/*
 	 * Now we do the fsync()s in the same order.
 	 *
@@ -3465,6 +3474,8 @@ SyncDataDirectory(void)
 	if (xlog_is_symlink)
 		walkdir("pg_wal", datadir_fsync_fname, false, LOG);
 	walkdir("pg_tblspc", datadir_fsync_fname, true, LOG);
+
+	LogStartupProgressComplete(STARTUP_PROCESS_OP_FSYNC);
 }
 
 /*
@@ -3505,6 +3516,8 @@ walkdir(const char *path,
 
 		snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
 
+		LogStartupProgress(STARTUP_PROCESS_OP_FSYNC, path);
+
 		switch (get_dirent_type(subpath, de, process_symlinks, elevel))
 		{
 			case PGFILETYPE_REG:
diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
index 40c758d..ba18996 100644
--- a/src/backend/storage/file/reinit.c
+++ b/src/backend/storage/file/reinit.c
@@ -16,12 +16,14 @@
 
 #include <unistd.h>
 
+#include "access/xlog.h"
 #include "common/relpath.h"
 #include "storage/copydir.h"
 #include "storage/fd.h"
 #include "storage/reinit.h"
 #include "utils/hsearch.h"
 #include "utils/memutils.h"
+#include "utils/timeout.h"
 
 static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
 												  int op);
@@ -65,6 +67,8 @@ ResetUnloggedRelations(int op)
 								   ALLOCSET_DEFAULT_SIZES);
 	oldctx = MemoryContextSwitchTo(tmpctx);
 
+	InitStartupProgress();
+
 	/*
 	 * First process unlogged files in pg_default ($PGDATA/base)
 	 */
@@ -75,6 +79,7 @@ ResetUnloggedRelations(int op)
 	 */
 	spc_dir = AllocateDir("pg_tblspc");
 
+
 	while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL)
 	{
 		if (strcmp(spc_de->d_name, ".") == 0 ||
@@ -86,6 +91,11 @@ ResetUnloggedRelations(int op)
 		ResetUnloggedRelationsInTablespaceDir(temp_path, op);
 	}
 
+	if (op & UNLOGGED_RELATION_INIT)
+		LogStartupProgressComplete(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT);
+	else if (op & UNLOGGED_RELATION_CLEANUP)
+		LogStartupProgressComplete(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP);
+
 	FreeDir(spc_dir);
 
 	/*
@@ -136,6 +146,14 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
 
 		snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
 				 tsdirname, de->d_name);
+
+		if (op & UNLOGGED_RELATION_INIT)
+			LogStartupProgress(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT,
+							   dbspace_path);
+		else if (op & UNLOGGED_RELATION_CLEANUP)
+			LogStartupProgress(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP,
+							   dbspace_path);
+
 		ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
 	}
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a2e0f8d..d0ea7de 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -679,7 +679,6 @@ static char *recovery_target_lsn_string;
 /* should be static, but commands/variable.c needs to get at this */
 char	   *role_string;
 
-
 /*
  * Displayable names for context types (enum GucContext)
  *
@@ -3546,6 +3545,18 @@ static struct config_int ConfigureNamesInt[] =
 		check_client_connection_check_interval, NULL, NULL
 	},
 
+	{
+		{"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN,
+			gettext_noop("Sets the time interval between each progress update "
+						 "of the startup process."),
+			gettext_noop("0 logs all messages. -1 turns this feature off."),
+			GUC_UNIT_S,
+		},
+		&log_startup_progress_interval,
+		-1, -1, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b242a7f..901e4ba 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -523,6 +523,12 @@
 					# are logged regardless of their duration; 1.0 logs all
 					# statements from all transactions, 0.0 never logs
 
+log_startup_progress_interval = 10	# Sets the time interval between each
+									# progress update of the startup process.
+									# -1 disables the feature, 0 logs all
+									# messages, > 0 indicates the interval in
+									# seconds.
+
 # - What to Log -
 
 #debug_print_parse = off
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index ccfcf43..f17d91b 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -132,6 +132,7 @@ extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
 extern bool wal_receiver_create_temp_slot;
 extern bool track_wal_io_timing;
+extern int  log_startup_progress_interval;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
@@ -295,6 +296,18 @@ typedef enum WALAvailability
 	WALAVAIL_REMOVED			/* WAL segment has been removed */
 } WALAvailability;
 
+/*
+ * Codes of the operations performed during startup process
+ */
+typedef enum StartupProcessOp
+{
+	STARTUP_PROCESS_OP_SYNCFS,
+	STARTUP_PROCESS_OP_FSYNC,
+	STARTUP_PROCESS_OP_REDO,
+	STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT,
+	STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP
+} StartupProcessOp;
+
 struct XLogRecData;
 
 extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
@@ -399,6 +412,11 @@ extern void do_pg_abort_backup(int code, Datum arg);
 extern void register_persistent_abort_backup_handler(void);
 extern SessionBackupState get_backup_status(void);
 
+extern void InitStartupProgress(void);
+extern void LogStartupProgress(StartupProcessOp operation, const char *path);
+extern void LogStartupProgressComplete(StartupProcessOp operation);
+extern void LogStartupProgressTimeoutHandler(void);
+
 /* File path names (all relative to $PGDATA) */
 #define RECOVERY_SIGNAL_FILE	"recovery.signal"
 #define STANDBY_SIGNAL_FILE		"standby.signal"
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index 93e6a69..ef892dd 100644
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -33,6 +33,7 @@ typedef enum TimeoutId
 	IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
 	IDLE_SESSION_TIMEOUT,
 	CLIENT_CONNECTION_CHECK_TIMEOUT,
+	LOG_STARTUP_PROGRESS_TIMEOUT,
 	/* First user-definable timeout reason */
 	USER_TIMEOUT,
 	/* Maximum number of timeout reasons */
-- 
1.8.3.1

Reply via email to