Re: [HACKERS] WAL Restore process during recovery

Simon Riggs Thu, 19 Jan 2012 11:18:06 -0800

On Tue, Jan 17, 2012 at 6:52 AM, Fujii Masao <masao.fu...@gmail.com> wrote:
> On Mon, Jan 16, 2012 at 2:06 AM, Simon Riggs <si...@2ndquadrant.com> wrote:
>> WALRestore process asynchronously executes restore_command while
>> recovery continues working.
>>
>> Overlaps downloading of next WAL file to reduce time delays in file
>> based archive recovery.
>>
>> Handles cases of file-only and streaming/file correctly.
>
> Though I've not reviewed the patch deeply yet, I observed the following
> two problems when I tested the patch.
>
> When I set up streaming replication + archive (i.e., restore_command is set)
> and started the standby, I got the following error:
>
>    FATAL:  all AuxiliaryProcs are in use
>    LOG:  walrestore process (PID 18839) exited with exit code 1


Fixed and better documented.

> When I started an archive recovery without setting restore_command,
> it successfully finished.

Not sure exactly what you mean, but I fixed a bug that might be
something you're seeing.

-- 
 Simon Riggs                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ce659ec..469e6d6 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -40,6 +40,7 @@
 #include "pgstat.h"
 #include "postmaster/bgwriter.h"
 #include "postmaster/startup.h"
+#include "postmaster/walrestore.h"
 #include "replication/walreceiver.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
@@ -187,7 +188,6 @@ static bool InArchiveRecovery = false;
 static bool restoredFromArchive = false;
 
 /* options taken from recovery.conf for archive recovery */
-static char *recoveryRestoreCommand = NULL;
 static char *recoveryEndCommand = NULL;
 static char *archiveCleanupCommand = NULL;
 static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
@@ -575,8 +575,8 @@ bool reachedConsistency = false;
 
 static bool InRedo = false;
 
-/* Have we launched bgwriter during recovery? */
-static bool bgwriterLaunched = false;
+/* Have we launched background procs during archive recovery yet? */
+static bool ArchRecoveryBgProcsActive = false;
 
 /*
  * Information logged when we detect a change in one of the parameters
@@ -632,8 +632,6 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
 			 bool randAccess);
 static int	emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
 static void XLogFileClose(void);
-static bool RestoreArchivedFile(char *path, const char *xlogfname,
-					const char *recovername, off_t expectedSize);
 static void ExecuteRecoveryCommand(char *command, char *commandName,
 					   bool failOnerror);
 static void PreallocXlogFiles(XLogRecPtr endptr);
@@ -2706,19 +2704,47 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
 
 	XLogFileName(xlogfname, tli, log, seg);
 
+#define TMPRECOVERYXLOG	"RECOVERYXLOG"
+
 	switch (source)
 	{
 		case XLOG_FROM_ARCHIVE:
+			/*
+			 * Check to see if the WALRestore process has already put the
+			 * next file in place while we were working. If so, use that.
+			 * If not, get it ourselves. This makes it easier to handle
+			 * initial state before the WALRestore is active, and also
+			 * handles the stop/start logic correctly when we have both
+			 * streaming and file based replication active.
+			 *
+			 * We queue up the next task for WALRestore after we've begun to
+			 * use this file later in XLogFileRead().
+			 *
+			 * If the WALRestore process is still active, the lock wait makes
+			 * us wait, which is just like we were executing the command
+			 * ourselves and so doesn't alter the logic elsewhere.
+			 */
+			if (XLogFileIsNowFullyRestored(tli, log, seg))
+			{
+				snprintf(path, MAXPGPATH, XLOGDIR "/%s", TMPRECOVERYXLOG);
+				restoredFromArchive = true;
+				break;
+			}
+
 			/* Report recovery progress in PS display */
 			snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
 					 xlogfname);
 			set_ps_display(activitymsg, false);
 
 			restoredFromArchive = RestoreArchivedFile(path, xlogfname,
-													  "RECOVERYXLOG",
+													  TMPRECOVERYXLOG,
 													  XLogSegSize);
+
 			if (!restoredFromArchive)
+			{
+				LWLockRelease(WALRestoreCommandLock);
 				return -1;
+			}
 			break;
 
 		case XLOG_FROM_PG_XLOG:
@@ -2748,18 +2774,42 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
 		if (stat(xlogfpath, &statbuf) == 0)
 		{
 			if (unlink(xlogfpath) != 0)
+			{
+				LWLockRelease(WALRestoreCommandLock);
 				ereport(FATAL,
 						(errcode_for_file_access(),
 						 errmsg("could not remove file \"%s\": %m",
 								xlogfpath)));
+			}
 			reload = true;
 		}
 
 		if (rename(path, xlogfpath) < 0)
+		{
+			LWLockRelease(WALRestoreCommandLock);
 			ereport(ERROR,
 				(errcode_for_file_access(),
 				 errmsg("could not rename file \"%s\" to \"%s\": %m",
 						path, xlogfpath)));
+		}
+
+		/*
+		 * Make sure we recover from the new filename, so we can reuse the
+		 * temporary filename for asynchronous restore actions.
+		 */
+		strcpy(path, xlogfpath);
+
+		/*
+		 * Tell the WALRestore process to get the next file now.
+		 * Hopefully it will be ready for use in time for the next call the
+		 * Startup process makes to XLogFileRead().
+		 *
+		 * It might seem like we should do that earlier but then there is a
+		 * race condition that might lead to replacing RECOVERYXLOG with
+		 * another file before we've copied it.
+		 */
+		SetNextWALRestoreLogSeg(tli, log, seg);
+		LWLockRelease(WALRestoreCommandLock);
 
 		/*
 		 * If the existing segment was replaced, since walsenders might have
@@ -2911,8 +2961,11 @@ XLogFileClose(void)
  * For fixed-size files, the caller may pass the expected size as an
  * additional crosscheck on successful recovery.  If the file size is not
  * known, set expectedSize = 0.
+ *
+ * Must be called with WALRestoreCommandLock held and must be held at exit,
+ * if the function returns.
  */
-static bool
+bool
 RestoreArchivedFile(char *path, const char *xlogfname,
 					const char *recovername, off_t expectedSize)
 {
@@ -2929,7 +2982,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 	uint32		restartSeg;
 
 	/* In standby mode, restore_command might not be supplied */
-	if (recoveryRestoreCommand == NULL)
+	if (strlen(GetRecoveryRestoreCommand()) == 0)
 		goto not_available;
 
 	/*
@@ -2963,18 +3016,24 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 	if (stat(xlogpath, &stat_buf) != 0)
 	{
 		if (errno != ENOENT)
+		{
+			LWLockRelease(WALRestoreCommandLock);
 			ereport(FATAL,
 					(errcode_for_file_access(),
 					 errmsg("could not stat file \"%s\": %m",
 							xlogpath)));
+		}
 	}
 	else
 	{
 		if (unlink(xlogpath) != 0)
+		{
+			LWLockRelease(WALRestoreCommandLock);
 			ereport(FATAL,
 					(errcode_for_file_access(),
 					 errmsg("could not remove file \"%s\": %m",
 							xlogpath)));
+		}
 	}
 
 	/*
@@ -3013,7 +3072,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 	endp = xlogRestoreCmd + MAXPGPATH - 1;
 	*endp = '\0';
 
-	for (sp = recoveryRestoreCommand; *sp; sp++)
+	for (sp = GetRecoveryRestoreCommand(); *sp; sp++)
 	{
 		if (*sp == '%')
 		{
@@ -3059,21 +3118,29 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 	}
 	*dp = '\0';
 
-	ereport(DEBUG3,
+	ereport(DEBUG2,
 			(errmsg_internal("executing restore command \"%s\"",
 							 xlogRestoreCmd)));
 
 	/*
-	 * Check signals before restore command and reset afterwards.
+	 * Set in_restore_command to tell the signal handler that we should exit
+	 * right away on SIGTERM. We know that we're at a safe point to do that.
+	 * Check if we had already received the signal, so that we don't miss a
+	 * shutdown request received just before this.
 	 */
-	PreRestoreCommand();
+	in_restore_command = true;
+	if (startup_shutdown_requested || walrestore_shutdown_requested)
+	{
+		LWLockRelease(WALRestoreCommandLock);
+		proc_exit(1);
+	}
 
 	/*
 	 * Copy xlog from archival storage to XLOGDIR
 	 */
 	rc = system(xlogRestoreCmd);
 
-	PostRestoreCommand();
+	in_restore_command = false;
 
 	if (rc == 0)
 	{
@@ -3102,7 +3169,10 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 				if (StandbyMode && stat_buf.st_size < expectedSize)
 					elevel = DEBUG1;
 				else
+				{
+					LWLockRelease(WALRestoreCommandLock);
 					elevel = FATAL;
+				}
 				ereport(elevel,
 						(errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
 								xlogfname,
@@ -3123,10 +3193,13 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 		{
 			/* stat failed */
 			if (errno != ENOENT)
+			{
+				LWLockRelease(WALRestoreCommandLock);
 				ereport(FATAL,
 						(errcode_for_file_access(),
 						 errmsg("could not stat file \"%s\": %m",
 								xlogpath)));
+			}
 		}
 	}
 
@@ -3158,10 +3231,18 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 	 * too.
 	 */
 	if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
+	{
+		LWLockRelease(WALRestoreCommandLock);
 		proc_exit(1);
+	}
 
 	signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
 
+	/*
+	 * If signaled we will immediately issue a FATAL error so drop the lock
+	 */
+	if (signaled)
+		LWLockRelease(WALRestoreCommandLock);
 	ereport(signaled ? FATAL : DEBUG2,
 		(errmsg("could not restore file \"%s\" from archive: return code %d",
 				xlogfname, rc)));
@@ -4203,7 +4284,9 @@ readTimeLineHistory(TimeLineID targetTLI)
 	if (InArchiveRecovery)
 	{
 		TLHistoryFileName(histfname, targetTLI);
+		LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE);
 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+		LWLockRelease(WALRestoreCommandLock);
 	}
 	else
 		TLHistoryFilePath(path, targetTLI);
@@ -4292,7 +4375,9 @@ existsTimeLineHistory(TimeLineID probeTLI)
 	if (InArchiveRecovery)
 	{
 		TLHistoryFileName(histfname, probeTLI);
+		LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE);
 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+		LWLockRelease(WALRestoreCommandLock);
 	}
 	else
 		TLHistoryFilePath(path, probeTLI);
@@ -4453,7 +4538,9 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
 	if (InArchiveRecovery)
 	{
 		TLHistoryFileName(histfname, parentTLI);
+		LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE);
 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+		LWLockRelease(WALRestoreCommandLock);
 	}
 	else
 		TLHistoryFilePath(path, parentTLI);
@@ -5299,10 +5386,10 @@ readRecoveryCommandFile(void)
 	{
 		if (strcmp(item->name, "restore_command") == 0)
 		{
-			recoveryRestoreCommand = pstrdup(item->value);
+			SetRecoveryRestoreCommand(pstrdup(item->value));
 			ereport(DEBUG2,
 					(errmsg_internal("restore_command = '%s'",
-									 recoveryRestoreCommand)));
+									 GetRecoveryRestoreCommand())));
 		}
 		else if (strcmp(item->name, "recovery_end_command") == 0)
 		{
@@ -5455,7 +5542,7 @@ readRecoveryCommandFile(void)
 	 */
 	if (StandbyMode)
 	{
-		if (PrimaryConnInfo == NULL && recoveryRestoreCommand == NULL)
+		if (PrimaryConnInfo == NULL && strlen(GetRecoveryRestoreCommand()) == 0)
 			ereport(WARNING,
 					(errmsg("recovery command file \"%s\" specified neither primary_conninfo nor restore_command",
 							RECOVERY_COMMAND_FILE),
@@ -5463,7 +5550,7 @@ readRecoveryCommandFile(void)
 	}
 	else
 	{
-		if (recoveryRestoreCommand == NULL)
+		if (strlen(GetRecoveryRestoreCommand()) == 0)
 			ereport(FATAL,
 					(errmsg("recovery command file \"%s\" must specify restore_command when standby mode is not enabled",
 							RECOVERY_COMMAND_FILE)));
@@ -6432,7 +6519,7 @@ StartupXLOG(void)
 			PublishStartupProcessInformation();
 			SetForwardFsyncRequests();
 			SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
-			bgwriterLaunched = true;
+			ArchRecoveryBgProcsActive = true;
 		}
 
 		/*
@@ -6795,7 +6882,7 @@ StartupXLOG(void)
 		 * the rule that TLI only changes in shutdown checkpoints, which
 		 * allows some extra error checking in xlog_redo.
 		 */
-		if (bgwriterLaunched)
+		if (ArchRecoveryBgProcsActive)
 			RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
 							  CHECKPOINT_IMMEDIATE |
 							  CHECKPOINT_WAIT);
@@ -9640,7 +9727,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
 		 * Request a restartpoint if we've replayed too much
 		 * xlog since the last one.
 		 */
-		if (StandbyMode && bgwriterLaunched)
+		if (StandbyMode && ArchRecoveryBgProcsActive)
 		{
 			if (XLogCheckpointNeeded(readId, readSeg))
 			{
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index e3ae92d..81a8cb3 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -30,6 +30,7 @@
 #include "nodes/makefuncs.h"
 #include "postmaster/bgwriter.h"
 #include "postmaster/startup.h"
+#include "postmaster/walrestore.h"
 #include "postmaster/walwriter.h"
 #include "replication/walreceiver.h"
 #include "storage/bufmgr.h"
@@ -319,6 +320,9 @@ AuxiliaryProcessMain(int argc, char *argv[])
 			case CheckpointerProcess:
 				statmsg = "checkpointer process";
 				break;
+			case WalRestoreProcess:
+				statmsg = "wal restore process";
+				break;
 			case WalWriterProcess:
 				statmsg = "wal writer process";
 				break;
@@ -424,6 +428,11 @@ AuxiliaryProcessMain(int argc, char *argv[])
 			CheckpointerMain();
 			proc_exit(1);		/* should never return */
 
+		case WalRestoreProcess:
+			/* don't set signals, wal restore has its own agenda */
+			WalRestoreMain();
+			proc_exit(1);		/* should never return */
+
 		case WalWriterProcess:
 			/* don't set signals, walwriter has its own agenda */
 			InitXLOGAccess();
diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile
index 3056b09..349e722 100644
--- a/src/backend/postmaster/Makefile
+++ b/src/backend/postmaster/Makefile
@@ -13,6 +13,6 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o \
-	startup.o syslogger.o walwriter.o checkpointer.o
+	startup.o syslogger.o walrestore.o walwriter.o checkpointer.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index ad0c17a..15684c0 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -210,6 +210,7 @@ static pid_t StartupPID = 0,
 			BgWriterPID = 0,
 			CheckpointerPID = 0,
 			WalWriterPID = 0,
+			WalRestorePID = 0,
 			WalReceiverPID = 0,
 			AutoVacPID = 0,
 			PgArchPID = 0,
@@ -470,6 +471,7 @@ static void ShmemBackendArrayRemove(Backend *bn);
 #define StartCheckpointer()		StartChildProcess(CheckpointerProcess)
 #define StartWalWriter()		StartChildProcess(WalWriterProcess)
 #define StartWalReceiver()		StartChildProcess(WalReceiverProcess)
+#define StartWalRestore()		StartChildProcess(WalRestoreProcess)
 
 /* Macros to check exit status of a child process */
 #define EXIT_STATUS_0(st)  ((st) == 0)
@@ -2060,6 +2062,8 @@ SIGHUP_handler(SIGNAL_ARGS)
 			signal_child(WalWriterPID, SIGHUP);
 		if (WalReceiverPID != 0)
 			signal_child(WalReceiverPID, SIGHUP);
+		if (WalRestorePID != 0)
+			signal_child(WalRestorePID, SIGHUP);
 		if (AutoVacPID != 0)
 			signal_child(AutoVacPID, SIGHUP);
 		if (PgArchPID != 0)
@@ -2170,6 +2174,8 @@ pmdie(SIGNAL_ARGS)
 				signal_child(StartupPID, SIGTERM);
 			if (WalReceiverPID != 0)
 				signal_child(WalReceiverPID, SIGTERM);
+			if (WalRestorePID != 0)
+				signal_child(WalRestorePID, SIGTERM);
 			if (BgWriterPID != 0)
 				signal_child(BgWriterPID, SIGTERM);
 			if (pmState == PM_RECOVERY)
@@ -2225,6 +2231,8 @@ pmdie(SIGNAL_ARGS)
 				signal_child(WalWriterPID, SIGQUIT);
 			if (WalReceiverPID != 0)
 				signal_child(WalReceiverPID, SIGQUIT);
+			if (WalRestorePID != 0)
+				signal_child(WalRestorePID, SIGQUIT);
 			if (AutoVacPID != 0)
 				signal_child(AutoVacPID, SIGQUIT);
 			if (PgArchPID != 0)
@@ -2331,6 +2339,12 @@ reaper(SIGNAL_ARGS)
 			pmState = PM_RUN;
 
 			/*
+			 * Shutdown the WALRestore process
+			 */
+			if (WalRestorePID != 0)
+				signal_child(WalRestorePID, SIGTERM);
+
+			/*
 			 * Kill any walsenders to force the downstream standby(s) to
 			 * reread the timeline history file, adjust their timelines and
 			 * establish replication connections again. This is required
@@ -2477,6 +2491,30 @@ reaper(SIGNAL_ARGS)
 		}
 
 		/*
+		 * Was it the wal restore?  If exit status is zero (normal) or one
+		 * (FATAL exit), we assume everything is all right just like normal
+		 * backends.
+		 */
+		if (pid == WalRestorePID)
+		{
+			if (pmState >= PM_RUN)
+			{
+				WalRestorePID = 0;
+				continue;
+			}
+
+			/*
+			 * Any unexpected exit (including FATAL exit) of the WALRestore
+			 * process is treated as a crash, except that we don't want to
+			 * reinitialize because availability is important.
+			 */
+			RecoveryError = true;
+			HandleChildCrash(pid, exitstatus,
+							 _("walrestore process"));
+			continue;
+		}
+
+		/*
 		 * Was it the autovacuum launcher?	Normal exit can be ignored; we'll
 		 * start a new one at the next iteration of the postmaster's main
 		 * loop, if necessary.	Any other exit condition is treated as a
@@ -2756,6 +2794,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 		signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
 	}
 
+	/* Take care of the walrestore too */
+	if (pid == WalRestorePID)
+		WalRestorePID = 0;
+	else if (WalRestorePID != 0 && !FatalError)
+	{
+		ereport(DEBUG2,
+				(errmsg_internal("sending %s to process %d",
+								 (SendStop ? "SIGSTOP" : "SIGQUIT"),
+								 (int) WalRestorePID)));
+		signal_child(WalRestorePID, (SendStop ? SIGSTOP : SIGQUIT));
+	}
+
 	/* Take care of the autovacuum launcher too */
 	if (pid == AutoVacPID)
 		AutoVacPID = 0;
@@ -2916,6 +2966,8 @@ PostmasterStateMachine(void)
 				signal_child(StartupPID, SIGTERM);
 			if (WalReceiverPID != 0)
 				signal_child(WalReceiverPID, SIGTERM);
+			if (WalRestorePID != 0)
+				signal_child(WalRestorePID, SIGTERM);
 			pmState = PM_WAIT_BACKENDS;
 		}
 	}
@@ -2940,6 +2992,7 @@ PostmasterStateMachine(void)
 		if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 &&
 			StartupPID == 0 &&
 			WalReceiverPID == 0 &&
+			WalRestorePID == 0 &&
 			BgWriterPID == 0 &&
 			(CheckpointerPID == 0 || !FatalError) &&
 			WalWriterPID == 0 &&
@@ -3005,11 +3058,11 @@ PostmasterStateMachine(void)
 		 * left by now anyway; what we're really waiting for is walsenders and
 		 * archiver.
 		 *
-		 * Walreceiver should normally be dead by now, but not when a fast
-		 * shutdown is performed during recovery.
+		 * Walreceiver and Walrestore should normally be dead by now, but not
+		 * when a fast shutdown is performed during recovery.
 		 */
 		if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 &&
-			WalReceiverPID == 0)
+			WalReceiverPID == 0 && WalRestorePID == 0)
 		{
 			pmState = PM_WAIT_DEAD_END;
 		}
@@ -3036,6 +3089,7 @@ PostmasterStateMachine(void)
 			/* These other guys should be dead already */
 			Assert(StartupPID == 0);
 			Assert(WalReceiverPID == 0);
+			Assert(WalRestorePID == 0);
 			Assert(BgWriterPID == 0);
 			Assert(CheckpointerPID == 0);
 			Assert(WalWriterPID == 0);
@@ -4219,6 +4273,8 @@ sigusr1_handler(SIGNAL_ARGS)
 		BgWriterPID = StartBackgroundWriter();
 		Assert(CheckpointerPID == 0);
 		CheckpointerPID = StartCheckpointer();
+		Assert(WalRestorePID == 0);
+		WalRestorePID = StartWalRestore();
 
 		pmState = PM_RECOVERY;
 	}
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index ed75d09..1791feb 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -35,14 +35,14 @@
  * Flags set by interrupt handlers for later service in the redo loop.
  */
 static volatile sig_atomic_t got_SIGHUP = false;
-static volatile sig_atomic_t shutdown_requested = false;
 static volatile sig_atomic_t promote_triggered = false;
+volatile sig_atomic_t startup_shutdown_requested = false;
 
 /*
  * Flag set when executing a restore command, to tell SIGTERM signal handler
  * that it's safe to just proc_exit.
  */
-static volatile sig_atomic_t in_restore_command = false;
+volatile sig_atomic_t in_restore_command = false;
 
 /* Signal handlers */
 static void startupproc_quickdie(SIGNAL_ARGS);
@@ -131,9 +131,16 @@ StartupProcShutdownHandler(SIGNAL_ARGS)
 	int			save_errno = errno;
 
 	if (in_restore_command)
+	{
+		/*
+		 * See RestoreArchivedFile() for explanation of why this
+		 * lock is always held when in_restore_command is true.
+		 */
+		LWLockRelease(WALRestoreCommandLock);
 		proc_exit(1);
+	}
 	else
-		shutdown_requested = true;
+		startup_shutdown_requested = true;
 	WakeupRecovery();
 
 	errno = save_errno;
@@ -155,7 +162,7 @@ HandleStartupProcInterrupts(void)
 	/*
 	 * Check if we were requested to exit without finishing recovery.
 	 */
-	if (shutdown_requested)
+	if (startup_shutdown_requested)
 		proc_exit(1);
 
 	/*
@@ -226,26 +233,6 @@ StartupProcessMain(void)
 	proc_exit(0);
 }
 
-void
-PreRestoreCommand(void)
-{
-	/*
-	 * Set in_restore_command to tell the signal handler that we should exit
-	 * right away on SIGTERM. We know that we're at a safe point to do that.
-	 * Check if we had already received the signal, so that we don't miss a
-	 * shutdown request received just before this.
-	 */
-	in_restore_command = true;
-	if (shutdown_requested)
-		proc_exit(1);
-}
-
-void
-PostRestoreCommand(void)
-{
-	in_restore_command = false;
-}
-
 bool
 IsPromoteTriggered(void)
 {
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index ef1dc91..8f4443a 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -26,6 +26,7 @@
 #include "postmaster/autovacuum.h"
 #include "postmaster/bgwriter.h"
 #include "postmaster/postmaster.h"
+#include "postmaster/walrestore.h"
 #include "replication/walreceiver.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
@@ -123,6 +124,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 		size = add_size(size, AutoVacuumShmemSize());
 		size = add_size(size, WalSndShmemSize());
 		size = add_size(size, WalRcvShmemSize());
+		size = add_size(size, WalRestoreShmemSize());
 		size = add_size(size, BTreeShmemSize());
 		size = add_size(size, SyncScanShmemSize());
 		size = add_size(size, AsyncShmemSize());
@@ -228,6 +230,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 	AutoVacuumShmemInit();
 	WalSndShmemInit();
 	WalRcvShmemInit();
+	WalRestoreShmemInit();
 
 	/*
 	 * Set up other modules that need some shared memory space
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 1ddf4bf..e9e5325 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -270,7 +270,8 @@ extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
 extern int XLogFileInit(uint32 log, uint32 seg,
 			 bool *use_existent, bool use_lock);
 extern int	XLogFileOpen(uint32 log, uint32 seg);
-
+extern bool RestoreArchivedFile(char *path, const char *xlogfname,
+					const char *recovername, off_t expectedSize);
 
 extern void XLogGetLastRemoved(uint32 *log, uint32 *seg);
 extern void XLogSetAsyncXactLSN(XLogRecPtr record);
@@ -316,6 +317,7 @@ extern TimeLineID GetRecoveryTargetTLI(void);
 extern bool CheckPromoteSignal(void);
 extern void WakeupRecovery(void);
 extern Latch *WALWriterLatch(void);
+extern Latch *WALRestoreLatch(void);
 
 /*
  * Starting/stopping a base backup
diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h
index e966a73..b90ce33 100644
--- a/src/include/bootstrap/bootstrap.h
+++ b/src/include/bootstrap/bootstrap.h
@@ -23,6 +23,7 @@ typedef enum
 	StartupProcess,
 	BgWriterProcess,
 	CheckpointerProcess,
+	WalRestoreProcess,
 	WalWriterProcess,
 	WalReceiverProcess,
 
diff --git a/src/include/postmaster/startup.h b/src/include/postmaster/startup.h
index 3ec6950..35d9665 100644
--- a/src/include/postmaster/startup.h
+++ b/src/include/postmaster/startup.h
@@ -12,10 +12,11 @@
 #ifndef _STARTUP_H
 #define _STARTUP_H
 
+extern volatile sig_atomic_t startup_shutdown_requested;
+extern volatile sig_atomic_t in_restore_command;
+
 extern void HandleStartupProcInterrupts(void);
 extern void StartupProcessMain(void);
-extern void PreRestoreCommand(void);
-extern void PostRestoreCommand(void);
 extern bool IsPromoteTriggered(void);
 extern void ResetPromoteTriggered(void);
 
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index df3df29..c316dcc 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -79,6 +79,7 @@ typedef enum LWLockId
 	SerializablePredicateLockListLock,
 	OldSerXidLock,
 	SyncRepLock,
+	WALRestoreCommandLock,
 	/* Individual lock IDs end here */
 	FirstBufMappingLock,
 	FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 358d1a4..50d4f35 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -204,12 +204,20 @@ extern PGPROC *PreparedXactProcs;
 /*
  * We set aside some extra PGPROC structures for auxiliary processes,
  * ie things that aren't full-fledged backends but need shmem access.
+ * Logger, archiver and stats processes don't count towards this total.
  *
+ * This needs to be set to whichever of normal running or recovery has the
+ * highest number of backends that might occur together.
+ *
+ * During normal running we need slots for:
  * Background writer, checkpointer and WAL writer run during normal operation.
- * Startup process and WAL receiver also consume 2 slots, but WAL writer is
- * launched only after startup has exited, so we only need 4 slots.
+ * 3 slots
+ *
+ * During recovery we need slots for:
+ * Background writer, checkpointer, Startup process, WAL receiver, WAL restore.
+ * 5 slots
  */
-#define NUM_AUXILIARY_PROCS		4
+#define NUM_AUXILIARY_PROCS		5
 
 
 /* configurable options */

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] WAL Restore process during recovery

Reply via email to