On Tue, Jan 17, 2012 at 6:52 AM, Fujii Masao <[email protected]> wrote:
> On Mon, Jan 16, 2012 at 2:06 AM, Simon Riggs <[email protected]> wrote:
>> WALRestore process asynchronously executes restore_command while
>> recovery continues working.
>>
>> Overlaps downloading of next WAL file to reduce time delays in file
>> based archive recovery.
>>
>> Handles cases of file-only and streaming/file correctly.
>
> Though I've not reviewed the patch deeply yet, I observed the following
> two problems when I tested the patch.
>
> When I set up streaming replication + archive (i.e., restore_command is set)
> and started the standby, I got the following error:
>
> FATAL: all AuxiliaryProcs are in use
> LOG: walrestore process (PID 18839) exited with exit code 1
Fixed and better documented.
> When I started an archive recovery without setting restore_command,
> it successfully finished.
Not sure exactly what you mean, but I fixed a bug that might be
something you're seeing.
--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ce659ec..469e6d6 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -40,6 +40,7 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "postmaster/walrestore.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
@@ -187,7 +188,6 @@ static bool InArchiveRecovery = false;
static bool restoredFromArchive = false;
/* options taken from recovery.conf for archive recovery */
-static char *recoveryRestoreCommand = NULL;
static char *recoveryEndCommand = NULL;
static char *archiveCleanupCommand = NULL;
static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
@@ -575,8 +575,8 @@ bool reachedConsistency = false;
static bool InRedo = false;
-/* Have we launched bgwriter during recovery? */
-static bool bgwriterLaunched = false;
+/* Have we launched background procs during archive recovery yet? */
+static bool ArchRecoveryBgProcsActive = false;
/*
* Information logged when we detect a change in one of the parameters
@@ -632,8 +632,6 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
bool randAccess);
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
static void XLogFileClose(void);
-static bool RestoreArchivedFile(char *path, const char *xlogfname,
- const char *recovername, off_t expectedSize);
static void ExecuteRecoveryCommand(char *command, char *commandName,
bool failOnerror);
static void PreallocXlogFiles(XLogRecPtr endptr);
@@ -2706,19 +2704,47 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
XLogFileName(xlogfname, tli, log, seg);
+#define TMPRECOVERYXLOG "RECOVERYXLOG"
+
switch (source)
{
case XLOG_FROM_ARCHIVE:
+ /*
+ * Check to see if the WALRestore process has already put the
+ * next file in place while we were working. If so, use that.
+ * If not, get it ourselves. This makes it easier to handle
+ * initial state before the WALRestore is active, and also
+ * handles the stop/start logic correctly when we have both
+ * streaming and file based replication active.
+ *
+ * We queue up the next task for WALRestore after we've begun to
+ * use this file later in XLogFileRead().
+ *
+ * If the WALRestore process is still active, the lock wait makes
+ * us wait, which is just like we were executing the command
+ * ourselves and so doesn't alter the logic elsewhere.
+ */
+ if (XLogFileIsNowFullyRestored(tli, log, seg))
+ {
+ snprintf(path, MAXPGPATH, XLOGDIR "/%s", TMPRECOVERYXLOG);
+ restoredFromArchive = true;
+ break;
+ }
+
/* Report recovery progress in PS display */
snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
xlogfname);
set_ps_display(activitymsg, false);
restoredFromArchive = RestoreArchivedFile(path, xlogfname,
- "RECOVERYXLOG",
+ TMPRECOVERYXLOG,
XLogSegSize);
+
if (!restoredFromArchive)
+ {
+ LWLockRelease(WALRestoreCommandLock);
return -1;
+ }
break;
case XLOG_FROM_PG_XLOG:
@@ -2748,18 +2774,42 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
if (stat(xlogfpath, &statbuf) == 0)
{
if (unlink(xlogfpath) != 0)
+ {
+ LWLockRelease(WALRestoreCommandLock);
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
xlogfpath)));
+ }
reload = true;
}
if (rename(path, xlogfpath) < 0)
+ {
+ LWLockRelease(WALRestoreCommandLock);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
path, xlogfpath)));
+ }
+
+ /*
+ * Make sure we recover from the new filename, so we can reuse the
+ * temporary filename for asynchronous restore actions.
+ */
+ strcpy(path, xlogfpath);
+
+ /*
+ * Tell the WALRestore process to get the next file now.
+ * Hopefully it will be ready for use in time for the next call the
+ * Startup process makes to XLogFileRead().
+ *
+ * It might seem like we should do that earlier but then there is a
+ * race condition that might lead to replacing RECOVERYXLOG with
+ * another file before we've copied it.
+ */
+ SetNextWALRestoreLogSeg(tli, log, seg);
+ LWLockRelease(WALRestoreCommandLock);
/*
* If the existing segment was replaced, since walsenders might have
@@ -2911,8 +2961,11 @@ XLogFileClose(void)
* For fixed-size files, the caller may pass the expected size as an
* additional crosscheck on successful recovery. If the file size is not
* known, set expectedSize = 0.
+ *
+ * Must be called with WALRestoreCommandLock held and must be held at exit,
+ * if the function returns.
*/
-static bool
+bool
RestoreArchivedFile(char *path, const char *xlogfname,
const char *recovername, off_t expectedSize)
{
@@ -2929,7 +2982,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
uint32 restartSeg;
/* In standby mode, restore_command might not be supplied */
- if (recoveryRestoreCommand == NULL)
+ if (strlen(GetRecoveryRestoreCommand()) == 0)
goto not_available;
/*
@@ -2963,18 +3016,24 @@ RestoreArchivedFile(char *path, const char *xlogfname,
if (stat(xlogpath, &stat_buf) != 0)
{
if (errno != ENOENT)
+ {
+ LWLockRelease(WALRestoreCommandLock);
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
xlogpath)));
+ }
}
else
{
if (unlink(xlogpath) != 0)
+ {
+ LWLockRelease(WALRestoreCommandLock);
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
xlogpath)));
+ }
}
/*
@@ -3013,7 +3072,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
endp = xlogRestoreCmd + MAXPGPATH - 1;
*endp = '\0';
- for (sp = recoveryRestoreCommand; *sp; sp++)
+ for (sp = GetRecoveryRestoreCommand(); *sp; sp++)
{
if (*sp == '%')
{
@@ -3059,21 +3118,29 @@ RestoreArchivedFile(char *path, const char *xlogfname,
}
*dp = '\0';
- ereport(DEBUG3,
+ ereport(DEBUG2,
(errmsg_internal("executing restore command \"%s\"",
xlogRestoreCmd)));
/*
- * Check signals before restore command and reset afterwards.
+ * Set in_restore_command to tell the signal handler that we should exit
+ * right away on SIGTERM. We know that we're at a safe point to do that.
+ * Check if we had already received the signal, so that we don't miss a
+ * shutdown request received just before this.
*/
- PreRestoreCommand();
+ in_restore_command = true;
+ if (startup_shutdown_requested || walrestore_shutdown_requested)
+ {
+ LWLockRelease(WALRestoreCommandLock);
+ proc_exit(1);
+ }
/*
* Copy xlog from archival storage to XLOGDIR
*/
rc = system(xlogRestoreCmd);
- PostRestoreCommand();
+ in_restore_command = false;
if (rc == 0)
{
@@ -3102,7 +3169,10 @@ RestoreArchivedFile(char *path, const char *xlogfname,
if (StandbyMode && stat_buf.st_size < expectedSize)
elevel = DEBUG1;
else
+ {
+ LWLockRelease(WALRestoreCommandLock);
elevel = FATAL;
+ }
ereport(elevel,
(errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
xlogfname,
@@ -3123,10 +3193,13 @@ RestoreArchivedFile(char *path, const char *xlogfname,
{
/* stat failed */
if (errno != ENOENT)
+ {
+ LWLockRelease(WALRestoreCommandLock);
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
xlogpath)));
+ }
}
}
@@ -3158,10 +3231,18 @@ RestoreArchivedFile(char *path, const char *xlogfname,
* too.
*/
if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
+ {
+ LWLockRelease(WALRestoreCommandLock);
proc_exit(1);
+ }
signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
+ /*
+ * If signaled we will immediately issue a FATAL error so drop the lock
+ */
+ if (signaled)
+ LWLockRelease(WALRestoreCommandLock);
ereport(signaled ? FATAL : DEBUG2,
(errmsg("could not restore file \"%s\" from archive: return code %d",
xlogfname, rc)));
@@ -4203,7 +4284,9 @@ readTimeLineHistory(TimeLineID targetTLI)
if (InArchiveRecovery)
{
TLHistoryFileName(histfname, targetTLI);
+ LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+ LWLockRelease(WALRestoreCommandLock);
}
else
TLHistoryFilePath(path, targetTLI);
@@ -4292,7 +4375,9 @@ existsTimeLineHistory(TimeLineID probeTLI)
if (InArchiveRecovery)
{
TLHistoryFileName(histfname, probeTLI);
+ LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+ LWLockRelease(WALRestoreCommandLock);
}
else
TLHistoryFilePath(path, probeTLI);
@@ -4453,7 +4538,9 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
if (InArchiveRecovery)
{
TLHistoryFileName(histfname, parentTLI);
+ LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+ LWLockRelease(WALRestoreCommandLock);
}
else
TLHistoryFilePath(path, parentTLI);
@@ -5299,10 +5386,10 @@ readRecoveryCommandFile(void)
{
if (strcmp(item->name, "restore_command") == 0)
{
- recoveryRestoreCommand = pstrdup(item->value);
+ SetRecoveryRestoreCommand(pstrdup(item->value));
ereport(DEBUG2,
(errmsg_internal("restore_command = '%s'",
- recoveryRestoreCommand)));
+ GetRecoveryRestoreCommand())));
}
else if (strcmp(item->name, "recovery_end_command") == 0)
{
@@ -5455,7 +5542,7 @@ readRecoveryCommandFile(void)
*/
if (StandbyMode)
{
- if (PrimaryConnInfo == NULL && recoveryRestoreCommand == NULL)
+ if (PrimaryConnInfo == NULL && strlen(GetRecoveryRestoreCommand()) == 0)
ereport(WARNING,
(errmsg("recovery command file \"%s\" specified neither primary_conninfo nor restore_command",
RECOVERY_COMMAND_FILE),
@@ -5463,7 +5550,7 @@ readRecoveryCommandFile(void)
}
else
{
- if (recoveryRestoreCommand == NULL)
+ if (strlen(GetRecoveryRestoreCommand()) == 0)
ereport(FATAL,
(errmsg("recovery command file \"%s\" must specify restore_command when standby mode is not enabled",
RECOVERY_COMMAND_FILE)));
@@ -6432,7 +6519,7 @@ StartupXLOG(void)
PublishStartupProcessInformation();
SetForwardFsyncRequests();
SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
- bgwriterLaunched = true;
+ ArchRecoveryBgProcsActive = true;
}
/*
@@ -6795,7 +6882,7 @@ StartupXLOG(void)
* the rule that TLI only changes in shutdown checkpoints, which
* allows some extra error checking in xlog_redo.
*/
- if (bgwriterLaunched)
+ if (ArchRecoveryBgProcsActive)
RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
CHECKPOINT_IMMEDIATE |
CHECKPOINT_WAIT);
@@ -9640,7 +9727,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
* Request a restartpoint if we've replayed too much
* xlog since the last one.
*/
- if (StandbyMode && bgwriterLaunched)
+ if (StandbyMode && ArchRecoveryBgProcsActive)
{
if (XLogCheckpointNeeded(readId, readSeg))
{
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index e3ae92d..81a8cb3 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -30,6 +30,7 @@
#include "nodes/makefuncs.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "postmaster/walrestore.h"
#include "postmaster/walwriter.h"
#include "replication/walreceiver.h"
#include "storage/bufmgr.h"
@@ -319,6 +320,9 @@ AuxiliaryProcessMain(int argc, char *argv[])
case CheckpointerProcess:
statmsg = "checkpointer process";
break;
+ case WalRestoreProcess:
+ statmsg = "wal restore process";
+ break;
case WalWriterProcess:
statmsg = "wal writer process";
break;
@@ -424,6 +428,11 @@ AuxiliaryProcessMain(int argc, char *argv[])
CheckpointerMain();
proc_exit(1); /* should never return */
+ case WalRestoreProcess:
+ /* don't set signals, wal restore has its own agenda */
+ WalRestoreMain();
+ proc_exit(1); /* should never return */
+
case WalWriterProcess:
/* don't set signals, walwriter has its own agenda */
InitXLOGAccess();
diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile
index 3056b09..349e722 100644
--- a/src/backend/postmaster/Makefile
+++ b/src/backend/postmaster/Makefile
@@ -13,6 +13,6 @@ top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o \
- startup.o syslogger.o walwriter.o checkpointer.o
+ startup.o syslogger.o walrestore.o walwriter.o checkpointer.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index ad0c17a..15684c0 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -210,6 +210,7 @@ static pid_t StartupPID = 0,
BgWriterPID = 0,
CheckpointerPID = 0,
WalWriterPID = 0,
+ WalRestorePID = 0,
WalReceiverPID = 0,
AutoVacPID = 0,
PgArchPID = 0,
@@ -470,6 +471,7 @@ static void ShmemBackendArrayRemove(Backend *bn);
#define StartCheckpointer() StartChildProcess(CheckpointerProcess)
#define StartWalWriter() StartChildProcess(WalWriterProcess)
#define StartWalReceiver() StartChildProcess(WalReceiverProcess)
+#define StartWalRestore() StartChildProcess(WalRestoreProcess)
/* Macros to check exit status of a child process */
#define EXIT_STATUS_0(st) ((st) == 0)
@@ -2060,6 +2062,8 @@ SIGHUP_handler(SIGNAL_ARGS)
signal_child(WalWriterPID, SIGHUP);
if (WalReceiverPID != 0)
signal_child(WalReceiverPID, SIGHUP);
+ if (WalRestorePID != 0)
+ signal_child(WalRestorePID, SIGHUP);
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGHUP);
if (PgArchPID != 0)
@@ -2170,6 +2174,8 @@ pmdie(SIGNAL_ARGS)
signal_child(StartupPID, SIGTERM);
if (WalReceiverPID != 0)
signal_child(WalReceiverPID, SIGTERM);
+ if (WalRestorePID != 0)
+ signal_child(WalRestorePID, SIGTERM);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGTERM);
if (pmState == PM_RECOVERY)
@@ -2225,6 +2231,8 @@ pmdie(SIGNAL_ARGS)
signal_child(WalWriterPID, SIGQUIT);
if (WalReceiverPID != 0)
signal_child(WalReceiverPID, SIGQUIT);
+ if (WalRestorePID != 0)
+ signal_child(WalRestorePID, SIGQUIT);
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGQUIT);
if (PgArchPID != 0)
@@ -2331,6 +2339,12 @@ reaper(SIGNAL_ARGS)
pmState = PM_RUN;
/*
+ * Shutdown the WALRestore process
+ */
+ if (WalRestorePID != 0)
+ signal_child(WalRestorePID, SIGTERM);
+
+ /*
* Kill any walsenders to force the downstream standby(s) to
* reread the timeline history file, adjust their timelines and
* establish replication connections again. This is required
@@ -2477,6 +2491,30 @@ reaper(SIGNAL_ARGS)
}
/*
+ * Was it the wal restore? If exit status is zero (normal) or one
+ * (FATAL exit), we assume everything is all right just like normal
+ * backends.
+ */
+ if (pid == WalRestorePID)
+ {
+ if (pmState >= PM_RUN)
+ {
+ WalRestorePID = 0;
+ continue;
+ }
+
+ /*
+ * Any unexpected exit (including FATAL exit) of the WALRestore
+ * process is treated as a crash, except that we don't want to
+ * reinitialize because availability is important.
+ */
+ RecoveryError = true;
+ HandleChildCrash(pid, exitstatus,
+ _("walrestore process"));
+ continue;
+ }
+
+ /*
* Was it the autovacuum launcher? Normal exit can be ignored; we'll
* start a new one at the next iteration of the postmaster's main
* loop, if necessary. Any other exit condition is treated as a
@@ -2756,6 +2794,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
}
+ /* Take care of the walrestore too */
+ if (pid == WalRestorePID)
+ WalRestorePID = 0;
+ else if (WalRestorePID != 0 && !FatalError)
+ {
+ ereport(DEBUG2,
+ (errmsg_internal("sending %s to process %d",
+ (SendStop ? "SIGSTOP" : "SIGQUIT"),
+ (int) WalRestorePID)));
+ signal_child(WalRestorePID, (SendStop ? SIGSTOP : SIGQUIT));
+ }
+
/* Take care of the autovacuum launcher too */
if (pid == AutoVacPID)
AutoVacPID = 0;
@@ -2916,6 +2966,8 @@ PostmasterStateMachine(void)
signal_child(StartupPID, SIGTERM);
if (WalReceiverPID != 0)
signal_child(WalReceiverPID, SIGTERM);
+ if (WalRestorePID != 0)
+ signal_child(WalRestorePID, SIGTERM);
pmState = PM_WAIT_BACKENDS;
}
}
@@ -2940,6 +2992,7 @@ PostmasterStateMachine(void)
if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 &&
StartupPID == 0 &&
WalReceiverPID == 0 &&
+ WalRestorePID == 0 &&
BgWriterPID == 0 &&
(CheckpointerPID == 0 || !FatalError) &&
WalWriterPID == 0 &&
@@ -3005,11 +3058,11 @@ PostmasterStateMachine(void)
* left by now anyway; what we're really waiting for is walsenders and
* archiver.
*
- * Walreceiver should normally be dead by now, but not when a fast
- * shutdown is performed during recovery.
+ * Walreceiver and Walrestore should normally be dead by now, but not
+ * when a fast shutdown is performed during recovery.
*/
if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 &&
- WalReceiverPID == 0)
+ WalReceiverPID == 0 && WalRestorePID == 0)
{
pmState = PM_WAIT_DEAD_END;
}
@@ -3036,6 +3089,7 @@ PostmasterStateMachine(void)
/* These other guys should be dead already */
Assert(StartupPID == 0);
Assert(WalReceiverPID == 0);
+ Assert(WalRestorePID == 0);
Assert(BgWriterPID == 0);
Assert(CheckpointerPID == 0);
Assert(WalWriterPID == 0);
@@ -4219,6 +4273,8 @@ sigusr1_handler(SIGNAL_ARGS)
BgWriterPID = StartBackgroundWriter();
Assert(CheckpointerPID == 0);
CheckpointerPID = StartCheckpointer();
+ Assert(WalRestorePID == 0);
+ WalRestorePID = StartWalRestore();
pmState = PM_RECOVERY;
}
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index ed75d09..1791feb 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -35,14 +35,14 @@
* Flags set by interrupt handlers for later service in the redo loop.
*/
static volatile sig_atomic_t got_SIGHUP = false;
-static volatile sig_atomic_t shutdown_requested = false;
static volatile sig_atomic_t promote_triggered = false;
+volatile sig_atomic_t startup_shutdown_requested = false;
/*
* Flag set when executing a restore command, to tell SIGTERM signal handler
* that it's safe to just proc_exit.
*/
-static volatile sig_atomic_t in_restore_command = false;
+volatile sig_atomic_t in_restore_command = false;
/* Signal handlers */
static void startupproc_quickdie(SIGNAL_ARGS);
@@ -131,9 +131,16 @@ StartupProcShutdownHandler(SIGNAL_ARGS)
int save_errno = errno;
if (in_restore_command)
+ {
+ /*
+ * See RestoreArchivedFile() for explanation of why this
+ * lock is always held when in_restore_command is true.
+ */
+ LWLockRelease(WALRestoreCommandLock);
proc_exit(1);
+ }
else
- shutdown_requested = true;
+ startup_shutdown_requested = true;
WakeupRecovery();
errno = save_errno;
@@ -155,7 +162,7 @@ HandleStartupProcInterrupts(void)
/*
* Check if we were requested to exit without finishing recovery.
*/
- if (shutdown_requested)
+ if (startup_shutdown_requested)
proc_exit(1);
/*
@@ -226,26 +233,6 @@ StartupProcessMain(void)
proc_exit(0);
}
-void
-PreRestoreCommand(void)
-{
- /*
- * Set in_restore_command to tell the signal handler that we should exit
- * right away on SIGTERM. We know that we're at a safe point to do that.
- * Check if we had already received the signal, so that we don't miss a
- * shutdown request received just before this.
- */
- in_restore_command = true;
- if (shutdown_requested)
- proc_exit(1);
-}
-
-void
-PostRestoreCommand(void)
-{
- in_restore_command = false;
-}
-
bool
IsPromoteTriggered(void)
{
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index ef1dc91..8f4443a 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -26,6 +26,7 @@
#include "postmaster/autovacuum.h"
#include "postmaster/bgwriter.h"
#include "postmaster/postmaster.h"
+#include "postmaster/walrestore.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
@@ -123,6 +124,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
size = add_size(size, AutoVacuumShmemSize());
size = add_size(size, WalSndShmemSize());
size = add_size(size, WalRcvShmemSize());
+ size = add_size(size, WalRestoreShmemSize());
size = add_size(size, BTreeShmemSize());
size = add_size(size, SyncScanShmemSize());
size = add_size(size, AsyncShmemSize());
@@ -228,6 +230,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
AutoVacuumShmemInit();
WalSndShmemInit();
WalRcvShmemInit();
+ WalRestoreShmemInit();
/*
* Set up other modules that need some shared memory space
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 1ddf4bf..e9e5325 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -270,7 +270,8 @@ extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
extern int XLogFileInit(uint32 log, uint32 seg,
bool *use_existent, bool use_lock);
extern int XLogFileOpen(uint32 log, uint32 seg);
-
+extern bool RestoreArchivedFile(char *path, const char *xlogfname,
+ const char *recovername, off_t expectedSize);
extern void XLogGetLastRemoved(uint32 *log, uint32 *seg);
extern void XLogSetAsyncXactLSN(XLogRecPtr record);
@@ -316,6 +317,7 @@ extern TimeLineID GetRecoveryTargetTLI(void);
extern bool CheckPromoteSignal(void);
extern void WakeupRecovery(void);
extern Latch *WALWriterLatch(void);
+extern Latch *WALRestoreLatch(void);
/*
* Starting/stopping a base backup
diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h
index e966a73..b90ce33 100644
--- a/src/include/bootstrap/bootstrap.h
+++ b/src/include/bootstrap/bootstrap.h
@@ -23,6 +23,7 @@ typedef enum
StartupProcess,
BgWriterProcess,
CheckpointerProcess,
+ WalRestoreProcess,
WalWriterProcess,
WalReceiverProcess,
diff --git a/src/include/postmaster/startup.h b/src/include/postmaster/startup.h
index 3ec6950..35d9665 100644
--- a/src/include/postmaster/startup.h
+++ b/src/include/postmaster/startup.h
@@ -12,10 +12,11 @@
#ifndef _STARTUP_H
#define _STARTUP_H
+extern volatile sig_atomic_t startup_shutdown_requested;
+extern volatile sig_atomic_t in_restore_command;
+
extern void HandleStartupProcInterrupts(void);
extern void StartupProcessMain(void);
-extern void PreRestoreCommand(void);
-extern void PostRestoreCommand(void);
extern bool IsPromoteTriggered(void);
extern void ResetPromoteTriggered(void);
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index df3df29..c316dcc 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -79,6 +79,7 @@ typedef enum LWLockId
SerializablePredicateLockListLock,
OldSerXidLock,
SyncRepLock,
+ WALRestoreCommandLock,
/* Individual lock IDs end here */
FirstBufMappingLock,
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 358d1a4..50d4f35 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -204,12 +204,20 @@ extern PGPROC *PreparedXactProcs;
/*
* We set aside some extra PGPROC structures for auxiliary processes,
* ie things that aren't full-fledged backends but need shmem access.
+ * Logger, archiver and stats processes don't count towards this total.
*
+ * This needs to be set to whichever of normal running or recovery has the
+ * highest number of backends that might occur together.
+ *
+ * During normal running we need slots for:
* Background writer, checkpointer and WAL writer run during normal operation.
- * Startup process and WAL receiver also consume 2 slots, but WAL writer is
- * launched only after startup has exited, so we only need 4 slots.
+ * 3 slots
+ *
+ * During recovery we need slots for:
+ * Background writer, checkpointer, Startup process, WAL receiver, WAL restore.
+ * 5 slots
*/
-#define NUM_AUXILIARY_PROCS 4
+#define NUM_AUXILIARY_PROCS 5
/* configurable options */
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers