On Tue, Jan 17, 2012 at 6:52 AM, Fujii Masao <masao.fu...@gmail.com> wrote: > On Mon, Jan 16, 2012 at 2:06 AM, Simon Riggs <si...@2ndquadrant.com> wrote: >> WALRestore process asynchronously executes restore_command while >> recovery continues working. >> >> Overlaps downloading of next WAL file to reduce time delays in file >> based archive recovery. >> >> Handles cases of file-only and streaming/file correctly. > > Though I've not reviewed the patch deeply yet, I observed the following > two problems when I tested the patch. > > When I set up streaming replication + archive (i.e., restore_command is set) > and started the standby, I got the following error: > > FATAL: all AuxiliaryProcs are in use > LOG: walrestore process (PID 18839) exited with exit code 1
Fixed and better documented. > When I started an archive recovery without setting restore_command, > it successfully finished. Not sure exactly what you mean, but I fixed a bug that might be something you're seeing. -- Simon Riggs http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index ce659ec..469e6d6 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -40,6 +40,7 @@ #include "pgstat.h" #include "postmaster/bgwriter.h" #include "postmaster/startup.h" +#include "postmaster/walrestore.h" #include "replication/walreceiver.h" #include "replication/walsender.h" #include "storage/bufmgr.h" @@ -187,7 +188,6 @@ static bool InArchiveRecovery = false; static bool restoredFromArchive = false; /* options taken from recovery.conf for archive recovery */ -static char *recoveryRestoreCommand = NULL; static char *recoveryEndCommand = NULL; static char *archiveCleanupCommand = NULL; static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET; @@ -575,8 +575,8 @@ bool reachedConsistency = false; static bool InRedo = false; -/* Have we launched bgwriter during recovery? */ -static bool bgwriterLaunched = false; +/* Have we launched background procs during archive recovery yet? */ +static bool ArchRecoveryBgProcsActive = false; /* * Information logged when we detect a change in one of the parameters @@ -632,8 +632,6 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, bool randAccess); static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static void XLogFileClose(void); -static bool RestoreArchivedFile(char *path, const char *xlogfname, - const char *recovername, off_t expectedSize); static void ExecuteRecoveryCommand(char *command, char *commandName, bool failOnerror); static void PreallocXlogFiles(XLogRecPtr endptr); @@ -2706,19 +2704,47 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli, XLogFileName(xlogfname, tli, log, seg); +#define TMPRECOVERYXLOG "RECOVERYXLOG" + switch (source) { case XLOG_FROM_ARCHIVE: + /* + * Check to see if the WALRestore process has already put the + * next file in place while we were working. If so, use that. + * If not, get it ourselves. This makes it easier to handle + * initial state before the WALRestore is active, and also + * handles the stop/start logic correctly when we have both + * streaming and file based replication active. + * + * We queue up the next task for WALRestore after we've begun to + * use this file later in XLogFileRead(). + * + * If the WALRestore process is still active, the lock wait makes + * us wait, which is just like we were executing the command + * ourselves and so doesn't alter the logic elsewhere. + */ + if (XLogFileIsNowFullyRestored(tli, log, seg)) + { + snprintf(path, MAXPGPATH, XLOGDIR "/%s", TMPRECOVERYXLOG); + restoredFromArchive = true; + break; + } + /* Report recovery progress in PS display */ snprintf(activitymsg, sizeof(activitymsg), "waiting for %s", xlogfname); set_ps_display(activitymsg, false); restoredFromArchive = RestoreArchivedFile(path, xlogfname, - "RECOVERYXLOG", + TMPRECOVERYXLOG, XLogSegSize); + if (!restoredFromArchive) + { + LWLockRelease(WALRestoreCommandLock); return -1; + } break; case XLOG_FROM_PG_XLOG: @@ -2748,18 +2774,42 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli, if (stat(xlogfpath, &statbuf) == 0) { if (unlink(xlogfpath) != 0) + { + LWLockRelease(WALRestoreCommandLock); ereport(FATAL, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", xlogfpath))); + } reload = true; } if (rename(path, xlogfpath) < 0) + { + LWLockRelease(WALRestoreCommandLock); ereport(ERROR, (errcode_for_file_access(), errmsg("could not rename file \"%s\" to \"%s\": %m", path, xlogfpath))); + } + + /* + * Make sure we recover from the new filename, so we can reuse the + * temporary filename for asynchronous restore actions. + */ + strcpy(path, xlogfpath); + + /* + * Tell the WALRestore process to get the next file now. + * Hopefully it will be ready for use in time for the next call the + * Startup process makes to XLogFileRead(). + * + * It might seem like we should do that earlier but then there is a + * race condition that might lead to replacing RECOVERYXLOG with + * another file before we've copied it. + */ + SetNextWALRestoreLogSeg(tli, log, seg); + LWLockRelease(WALRestoreCommandLock); /* * If the existing segment was replaced, since walsenders might have @@ -2911,8 +2961,11 @@ XLogFileClose(void) * For fixed-size files, the caller may pass the expected size as an * additional crosscheck on successful recovery. If the file size is not * known, set expectedSize = 0. + * + * Must be called with WALRestoreCommandLock held and must be held at exit, + * if the function returns. */ -static bool +bool RestoreArchivedFile(char *path, const char *xlogfname, const char *recovername, off_t expectedSize) { @@ -2929,7 +2982,7 @@ RestoreArchivedFile(char *path, const char *xlogfname, uint32 restartSeg; /* In standby mode, restore_command might not be supplied */ - if (recoveryRestoreCommand == NULL) + if (strlen(GetRecoveryRestoreCommand()) == 0) goto not_available; /* @@ -2963,18 +3016,24 @@ RestoreArchivedFile(char *path, const char *xlogfname, if (stat(xlogpath, &stat_buf) != 0) { if (errno != ENOENT) + { + LWLockRelease(WALRestoreCommandLock); ereport(FATAL, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", xlogpath))); + } } else { if (unlink(xlogpath) != 0) + { + LWLockRelease(WALRestoreCommandLock); ereport(FATAL, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", xlogpath))); + } } /* @@ -3013,7 +3072,7 @@ RestoreArchivedFile(char *path, const char *xlogfname, endp = xlogRestoreCmd + MAXPGPATH - 1; *endp = '\0'; - for (sp = recoveryRestoreCommand; *sp; sp++) + for (sp = GetRecoveryRestoreCommand(); *sp; sp++) { if (*sp == '%') { @@ -3059,21 +3118,29 @@ RestoreArchivedFile(char *path, const char *xlogfname, } *dp = '\0'; - ereport(DEBUG3, + ereport(DEBUG2, (errmsg_internal("executing restore command \"%s\"", xlogRestoreCmd))); /* - * Check signals before restore command and reset afterwards. + * Set in_restore_command to tell the signal handler that we should exit + * right away on SIGTERM. We know that we're at a safe point to do that. + * Check if we had already received the signal, so that we don't miss a + * shutdown request received just before this. */ - PreRestoreCommand(); + in_restore_command = true; + if (startup_shutdown_requested || walrestore_shutdown_requested) + { + LWLockRelease(WALRestoreCommandLock); + proc_exit(1); + } /* * Copy xlog from archival storage to XLOGDIR */ rc = system(xlogRestoreCmd); - PostRestoreCommand(); + in_restore_command = false; if (rc == 0) { @@ -3102,7 +3169,10 @@ RestoreArchivedFile(char *path, const char *xlogfname, if (StandbyMode && stat_buf.st_size < expectedSize) elevel = DEBUG1; else + { + LWLockRelease(WALRestoreCommandLock); elevel = FATAL; + } ereport(elevel, (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu", xlogfname, @@ -3123,10 +3193,13 @@ RestoreArchivedFile(char *path, const char *xlogfname, { /* stat failed */ if (errno != ENOENT) + { + LWLockRelease(WALRestoreCommandLock); ereport(FATAL, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", xlogpath))); + } } } @@ -3158,10 +3231,18 @@ RestoreArchivedFile(char *path, const char *xlogfname, * too. */ if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM) + { + LWLockRelease(WALRestoreCommandLock); proc_exit(1); + } signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; + /* + * If signaled we will immediately issue a FATAL error so drop the lock + */ + if (signaled) + LWLockRelease(WALRestoreCommandLock); ereport(signaled ? FATAL : DEBUG2, (errmsg("could not restore file \"%s\" from archive: return code %d", xlogfname, rc))); @@ -4203,7 +4284,9 @@ readTimeLineHistory(TimeLineID targetTLI) if (InArchiveRecovery) { TLHistoryFileName(histfname, targetTLI); + LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE); RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); + LWLockRelease(WALRestoreCommandLock); } else TLHistoryFilePath(path, targetTLI); @@ -4292,7 +4375,9 @@ existsTimeLineHistory(TimeLineID probeTLI) if (InArchiveRecovery) { TLHistoryFileName(histfname, probeTLI); + LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE); RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); + LWLockRelease(WALRestoreCommandLock); } else TLHistoryFilePath(path, probeTLI); @@ -4453,7 +4538,9 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, if (InArchiveRecovery) { TLHistoryFileName(histfname, parentTLI); + LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE); RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); + LWLockRelease(WALRestoreCommandLock); } else TLHistoryFilePath(path, parentTLI); @@ -5299,10 +5386,10 @@ readRecoveryCommandFile(void) { if (strcmp(item->name, "restore_command") == 0) { - recoveryRestoreCommand = pstrdup(item->value); + SetRecoveryRestoreCommand(pstrdup(item->value)); ereport(DEBUG2, (errmsg_internal("restore_command = '%s'", - recoveryRestoreCommand))); + GetRecoveryRestoreCommand()))); } else if (strcmp(item->name, "recovery_end_command") == 0) { @@ -5455,7 +5542,7 @@ readRecoveryCommandFile(void) */ if (StandbyMode) { - if (PrimaryConnInfo == NULL && recoveryRestoreCommand == NULL) + if (PrimaryConnInfo == NULL && strlen(GetRecoveryRestoreCommand()) == 0) ereport(WARNING, (errmsg("recovery command file \"%s\" specified neither primary_conninfo nor restore_command", RECOVERY_COMMAND_FILE), @@ -5463,7 +5550,7 @@ readRecoveryCommandFile(void) } else { - if (recoveryRestoreCommand == NULL) + if (strlen(GetRecoveryRestoreCommand()) == 0) ereport(FATAL, (errmsg("recovery command file \"%s\" must specify restore_command when standby mode is not enabled", RECOVERY_COMMAND_FILE))); @@ -6432,7 +6519,7 @@ StartupXLOG(void) PublishStartupProcessInformation(); SetForwardFsyncRequests(); SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED); - bgwriterLaunched = true; + ArchRecoveryBgProcsActive = true; } /* @@ -6795,7 +6882,7 @@ StartupXLOG(void) * the rule that TLI only changes in shutdown checkpoints, which * allows some extra error checking in xlog_redo. */ - if (bgwriterLaunched) + if (ArchRecoveryBgProcsActive) RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IMMEDIATE | CHECKPOINT_WAIT); @@ -9640,7 +9727,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, * Request a restartpoint if we've replayed too much * xlog since the last one. */ - if (StandbyMode && bgwriterLaunched) + if (StandbyMode && ArchRecoveryBgProcsActive) { if (XLogCheckpointNeeded(readId, readSeg)) { diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index e3ae92d..81a8cb3 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -30,6 +30,7 @@ #include "nodes/makefuncs.h" #include "postmaster/bgwriter.h" #include "postmaster/startup.h" +#include "postmaster/walrestore.h" #include "postmaster/walwriter.h" #include "replication/walreceiver.h" #include "storage/bufmgr.h" @@ -319,6 +320,9 @@ AuxiliaryProcessMain(int argc, char *argv[]) case CheckpointerProcess: statmsg = "checkpointer process"; break; + case WalRestoreProcess: + statmsg = "wal restore process"; + break; case WalWriterProcess: statmsg = "wal writer process"; break; @@ -424,6 +428,11 @@ AuxiliaryProcessMain(int argc, char *argv[]) CheckpointerMain(); proc_exit(1); /* should never return */ + case WalRestoreProcess: + /* don't set signals, wal restore has its own agenda */ + WalRestoreMain(); + proc_exit(1); /* should never return */ + case WalWriterProcess: /* don't set signals, walwriter has its own agenda */ InitXLOGAccess(); diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile index 3056b09..349e722 100644 --- a/src/backend/postmaster/Makefile +++ b/src/backend/postmaster/Makefile @@ -13,6 +13,6 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o \ - startup.o syslogger.o walwriter.o checkpointer.o + startup.o syslogger.o walrestore.o walwriter.o checkpointer.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index ad0c17a..15684c0 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -210,6 +210,7 @@ static pid_t StartupPID = 0, BgWriterPID = 0, CheckpointerPID = 0, WalWriterPID = 0, + WalRestorePID = 0, WalReceiverPID = 0, AutoVacPID = 0, PgArchPID = 0, @@ -470,6 +471,7 @@ static void ShmemBackendArrayRemove(Backend *bn); #define StartCheckpointer() StartChildProcess(CheckpointerProcess) #define StartWalWriter() StartChildProcess(WalWriterProcess) #define StartWalReceiver() StartChildProcess(WalReceiverProcess) +#define StartWalRestore() StartChildProcess(WalRestoreProcess) /* Macros to check exit status of a child process */ #define EXIT_STATUS_0(st) ((st) == 0) @@ -2060,6 +2062,8 @@ SIGHUP_handler(SIGNAL_ARGS) signal_child(WalWriterPID, SIGHUP); if (WalReceiverPID != 0) signal_child(WalReceiverPID, SIGHUP); + if (WalRestorePID != 0) + signal_child(WalRestorePID, SIGHUP); if (AutoVacPID != 0) signal_child(AutoVacPID, SIGHUP); if (PgArchPID != 0) @@ -2170,6 +2174,8 @@ pmdie(SIGNAL_ARGS) signal_child(StartupPID, SIGTERM); if (WalReceiverPID != 0) signal_child(WalReceiverPID, SIGTERM); + if (WalRestorePID != 0) + signal_child(WalRestorePID, SIGTERM); if (BgWriterPID != 0) signal_child(BgWriterPID, SIGTERM); if (pmState == PM_RECOVERY) @@ -2225,6 +2231,8 @@ pmdie(SIGNAL_ARGS) signal_child(WalWriterPID, SIGQUIT); if (WalReceiverPID != 0) signal_child(WalReceiverPID, SIGQUIT); + if (WalRestorePID != 0) + signal_child(WalRestorePID, SIGQUIT); if (AutoVacPID != 0) signal_child(AutoVacPID, SIGQUIT); if (PgArchPID != 0) @@ -2331,6 +2339,12 @@ reaper(SIGNAL_ARGS) pmState = PM_RUN; /* + * Shutdown the WALRestore process + */ + if (WalRestorePID != 0) + signal_child(WalRestorePID, SIGTERM); + + /* * Kill any walsenders to force the downstream standby(s) to * reread the timeline history file, adjust their timelines and * establish replication connections again. This is required @@ -2477,6 +2491,30 @@ reaper(SIGNAL_ARGS) } /* + * Was it the wal restore? If exit status is zero (normal) or one + * (FATAL exit), we assume everything is all right just like normal + * backends. + */ + if (pid == WalRestorePID) + { + if (pmState >= PM_RUN) + { + WalRestorePID = 0; + continue; + } + + /* + * Any unexpected exit (including FATAL exit) of the WALRestore + * process is treated as a crash, except that we don't want to + * reinitialize because availability is important. + */ + RecoveryError = true; + HandleChildCrash(pid, exitstatus, + _("walrestore process")); + continue; + } + + /* * Was it the autovacuum launcher? Normal exit can be ignored; we'll * start a new one at the next iteration of the postmaster's main * loop, if necessary. Any other exit condition is treated as a @@ -2756,6 +2794,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT)); } + /* Take care of the walrestore too */ + if (pid == WalRestorePID) + WalRestorePID = 0; + else if (WalRestorePID != 0 && !FatalError) + { + ereport(DEBUG2, + (errmsg_internal("sending %s to process %d", + (SendStop ? "SIGSTOP" : "SIGQUIT"), + (int) WalRestorePID))); + signal_child(WalRestorePID, (SendStop ? SIGSTOP : SIGQUIT)); + } + /* Take care of the autovacuum launcher too */ if (pid == AutoVacPID) AutoVacPID = 0; @@ -2916,6 +2966,8 @@ PostmasterStateMachine(void) signal_child(StartupPID, SIGTERM); if (WalReceiverPID != 0) signal_child(WalReceiverPID, SIGTERM); + if (WalRestorePID != 0) + signal_child(WalRestorePID, SIGTERM); pmState = PM_WAIT_BACKENDS; } } @@ -2940,6 +2992,7 @@ PostmasterStateMachine(void) if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 && StartupPID == 0 && WalReceiverPID == 0 && + WalRestorePID == 0 && BgWriterPID == 0 && (CheckpointerPID == 0 || !FatalError) && WalWriterPID == 0 && @@ -3005,11 +3058,11 @@ PostmasterStateMachine(void) * left by now anyway; what we're really waiting for is walsenders and * archiver. * - * Walreceiver should normally be dead by now, but not when a fast - * shutdown is performed during recovery. + * Walreceiver and Walrestore should normally be dead by now, but not + * when a fast shutdown is performed during recovery. */ if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 && - WalReceiverPID == 0) + WalReceiverPID == 0 && WalRestorePID == 0) { pmState = PM_WAIT_DEAD_END; } @@ -3036,6 +3089,7 @@ PostmasterStateMachine(void) /* These other guys should be dead already */ Assert(StartupPID == 0); Assert(WalReceiverPID == 0); + Assert(WalRestorePID == 0); Assert(BgWriterPID == 0); Assert(CheckpointerPID == 0); Assert(WalWriterPID == 0); @@ -4219,6 +4273,8 @@ sigusr1_handler(SIGNAL_ARGS) BgWriterPID = StartBackgroundWriter(); Assert(CheckpointerPID == 0); CheckpointerPID = StartCheckpointer(); + Assert(WalRestorePID == 0); + WalRestorePID = StartWalRestore(); pmState = PM_RECOVERY; } diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index ed75d09..1791feb 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -35,14 +35,14 @@ * Flags set by interrupt handlers for later service in the redo loop. */ static volatile sig_atomic_t got_SIGHUP = false; -static volatile sig_atomic_t shutdown_requested = false; static volatile sig_atomic_t promote_triggered = false; +volatile sig_atomic_t startup_shutdown_requested = false; /* * Flag set when executing a restore command, to tell SIGTERM signal handler * that it's safe to just proc_exit. */ -static volatile sig_atomic_t in_restore_command = false; +volatile sig_atomic_t in_restore_command = false; /* Signal handlers */ static void startupproc_quickdie(SIGNAL_ARGS); @@ -131,9 +131,16 @@ StartupProcShutdownHandler(SIGNAL_ARGS) int save_errno = errno; if (in_restore_command) + { + /* + * See RestoreArchivedFile() for explanation of why this + * lock is always held when in_restore_command is true. + */ + LWLockRelease(WALRestoreCommandLock); proc_exit(1); + } else - shutdown_requested = true; + startup_shutdown_requested = true; WakeupRecovery(); errno = save_errno; @@ -155,7 +162,7 @@ HandleStartupProcInterrupts(void) /* * Check if we were requested to exit without finishing recovery. */ - if (shutdown_requested) + if (startup_shutdown_requested) proc_exit(1); /* @@ -226,26 +233,6 @@ StartupProcessMain(void) proc_exit(0); } -void -PreRestoreCommand(void) -{ - /* - * Set in_restore_command to tell the signal handler that we should exit - * right away on SIGTERM. We know that we're at a safe point to do that. - * Check if we had already received the signal, so that we don't miss a - * shutdown request received just before this. - */ - in_restore_command = true; - if (shutdown_requested) - proc_exit(1); -} - -void -PostRestoreCommand(void) -{ - in_restore_command = false; -} - bool IsPromoteTriggered(void) { diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index ef1dc91..8f4443a 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -26,6 +26,7 @@ #include "postmaster/autovacuum.h" #include "postmaster/bgwriter.h" #include "postmaster/postmaster.h" +#include "postmaster/walrestore.h" #include "replication/walreceiver.h" #include "replication/walsender.h" #include "storage/bufmgr.h" @@ -123,6 +124,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, AutoVacuumShmemSize()); size = add_size(size, WalSndShmemSize()); size = add_size(size, WalRcvShmemSize()); + size = add_size(size, WalRestoreShmemSize()); size = add_size(size, BTreeShmemSize()); size = add_size(size, SyncScanShmemSize()); size = add_size(size, AsyncShmemSize()); @@ -228,6 +230,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) AutoVacuumShmemInit(); WalSndShmemInit(); WalRcvShmemInit(); + WalRestoreShmemInit(); /* * Set up other modules that need some shared memory space diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 1ddf4bf..e9e5325 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -270,7 +270,8 @@ extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern int XLogFileInit(uint32 log, uint32 seg, bool *use_existent, bool use_lock); extern int XLogFileOpen(uint32 log, uint32 seg); - +extern bool RestoreArchivedFile(char *path, const char *xlogfname, + const char *recovername, off_t expectedSize); extern void XLogGetLastRemoved(uint32 *log, uint32 *seg); extern void XLogSetAsyncXactLSN(XLogRecPtr record); @@ -316,6 +317,7 @@ extern TimeLineID GetRecoveryTargetTLI(void); extern bool CheckPromoteSignal(void); extern void WakeupRecovery(void); extern Latch *WALWriterLatch(void); +extern Latch *WALRestoreLatch(void); /* * Starting/stopping a base backup diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h index e966a73..b90ce33 100644 --- a/src/include/bootstrap/bootstrap.h +++ b/src/include/bootstrap/bootstrap.h @@ -23,6 +23,7 @@ typedef enum StartupProcess, BgWriterProcess, CheckpointerProcess, + WalRestoreProcess, WalWriterProcess, WalReceiverProcess, diff --git a/src/include/postmaster/startup.h b/src/include/postmaster/startup.h index 3ec6950..35d9665 100644 --- a/src/include/postmaster/startup.h +++ b/src/include/postmaster/startup.h @@ -12,10 +12,11 @@ #ifndef _STARTUP_H #define _STARTUP_H +extern volatile sig_atomic_t startup_shutdown_requested; +extern volatile sig_atomic_t in_restore_command; + extern void HandleStartupProcInterrupts(void); extern void StartupProcessMain(void); -extern void PreRestoreCommand(void); -extern void PostRestoreCommand(void); extern bool IsPromoteTriggered(void); extern void ResetPromoteTriggered(void); diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index df3df29..c316dcc 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -79,6 +79,7 @@ typedef enum LWLockId SerializablePredicateLockListLock, OldSerXidLock, SyncRepLock, + WALRestoreCommandLock, /* Individual lock IDs end here */ FirstBufMappingLock, FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS, diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 358d1a4..50d4f35 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -204,12 +204,20 @@ extern PGPROC *PreparedXactProcs; /* * We set aside some extra PGPROC structures for auxiliary processes, * ie things that aren't full-fledged backends but need shmem access. + * Logger, archiver and stats processes don't count towards this total. * + * This needs to be set to whichever of normal running or recovery has the + * highest number of backends that might occur together. + * + * During normal running we need slots for: * Background writer, checkpointer and WAL writer run during normal operation. - * Startup process and WAL receiver also consume 2 slots, but WAL writer is - * launched only after startup has exited, so we only need 4 slots. + * 3 slots + * + * During recovery we need slots for: + * Background writer, checkpointer, Startup process, WAL receiver, WAL restore. + * 5 slots */ -#define NUM_AUXILIARY_PROCS 4 +#define NUM_AUXILIARY_PROCS 5 /* configurable options */
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers