Where are we on this? ---------------------------------------------------------------------------
On Mon, Jan 16, 2012 at 01:52:35AM +0000, Simon Riggs wrote: > On Fri, Dec 16, 2011 at 3:01 PM, Simon Riggs <si...@2ndquadrant.com> wrote: > > archive_command and restore_command describe how to ship WAL files > > to/from an archive. > > > > When there is nothing to ship, we delay sending WAL files. When no WAL > > files, the standby has no information at all. > > > > To provide some form of keepalive on quiet systems the > > archive_keepalive_command provides a generic hook to implement > > keepalives. This is implemented as a separate command to avoid storing > > keepalive messages in the archive, or at least allow overwrites using > > a single filename like "keepalive". > > > > Examples > > archive_keepalive_command = 'arch_cmd keepalive' # sends a file > > called "keepalive" to archive, overwrites allowed > > archive_keepalive_command = 'arch_cmd %f.%t.keepalive #sends a file > > like 000000010000000AB00000000FE.20111216143517.keepalive > > > > If there is no WAL file to send, then we send a keepalive file > > instead. Keepalive is a small file that contains same contents as a > > streaming keepalive message (re: other patch on that). > > > > If no WAL file is available and we are attempting to restore in > > standby_mode, then we execute restore_keepalive_command to see if a > > keepalive file is available. Checks for a file in the specific > > keepalive format and then uses that to update last received info from > > master. > > > > e.g. > > restore_keepalive_command = 'restore_cmd keepalive' # gets a file > > called "keepalive" to archive, overwrites allowed > > Patch. > > -- > Simon Riggs http://www.2ndQuadrant.com/ > PostgreSQL Development, 24x7 Support, Training & Services > diff --git a/src/backend/access/transam/recovery.conf.sample > b/src/backend/access/transam/recovery.conf.sample > index 5acfa57..fab288c 100644 > --- a/src/backend/access/transam/recovery.conf.sample > +++ b/src/backend/access/transam/recovery.conf.sample > @@ -43,6 +43,13 @@ > # > #restore_command = '' # e.g. 'cp /mnt/server/archivedir/%f %p' > # > +# restore_keepalive_command > +# > +# specifies an optional shell command to download keepalive files > +# e.g. archive_keepalive_command = 'cp -f %p $ARCHIVE/keepalive </dev/null' > +# e.g. restore_keepalive_command = 'cp $ARCHIVE/keepalive %p' > +# > +#restore_keepalive_command = '' > # > # archive_cleanup_command > # > diff --git a/src/backend/access/transam/xlog.c > b/src/backend/access/transam/xlog.c > index ce659ec..2729141 100644 > --- a/src/backend/access/transam/xlog.c > +++ b/src/backend/access/transam/xlog.c > @@ -73,8 +73,10 @@ int CheckPointSegments = 3; > int wal_keep_segments = 0; > int XLOGbuffers = -1; > int XLogArchiveTimeout = 0; > +int XLogArchiveKeepaliveTimeout = 10; /* XXX set to > 60 before commit */ > bool XLogArchiveMode = false; > char *XLogArchiveCommand = NULL; > +char *XLogArchiveKeepaliveCommand = NULL; > bool EnableHotStandby = false; > bool fullPageWrites = true; > bool log_checkpoints = false; > @@ -188,6 +190,7 @@ static bool restoredFromArchive = false; > > /* options taken from recovery.conf for archive recovery */ > static char *recoveryRestoreCommand = NULL; > +static char *recoveryRestoreKeepaliveCommand = NULL; > static char *recoveryEndCommand = NULL; > static char *archiveCleanupCommand = NULL; > static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET; > @@ -634,6 +637,7 @@ static int emode_for_corrupt_record(int emode, > XLogRecPtr RecPtr); > static void XLogFileClose(void); > static bool RestoreArchivedFile(char *path, const char *xlogfname, > const char *recovername, off_t > expectedSize); > +static void RestoreKeepaliveFile(void); > static void ExecuteRecoveryCommand(char *command, char *commandName, > bool failOnerror); > static void PreallocXlogFiles(XLogRecPtr endptr); > @@ -2718,7 +2722,10 @@ XLogFileRead(uint32 log, uint32 seg, int emode, > TimeLineID tli, > > "RECOVERYXLOG", > > XLogSegSize); > if (!restoredFromArchive) > + { > + RestoreKeepaliveFile(); > return -1; > + } > break; > > case XLOG_FROM_PG_XLOG: > @@ -3179,6 +3186,192 @@ not_available: > return false; > } > > +static void > +RestoreKeepaliveFile(void) > +{ > + char keepalivepath[MAXPGPATH]; > + char keepaliveRestoreCmd[MAXPGPATH]; > + char *dp; > + char *endp; > + const char *sp; > + int rc; > + bool signaled; > + struct stat stat_buf; > + > + /* In standby mode, restore_command might not be supplied */ > + if (recoveryRestoreKeepaliveCommand == NULL) > + return; > + > + snprintf(keepalivepath, MAXPGPATH, XLOGDIR "/archive_status/KEEPALIVE"); > + > + /* > + * Make sure there is no existing file in keepalivepath > + */ > + if (stat(keepalivepath, &stat_buf) == 0) > + { > + if (unlink(keepalivepath) != 0) > + ereport(FATAL, > + (errcode_for_file_access(), > + errmsg("could not remove file \"%s\": > %m", > + keepalivepath))); > + } > + > + /* > + * construct the command to be executed > + */ > + dp = keepaliveRestoreCmd; > + endp = keepaliveRestoreCmd + MAXPGPATH - 1; > + *endp = '\0'; > + > + for (sp = recoveryRestoreKeepaliveCommand; *sp; sp++) > + { > + if (*sp == '%') > + { > + switch (sp[1]) > + { > + case 'p': > + /* %p: relative path of target file */ > + sp++; > + StrNCpy(dp, keepalivepath, endp - dp); > + make_native_path(dp); > + dp += strlen(dp); > + break; > + case '%': > + /* convert %% to a single % */ > + sp++; > + if (dp < endp) > + *dp++ = *sp; > + break; > + default: > + /* otherwise treat the % as not special > */ > + if (dp < endp) > + *dp++ = *sp; > + break; > + } > + } > + else > + { > + if (dp < endp) > + *dp++ = *sp; > + } > + } > + *dp = '\0'; > + > + ereport(DEBUG2, > + (errmsg_internal("executing restore keepalive command > \"%s\"", > + keepaliveRestoreCmd))); > + > + /* > + * Check signals before restore command and reset afterwards. > + */ > + PreRestoreCommand(); > + > + /* > + * Copy keepalive from archival storage to archive_status dir > + */ > + rc = system(keepaliveRestoreCmd); > + > + PostRestoreCommand(); > + > + if (rc == 0) > + { > + /* > + * command apparently succeeded, but let's check the file is > there > + */ > + if (stat(keepalivepath, &stat_buf) == 0) > + { > + char kptime[15]; > + char kptimezone[4]; > + char *kdata; > + char ch; > + int r; > + FILE *fd; > + > + fd = AllocateFile(keepalivepath, "r"); > + if (!fd) > + { > + ereport(ERROR, > + (errcode_for_file_access(), > + errmsg("could not read file > \"%s\": %m", > + > keepalivepath))); > + } > + kdata = palloc(stat_buf.st_size + 1); > + r = fread(kdata, stat_buf.st_size, 1, fd); > + kdata[stat_buf.st_size] = '\0'; > + > + /* > + * Close and remove the keepalive file > + */ > + if (r != 1 || ferror(fd) || FreeFile(fd)) > + ereport(ERROR, > + > (errcode_for_file_access(), > + errmsg("could not read file > \"%s\": %m", > + > keepalivepath))); > + > + /* > + * Parse the keepalive file > + */ > + if (sscanf(kdata, "KEEPALIVE TIME: %14s%3s%c", > + kptime, kptimezone, &ch) != 3 > || ch != '\n') > + ereport(ERROR, > + > (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), > + errmsg("invalid data in file > \"%s\"", keepalivepath))); > + kptime[14] = '\0'; > + kptimezone[3] = '\0'; > + > + ereport(DEBUG2, > + (errmsg("restored keepalive from > archive %s%s", kptime, kptimezone))); > + > + XLogReceiptSource = XLOG_FROM_ARCHIVE; > + XLogReceiptTime = GetCurrentTimestamp(); > + SetCurrentChunkStartTime(XLogReceiptTime); > + > + if (unlink(keepalivepath) != 0) > + ereport(ERROR, > + (errcode_for_file_access(), > + errmsg("could not remove file > \"%s\": %m", > + > keepalivepath))); > + return; > + } > + } > + > + /* > + * Remember, we rollforward UNTIL the restore fails so failure here is > + * just part of the process... that makes it difficult to determine > + * whether the restore failed because there isn't an archive to restore, > + * or because the administrator has specified the restore program > + * incorrectly. We have to assume the former. > + * > + * However, if the failure was due to any sort of signal, it's best to > + * punt and abort recovery. (If we "return false" here, upper levels > will > + * assume that recovery is complete and start up the database!) It's > + * essential to abort on child SIGINT and SIGQUIT, because per spec > + * system() ignores SIGINT and SIGQUIT while waiting; if we see one of > + * those it's a good bet we should have gotten it too. > + * > + * On SIGTERM, assume we have received a fast shutdown request, and exit > + * cleanly. It's pure chance whether we receive the SIGTERM first, or > the > + * child process. If we receive it first, the signal handler will call > + * proc_exit, otherwise we do it here. If we or the child process > received > + * SIGTERM for any other reason than a fast shutdown request, postmaster > + * will perform an immediate shutdown when it sees us exiting > + * unexpectedly. > + * > + * Per the Single Unix Spec, shells report exit status > 128 when a > called > + * command died on a signal. Also, 126 and 127 are used to report > + * problems such as an unfindable command; treat those as fatal errors > + * too. > + */ > + if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM) > + proc_exit(1); > + > + signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; > + > + ereport(signaled ? FATAL : DEBUG2, > + (errmsg("could not restore keepalive file from archive: return > code %d", > + rc))); > +} > + > /* > * Attempt to execute an external shell command during recovery. > * > @@ -5304,6 +5497,13 @@ readRecoveryCommandFile(void) > (errmsg_internal("restore_command = > '%s'", > > recoveryRestoreCommand))); > } > + else if (strcmp(item->name, "restore_keepalive_command") == 0) > + { > + recoveryRestoreKeepaliveCommand = pstrdup(item->value); > + ereport(DEBUG2, > + > (errmsg_internal("restore_keepalive_command = '%s'", > + > recoveryRestoreKeepaliveCommand))); > + } > else if (strcmp(item->name, "recovery_end_command") == 0) > { > recoveryEndCommand = pstrdup(item->value); > @@ -10102,3 +10302,52 @@ WALWriterLatch(void) > { > return &XLogCtl->WALWriterLatch; > } > + > +/* > + * Write a keepalive and return the values of path and filename > + */ > +void > +XLogWriteKeepaliveFile(void) > +{ > + char keepalivepath[MAXPGPATH]; > + char xlogfname[MAXFNAMELEN]; > + XLogRecPtr lastFlushRecPtr = GetFlushRecPtr(); > + pg_time_t stamp_time; > + char strfbuf[128]; > + uint32 log; > + uint32 seg; > + FILE *fd; > + > + XLByteToSeg(lastFlushRecPtr, log, seg); > + XLogFileName(xlogfname, ThisTimeLineID, log, seg); > + > + /* Use the log timezone here, not the session timezone */ > + stamp_time = (pg_time_t) time(NULL); > + pg_strftime(strfbuf, sizeof(strfbuf), > + "%Y%m%d%H%M%S%Z", > + pg_localtime(&stamp_time, log_timezone)); > + > + KeepaliveFilePath(keepalivepath, xlogfname, strfbuf); > + > + elog(DEBUG4, "keepalive %s", keepalivepath); > + > + fd = AllocateFile(keepalivepath, "w"); > + if (fd == NULL) > + { > + ereport(LOG, > + (errcode_for_file_access(), > + errmsg("could not create archive keepalive > file \"%s\": %m", > + keepalivepath))); > + return; > + } > + fprintf(fd, "KEEPALIVE TIME: %s\n", strfbuf); > + if (fflush(fd) || ferror(fd) || FreeFile(fd)) > + ereport(ERROR, > + (errcode_for_file_access(), > + errmsg("could not write file \"%s\": %m", > + keepalivepath))); > + > + /* Notify archiver that it's got something to do */ > + if (IsUnderPostmaster) > + SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER); > +} > diff --git a/src/backend/postmaster/checkpointer.c > b/src/backend/postmaster/checkpointer.c > index 0b792d2..29882b1 100644 > --- a/src/backend/postmaster/checkpointer.c > +++ b/src/backend/postmaster/checkpointer.c > @@ -164,6 +164,7 @@ static double ckpt_cached_elapsed; > > static pg_time_t last_checkpoint_time; > static pg_time_t last_xlog_switch_time; > +static pg_time_t last_xlog_keepalive_time; > > /* Prototypes for private functions */ > > @@ -241,7 +242,7 @@ CheckpointerMain(void) > /* > * Initialize so that first time-driven event happens at the correct > time. > */ > - last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL); > + last_xlog_keepalive_time = last_checkpoint_time = last_xlog_switch_time > = (pg_time_t) time(NULL); > > /* > * Create a resource owner to keep track of our resources (currently > only > @@ -546,6 +547,7 @@ CheckpointerMain(void) > > /* > * CheckArchiveTimeout -- check for archive_timeout and switch xlog files > + * or write keepalive files > * > * This will switch to a new WAL file and force an archive file write > * if any activity is recorded in the current WAL file, including just > @@ -556,47 +558,83 @@ CheckArchiveTimeout(void) > { > pg_time_t now; > pg_time_t last_time; > + bool switched = false; > > - if (XLogArchiveTimeout <= 0 || RecoveryInProgress()) > + if (RecoveryInProgress()) > return; > > now = (pg_time_t) time(NULL); > > + if (XLogArchiveTimeout > 0) > + { > + /* First we do a quick check using possibly-stale local state. > */ > + if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout) > + { > + /* > + * Update local state ... note that > last_xlog_switch_time is the last time > + * a switch was performed *or requested*. > + */ > + last_time = GetLastSegSwitchTime(); > + > + last_xlog_switch_time = Max(last_xlog_switch_time, > last_time); > + > + /* Now we can do the real check */ > + if ((int) (now - last_xlog_switch_time) >= > XLogArchiveTimeout) > + { > + XLogRecPtr switchpoint; > + > + /* OK, it's time to switch */ > + switchpoint = RequestXLogSwitch(); > + > + /* > + * If the returned pointer points exactly to a > segment boundary, > + * assume nothing happened. > + */ > + if ((switchpoint.xrecoff % XLogSegSize) != 0) > + ereport(DEBUG1, > + (errmsg("transaction log switch > forced (archive_timeout=%d)", > + > XLogArchiveTimeout))); > + > + /* > + * Update state in any case, so we don't retry > constantly when the > + * system is idle. > + */ > + last_xlog_switch_time = now; > + switched = true; > + } > + } > + } > + > + if (switched || !XLogArchiveKeepaliveCommandSet()) > + return; > + > /* First we do a quick check using possibly-stale local state. */ > - if ((int) (now - last_xlog_switch_time) < XLogArchiveTimeout) > + if ((int) (now - last_xlog_switch_time) < XLogArchiveKeepaliveTimeout) > return; > > /* > - * Update local state ... note that last_xlog_switch_time is the last > time > - * a switch was performed *or requested*. > + * Update local state if we didn't do it already. > */ > - last_time = GetLastSegSwitchTime(); > - > - last_xlog_switch_time = Max(last_xlog_switch_time, last_time); > + if (XLogArchiveTimeout <= 0) > + last_time = GetLastSegSwitchTime(); > > /* Now we can do the real check */ > - if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout) > - { > - XLogRecPtr switchpoint; > + if ((int) (now - last_xlog_switch_time) < XLogArchiveKeepaliveTimeout) > + return; > > - /* OK, it's time to switch */ > - switchpoint = RequestXLogSwitch(); > + if ((int) (now - last_xlog_keepalive_time) < > XLogArchiveKeepaliveTimeout) > + return; > > - /* > - * If the returned pointer points exactly to a segment boundary, > - * assume nothing happened. > - */ > - if ((switchpoint.xrecoff % XLogSegSize) != 0) > - ereport(DEBUG1, > - (errmsg("transaction log switch forced > (archive_timeout=%d)", > - XLogArchiveTimeout))); > + /* > + * Write a keepalive file for archive_keepalive_command > + */ > + XLogWriteKeepaliveFile(); > > - /* > - * Update state in any case, so we don't retry constantly when > the > - * system is idle. > - */ > - last_xlog_switch_time = now; > - } > + /* > + * We don't log a message to say keepalive sent > + */ > + > + last_xlog_keepalive_time = now; > } > > /* > diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c > index 37fc735..e8c19bb 100644 > --- a/src/backend/postmaster/pgarch.c > +++ b/src/backend/postmaster/pgarch.c > @@ -51,7 +51,8 @@ > * Timer definitions. > * ---------- > */ > -#define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of > the > +/* XXX change only for testing */ > +#define PGARCH_AUTOWAKE_INTERVAL 10 /* How often to force a poll of > the > > * archive status directory; in > > * seconds. */ > #define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to > restart a > @@ -108,10 +109,14 @@ static void ArchSigTermHandler(SIGNAL_ARGS); > static void pgarch_waken(SIGNAL_ARGS); > static void pgarch_waken_stop(SIGNAL_ARGS); > static void pgarch_MainLoop(void); > -static void pgarch_ArchiverCopyLoop(void); > +static void pgarch_ArchiverCopyLoop(bool timedout); > static bool pgarch_archiveXlog(char *xlog); > +static void pgarch_archiveKeepalive(void); > static bool pgarch_readyXlog(char *xlog); > static void pgarch_archiveDone(char *xlog); > +static void constructArchiveCommand(char *archcmd, const char > *archcmdtemplate, > + const char *filepath, const > char *filename); > +static bool executeArchiveCommand(const char *archcmd, const char > *description); > > > /* ------------------------------------------------------------ > @@ -351,6 +356,7 @@ pgarch_MainLoop(void) > { > pg_time_t last_copy_time = 0; > bool time_to_stop; > + bool timedout = false; > > /* > * We run the copy loop immediately upon entry, in case there are > @@ -401,7 +407,8 @@ pgarch_MainLoop(void) > if (wakened || time_to_stop) > { > wakened = false; > - pgarch_ArchiverCopyLoop(); > + pgarch_ArchiverCopyLoop(timedout); > + timedout = false; > last_copy_time = time(NULL); > } > > @@ -424,7 +431,10 @@ pgarch_MainLoop(void) > WL_LATCH_SET | > WL_TIMEOUT | WL_POSTMASTER_DEATH, > timeout * 1000L); > if (rc & WL_TIMEOUT) > + { > + timedout = true; > wakened = true; > + } > } > else > wakened = true; > @@ -444,9 +454,10 @@ pgarch_MainLoop(void) > * Archives all outstanding xlogs then returns > */ > static void > -pgarch_ArchiverCopyLoop(void) > +pgarch_ArchiverCopyLoop(bool timedout) > { > char xlog[MAX_XFN_CHARS + 1]; > + bool sentfile = false; > > /* > * loop through all xlogs with archive_status of .ready and archive > @@ -486,6 +497,8 @@ pgarch_ArchiverCopyLoop(void) > { > ereport(WARNING, > (errmsg("archive_mode enabled, > yet archive_command is not set"))); > + if (!sentfile && timedout) > + pgarch_archiveKeepalive(); > return; > } > > @@ -493,6 +506,7 @@ pgarch_ArchiverCopyLoop(void) > { > /* successful */ > pgarch_archiveDone(xlog); > + sentfile = true; > break; /* out of inner retry > loop */ > } > else > @@ -508,151 +522,117 @@ pgarch_ArchiverCopyLoop(void) > } > } > } > + > + if (!sentfile && timedout) > + pgarch_archiveKeepalive(); > } > > /* > - * pgarch_archiveXlog > - * > - * Invokes system(3) to copy one archive file to wherever it should go > - * > - * Returns true if successful > + * pgarch_archiveXlog - executes archive_command for latest WAL file > */ > static bool > pgarch_archiveXlog(char *xlog) > { > char xlogarchcmd[MAXPGPATH]; > - char pathname[MAXPGPATH]; > char activitymsg[MAXFNAMELEN + 16]; > - char *dp; > - char *endp; > - const char *sp; > - int rc; > + char xlogfilepath[MAXPGPATH]; > + > + snprintf(xlogfilepath, MAXPGPATH, XLOGDIR "/%s", xlog); > + > + constructArchiveCommand(xlogarchcmd, XLogArchiveCommand, > + xlogfilepath, xlog); > + > + /* Report archive activity in PS display */ > + snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog); > + set_ps_display(activitymsg, false); > + > + if (!executeArchiveCommand(xlogarchcmd, "archive command")) > + return false; > + > + ereport(DEBUG1, > + (errmsg("archived transaction log file \"%s\"", xlog))); > + > + snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog); > + set_ps_display(activitymsg, false); > + > + return true; > +} > + > +/* > + * pgarch_archiveKeepalive - executes archive_keepalive_command > + */ > +static void > +pgarch_archiveKeepalive(void) > +{ > +#define LENGTH_DOT_KEEPALIVE 10 > + char keepalivearchcmd[MAXPGPATH]; > + char keepalivepath[MAXPGPATH]; > + char XLogArchiveStatusDir[MAXPGPATH]; > + char keepalive[MAX_XFN_CHARS + LENGTH_DOT_KEEPALIVE + 1]; > + DIR *rldir; > + struct dirent *rlde; > + bool found = false; > > - snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog); > + if (!XLogArchiveKeepaliveCommandSet()) > + return; > > /* > - * construct the command to be executed > + * open xlog status directory and read through list of keepalives, > + * looking for latest file. It is possible to optimise this code > + * though only a single file is expected on the vast majority > + * of calls, so.... > */ > - dp = xlogarchcmd; > - endp = xlogarchcmd + MAXPGPATH - 1; > - *endp = '\0'; > > - for (sp = XLogArchiveCommand; *sp; sp++) > + snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status"); > + rldir = AllocateDir(XLogArchiveStatusDir); > + if (rldir == NULL) > + ereport(ERROR, > + (errcode_for_file_access(), > + errmsg("could not open archive status > directory \"%s\": %m", > + XLogArchiveStatusDir))); > + > + while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL) > { > - if (*sp == '%') > + int basenamelen = (int) > strlen(rlde->d_name) - LENGTH_DOT_KEEPALIVE; > + > + if (strcmp(rlde->d_name + basenamelen, ".keepalive") == 0) > { > - switch (sp[1]) > + if (!found) > { > - case 'p': > - /* %p: relative path of source file */ > - sp++; > - strlcpy(dp, pathname, endp - dp); > - make_native_path(dp); > - dp += strlen(dp); > - break; > - case 'f': > - /* %f: filename of source file */ > - sp++; > - strlcpy(dp, xlog, endp - dp); > - dp += strlen(dp); > - break; > - case '%': > - /* convert %% to a single % */ > - sp++; > - if (dp < endp) > - *dp++ = *sp; > - break; > - default: > - /* otherwise treat the % as not special > */ > - if (dp < endp) > - *dp++ = *sp; > - break; > + strcpy(keepalive, rlde->d_name); > + found = true; > + } > + else > + { > + if (strcmp(rlde->d_name, keepalive) > 0) > + { > + sprintf(keepalivepath, "%s/%s", > XLogArchiveStatusDir, keepalive); > + unlink(keepalivepath); > + strcpy(keepalive, rlde->d_name); > + } > + else > + { > + sprintf(keepalivepath, "%s/%s", > XLogArchiveStatusDir, rlde->d_name); > + unlink(keepalivepath); > + } > } > - } > - else > - { > - if (dp < endp) > - *dp++ = *sp; > } > } > - *dp = '\0'; > - > - ereport(DEBUG3, > - (errmsg_internal("executing archive command \"%s\"", > - xlogarchcmd))); > - > - /* Report archive activity in PS display */ > - snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog); > - set_ps_display(activitymsg, false); > + FreeDir(rldir); > > - rc = system(xlogarchcmd); > - if (rc != 0) > - { > - /* > - * If either the shell itself, or a called command, died on a > signal, > - * abort the archiver. We do this because system() ignores > SIGINT and > - * SIGQUIT while waiting; so a signal is very likely something > that > - * should have interrupted us too. If we overreact it's no > big deal, > - * the postmaster will just start the archiver again. > - * > - * Per the Single Unix Spec, shells report exit status > 128 > when a > - * called command died on a signal. > - */ > - int lev = (WIFSIGNALED(rc) || > WEXITSTATUS(rc) > 128) ? FATAL : LOG; > + if (!found) > + return; > > - if (WIFEXITED(rc)) > - { > - ereport(lev, > - (errmsg("archive command failed with > exit code %d", > - WEXITSTATUS(rc)), > - errdetail("The failed archive command > was: %s", > - xlogarchcmd))); > - } > - else if (WIFSIGNALED(rc)) > - { > -#if defined(WIN32) > - ereport(lev, > - (errmsg("archive command was terminated by > exception 0x%X", > - WTERMSIG(rc)), > - errhint("See C include file \"ntstatus.h\" > for a description of the hexadecimal value."), > - errdetail("The failed archive command was: > %s", > - xlogarchcmd))); > -#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST > - ereport(lev, > - (errmsg("archive command was terminated > by signal %d: %s", > - WTERMSIG(rc), > - WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : > "(unknown)"), > - errdetail("The failed archive command > was: %s", > - xlogarchcmd))); > -#else > - ereport(lev, > - (errmsg("archive command was terminated > by signal %d", > - WTERMSIG(rc)), > - errdetail("The failed archive command > was: %s", > - xlogarchcmd))); > -#endif > - } > - else > - { > - ereport(lev, > - (errmsg("archive command exited with > unrecognized status %d", > - rc), > - errdetail("The failed archive command was: %s", > - xlogarchcmd))); > - } > + sprintf(keepalivepath, "%s/%s", XLogArchiveStatusDir, keepalive); > + constructArchiveCommand(keepalivearchcmd, XLogArchiveKeepaliveCommand, > + keepalivepath, > keepalive); > + if (!executeArchiveCommand(keepalivearchcmd, "archive keepalive > command")) > + return; > > - snprintf(activitymsg, sizeof(activitymsg), "failed on %s", > xlog); > - set_ps_display(activitymsg, false); > + unlink(keepalivepath); > > - return false; > - } > ereport(DEBUG1, > - (errmsg("archived transaction log file \"%s\"", xlog))); > - > - snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog); > - set_ps_display(activitymsg, false); > - > - return true; > + (errmsg("archived keepalive file \"%s\"", keepalive))); > } > > /* > @@ -753,3 +733,138 @@ pgarch_archiveDone(char *xlog) > errmsg("could not rename file \"%s\" to > \"%s\": %m", > rlogready, rlogdone))); > } > + > +/* > + * Constructs the executable archive command from a template for a given file > + */ > +static void > +constructArchiveCommand(char *archcmd, const char *archcmdtemplate, > + const char *filepath, const > char *filename) > +{ > + char *dp; > + char *endp; > + const char *sp; > + > + /* > + * construct the command to be executed > + */ > + dp = archcmd; > + endp = archcmd + MAXPGPATH - 1; > + *endp = '\0'; > + > + for (sp = archcmdtemplate; *sp; sp++) > + { > + if (*sp == '%') > + { > + switch (sp[1]) > + { > + case 'p': > + /* %p: relative path of source file */ > + sp++; > + strlcpy(dp, filepath, endp - dp); > + make_native_path(dp); > + dp += strlen(dp); > + break; > + case 'f': > + /* %f: filename of source file */ > + sp++; > + strlcpy(dp, filename, endp - dp); > + dp += strlen(dp); > + break; > + case '%': > + /* convert %% to a single % */ > + sp++; > + if (dp < endp) > + *dp++ = *sp; > + break; > + default: > + /* otherwise treat the % as not special > */ > + if (dp < endp) > + *dp++ = *sp; > + break; > + } > + } > + else > + { > + if (dp < endp) > + *dp++ = *sp; > + } > + } > + *dp = '\0'; > +} > + > +/* > + * Invokes system(3) to execute the supplied archive command > + * > + * Returns true if successful > + */ > +static bool > +executeArchiveCommand(const char *archcmd, const char *description) > +{ > + int rc; > + > + ereport(DEBUG3, > + (errmsg_internal("executing %s \"%s\"", > + description, > archcmd))); > + > + rc = system(archcmd); > + if (rc != 0) > + { > + /* > + * If either the shell itself, or a called command, died on a > signal, > + * abort the archiver. We do this because system() ignores > SIGINT and > + * SIGQUIT while waiting; so a signal is very likely something > that > + * should have interrupted us too. If we overreact it's no > big deal, > + * the postmaster will just start the archiver again. > + * > + * Per the Single Unix Spec, shells report exit status > 128 > when a > + * called command died on a signal. > + */ > + int lev = (WIFSIGNALED(rc) || > WEXITSTATUS(rc) > 128) ? FATAL : LOG; > + > + if (WIFEXITED(rc)) > + { > + ereport(lev, > + (errmsg("%s failed with exit code %d", > + description, > WEXITSTATUS(rc)), > + errdetail("The failed archive command > was: %s", > + archcmd))); > + } > + else if (WIFSIGNALED(rc)) > + { > +#if defined(WIN32) > + ereport(lev, > + (errmsg("%s was terminated by exception 0x%X", > + description, WTERMSIG(rc)), > + errhint("See C include file \"ntstatus.h\" > for a description of the hexadecimal value."), > + errdetail("The failed archive command was: > %s", > + archcmd))); > +#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST > + ereport(lev, > + (errmsg("%s was terminated by signal > %d: %s", > + description, > WTERMSIG(rc), > + WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : > "(unknown)"), > + errdetail("The failed archive command > was: %s", > + archcmd))); > +#else > + ereport(lev, > + (errmsg("%s was terminated by signal > %d", > + description, > WTERMSIG(rc)), > + errdetail("The failed archive command > was: %s", > + archcmd))); > +#endif > + } > + else > + { > + ereport(lev, > + (errmsg("%s exited with unrecognized status %d", > + description, rc), > + errdetail("The failed archive command was: %s", > + archcmd))); > + } > + > + return false; > + } > + > + return true; > +} > diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c > index 5c910dd..16bd77f 100644 > --- a/src/backend/utils/misc/guc.c > +++ b/src/backend/utils/misc/guc.c > @@ -189,6 +189,7 @@ static bool check_timezone_abbreviations(char **newval, > void **extra, GucSource > static void assign_timezone_abbreviations(const char *newval, void *extra); > static void pg_timezone_abbrev_initialize(void); > static const char *show_archive_command(void); > +static const char *show_archive_keepalive_command(void); > static void assign_tcp_keepalives_idle(int newval, void *extra); > static void assign_tcp_keepalives_interval(int newval, void *extra); > static void assign_tcp_keepalives_count(int newval, void *extra); > @@ -2531,6 +2532,16 @@ static struct config_string ConfigureNamesString[] = > }, > > { > + {"archive_keepalive_command", PGC_SIGHUP, WAL_ARCHIVING, > + gettext_noop("Sets the shell command that will be > called to send a keepalive file."), > + NULL > + }, > + &XLogArchiveKeepaliveCommand, > + "", > + NULL, NULL, show_archive_keepalive_command > + }, > + > + { > {"client_encoding", PGC_USERSET, CLIENT_CONN_LOCALE, > gettext_noop("Sets the client's character set > encoding."), > NULL, > @@ -8490,6 +8501,15 @@ show_archive_command(void) > return "(disabled)"; > } > > +static const char * > +show_archive_keepalive_command(void) > +{ > + if (XLogArchivingActive()) > + return XLogArchiveKeepaliveCommand; > + else > + return "(disabled)"; > +} > + > static void > assign_tcp_keepalives_idle(int newval, void *extra) > { > diff --git a/src/backend/utils/misc/postgresql.conf.sample > b/src/backend/utils/misc/postgresql.conf.sample > index 315db46..085d5bb 100644 > --- a/src/backend/utils/misc/postgresql.conf.sample > +++ b/src/backend/utils/misc/postgresql.conf.sample > @@ -189,6 +189,10 @@ > # placeholders: %p = path of file to archive > # %f = file name only > # e.g. 'test ! -f /mnt/server/archivedir/%f && > cp %p /mnt/server/archivedir/%f' > +#archive_keepalive_command = '' # command to use to archive keepalive > message files > + # placeholders: %p = path of keepalive file > + # %f = keepalive file name only > + # e.g. 'cp %p /mnt/server/archivedir/%f' > #archive_timeout = 0 # force a logfile segment switch after this > # number of seconds; 0 disables > > diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h > index 1ddf4bf..63174c5 100644 > --- a/src/include/access/xlog.h > +++ b/src/include/access/xlog.h > @@ -191,6 +191,8 @@ extern int XLOGbuffers; > extern int XLogArchiveTimeout; > extern bool XLogArchiveMode; > extern char *XLogArchiveCommand; > +extern char *XLogArchiveKeepaliveCommand; > +extern int XLogArchiveKeepaliveTimeout; > extern bool EnableHotStandby; > extern bool log_checkpoints; > > @@ -205,6 +207,7 @@ extern int wal_level; > > #define XLogArchivingActive() (XLogArchiveMode && wal_level >= > WAL_LEVEL_ARCHIVE) > #define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0') > +#define XLogArchiveKeepaliveCommandSet() (XLogArchiveKeepaliveCommand[0] != > '\0') > > /* > * Is WAL-logging necessary for archival or log-shipping, or can we skip > diff --git a/src/include/access/xlog_internal.h > b/src/include/access/xlog_internal.h > index db6380f..51e6558 100644 > --- a/src/include/access/xlog_internal.h > +++ b/src/include/access/xlog_internal.h > @@ -233,6 +233,9 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; > #define StatusFilePath(path, xlog, suffix) \ > snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s%s", xlog, suffix) > > +#define KeepaliveFilePath(path, kfname, timestr) \ > + snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s.%s.keepalive", > kfname, timestr) > + > #define BackupHistoryFileName(fname, tli, log, seg, offset) \ > snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, log, seg, > offset) > > @@ -258,6 +261,11 @@ typedef struct RmgrData > extern const RmgrData RmgrTable[]; > > /* > + * Exported to support writing keepalives from archiver > + */ > +extern void XLogWriteKeepaliveFile(void); > + > +/* > * Exported to support xlog switching from checkpointer > */ > extern pg_time_t GetLastSegSwitchTime(void); > > -- > Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) > To make changes to your subscription: > http://www.postgresql.org/mailpref/pgsql-hackers -- Bruce Momjian <br...@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. + -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers