On Fri, Dec 16, 2011 at 3:01 PM, Simon Riggs <[email protected]> wrote:
> archive_command and restore_command describe how to ship WAL files
> to/from an archive.
>
> When there is nothing to ship, we delay sending WAL files. When no WAL
> files, the standby has no information at all.
>
> To provide some form of keepalive on quiet systems the
> archive_keepalive_command provides a generic hook to implement
> keepalives. This is implemented as a separate command to avoid storing
> keepalive messages in the archive, or at least allow overwrites using
> a single filename like "keepalive".
>
> Examples
> archive_keepalive_command = 'arch_cmd keepalive' # sends a file
> called "keepalive" to archive, overwrites allowed
> archive_keepalive_command = 'arch_cmd %f.%t.keepalive #sends a file
> like 000000010000000AB00000000FE.20111216143517.keepalive
>
> If there is no WAL file to send, then we send a keepalive file
> instead. Keepalive is a small file that contains same contents as a
> streaming keepalive message (re: other patch on that).
>
> If no WAL file is available and we are attempting to restore in
> standby_mode, then we execute restore_keepalive_command to see if a
> keepalive file is available. Checks for a file in the specific
> keepalive format and then uses that to update last received info from
> master.
>
> e.g.
> restore_keepalive_command = 'restore_cmd keepalive' # gets a file
> called "keepalive" to archive, overwrites allowed
Patch.
--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/transam/recovery.conf.sample b/src/backend/access/transam/recovery.conf.sample
index 5acfa57..fab288c 100644
--- a/src/backend/access/transam/recovery.conf.sample
+++ b/src/backend/access/transam/recovery.conf.sample
@@ -43,6 +43,13 @@
#
#restore_command = '' # e.g. 'cp /mnt/server/archivedir/%f %p'
#
+# restore_keepalive_command
+#
+# specifies an optional shell command to download keepalive files
+# e.g. archive_keepalive_command = 'cp -f %p $ARCHIVE/keepalive </dev/null'
+# e.g. restore_keepalive_command = 'cp $ARCHIVE/keepalive %p'
+#
+#restore_keepalive_command = ''
#
# archive_cleanup_command
#
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ce659ec..2729141 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -73,8 +73,10 @@ int CheckPointSegments = 3;
int wal_keep_segments = 0;
int XLOGbuffers = -1;
int XLogArchiveTimeout = 0;
+int XLogArchiveKeepaliveTimeout = 10; /* XXX set to 60 before commit */
bool XLogArchiveMode = false;
char *XLogArchiveCommand = NULL;
+char *XLogArchiveKeepaliveCommand = NULL;
bool EnableHotStandby = false;
bool fullPageWrites = true;
bool log_checkpoints = false;
@@ -188,6 +190,7 @@ static bool restoredFromArchive = false;
/* options taken from recovery.conf for archive recovery */
static char *recoveryRestoreCommand = NULL;
+static char *recoveryRestoreKeepaliveCommand = NULL;
static char *recoveryEndCommand = NULL;
static char *archiveCleanupCommand = NULL;
static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
@@ -634,6 +637,7 @@ static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
static void XLogFileClose(void);
static bool RestoreArchivedFile(char *path, const char *xlogfname,
const char *recovername, off_t expectedSize);
+static void RestoreKeepaliveFile(void);
static void ExecuteRecoveryCommand(char *command, char *commandName,
bool failOnerror);
static void PreallocXlogFiles(XLogRecPtr endptr);
@@ -2718,7 +2722,10 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
"RECOVERYXLOG",
XLogSegSize);
if (!restoredFromArchive)
+ {
+ RestoreKeepaliveFile();
return -1;
+ }
break;
case XLOG_FROM_PG_XLOG:
@@ -3179,6 +3186,192 @@ not_available:
return false;
}
+static void
+RestoreKeepaliveFile(void)
+{
+ char keepalivepath[MAXPGPATH];
+ char keepaliveRestoreCmd[MAXPGPATH];
+ char *dp;
+ char *endp;
+ const char *sp;
+ int rc;
+ bool signaled;
+ struct stat stat_buf;
+
+ /* In standby mode, restore_command might not be supplied */
+ if (recoveryRestoreKeepaliveCommand == NULL)
+ return;
+
+ snprintf(keepalivepath, MAXPGPATH, XLOGDIR "/archive_status/KEEPALIVE");
+
+ /*
+ * Make sure there is no existing file in keepalivepath
+ */
+ if (stat(keepalivepath, &stat_buf) == 0)
+ {
+ if (unlink(keepalivepath) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not remove file \"%s\": %m",
+ keepalivepath)));
+ }
+
+ /*
+ * construct the command to be executed
+ */
+ dp = keepaliveRestoreCmd;
+ endp = keepaliveRestoreCmd + MAXPGPATH - 1;
+ *endp = '\0';
+
+ for (sp = recoveryRestoreKeepaliveCommand; *sp; sp++)
+ {
+ if (*sp == '%')
+ {
+ switch (sp[1])
+ {
+ case 'p':
+ /* %p: relative path of target file */
+ sp++;
+ StrNCpy(dp, keepalivepath, endp - dp);
+ make_native_path(dp);
+ dp += strlen(dp);
+ break;
+ case '%':
+ /* convert %% to a single % */
+ sp++;
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ default:
+ /* otherwise treat the % as not special */
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ }
+ }
+ else
+ {
+ if (dp < endp)
+ *dp++ = *sp;
+ }
+ }
+ *dp = '\0';
+
+ ereport(DEBUG2,
+ (errmsg_internal("executing restore keepalive command \"%s\"",
+ keepaliveRestoreCmd)));
+
+ /*
+ * Check signals before restore command and reset afterwards.
+ */
+ PreRestoreCommand();
+
+ /*
+ * Copy keepalive from archival storage to archive_status dir
+ */
+ rc = system(keepaliveRestoreCmd);
+
+ PostRestoreCommand();
+
+ if (rc == 0)
+ {
+ /*
+ * command apparently succeeded, but let's check the file is there
+ */
+ if (stat(keepalivepath, &stat_buf) == 0)
+ {
+ char kptime[15];
+ char kptimezone[4];
+ char *kdata;
+ char ch;
+ int r;
+ FILE *fd;
+
+ fd = AllocateFile(keepalivepath, "r");
+ if (!fd)
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ keepalivepath)));
+ }
+ kdata = palloc(stat_buf.st_size + 1);
+ r = fread(kdata, stat_buf.st_size, 1, fd);
+ kdata[stat_buf.st_size] = '\0';
+
+ /*
+ * Close and remove the keepalive file
+ */
+ if (r != 1 || ferror(fd) || FreeFile(fd))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ keepalivepath)));
+
+ /*
+ * Parse the keepalive file
+ */
+ if (sscanf(kdata, "KEEPALIVE TIME: %14s%3s%c",
+ kptime, kptimezone, &ch) != 3 || ch != '\n')
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", keepalivepath)));
+ kptime[14] = '\0';
+ kptimezone[3] = '\0';
+
+ ereport(DEBUG2,
+ (errmsg("restored keepalive from archive %s%s", kptime, kptimezone)));
+
+ XLogReceiptSource = XLOG_FROM_ARCHIVE;
+ XLogReceiptTime = GetCurrentTimestamp();
+ SetCurrentChunkStartTime(XLogReceiptTime);
+
+ if (unlink(keepalivepath) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove file \"%s\": %m",
+ keepalivepath)));
+ return;
+ }
+ }
+
+ /*
+ * Remember, we rollforward UNTIL the restore fails so failure here is
+ * just part of the process... that makes it difficult to determine
+ * whether the restore failed because there isn't an archive to restore,
+ * or because the administrator has specified the restore program
+ * incorrectly. We have to assume the former.
+ *
+ * However, if the failure was due to any sort of signal, it's best to
+ * punt and abort recovery. (If we "return false" here, upper levels will
+ * assume that recovery is complete and start up the database!) It's
+ * essential to abort on child SIGINT and SIGQUIT, because per spec
+ * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
+ * those it's a good bet we should have gotten it too.
+ *
+ * On SIGTERM, assume we have received a fast shutdown request, and exit
+ * cleanly. It's pure chance whether we receive the SIGTERM first, or the
+ * child process. If we receive it first, the signal handler will call
+ * proc_exit, otherwise we do it here. If we or the child process received
+ * SIGTERM for any other reason than a fast shutdown request, postmaster
+ * will perform an immediate shutdown when it sees us exiting
+ * unexpectedly.
+ *
+ * Per the Single Unix Spec, shells report exit status > 128 when a called
+ * command died on a signal. Also, 126 and 127 are used to report
+ * problems such as an unfindable command; treat those as fatal errors
+ * too.
+ */
+ if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
+ proc_exit(1);
+
+ signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
+
+ ereport(signaled ? FATAL : DEBUG2,
+ (errmsg("could not restore keepalive file from archive: return code %d",
+ rc)));
+}
+
/*
* Attempt to execute an external shell command during recovery.
*
@@ -5304,6 +5497,13 @@ readRecoveryCommandFile(void)
(errmsg_internal("restore_command = '%s'",
recoveryRestoreCommand)));
}
+ else if (strcmp(item->name, "restore_keepalive_command") == 0)
+ {
+ recoveryRestoreKeepaliveCommand = pstrdup(item->value);
+ ereport(DEBUG2,
+ (errmsg_internal("restore_keepalive_command = '%s'",
+ recoveryRestoreKeepaliveCommand)));
+ }
else if (strcmp(item->name, "recovery_end_command") == 0)
{
recoveryEndCommand = pstrdup(item->value);
@@ -10102,3 +10302,52 @@ WALWriterLatch(void)
{
return &XLogCtl->WALWriterLatch;
}
+
+/*
+ * Write a keepalive and return the values of path and filename
+ */
+void
+XLogWriteKeepaliveFile(void)
+{
+ char keepalivepath[MAXPGPATH];
+ char xlogfname[MAXFNAMELEN];
+ XLogRecPtr lastFlushRecPtr = GetFlushRecPtr();
+ pg_time_t stamp_time;
+ char strfbuf[128];
+ uint32 log;
+ uint32 seg;
+ FILE *fd;
+
+ XLByteToSeg(lastFlushRecPtr, log, seg);
+ XLogFileName(xlogfname, ThisTimeLineID, log, seg);
+
+ /* Use the log timezone here, not the session timezone */
+ stamp_time = (pg_time_t) time(NULL);
+ pg_strftime(strfbuf, sizeof(strfbuf),
+ "%Y%m%d%H%M%S%Z",
+ pg_localtime(&stamp_time, log_timezone));
+
+ KeepaliveFilePath(keepalivepath, xlogfname, strfbuf);
+
+ elog(DEBUG4, "keepalive %s", keepalivepath);
+
+ fd = AllocateFile(keepalivepath, "w");
+ if (fd == NULL)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not create archive keepalive file \"%s\": %m",
+ keepalivepath)));
+ return;
+ }
+ fprintf(fd, "KEEPALIVE TIME: %s\n", strfbuf);
+ if (fflush(fd) || ferror(fd) || FreeFile(fd))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ keepalivepath)));
+
+ /* Notify archiver that it's got something to do */
+ if (IsUnderPostmaster)
+ SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
+}
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 0b792d2..29882b1 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -164,6 +164,7 @@ static double ckpt_cached_elapsed;
static pg_time_t last_checkpoint_time;
static pg_time_t last_xlog_switch_time;
+static pg_time_t last_xlog_keepalive_time;
/* Prototypes for private functions */
@@ -241,7 +242,7 @@ CheckpointerMain(void)
/*
* Initialize so that first time-driven event happens at the correct time.
*/
- last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL);
+ last_xlog_keepalive_time = last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL);
/*
* Create a resource owner to keep track of our resources (currently only
@@ -546,6 +547,7 @@ CheckpointerMain(void)
/*
* CheckArchiveTimeout -- check for archive_timeout and switch xlog files
+ * or write keepalive files
*
* This will switch to a new WAL file and force an archive file write
* if any activity is recorded in the current WAL file, including just
@@ -556,47 +558,83 @@ CheckArchiveTimeout(void)
{
pg_time_t now;
pg_time_t last_time;
+ bool switched = false;
- if (XLogArchiveTimeout <= 0 || RecoveryInProgress())
+ if (RecoveryInProgress())
return;
now = (pg_time_t) time(NULL);
+ if (XLogArchiveTimeout > 0)
+ {
+ /* First we do a quick check using possibly-stale local state. */
+ if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout)
+ {
+ /*
+ * Update local state ... note that last_xlog_switch_time is the last time
+ * a switch was performed *or requested*.
+ */
+ last_time = GetLastSegSwitchTime();
+
+ last_xlog_switch_time = Max(last_xlog_switch_time, last_time);
+
+ /* Now we can do the real check */
+ if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout)
+ {
+ XLogRecPtr switchpoint;
+
+ /* OK, it's time to switch */
+ switchpoint = RequestXLogSwitch();
+
+ /*
+ * If the returned pointer points exactly to a segment boundary,
+ * assume nothing happened.
+ */
+ if ((switchpoint.xrecoff % XLogSegSize) != 0)
+ ereport(DEBUG1,
+ (errmsg("transaction log switch forced (archive_timeout=%d)",
+ XLogArchiveTimeout)));
+
+ /*
+ * Update state in any case, so we don't retry constantly when the
+ * system is idle.
+ */
+ last_xlog_switch_time = now;
+ switched = true;
+ }
+ }
+ }
+
+ if (switched || !XLogArchiveKeepaliveCommandSet())
+ return;
+
/* First we do a quick check using possibly-stale local state. */
- if ((int) (now - last_xlog_switch_time) < XLogArchiveTimeout)
+ if ((int) (now - last_xlog_switch_time) < XLogArchiveKeepaliveTimeout)
return;
/*
- * Update local state ... note that last_xlog_switch_time is the last time
- * a switch was performed *or requested*.
+ * Update local state if we didn't do it already.
*/
- last_time = GetLastSegSwitchTime();
-
- last_xlog_switch_time = Max(last_xlog_switch_time, last_time);
+ if (XLogArchiveTimeout <= 0)
+ last_time = GetLastSegSwitchTime();
/* Now we can do the real check */
- if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout)
- {
- XLogRecPtr switchpoint;
+ if ((int) (now - last_xlog_switch_time) < XLogArchiveKeepaliveTimeout)
+ return;
- /* OK, it's time to switch */
- switchpoint = RequestXLogSwitch();
+ if ((int) (now - last_xlog_keepalive_time) < XLogArchiveKeepaliveTimeout)
+ return;
- /*
- * If the returned pointer points exactly to a segment boundary,
- * assume nothing happened.
- */
- if ((switchpoint.xrecoff % XLogSegSize) != 0)
- ereport(DEBUG1,
- (errmsg("transaction log switch forced (archive_timeout=%d)",
- XLogArchiveTimeout)));
+ /*
+ * Write a keepalive file for archive_keepalive_command
+ */
+ XLogWriteKeepaliveFile();
- /*
- * Update state in any case, so we don't retry constantly when the
- * system is idle.
- */
- last_xlog_switch_time = now;
- }
+ /*
+ * We don't log a message to say keepalive sent
+ */
+
+ last_xlog_keepalive_time = now;
}
/*
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 37fc735..e8c19bb 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -51,7 +51,8 @@
* Timer definitions.
* ----------
*/
-#define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of the
+/* XXX change only for testing */
+#define PGARCH_AUTOWAKE_INTERVAL 10 /* How often to force a poll of the
* archive status directory; in
* seconds. */
#define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to restart a
@@ -108,10 +109,14 @@ static void ArchSigTermHandler(SIGNAL_ARGS);
static void pgarch_waken(SIGNAL_ARGS);
static void pgarch_waken_stop(SIGNAL_ARGS);
static void pgarch_MainLoop(void);
-static void pgarch_ArchiverCopyLoop(void);
+static void pgarch_ArchiverCopyLoop(bool timedout);
static bool pgarch_archiveXlog(char *xlog);
+static void pgarch_archiveKeepalive(void);
static bool pgarch_readyXlog(char *xlog);
static void pgarch_archiveDone(char *xlog);
+static void constructArchiveCommand(char *archcmd, const char *archcmdtemplate,
+ const char *filepath, const char *filename);
+static bool executeArchiveCommand(const char *archcmd, const char *description);
/* ------------------------------------------------------------
@@ -351,6 +356,7 @@ pgarch_MainLoop(void)
{
pg_time_t last_copy_time = 0;
bool time_to_stop;
+ bool timedout = false;
/*
* We run the copy loop immediately upon entry, in case there are
@@ -401,7 +407,8 @@ pgarch_MainLoop(void)
if (wakened || time_to_stop)
{
wakened = false;
- pgarch_ArchiverCopyLoop();
+ pgarch_ArchiverCopyLoop(timedout);
+ timedout = false;
last_copy_time = time(NULL);
}
@@ -424,7 +431,10 @@ pgarch_MainLoop(void)
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
timeout * 1000L);
if (rc & WL_TIMEOUT)
+ {
+ timedout = true;
wakened = true;
+ }
}
else
wakened = true;
@@ -444,9 +454,10 @@ pgarch_MainLoop(void)
* Archives all outstanding xlogs then returns
*/
static void
-pgarch_ArchiverCopyLoop(void)
+pgarch_ArchiverCopyLoop(bool timedout)
{
char xlog[MAX_XFN_CHARS + 1];
+ bool sentfile = false;
/*
* loop through all xlogs with archive_status of .ready and archive
@@ -486,6 +497,8 @@ pgarch_ArchiverCopyLoop(void)
{
ereport(WARNING,
(errmsg("archive_mode enabled, yet archive_command is not set")));
+ if (!sentfile && timedout)
+ pgarch_archiveKeepalive();
return;
}
@@ -493,6 +506,7 @@ pgarch_ArchiverCopyLoop(void)
{
/* successful */
pgarch_archiveDone(xlog);
+ sentfile = true;
break; /* out of inner retry loop */
}
else
@@ -508,151 +522,117 @@ pgarch_ArchiverCopyLoop(void)
}
}
}
+
+ if (!sentfile && timedout)
+ pgarch_archiveKeepalive();
}
/*
- * pgarch_archiveXlog
- *
- * Invokes system(3) to copy one archive file to wherever it should go
- *
- * Returns true if successful
+ * pgarch_archiveXlog - executes archive_command for latest WAL file
*/
static bool
pgarch_archiveXlog(char *xlog)
{
char xlogarchcmd[MAXPGPATH];
- char pathname[MAXPGPATH];
char activitymsg[MAXFNAMELEN + 16];
- char *dp;
- char *endp;
- const char *sp;
- int rc;
+ char xlogfilepath[MAXPGPATH];
+
+ snprintf(xlogfilepath, MAXPGPATH, XLOGDIR "/%s", xlog);
+
+ constructArchiveCommand(xlogarchcmd, XLogArchiveCommand,
+ xlogfilepath, xlog);
+
+ /* Report archive activity in PS display */
+ snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
+ set_ps_display(activitymsg, false);
+
+ if (!executeArchiveCommand(xlogarchcmd, "archive command"))
+ return false;
+
+ ereport(DEBUG1,
+ (errmsg("archived transaction log file \"%s\"", xlog)));
+
+ snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
+ set_ps_display(activitymsg, false);
+
+ return true;
+}
+
+/*
+ * pgarch_archiveKeepalive - executes archive_keepalive_command
+ */
+static void
+pgarch_archiveKeepalive(void)
+{
+#define LENGTH_DOT_KEEPALIVE 10
+ char keepalivearchcmd[MAXPGPATH];
+ char keepalivepath[MAXPGPATH];
+ char XLogArchiveStatusDir[MAXPGPATH];
+ char keepalive[MAX_XFN_CHARS + LENGTH_DOT_KEEPALIVE + 1];
+ DIR *rldir;
+ struct dirent *rlde;
+ bool found = false;
- snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
+ if (!XLogArchiveKeepaliveCommandSet())
+ return;
/*
- * construct the command to be executed
+ * open xlog status directory and read through list of keepalives,
+ * looking for latest file. It is possible to optimise this code
+ * though only a single file is expected on the vast majority
+ * of calls, so....
*/
- dp = xlogarchcmd;
- endp = xlogarchcmd + MAXPGPATH - 1;
- *endp = '\0';
- for (sp = XLogArchiveCommand; *sp; sp++)
+ snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
+ rldir = AllocateDir(XLogArchiveStatusDir);
+ if (rldir == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open archive status directory \"%s\": %m",
+ XLogArchiveStatusDir)));
+
+ while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
{
- if (*sp == '%')
+ int basenamelen = (int) strlen(rlde->d_name) - LENGTH_DOT_KEEPALIVE;
+
+ if (strcmp(rlde->d_name + basenamelen, ".keepalive") == 0)
{
- switch (sp[1])
+ if (!found)
{
- case 'p':
- /* %p: relative path of source file */
- sp++;
- strlcpy(dp, pathname, endp - dp);
- make_native_path(dp);
- dp += strlen(dp);
- break;
- case 'f':
- /* %f: filename of source file */
- sp++;
- strlcpy(dp, xlog, endp - dp);
- dp += strlen(dp);
- break;
- case '%':
- /* convert %% to a single % */
- sp++;
- if (dp < endp)
- *dp++ = *sp;
- break;
- default:
- /* otherwise treat the % as not special */
- if (dp < endp)
- *dp++ = *sp;
- break;
+ strcpy(keepalive, rlde->d_name);
+ found = true;
+ }
+ else
+ {
+ if (strcmp(rlde->d_name, keepalive) > 0)
+ {
+ sprintf(keepalivepath, "%s/%s", XLogArchiveStatusDir, keepalive);
+ unlink(keepalivepath);
+ strcpy(keepalive, rlde->d_name);
+ }
+ else
+ {
+ sprintf(keepalivepath, "%s/%s", XLogArchiveStatusDir, rlde->d_name);
+ unlink(keepalivepath);
+ }
}
- }
- else
- {
- if (dp < endp)
- *dp++ = *sp;
}
}
- *dp = '\0';
-
- ereport(DEBUG3,
- (errmsg_internal("executing archive command \"%s\"",
- xlogarchcmd)));
-
- /* Report archive activity in PS display */
- snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
- set_ps_display(activitymsg, false);
+ FreeDir(rldir);
- rc = system(xlogarchcmd);
- if (rc != 0)
- {
- /*
- * If either the shell itself, or a called command, died on a signal,
- * abort the archiver. We do this because system() ignores SIGINT and
- * SIGQUIT while waiting; so a signal is very likely something that
- * should have interrupted us too. If we overreact it's no big deal,
- * the postmaster will just start the archiver again.
- *
- * Per the Single Unix Spec, shells report exit status > 128 when a
- * called command died on a signal.
- */
- int lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG;
+ if (!found)
+ return;
- if (WIFEXITED(rc))
- {
- ereport(lev,
- (errmsg("archive command failed with exit code %d",
- WEXITSTATUS(rc)),
- errdetail("The failed archive command was: %s",
- xlogarchcmd)));
- }
- else if (WIFSIGNALED(rc))
- {
-#if defined(WIN32)
- ereport(lev,
- (errmsg("archive command was terminated by exception 0x%X",
- WTERMSIG(rc)),
- errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
- errdetail("The failed archive command was: %s",
- xlogarchcmd)));
-#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
- ereport(lev,
- (errmsg("archive command was terminated by signal %d: %s",
- WTERMSIG(rc),
- WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : "(unknown)"),
- errdetail("The failed archive command was: %s",
- xlogarchcmd)));
-#else
- ereport(lev,
- (errmsg("archive command was terminated by signal %d",
- WTERMSIG(rc)),
- errdetail("The failed archive command was: %s",
- xlogarchcmd)));
-#endif
- }
- else
- {
- ereport(lev,
- (errmsg("archive command exited with unrecognized status %d",
- rc),
- errdetail("The failed archive command was: %s",
- xlogarchcmd)));
- }
+ sprintf(keepalivepath, "%s/%s", XLogArchiveStatusDir, keepalive);
+ constructArchiveCommand(keepalivearchcmd, XLogArchiveKeepaliveCommand,
+ keepalivepath, keepalive);
+ if (!executeArchiveCommand(keepalivearchcmd, "archive keepalive command"))
+ return;
- snprintf(activitymsg, sizeof(activitymsg), "failed on %s", xlog);
- set_ps_display(activitymsg, false);
+ unlink(keepalivepath);
- return false;
- }
ereport(DEBUG1,
- (errmsg("archived transaction log file \"%s\"", xlog)));
-
- snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
- set_ps_display(activitymsg, false);
-
- return true;
+ (errmsg("archived keepalive file \"%s\"", keepalive)));
}
/*
@@ -753,3 +733,138 @@ pgarch_archiveDone(char *xlog)
errmsg("could not rename file \"%s\" to \"%s\": %m",
rlogready, rlogdone)));
}
+
+/*
+ * Constructs the executable archive command from a template for a given file
+ */
+static void
+constructArchiveCommand(char *archcmd, const char *archcmdtemplate,
+ const char *filepath, const char *filename)
+{
+ char *dp;
+ char *endp;
+ const char *sp;
+
+ /*
+ * construct the command to be executed
+ */
+ dp = archcmd;
+ endp = archcmd + MAXPGPATH - 1;
+ *endp = '\0';
+
+ for (sp = archcmdtemplate; *sp; sp++)
+ {
+ if (*sp == '%')
+ {
+ switch (sp[1])
+ {
+ case 'p':
+ /* %p: relative path of source file */
+ sp++;
+ strlcpy(dp, filepath, endp - dp);
+ make_native_path(dp);
+ dp += strlen(dp);
+ break;
+ case 'f':
+ /* %f: filename of source file */
+ sp++;
+ strlcpy(dp, filename, endp - dp);
+ dp += strlen(dp);
+ break;
+ case '%':
+ /* convert %% to a single % */
+ sp++;
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ default:
+ /* otherwise treat the % as not special */
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ }
+ }
+ else
+ {
+ if (dp < endp)
+ *dp++ = *sp;
+ }
+ }
+ *dp = '\0';
+}
+
+/*
+ * Invokes system(3) to execute the supplied archive command
+ *
+ * Returns true if successful
+ */
+static bool
+executeArchiveCommand(const char *archcmd, const char *description)
+{
+ int rc;
+
+ ereport(DEBUG3,
+ (errmsg_internal("executing %s \"%s\"",
+ description, archcmd)));
+
+ rc = system(archcmd);
+ if (rc != 0)
+ {
+ /*
+ * If either the shell itself, or a called command, died on a signal,
+ * abort the archiver. We do this because system() ignores SIGINT and
+ * SIGQUIT while waiting; so a signal is very likely something that
+ * should have interrupted us too. If we overreact it's no big deal,
+ * the postmaster will just start the archiver again.
+ *
+ * Per the Single Unix Spec, shells report exit status > 128 when a
+ * called command died on a signal.
+ */
+ int lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG;
+
+ if (WIFEXITED(rc))
+ {
+ ereport(lev,
+ (errmsg("%s failed with exit code %d",
+ description, WEXITSTATUS(rc)),
+ errdetail("The failed archive command was: %s",
+ archcmd)));
+ }
+ else if (WIFSIGNALED(rc))
+ {
+#if defined(WIN32)
+ ereport(lev,
+ (errmsg("%s was terminated by exception 0x%X",
+ description, WTERMSIG(rc)),
+ errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
+ errdetail("The failed archive command was: %s",
+ archcmd)));
+#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
+ ereport(lev,
+ (errmsg("%s was terminated by signal %d: %s",
+ description, WTERMSIG(rc),
+ WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : "(unknown)"),
+ errdetail("The failed archive command was: %s",
+ archcmd)));
+#else
+ ereport(lev,
+ (errmsg("%s was terminated by signal %d",
+ description, WTERMSIG(rc)),
+ errdetail("The failed archive command was: %s",
+ archcmd)));
+#endif
+ }
+ else
+ {
+ ereport(lev,
+ (errmsg("%s exited with unrecognized status %d",
+ description, rc),
+ errdetail("The failed archive command was: %s",
+ archcmd)));
+ }
+
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 5c910dd..16bd77f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -189,6 +189,7 @@ static bool check_timezone_abbreviations(char **newval, void **extra, GucSource
static void assign_timezone_abbreviations(const char *newval, void *extra);
static void pg_timezone_abbrev_initialize(void);
static const char *show_archive_command(void);
+static const char *show_archive_keepalive_command(void);
static void assign_tcp_keepalives_idle(int newval, void *extra);
static void assign_tcp_keepalives_interval(int newval, void *extra);
static void assign_tcp_keepalives_count(int newval, void *extra);
@@ -2531,6 +2532,16 @@ static struct config_string ConfigureNamesString[] =
},
{
+ {"archive_keepalive_command", PGC_SIGHUP, WAL_ARCHIVING,
+ gettext_noop("Sets the shell command that will be called to send a keepalive file."),
+ NULL
+ },
+ &XLogArchiveKeepaliveCommand,
+ "",
+ NULL, NULL, show_archive_keepalive_command
+ },
+
+ {
{"client_encoding", PGC_USERSET, CLIENT_CONN_LOCALE,
gettext_noop("Sets the client's character set encoding."),
NULL,
@@ -8490,6 +8501,15 @@ show_archive_command(void)
return "(disabled)";
}
+static const char *
+show_archive_keepalive_command(void)
+{
+ if (XLogArchivingActive())
+ return XLogArchiveKeepaliveCommand;
+ else
+ return "(disabled)";
+}
+
static void
assign_tcp_keepalives_idle(int newval, void *extra)
{
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 315db46..085d5bb 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -189,6 +189,10 @@
# placeholders: %p = path of file to archive
# %f = file name only
# e.g. 'test ! -f /mnt/server/archivedir/%f && cp %p /mnt/server/archivedir/%f'
+#archive_keepalive_command = '' # command to use to archive keepalive message files
+ # placeholders: %p = path of keepalive file
+ # %f = keepalive file name only
+ # e.g. 'cp %p /mnt/server/archivedir/%f'
#archive_timeout = 0 # force a logfile segment switch after this
# number of seconds; 0 disables
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 1ddf4bf..63174c5 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -191,6 +191,8 @@ extern int XLOGbuffers;
extern int XLogArchiveTimeout;
extern bool XLogArchiveMode;
extern char *XLogArchiveCommand;
+extern char *XLogArchiveKeepaliveCommand;
+extern int XLogArchiveKeepaliveTimeout;
extern bool EnableHotStandby;
extern bool log_checkpoints;
@@ -205,6 +207,7 @@ extern int wal_level;
#define XLogArchivingActive() (XLogArchiveMode && wal_level >= WAL_LEVEL_ARCHIVE)
#define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
+#define XLogArchiveKeepaliveCommandSet() (XLogArchiveKeepaliveCommand[0] != '\0')
/*
* Is WAL-logging necessary for archival or log-shipping, or can we skip
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index db6380f..51e6558 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -233,6 +233,9 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
#define StatusFilePath(path, xlog, suffix) \
snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s%s", xlog, suffix)
+#define KeepaliveFilePath(path, kfname, timestr) \
+ snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s.%s.keepalive", kfname, timestr)
+
#define BackupHistoryFileName(fname, tli, log, seg, offset) \
snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, log, seg, offset)
@@ -258,6 +261,11 @@ typedef struct RmgrData
extern const RmgrData RmgrTable[];
/*
+ * Exported to support writing keepalives from archiver
+ */
+extern void XLogWriteKeepaliveFile(void);
+
+/*
* Exported to support xlog switching from checkpointer
*/
extern pg_time_t GetLastSegSwitchTime(void);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers