On Mon, 2004-06-28 at 21:58, Simon Riggs wrote:
> PITR Archive Recovery, 28 June 2004
>
> What's in this patch?
>
> - All of what was in previous PITR Archival patch, including reworking
> of all the archiver startup/shutdown code to match that of pgstat
> - New code to perform Archive Recovery mode, which streams xlogs
> straight from archive to allow "infinite" recovery
>
> [This is a full, working patch for discussion and testing, with a few
> days left before 7.5dev freeze for changes and corrections]
>
err...and these additional files are REQUIRED also:
src/backend/postmaster/pgarch.c
src/include/pgarch.h
...implemented to match the locations of pgstat code
Best regards, Simon Riggs
/* ----------
* pgarch.c
*
* PostgreSQL transaction log archiver
*
* All functions relating to archiver are included here
*
* - All functions executed by archiver process
*
* - Postmaster is forked from postmaster, and the two
* processes then communicate using signals. All functions
* executed by postmaster are included in this file.
*
* Simon Riggs [EMAIL PROTECTED]
*
* ----------
*/
#include "postgres.h"
#include <unistd.h>
#include <fcntl.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/types.h>
#include <errno.h>
#include <signal.h>
#include <time.h>
#include "pgarch.h"
#include "storage/fd.h"
#include "miscadmin.h"
#include "access/xlog.h"
#include "libpq/pqsignal.h"
#include "storage/ipc.h"
#include "postmaster/postmaster.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "utils/ps_status.h"
/* ----------
* GUC parameters
* ----------
*/
bool XLogArchiveMode;
bool XLogArchiveDEBUG;
char *XLogArchiveDest;
char *XLogArchiveProgram;
/* ----------
* Other global variables
* ----------
*/
bool pgarch_is_running = false;
/* ----------
* Local data
* ----------
*/
static char XLogDir[MAXPGPATH];
static char XLogArchiveStatusDir[MAXPGPATH];
static time_t last_pgarch_start_time;
static time_t last_pgarch_archivercopy_time;
static time_t curtime;
static bool archiving_in_progress = false;
/* ----------
* Local function forward declarations
* ----------
*/
NON_EXEC_STATIC void pgarch_Main(int argc, char *argv[]);
static void pgarch_exit(SIGNAL_ARGS);
static void pgarch_waken(SIGNAL_ARGS);
static void pgarch_MainWaitLoop(void);
static void pgarch_ArchiverCopyLoop(void);
static bool pgarch_archiveXlog(char *xlog, bool testmode);
static bool pgarch_readyXlog(char *xlog);
static bool pgarch_archiveDone(char *xlog);
/* ------------------------------------------------------------
* Public functions called from postmaster follow
* ------------------------------------------------------------
*/
/* ----------
* pgarch_start() -
*
* Called from postmaster at startup
* or after the previous archiver died
*
* Note: if fail, we will be called again from the postmaster main loop.
* ----------
*/
int
pgarch_start(void)
{
time_t curtime;
pid_t pgArchPid;
/*
* Do nothing if no archiver needed
*/
if (!XLogArchiveMode)
return 0;
/*
* Do nothing if too soon since last archiver start. This is a
* safety valve to protect against continuous respawn attempts if the
* archiver is dying immediately at launch. Note that since we will
* be re-called from the postmaster main loop, we will get another
* chance later.
*/
curtime = time(NULL);
if ((unsigned int) (curtime - last_pgarch_start_time) <
(unsigned int) PGARCH_RESTART_INTERVAL) {
return 0;
}
last_pgarch_start_time = curtime;
fflush(stdout);
fflush(stderr);
#ifdef __BEOS__
/* Specific beos actions before backend startup */
beos_before_backend_startup();
#endif
#ifdef EXEC_BACKEND
switch ((pgArchPid = (int) pgarch_forkexec(STAT_PROC_BUFFER)))
#else
switch ((pgArchPid = (int) fork()))
#endif
{
case -1:
#ifdef __BEOS__
/* Specific beos actions */
beos_backend_startup_failed();
#endif
ereport(PANIC,
(errmsg("could not fork archiver")));
return 0;
#ifndef EXEC_BACKEND
case 0:
/* in postmaster child ... */
#ifdef __BEOS__
/* Specific beos actions after backend startup */
beos_backend_startup();
#endif
/* Close the postmaster's sockets */
ClosePostmasterPorts();
/* Drop our connection to postmaster's shared memory, as well */
PGSharedMemoryDetach();
pgarch_Main(0, NULL);
break;
#endif
default:
return (int) pgArchPid;
}
/* shouldn't get here */
return 0;
}
/* ------------------------------------------------------------
* Local functions called by archiver follow
* ------------------------------------------------------------
*/
/* ----------
* pgarch_Main() -
*
*
* The argc/argv parameters are valid only in EXEC_BACKEND case.
* ----------
*/
NON_EXEC_STATIC void
pgarch_Main(int argc, char *argv[])
{
char testxlog[32] = "0000TEST0000TEST";
char testpath[MAXPGPATH];
FILE *rlogFD;
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
MyProcPid = getpid(); /* reset MyProcPid */
/* Lose the postmaster's on-exit routines */
on_exit_reset();
/* Init XLOG file paths */
snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);
snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir);
/*
* Test whether archive_program and archive_dest have been set
* correctly in postgresql.conf. This test does *not* look at
* archive_status, since it is a direct test of archival
*/
snprintf(testpath, MAXPGPATH, "%s/%s", XLogDir, testxlog);
unlink(testpath);
rlogFD = AllocateFile(testpath, "w");
if (rlogFD == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write test file to pg_xlog \"%s\" ",
testpath)));
FreeFile(rlogFD);
if (!pgarch_archiveXlog(testxlog, true))
ereport(PANIC,
(errmsg("archive_program test failed...please correct this")));
unlink(testpath);
/*
* Ignore all signals usually bound to some action in the postmaster,
*/
pqsignal(SIGHUP, SIG_IGN);
pqsignal(SIGTERM, SIG_IGN);
pqsignal(SIGINT, SIG_IGN);
pqsignal(SIGQUIT, pgarch_exit);
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, pgarch_waken);
pqsignal(SIGUSR2, SIG_IGN);
pqsignal(SIGCHLD, SIG_IGN);
pqsignal(SIGTTIN, SIG_DFL);
pqsignal(SIGTTOU, SIG_DFL);
pqsignal(SIGCONT, SIG_DFL);
pqsignal(SIGWINCH, SIG_DFL);
PG_SETMASK(&UnBlockSig);
#ifdef EXEC_BACKEND
pgstat_parseArgs(argc,argv);
#endif
/*
* Identify via ps
*/
init_ps_display("archiver process", "", "");
set_ps_display("");
elog(LOG, "archiver started");
/*
* When first started, check for outstanding archive files
* which may be present if we did a (normal) fast shutdown
* or if archiver died with some form of error
*/
pgarch_ArchiverCopyLoop();
pgarch_MainWaitLoop();
exit(0);
}
static void
pgarch_exit(SIGNAL_ARGS)
{
exit(0);
}
#ifdef NOT_USED
static bool shutdowncalled = false;
/*
* Experiment later with having archiver complete its work on smart shutdown...
*
*/
static void
pgarch_shutdown(SIGNAL_ARGS)
{
shutdowncalled = true;
/*
* we do not block signals here, to allow a later,
* upgraded (fast/immediate) request for shutdown to
* override this behaviour
*/
if (archiving_in_progress) {
/* complete archiving, then exit
*/
return;
}
else {
/* This is a Smart or Fast Shutdown,
* so try archiving one last time. This is consistent
* with behaviour of a fast shutdown, in that we still
* write a shutdown checkpoint and try to recycle the
* log files - so before we do this, we try to archive
* away the last few .ready xlogs
*
* XXX archive partial xlog file, even though not full
* though we'd have to remember which one was next to
* handle that correctly on restore/recover
*/
pgarch_ArchiverCopyLoop();
return;
}
}
#endif
static void
pgarch_waken(SIGNAL_ARGS)
{
if (XLogArchiveDEBUG)
elog(LOG, "arch: archiver woken by SIGUSR1");
pgarch_ArchiverCopyLoop();
return;
}
/* ----------
* pgarch_MainWaitLoop() -
*
* Main wait loop for archiver
* ----------
*/
static void
pgarch_MainWaitLoop(void)
{
/*
* There shouldn't be anything for the archiver to do except
* to wait, so we could use pause(3) here...
* ...however, the archiver exists to protect our data, so
* she wakes up occaisionally to allow herself to be proactive.
* This shouldn't be required, but our data is important
* and this won't hurt to be cautious
*/
do {
/*
* Sleep for a while, hoping to be interrupted by signal
* if no signal, then check anyway....just to be sure
*/
sleep(PGARCH_AUTOWAKE_INTERVAL);
curtime = time(NULL);
if ((unsigned int) (curtime - last_pgarch_archivercopy_time) >=
(unsigned int) PGARCH_AUTOWAKE_INTERVAL) {
pgarch_ArchiverCopyLoop();
}
last_pgarch_archivercopy_time = curtime;
} while (PostmasterIsAlive(true));
return;
}
/* ----------
* pgarch_ArchiverCopyLoop() -
*
* Archives all outstanding xlogs then exits
* ----------
*/
static void
pgarch_ArchiverCopyLoop(void)
{
char xlog[32];
int try = 1;
/*
* We continue to trap for all signals, except for the
* one that brought us here in the first place. We
* loop through all transaction log files that require
* archiving, so no need to be interrupted to continue
* that task
*/
pqsignal(SIGUSR1, SIG_IGN);
archiving_in_progress = true;
/*
* loop through all xlogs with archive_status of .ready
* then archive them...mostly we expect this to be a single
* file, though not just a simple loop because we may add new
* files onto the list of those that need archiving while we
* are still copying earlier archives
*/
if (XLogArchiveDEBUG) {
elog(LOG, "arch: starting archive loop...");
}
while (pgarch_readyXlog(xlog) && try <= NUM_ARCHIVE_RETRIES)
{
if (pgarch_archiveXlog(xlog, false)) {
/*
* then update archive_status to show completion
*/
if (!pgarch_archiveDone(xlog)) {
ereport(LOG,(errmsg("arch: archive_status completion error")));
}
} else {
ereport(LOG,(errmsg("arch: archive copy error")));
}
/* if we have copied one file, we do not wait:
immediately loop back round and check to see if another is there.
Hopefully, we're quick enough....so we fall out and sleep again
*/
try++;
}
archiving_in_progress = false;
pqsignal(SIGUSR1, pgarch_waken);
return;
}
/*
* pgarch_archiveXlog
*
* Invokes system(3) to copy one archive file to XLogArchiveDest
* We assume xlog is a correct filename and that both
* XLogArchiveProgram and XLogArchiveDest are set correctly
*/
static bool
pgarch_archiveXlog(char *xlog, bool testmode)
{
char xlogarchcmd[MAXPGPATH];
char xlogpath[MAXPGPATH];
int rc;
snprintf(xlogpath, MAXPGPATH, "%s/%s", XLogDir,xlog);
/*
* set the string for the program and its parameters
* XLogArchiveProgram should contain 2 positional parameters
* xlog must be a full path to xlog
*/
snprintf(xlogarchcmd, MAXPGPATH, XLogArchiveProgram, xlogpath, XLogArchiveDest);
if (XLogArchiveDEBUG)
elog(LOG, "arch: system (%s)", xlogarchcmd);
rc = system(xlogarchcmd);
if (rc != 0 ) {
elog(LOG, "arch: system (%s), return code=%i", xlogarchcmd, rc);
return false;
}
if (!testmode)
elog(LOG, "archived transaction log file \"%s\"", xlog);
return true;
}
/*
* XLogArchiveXlogs
*
* Return name of the oldest xlog file that has not yet been archived.
* No notification is set that file archiving is now in progress, [so
* this would need to be extended if multiple concurrent archival
* tasks were created]. If a failure occurs, we would completely
* re-copy the file at the next available opportunity.
*
* It is important that we return the oldest, so that we archive xlogs
* in order that they were written, for two reasons:
* 1) to maintain the sequential chain of xlogs required for recovery
* 2) because the oldest ones will sooner become candidates for
* recycling at time of checkpoint
*
*/
static bool
pgarch_readyXlog(char *xlog)
{
/*
* open XLogArchive directory and read through list of
* rlogs that have the .ready suffix, looking for earliest file.
* It is possible to optimise this code, though only a single
* file is expected on the vast majority of calls, so....
*/
char newxlog[32];
char emptystr[32] = "\0";
DIR *rldir;
struct dirent *rlde;
bool firstfile;
rldir = AllocateDir(XLogArchiveStatusDir);
if (rldir == NULL)
elog(PANIC, "cannot access archive_status");
firstfile = true;
while ((rlde = readdir(rldir)) != NULL)
{
if (strlen(rlde->d_name) == 22 &&
strspn(rlde->d_name, "0123456789ABCDEF") == 16 &&
strcmp(rlde->d_name + 16, ".ready") == 0)
{
if (firstfile) {
strcpy(newxlog, rlde->d_name);
firstfile = false;
} else {
if (strcmp(rlde->d_name, newxlog) <= 0)
strcpy(newxlog, rlde->d_name);
}
}
}
FreeDir(rldir);
if (firstfile) {
return false;
}
else {
if (XLogArchiveDEBUG)
elog(LOG, "arch: found archive_status file...%s", newxlog);
strcpy(xlog, emptystr);
strncat(xlog, newxlog, 16);
return true;
}
}
/*
* pgarch_archiveDone
*
* Write notification that an xlog has now been successfully archived
*/
static bool
pgarch_archiveDone(char *xlog)
{
char rlogready[MAXPGPATH];
char rlogdone[MAXPGPATH];
int rc;
snprintf(rlogready, MAXPGPATH, "%s/%s.ready", XLogArchiveStatusDir, xlog);
snprintf(rlogdone, MAXPGPATH, "%s/%s.done", XLogArchiveStatusDir, xlog);
rc = rename(rlogready, rlogdone);
if (rc < 0) {
ereport(WARNING,(errcode_for_file_access(),
errmsg("could not update archive_status for %s",
rlogready)));
return false;
}
return true;
}
/* ----------
* pgarch.h
*
* Definitions for the PostgreSQL archiver daemon.
*
* ----------
*/
/* ----------
* Timer definitions.
* ----------
*/
#define PGARCH_AUTOWAKE_INTERVAL 600 /* How often to wake and poll */
#define PGARCH_RESTART_INTERVAL 60 /* How often to attempt to restart */
/* a failed statistics collector; in seconds. */
#define NUM_ARCHIVE_RETRIES 3
/* ----------
* Functions called from postmaster
* ----------
*/
extern int pgarch_start(void);
---------------------------(end of broadcast)---------------------------
TIP 6: Have you searched our list archives?
http://archives.postgresql.org