On Mon, 2004-06-28 at 21:58, Simon Riggs wrote: > PITR Archive Recovery, 28 June 2004 > > What's in this patch? > > - All of what was in previous PITR Archival patch, including reworking > of all the archiver startup/shutdown code to match that of pgstat > - New code to perform Archive Recovery mode, which streams xlogs > straight from archive to allow "infinite" recovery > > [This is a full, working patch for discussion and testing, with a few > days left before 7.5dev freeze for changes and corrections] >
err...and these additional files are REQUIRED also: src/backend/postmaster/pgarch.c src/include/pgarch.h ...implemented to match the locations of pgstat code Best regards, Simon Riggs
/* ---------- * pgarch.c * * PostgreSQL transaction log archiver * * All functions relating to archiver are included here * * - All functions executed by archiver process * * - Postmaster is forked from postmaster, and the two * processes then communicate using signals. All functions * executed by postmaster are included in this file. * * Simon Riggs [EMAIL PROTECTED] * * ---------- */ #include "postgres.h" #include <unistd.h> #include <fcntl.h> #include <sys/param.h> #include <sys/time.h> #include <sys/types.h> #include <errno.h> #include <signal.h> #include <time.h> #include "pgarch.h" #include "storage/fd.h" #include "miscadmin.h" #include "access/xlog.h" #include "libpq/pqsignal.h" #include "storage/ipc.h" #include "postmaster/postmaster.h" #include "storage/pg_shmem.h" #include "storage/pmsignal.h" #include "utils/ps_status.h" /* ---------- * GUC parameters * ---------- */ bool XLogArchiveMode; bool XLogArchiveDEBUG; char *XLogArchiveDest; char *XLogArchiveProgram; /* ---------- * Other global variables * ---------- */ bool pgarch_is_running = false; /* ---------- * Local data * ---------- */ static char XLogDir[MAXPGPATH]; static char XLogArchiveStatusDir[MAXPGPATH]; static time_t last_pgarch_start_time; static time_t last_pgarch_archivercopy_time; static time_t curtime; static bool archiving_in_progress = false; /* ---------- * Local function forward declarations * ---------- */ NON_EXEC_STATIC void pgarch_Main(int argc, char *argv[]); static void pgarch_exit(SIGNAL_ARGS); static void pgarch_waken(SIGNAL_ARGS); static void pgarch_MainWaitLoop(void); static void pgarch_ArchiverCopyLoop(void); static bool pgarch_archiveXlog(char *xlog, bool testmode); static bool pgarch_readyXlog(char *xlog); static bool pgarch_archiveDone(char *xlog); /* ------------------------------------------------------------ * Public functions called from postmaster follow * ------------------------------------------------------------ */ /* ---------- * pgarch_start() - * * Called from postmaster at startup * or after the previous archiver died * * Note: if fail, we will be called again from the postmaster main loop. * ---------- */ int pgarch_start(void) { time_t curtime; pid_t pgArchPid; /* * Do nothing if no archiver needed */ if (!XLogArchiveMode) return 0; /* * Do nothing if too soon since last archiver start. This is a * safety valve to protect against continuous respawn attempts if the * archiver is dying immediately at launch. Note that since we will * be re-called from the postmaster main loop, we will get another * chance later. */ curtime = time(NULL); if ((unsigned int) (curtime - last_pgarch_start_time) < (unsigned int) PGARCH_RESTART_INTERVAL) { return 0; } last_pgarch_start_time = curtime; fflush(stdout); fflush(stderr); #ifdef __BEOS__ /* Specific beos actions before backend startup */ beos_before_backend_startup(); #endif #ifdef EXEC_BACKEND switch ((pgArchPid = (int) pgarch_forkexec(STAT_PROC_BUFFER))) #else switch ((pgArchPid = (int) fork())) #endif { case -1: #ifdef __BEOS__ /* Specific beos actions */ beos_backend_startup_failed(); #endif ereport(PANIC, (errmsg("could not fork archiver"))); return 0; #ifndef EXEC_BACKEND case 0: /* in postmaster child ... */ #ifdef __BEOS__ /* Specific beos actions after backend startup */ beos_backend_startup(); #endif /* Close the postmaster's sockets */ ClosePostmasterPorts(); /* Drop our connection to postmaster's shared memory, as well */ PGSharedMemoryDetach(); pgarch_Main(0, NULL); break; #endif default: return (int) pgArchPid; } /* shouldn't get here */ return 0; } /* ------------------------------------------------------------ * Local functions called by archiver follow * ------------------------------------------------------------ */ /* ---------- * pgarch_Main() - * * * The argc/argv parameters are valid only in EXEC_BACKEND case. * ---------- */ NON_EXEC_STATIC void pgarch_Main(int argc, char *argv[]) { char testxlog[32] = "0000TEST0000TEST"; char testpath[MAXPGPATH]; FILE *rlogFD; IsUnderPostmaster = true; /* we are a postmaster subprocess now */ MyProcPid = getpid(); /* reset MyProcPid */ /* Lose the postmaster's on-exit routines */ on_exit_reset(); /* Init XLOG file paths */ snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir); snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir); /* * Test whether archive_program and archive_dest have been set * correctly in postgresql.conf. This test does *not* look at * archive_status, since it is a direct test of archival */ snprintf(testpath, MAXPGPATH, "%s/%s", XLogDir, testxlog); unlink(testpath); rlogFD = AllocateFile(testpath, "w"); if (rlogFD == NULL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write test file to pg_xlog \"%s\" ", testpath))); FreeFile(rlogFD); if (!pgarch_archiveXlog(testxlog, true)) ereport(PANIC, (errmsg("archive_program test failed...please correct this"))); unlink(testpath); /* * Ignore all signals usually bound to some action in the postmaster, */ pqsignal(SIGHUP, SIG_IGN); pqsignal(SIGTERM, SIG_IGN); pqsignal(SIGINT, SIG_IGN); pqsignal(SIGQUIT, pgarch_exit); pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, pgarch_waken); pqsignal(SIGUSR2, SIG_IGN); pqsignal(SIGCHLD, SIG_IGN); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); PG_SETMASK(&UnBlockSig); #ifdef EXEC_BACKEND pgstat_parseArgs(argc,argv); #endif /* * Identify via ps */ init_ps_display("archiver process", "", ""); set_ps_display(""); elog(LOG, "archiver started"); /* * When first started, check for outstanding archive files * which may be present if we did a (normal) fast shutdown * or if archiver died with some form of error */ pgarch_ArchiverCopyLoop(); pgarch_MainWaitLoop(); exit(0); } static void pgarch_exit(SIGNAL_ARGS) { exit(0); } #ifdef NOT_USED static bool shutdowncalled = false; /* * Experiment later with having archiver complete its work on smart shutdown... * */ static void pgarch_shutdown(SIGNAL_ARGS) { shutdowncalled = true; /* * we do not block signals here, to allow a later, * upgraded (fast/immediate) request for shutdown to * override this behaviour */ if (archiving_in_progress) { /* complete archiving, then exit */ return; } else { /* This is a Smart or Fast Shutdown, * so try archiving one last time. This is consistent * with behaviour of a fast shutdown, in that we still * write a shutdown checkpoint and try to recycle the * log files - so before we do this, we try to archive * away the last few .ready xlogs * * XXX archive partial xlog file, even though not full * though we'd have to remember which one was next to * handle that correctly on restore/recover */ pgarch_ArchiverCopyLoop(); return; } } #endif static void pgarch_waken(SIGNAL_ARGS) { if (XLogArchiveDEBUG) elog(LOG, "arch: archiver woken by SIGUSR1"); pgarch_ArchiverCopyLoop(); return; } /* ---------- * pgarch_MainWaitLoop() - * * Main wait loop for archiver * ---------- */ static void pgarch_MainWaitLoop(void) { /* * There shouldn't be anything for the archiver to do except * to wait, so we could use pause(3) here... * ...however, the archiver exists to protect our data, so * she wakes up occaisionally to allow herself to be proactive. * This shouldn't be required, but our data is important * and this won't hurt to be cautious */ do { /* * Sleep for a while, hoping to be interrupted by signal * if no signal, then check anyway....just to be sure */ sleep(PGARCH_AUTOWAKE_INTERVAL); curtime = time(NULL); if ((unsigned int) (curtime - last_pgarch_archivercopy_time) >= (unsigned int) PGARCH_AUTOWAKE_INTERVAL) { pgarch_ArchiverCopyLoop(); } last_pgarch_archivercopy_time = curtime; } while (PostmasterIsAlive(true)); return; } /* ---------- * pgarch_ArchiverCopyLoop() - * * Archives all outstanding xlogs then exits * ---------- */ static void pgarch_ArchiverCopyLoop(void) { char xlog[32]; int try = 1; /* * We continue to trap for all signals, except for the * one that brought us here in the first place. We * loop through all transaction log files that require * archiving, so no need to be interrupted to continue * that task */ pqsignal(SIGUSR1, SIG_IGN); archiving_in_progress = true; /* * loop through all xlogs with archive_status of .ready * then archive them...mostly we expect this to be a single * file, though not just a simple loop because we may add new * files onto the list of those that need archiving while we * are still copying earlier archives */ if (XLogArchiveDEBUG) { elog(LOG, "arch: starting archive loop..."); } while (pgarch_readyXlog(xlog) && try <= NUM_ARCHIVE_RETRIES) { if (pgarch_archiveXlog(xlog, false)) { /* * then update archive_status to show completion */ if (!pgarch_archiveDone(xlog)) { ereport(LOG,(errmsg("arch: archive_status completion error"))); } } else { ereport(LOG,(errmsg("arch: archive copy error"))); } /* if we have copied one file, we do not wait: immediately loop back round and check to see if another is there. Hopefully, we're quick enough....so we fall out and sleep again */ try++; } archiving_in_progress = false; pqsignal(SIGUSR1, pgarch_waken); return; } /* * pgarch_archiveXlog * * Invokes system(3) to copy one archive file to XLogArchiveDest * We assume xlog is a correct filename and that both * XLogArchiveProgram and XLogArchiveDest are set correctly */ static bool pgarch_archiveXlog(char *xlog, bool testmode) { char xlogarchcmd[MAXPGPATH]; char xlogpath[MAXPGPATH]; int rc; snprintf(xlogpath, MAXPGPATH, "%s/%s", XLogDir,xlog); /* * set the string for the program and its parameters * XLogArchiveProgram should contain 2 positional parameters * xlog must be a full path to xlog */ snprintf(xlogarchcmd, MAXPGPATH, XLogArchiveProgram, xlogpath, XLogArchiveDest); if (XLogArchiveDEBUG) elog(LOG, "arch: system (%s)", xlogarchcmd); rc = system(xlogarchcmd); if (rc != 0 ) { elog(LOG, "arch: system (%s), return code=%i", xlogarchcmd, rc); return false; } if (!testmode) elog(LOG, "archived transaction log file \"%s\"", xlog); return true; } /* * XLogArchiveXlogs * * Return name of the oldest xlog file that has not yet been archived. * No notification is set that file archiving is now in progress, [so * this would need to be extended if multiple concurrent archival * tasks were created]. If a failure occurs, we would completely * re-copy the file at the next available opportunity. * * It is important that we return the oldest, so that we archive xlogs * in order that they were written, for two reasons: * 1) to maintain the sequential chain of xlogs required for recovery * 2) because the oldest ones will sooner become candidates for * recycling at time of checkpoint * */ static bool pgarch_readyXlog(char *xlog) { /* * open XLogArchive directory and read through list of * rlogs that have the .ready suffix, looking for earliest file. * It is possible to optimise this code, though only a single * file is expected on the vast majority of calls, so.... */ char newxlog[32]; char emptystr[32] = "\0"; DIR *rldir; struct dirent *rlde; bool firstfile; rldir = AllocateDir(XLogArchiveStatusDir); if (rldir == NULL) elog(PANIC, "cannot access archive_status"); firstfile = true; while ((rlde = readdir(rldir)) != NULL) { if (strlen(rlde->d_name) == 22 && strspn(rlde->d_name, "0123456789ABCDEF") == 16 && strcmp(rlde->d_name + 16, ".ready") == 0) { if (firstfile) { strcpy(newxlog, rlde->d_name); firstfile = false; } else { if (strcmp(rlde->d_name, newxlog) <= 0) strcpy(newxlog, rlde->d_name); } } } FreeDir(rldir); if (firstfile) { return false; } else { if (XLogArchiveDEBUG) elog(LOG, "arch: found archive_status file...%s", newxlog); strcpy(xlog, emptystr); strncat(xlog, newxlog, 16); return true; } } /* * pgarch_archiveDone * * Write notification that an xlog has now been successfully archived */ static bool pgarch_archiveDone(char *xlog) { char rlogready[MAXPGPATH]; char rlogdone[MAXPGPATH]; int rc; snprintf(rlogready, MAXPGPATH, "%s/%s.ready", XLogArchiveStatusDir, xlog); snprintf(rlogdone, MAXPGPATH, "%s/%s.done", XLogArchiveStatusDir, xlog); rc = rename(rlogready, rlogdone); if (rc < 0) { ereport(WARNING,(errcode_for_file_access(), errmsg("could not update archive_status for %s", rlogready))); return false; } return true; }
/* ---------- * pgarch.h * * Definitions for the PostgreSQL archiver daemon. * * ---------- */ /* ---------- * Timer definitions. * ---------- */ #define PGARCH_AUTOWAKE_INTERVAL 600 /* How often to wake and poll */ #define PGARCH_RESTART_INTERVAL 60 /* How often to attempt to restart */ /* a failed statistics collector; in seconds. */ #define NUM_ARCHIVE_RETRIES 3 /* ---------- * Functions called from postmaster * ---------- */ extern int pgarch_start(void);
---------------------------(end of broadcast)--------------------------- TIP 6: Have you searched our list archives? http://archives.postgresql.org