On Mon, 2004-06-28 at 21:58, Simon Riggs wrote:
> PITR Archive Recovery, 28 June 2004
> 
> What's in this patch?
> 
> - All of what was in previous PITR Archival patch, including reworking
> of all the archiver startup/shutdown code to match that of pgstat
> - New code to perform Archive Recovery mode, which streams xlogs
> straight from archive to allow "infinite" recovery
> 
> [This is a full, working patch for discussion and testing, with a few
> days left before 7.5dev freeze for changes and corrections]
> 

err...and these additional files are REQUIRED also:

src/backend/postmaster/pgarch.c

src/include/pgarch.h

...implemented to match the locations of pgstat code

Best regards, Simon Riggs
/* ----------
 * pgarch.c
 *
 *	PostgreSQL transaction log archiver
 * 
 *  All functions relating to archiver are included here
 * 
 *  - All functions executed by archiver process 
 *
 *  - Postmaster is forked from postmaster, and the two
 *  processes then communicate using signals. All functions
 *  executed by postmaster are included in this file.
 *
 *  Simon Riggs     [EMAIL PROTECTED]
 *
 * ----------
 */
#include "postgres.h"

#include <unistd.h>
#include <fcntl.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/types.h>
#include <errno.h>
#include <signal.h>
#include <time.h>

#include "pgarch.h"

#include "storage/fd.h"
#include "miscadmin.h"
#include "access/xlog.h"
#include "libpq/pqsignal.h"
#include "storage/ipc.h"
#include "postmaster/postmaster.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "utils/ps_status.h"

/* ----------
 * GUC parameters
 * ----------
 */
bool 			XLogArchiveMode;
bool 			XLogArchiveDEBUG;
char 			*XLogArchiveDest;
char 			*XLogArchiveProgram;

/* ----------
 * Other global variables
 * ----------
 */
bool		pgarch_is_running = false;

/* ----------
 * Local data
 * ----------
 */
static char XLogDir[MAXPGPATH];
static char XLogArchiveStatusDir[MAXPGPATH];
static time_t last_pgarch_start_time;
static time_t last_pgarch_archivercopy_time;
static time_t curtime;
static bool archiving_in_progress = false;

/* ----------
 * Local function forward declarations
 * ----------
 */
NON_EXEC_STATIC void pgarch_Main(int argc, char *argv[]);
static void pgarch_exit(SIGNAL_ARGS);
static void pgarch_waken(SIGNAL_ARGS);
static void pgarch_MainWaitLoop(void);
static void pgarch_ArchiverCopyLoop(void);
static bool pgarch_archiveXlog(char *xlog, bool testmode);
static bool pgarch_readyXlog(char *xlog);
static bool pgarch_archiveDone(char *xlog);

/* ------------------------------------------------------------
 * Public functions called from postmaster follow
 * ------------------------------------------------------------
 */

/* ----------
 * pgarch_start() -
 *
 *	Called from postmaster at startup 
 *  or after the previous archiver died
 *
 *	Note: if fail, we will be called again from the postmaster main loop.
 * ----------
 */
int
pgarch_start(void)
{
	time_t		curtime;
	pid_t		pgArchPid;
	/*
	 * Do nothing if no archiver needed
	 */
	if (!XLogArchiveMode)
		return 0;

	/*
	 * Do nothing if too soon since last archiver start.  This is a
	 * safety valve to protect against continuous respawn attempts if the
	 * archiver is dying immediately at launch. Note that since we will
	 * be re-called from the postmaster main loop, we will get another
	 * chance later.
	 */
	curtime = time(NULL);
	if ((unsigned int) (curtime - last_pgarch_start_time) <
		(unsigned int) PGARCH_RESTART_INTERVAL) {
 		return 0;
    }
	last_pgarch_start_time = curtime;

	fflush(stdout);
	fflush(stderr);

#ifdef __BEOS__
	/* Specific beos actions before backend startup */
	beos_before_backend_startup();
#endif

#ifdef EXEC_BACKEND
	switch ((pgArchPid = (int) pgarch_forkexec(STAT_PROC_BUFFER)))
#else
	switch ((pgArchPid = (int) fork()))
#endif
	{
		case -1:
#ifdef __BEOS__
			/* Specific beos actions */
			beos_backend_startup_failed();
#endif
			ereport(PANIC,
					(errmsg("could not fork archiver")));
			return 0;

#ifndef EXEC_BACKEND
		case 0:
			/* in postmaster child ... */
#ifdef __BEOS__
			/* Specific beos actions after backend startup */
			beos_backend_startup();
#endif
			/* Close the postmaster's sockets */
			ClosePostmasterPorts();

			/* Drop our connection to postmaster's shared memory, as well */
			PGSharedMemoryDetach();

			pgarch_Main(0, NULL);

			break;
#endif

		default:
			return (int) pgArchPid;
	}

	/* shouldn't get here */
	return 0;
}

/* ------------------------------------------------------------
 * Local functions called by archiver follow
 * ------------------------------------------------------------
 */

/* ----------
 * pgarch_Main() -
 *
 *
 *	The argc/argv parameters are valid only in EXEC_BACKEND case.
 * ----------
 */
NON_EXEC_STATIC void
pgarch_Main(int argc, char *argv[])
{
    char    testxlog[32] = "0000TEST0000TEST";
    char    testpath[MAXPGPATH];
    FILE	   	*rlogFD;

    IsUnderPostmaster = true;	/* we are a postmaster subprocess now */

    MyProcPid = getpid();		/* reset MyProcPid */

	/* Lose the postmaster's on-exit routines */
	on_exit_reset();

    /* Init XLOG file paths */
    snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);
    snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir);

    /*
     * Test whether archive_program and archive_dest have been set
     * correctly in postgresql.conf. This test does *not* look at
     * archive_status, since it is a direct test of archival
     */
    snprintf(testpath, MAXPGPATH, "%s/%s", XLogDir, testxlog);
    unlink(testpath);
    rlogFD = AllocateFile(testpath, "w");
    if (rlogFD == NULL)
	ereport(ERROR,
	    (errcode_for_file_access(),
            errmsg("could not write test file to pg_xlog \"%s\" ",
				   testpath)));
    FreeFile(rlogFD);
    if (!pgarch_archiveXlog(testxlog, true))
			ereport(PANIC,
					(errmsg("archive_program test failed...please correct this")));
    unlink(testpath);
        
    /*
     * Ignore all signals usually bound to some action in the postmaster,
     */
    pqsignal(SIGHUP, SIG_IGN);
    pqsignal(SIGTERM, SIG_IGN);
    pqsignal(SIGINT, SIG_IGN); 
    pqsignal(SIGQUIT, pgarch_exit);
    pqsignal(SIGALRM, SIG_IGN);
    pqsignal(SIGPIPE, SIG_IGN);
    pqsignal(SIGUSR1, pgarch_waken);
    pqsignal(SIGUSR2, SIG_IGN);
    pqsignal(SIGCHLD, SIG_IGN);
    pqsignal(SIGTTIN, SIG_DFL);
    pqsignal(SIGTTOU, SIG_DFL);
    pqsignal(SIGCONT, SIG_DFL);
    pqsignal(SIGWINCH, SIG_DFL);
    PG_SETMASK(&UnBlockSig);

    #ifdef EXEC_BACKEND
    pgstat_parseArgs(argc,argv);
    #endif

    /*
     * Identify via ps
     */
    init_ps_display("archiver process", "", "");
    set_ps_display("");

    elog(LOG, "archiver started");

    /* 
     * When first started, check for outstanding archive files
     * which may be present if we did a (normal) fast shutdown
     * or if archiver died with some form of error
     */
    pgarch_ArchiverCopyLoop();

    pgarch_MainWaitLoop();

 	exit(0);
}

static void
pgarch_exit(SIGNAL_ARGS)
{  
    exit(0);
}

#ifdef NOT_USED
static bool shutdowncalled = false;
/*
 * Experiment later with having archiver complete its work on smart shutdown...
 *   
 */
static void
pgarch_shutdown(SIGNAL_ARGS)
{
    shutdowncalled = true;
    /* 
     * we do not block signals here, to allow a later,
     * upgraded (fast/immediate) request for shutdown to 
     * override this behaviour
     */

    if (archiving_in_progress) {
        /* complete archiving, then exit
         */
        return;
    }
    else {

        /* This is a Smart or Fast Shutdown, 
         * so try archiving one last time. This is consistent
         * with behaviour of a fast shutdown, in that we still
         * write a shutdown checkpoint and try to recycle the
         * log files - so before we do this, we try to archive
         * away the last few .ready xlogs
         * 
         * XXX archive partial xlog file, even though not full
         * though we'd have to remember which one was next to
         * handle that correctly on restore/recover
         */
   	    pgarch_ArchiverCopyLoop();
        return;
    }
}
#endif

static void
pgarch_waken(SIGNAL_ARGS)
{
    if (XLogArchiveDEBUG)
		elog(LOG, "arch: archiver woken by SIGUSR1");

	pgarch_ArchiverCopyLoop();

   	return;
 }

/* ----------
 * pgarch_MainWaitLoop() -
 *
 * Main wait loop for archiver
 * ----------
 */
static void
pgarch_MainWaitLoop(void)
{
    /*
     * There shouldn't be anything for the archiver to do except
     * to wait, so we could use pause(3) here...
     * ...however, the archiver exists to protect our data, so
     * she wakes up occaisionally to allow herself to be proactive. 
     * This shouldn't be required, but our data is important 
     * and this won't hurt to be cautious
     */
 	do {
        /* 
         * Sleep for a while, hoping to be interrupted by signal
         * if no signal, then check anyway....just to be sure
         */
 		sleep(PGARCH_AUTOWAKE_INTERVAL);

    	curtime = time(NULL);
    	if ((unsigned int) (curtime - last_pgarch_archivercopy_time) >=
    		(unsigned int) PGARCH_AUTOWAKE_INTERVAL) {

       		pgarch_ArchiverCopyLoop();
        }
    	last_pgarch_archivercopy_time = curtime;

 	} while (PostmasterIsAlive(true));

    return;
}

/* ----------
 * pgarch_ArchiverCopyLoop() -
 *
 * Archives all outstanding xlogs then exits
 *  ----------
 */
static void
pgarch_ArchiverCopyLoop(void)
{
 	char	xlog[32];
    int     try = 1;

    /*
     * We continue to trap for all signals, except for the 
     * one that brought us here in the first place. We
     * loop through all transaction log files that require 
     * archiving, so no need to be interrupted to continue
     * that task 
     */
	pqsignal(SIGUSR1, SIG_IGN);
    archiving_in_progress = true;

    /*
     * loop through all xlogs with archive_status of .ready 
     * then archive them...mostly we expect this to be a single
     * file, though not just a simple loop because we may add new
     * files onto the list of those that need archiving while we
     * are still copying earlier archives
     */
    if (XLogArchiveDEBUG) {
		elog(LOG, "arch: starting archive loop...");
    }

 	while (pgarch_readyXlog(xlog) && try <= NUM_ARCHIVE_RETRIES) 
 	{
 		if (pgarch_archiveXlog(xlog, false)) {
            /*
             * then update archive_status to show completion
             */
			if (!pgarch_archiveDone(xlog)) {
 				  ereport(LOG,(errmsg("arch: archive_status completion error")));
 			}
 		} else {
 			  ereport(LOG,(errmsg("arch: archive copy error")));
 		}
 		/* if we have copied one file, we do not wait:
 		   immediately loop back round and check to see if another is there.
 		   Hopefully, we're quick enough....so we fall out and sleep again
 		*/		
        try++;
 	}

    archiving_in_progress = false;
    pqsignal(SIGUSR1, pgarch_waken);
    
    return;
}

/*
 * pgarch_archiveXlog
 *
 * Invokes system(3) to copy one archive file to XLogArchiveDest
 * We assume xlog is a correct filename and that both
 * XLogArchiveProgram and XLogArchiveDest are set correctly
 */
static bool 
pgarch_archiveXlog(char *xlog, bool testmode)
{
    char xlogarchcmd[MAXPGPATH];
    char xlogpath[MAXPGPATH];
    int rc;

    snprintf(xlogpath, MAXPGPATH, "%s/%s", XLogDir,xlog);

    /*
 	 * set the string for the program and its parameters
 	 * XLogArchiveProgram should contain 2 positional parameters
 	 * xlog must be a full path to xlog
     */
 	snprintf(xlogarchcmd, MAXPGPATH, XLogArchiveProgram, xlogpath, XLogArchiveDest);
    if (XLogArchiveDEBUG)
    		elog(LOG, "arch: system (%s)", xlogarchcmd);

    rc = system(xlogarchcmd);
    if (rc != 0 ) {
    		elog(LOG, "arch: system (%s), return code=%i", xlogarchcmd, rc);
            return false;
    }
    if (!testmode)
    	elog(LOG, "archived transaction log file \"%s\"", xlog);

    return true;
}

/*
 * XLogArchiveXlogs
 *
 * Return name of the oldest xlog file that has not yet been archived.
 * No notification is set that file archiving is now in progress, [so
 * this would need to be extended if multiple concurrent archival
 * tasks were created]. If a failure occurs, we would completely
 * re-copy the file at the next available opportunity.
 * 
 * It is important that we return the oldest, so that we archive xlogs
 * in order that they were written, for two reasons: 
 * 1) to maintain the sequential chain of xlogs required for recovery 
 * 2) because the oldest ones will sooner become candidates for 
 * recycling at time of checkpoint
 *
 */
static bool 
pgarch_readyXlog(char *xlog)
{
/* 
 * open XLogArchive directory and read through list of 
 * rlogs that have the .ready suffix, looking for earliest file.
 * It is possible to optimise this code, though only a single
 * file is expected on the vast majority of calls, so....
 */
 
 	char		newxlog[32];
 	char		emptystr[32] = "\0";
 
 	DIR		    *rldir;

 	struct dirent 	*rlde;
 	bool		firstfile;
 
	rldir = AllocateDir(XLogArchiveStatusDir);
	if (rldir == NULL)
		elog(PANIC, "cannot access archive_status");
 
	firstfile = true;
	while ((rlde = readdir(rldir)) != NULL)
	{
		if (strlen(rlde->d_name) == 22 &&
			strspn(rlde->d_name, "0123456789ABCDEF") == 16 &&
			strcmp(rlde->d_name + 16, ".ready") == 0)
		{
		    if (firstfile) {
   				strcpy(newxlog, rlde->d_name);
   				firstfile = false;
		    } else {
          		if (strcmp(rlde->d_name, newxlog) <= 0) 
   					strcpy(newxlog, rlde->d_name);
		    }
		}
	}
	FreeDir(rldir);
 
	if (firstfile) {
		return false;
	}
    else {
        if (XLogArchiveDEBUG)
            elog(LOG, "arch: found archive_status file...%s", newxlog);
        strcpy(xlog, emptystr);
        strncat(xlog, newxlog, 16);
     	return true;
    }
}    

/* 
 * pgarch_archiveDone
 *
 * Write notification that an xlog has now been successfully archived
 */
static bool
pgarch_archiveDone(char *xlog)
{
    char		rlogready[MAXPGPATH];
    char		rlogdone[MAXPGPATH];
 	int 		rc;

    snprintf(rlogready, MAXPGPATH, "%s/%s.ready", XLogArchiveStatusDir, xlog);
 	snprintf(rlogdone, MAXPGPATH, "%s/%s.done", XLogArchiveStatusDir, xlog);
 	rc = rename(rlogready, rlogdone);
 	if (rc < 0) {
 		ereport(WARNING,(errcode_for_file_access(),
 			errmsg("could not update archive_status for %s",
 				rlogready)));
 		return false;
 	}
 
 	return true;
} 
/* ----------
 *	pgarch.h
 *
 *	Definitions for the PostgreSQL archiver daemon.
 *
 * ----------
 */

/* ----------
 * Timer definitions.
 * ----------
 */
#define PGARCH_AUTOWAKE_INTERVAL 600	/* How often to wake and poll */
#define PGARCH_RESTART_INTERVAL 60		/* How often to attempt to restart */
 /* a failed statistics collector; in seconds. */

#define NUM_ARCHIVE_RETRIES 3

/* ----------
 * Functions called from postmaster
 * ----------
 */
extern int pgarch_start(void);
---------------------------(end of broadcast)---------------------------
TIP 6: Have you searched our list archives?

               http://archives.postgresql.org

Reply via email to