On Tue, 2004-06-29 at 20:59, Simon Riggs wrote:
> On Mon, 2004-06-28 at 21:58, Simon Riggs wrote:
> > PITR Archive Recovery, 28 June 2004
> > 
> > What's in this patch?

This my LAST, PLANNED patch before Freeze. Any questions?

This is a patch-on-patch, rather than a full patch. To use this, apply
earlier patches for pitr_v4_4*, then apply this. (Full patch available
upon request...just saving the good people of this list some annoyance
time from a 50k download).

This now provides:

- parsing of restore program from recovery.conf
- minor cosmetic changes to some error messages

...there's more to do, but I'm working on the...
if it ain't broke, don't fix it...

Best Regards, Simon Riggs
Index: xlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/access/transam/xlog.c,v
retrieving revision 1.146
diff -c -r1.146 xlog.c
*** xlog.c	3 Jun 2004 02:08:00 -0000	1.146
--- xlog.c	30 Jun 2004 01:38:52 -0000
***************
*** 35,46 ****
  #include "storage/proc.h"
  #include "storage/sinval.h"
  #include "storage/spin.h"
  #include "utils/builtins.h"
  #include "utils/guc.h"
  #include "utils/relcache.h"
  #include "miscadmin.h"
  
- 
  /*
   * This chunk of hackery attempts to determine which file sync methods
   * are available on the current platform, and to choose an appropriate
--- 35,46 ----
  #include "storage/proc.h"
  #include "storage/sinval.h"
  #include "storage/spin.h"
+ #include "storage/pmsignal.h"
  #include "utils/builtins.h"
  #include "utils/guc.h"
  #include "utils/relcache.h"
  #include "miscadmin.h"
  
  /*
   * This chunk of hackery attempts to determine which file sync methods
   * are available on the current platform, and to choose an appropriate
***************
*** 84,95 ****
  
  
  /* User-settable parameters */
  int			CheckPointSegments = 3;
  int			XLOGbuffers = 8;
  char	   *XLOG_sync_method = NULL;
  const char	XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR;
- char		XLOG_archive_dir[MAXPGPATH];		/* null string means
- 												 * delete 'em */
  
  #ifdef WAL_DEBUG
  bool		XLOG_DEBUG = false;
--- 84,97 ----
  
  
  /* User-settable parameters */
+ bool 			XLogArchiveMode = false;
+ bool 			XLogArchiveDEBUG = false;
+ char 			*XLogArchiveDest;
+ char 			*XLogArchiveProgram;
  int			CheckPointSegments = 3;
  int			XLOGbuffers = 8;
  char	   *XLOG_sync_method = NULL;
  const char	XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR;
  
  #ifdef WAL_DEBUG
  bool		XLOG_DEBUG = false;
***************
*** 126,131 ****
--- 128,141 ----
  
  /* Are we doing recovery by reading XLOG? */
  bool		InRecovery = false;
+ bool        InArchiveRecovery = false;
+ bool        UseArchiveFirst = false;
+ bool        InRecoveryCleanup = false;
+ 
+ static  char XLogArchRestoreProgram[MAXPGPATH];
+ static 	char recoveryCommandFile[MAXPGPATH];
+ 
+ static void readRecoveryCommandFile(void);
  
  /*
   * MyLastRecPtr points to the start of the last XLOG record inserted by the
***************
*** 392,397 ****
--- 402,408 ----
  
  /* File path names */
  static char XLogDir[MAXPGPATH];
+ static char RLogDir[MAXPGPATH];
  static char ControlFilePath[MAXPGPATH];
  
  /*
***************
*** 433,438 ****
--- 444,452 ----
  
  static bool InRedo = false;
  
+ static bool XLogArchiveNotify(uint32 log, uint32 seg);
+ static bool XLogArchiveDone(char xlog[MAXPGPATH]);
+ static void XLogArchiveCleanup(char xlog[32]);
  
  static bool AdvanceXLInsertBuffer(void);
  static bool WasteXLInsertBuffer(void);
***************
*** 443,448 ****
--- 457,463 ----
  					   bool find_free, int max_advance,
  					   bool use_lock);
  static int	XLogFileOpen(uint32 log, uint32 seg, bool econt);
+ static void RestoreRecoveryXlog(char *path, uint32 log, uint32 seg);
  static void PreallocXlogFiles(XLogRecPtr endptr);
  static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr);
  static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer);
***************
*** 454,463 ****
  static void ReadControlFile(void);
  static char *str_time(time_t tnow);
  static void issue_xlog_fsync(void);
- #ifdef WAL_DEBUG
  static void xlog_outrec(char *buf, XLogRecord *record);
- #endif
- 
  
  /*
   * Insert an XLOG record having the specified RMID and info bytes,
--- 469,475 ----
***************
*** 911,916 ****
--- 923,1059 ----
  }
  
  /*
+  * XLogArchiveNotify
+  *
+  * Writes an archive notification file to the RLogDir
+  *
+  * The name of the notification file is the message that will be picked up
+  * by the archiver, e.g. we write RLogDir/00000001000000C6.ready 
+  * and the archiver then knows to archive XLogDir/00000001000000C6,
+  * then when complete, rename it to RLogDir/00000001000000C6.done
+  *
+  * Called only when in XLogArchiveMode by one backend process
+  */
+ static bool 
+ XLogArchiveNotify(uint32 log, uint32 seg)
+ {
+ 	char		rlog[32];
+ 	char		rlogpath[MAXPGPATH];
+ 	FILE	   	*rlogFD;
+ 
+ /* insert an otherwise empty file called <XLOG>.ready */
+ 	sprintf(rlog, "%08X%08X.ready", log, seg);
+ 	snprintf(rlogpath, MAXPGPATH, "%s/%s", RLogDir, rlog);
+ 	rlogFD = AllocateFile(rlogpath, "w");
+ 	if (rlogFD == NULL)
+ 		ereport(ERROR,
+ 				(errcode_for_file_access(),
+ 			errmsg("could not write archive_status file \"%s\" ",
+ 				   rlogpath)));
+ 	FreeFile(rlogFD);
+ 
+ /* the existence of this file is the message to the archiver to identify
+  * which files require archiving
+  *
+  * if this file is written OK, we then signal the ARCHIVER to do its thang
+  */
+ 
+ 	if (XLogArchiveDEBUG)
+ 		elog(LOG, "backend: written \"%s\"", rlogpath );
+ 
+     /*
+      * don't send the signal if we know that the archiver isn't there (yet)
+      * - the archiver will see the archive_status file as soon as it starts 
+      */
+     if (!InArchiveRecovery)
+         SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
+ 
+ 	return true;
+ }
+ 
+ /*
+  * XLogArchiveDone
+  *
+  * Searches for an archive notification file in RLogDir
+  * 
+  * Reads RLogDir looking for a specific filename. If that filename ends with .done
+  * then we know that the filename refers to an xlog in XLogDir that is safe to
+  * recycle. If the filename ends .ready then thats OK, else we have an error.
+  * 
+  * Called only when in XLogArchiveMode by bgwriter (when performing checkpoint)
+  *
+  * XXX code is rehacked from an earlier version, so needs streamlining
+  */
+ static bool 
+ XLogArchiveDone(char xlog[32])
+ {
+ 	char		rlogpath[MAXPGPATH];
+ 	FILE	   	*rlogFD;
+ 
+ 	if (XLogArchiveDEBUG)
+ 		elog(LOG, "chkpt: checking for log file \"%s\"",
+ 						   xlog);
+ 
+ /* If <XLOG>.done exists then return true
+  */
+ 	snprintf(rlogpath, MAXPGPATH, "%s/%s.done", RLogDir, xlog);
+ 	rlogFD = AllocateFile(rlogpath, "r");
+ 	if (rlogFD != NULL) {
+ 		FreeFile(rlogFD);
+ 		if (XLogArchiveDEBUG)
+ 			elog(LOG, "chkpt: archiving done for log file \"%s\"",
+ 						   xlog);
+ 		return true;
+ 	} 
+ 	else
+ 		{
+ /*
+  * else if <XLOG>.ready exists then return false and issue WARNING
+  * ...this indicates archiver is either not working at all or
+  * if it is, then its just way too slow or incorrectly configured
+  */
+ 			snprintf(rlogpath, MAXPGPATH, "%s/%s.ready", RLogDir, xlog);
+ 			rlogFD = AllocateFile(rlogpath, "r");
+ 			if (rlogFD != NULL) {
+ 			    FreeFile(rlogFD);
+ 		 	    elog(WARNING, "chkpt: archiving not yet started for log file \"%s\"", 
+ 						xlog);
+ 			    return false;
+ 			}
+ 			else
+ 			{
+ /* else issue a WARNING.... a notification file SHOULD exist...unless the 
+  * database has just been restored in which case it may be absent, so
+  * issue a WARNING, not an error, then return
+  */ 
+ 			    ereport(WARNING,
+ 				(errcode_for_file_access(),
+ 			 	errmsg("chkpt: cannot find archive_status file: %s ",
+ 						rlogpath)));
+ 			    return false;
+ 			}
+ 		}
+ }
+ 
+ /*
+  * XLogArchiveCleanup
+  *
+  * Cleanup an archive notification file for a particular xlog in XLogDir
+  * 
+  * Called only when in XLogArchiveMode by bgwriter (when performing checkpoint)
+  *
+  */
+ static void
+ XLogArchiveCleanup(char xlog[32])
+ {
+ 	char	rlogpath[MAXPGPATH];
+ 
+ 	snprintf(rlogpath, MAXPGPATH, "%s/%s.done", RLogDir, xlog);
+ 	unlink(rlogpath);
+ 
+ }
+ 
+ /*
   * Advance the Insert state to the next buffer page, writing out the next
   * buffer if it still contains unwritten data.
   *
***************
*** 1259,1264 ****
--- 1402,1415 ----
  		{
  			issue_xlog_fsync();
  			LogwrtResult.Flush = LogwrtResult.Write;	/* end of current page */
+ 
+             /* 
+              * Notify xlog ready to archive?
+              */
+             if (XLogArchiveMode && !XLogArchiveNotify(openLogId, openLogSeg))
+ 				elog(WARNING, "could not write archive_status file for log %u, segment %u",
+ 					   openLogId, openLogSeg);
+ 
  		}
  
  		if (ispartialpage)
***************
*** 1685,1691 ****
  	char		path[MAXPGPATH];
  	int			fd;
  
! 	XLogFileName(path, log, seg);
  
  	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,
  					   S_IRUSR | S_IWUSR);
--- 1836,1844 ----
  	char		path[MAXPGPATH];
  	int			fd;
  
!     XLogFileName(path, log, seg);
!  	if (UseArchiveFirst)
!         RestoreRecoveryXlog(path, log, seg);
  
  	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,
  					   S_IRUSR | S_IWUSR);
***************
*** 1704,1714 ****
  			errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
  				   path, log, seg)));
  	}
! 
  	return (fd);
  }
  
  /*
   * Preallocate log files beyond the specified log endpoint, according to
   * the XLOGfile user parameter.
   */
--- 1857,2047 ----
  			errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
  				   path, log, seg)));
  	}
!  
  	return (fd);
  }
  
  /*
+  * Get next logfile segment to allow recovery
+  *
+  */
+ static void
+ RestoreRecoveryXlog(char *path, uint32 log, uint32 seg)
+ {
+     char tmpXlog[32];
+     char restoreXlog[32];
+     char tmppath[MAXPGPATH];
+     char xlogRestoreCmd[MAXPGPATH];
+     char recoveryXlog[MAXPGPATH];
+     char lastrecoXlog[MAXPGPATH];
+     int         rc;
+ 	struct stat stat_buf;
+     uint32 prevlog, prevseg;
+ 	FILE	   	*rlogFD;
+ 
+     /* 
+      * If a RecoveryFile exists, then we know we are in media recovery
+      * in which case we choose to recover files from archive, even
+      * though a file of that name may already exist in XLogDir
+      *
+      * By doing this, we do not effect crash recovery code path
+      * when we are not in archive_mode
+      *
+      * We take the archived file because, at the point we took backup,
+      * the current xlog will most probably be only partially full, 
+      * so we MUST refer to the full version of this file and 
+      * NOT the version of the file that exists with the backup.
+      *
+      * We could try to optimize this slightly by checking the local
+      * copy lastchange timestamp against the archived copy, 
+      * but we have no API to do this, nor can we guarantee that the
+      * lastchange timestamp was preserved correctly when we copied
+      * to archive. Our aim is robustness, so we elect not to do this.
+      *
+      * Try to copy full xlog from archive to pg_xlog, if it is available
+      * If that succeeds, we pass the RecoveryXlog filepath back for opening
+      * If that fails, then we try to read a local file if one exists.
+      * This allows us to cater for situations where the current xlog
+      * is still available locally and hasn't yet made it to archive.
+      * This could happen if:
+      * - we decide to recover database to undo user data changes
+      * - we have XLogDir on a different disk and the main DataDir drive
+      *   fails, leaving us with just the XLogDir
+      *
+      * Notice that we don't actually overwrite any files when we copy back
+      * from archive because the XLogArchRestoreProgram may inadvertently
+      * restore inappropriate xlogs, or they may be corrupt, so we may
+      * have to fallback to the segments remaining in current XLogDir later.
+      * The copy-from-archive xlog is always the same, ensuring that we
+      * don't run out of disk space on long recoveries.
+      *
+      * [EMAIL PROTECTED]
+      */
+     
+         snprintf(recoveryXlog, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir);
+     	snprintf(lastrecoXlog, MAXPGPATH, "%s/LASTRECOXLOG", XLogDir);
+ 
+         if (stat(recoveryXlog, &stat_buf) == 0) {            
+             /*
+              * save a copy of the last xlog, before we try to restore
+              * if the restore fails, we will need it to become current xlog
+              */
+             rc = rename(recoveryXlog, lastrecoXlog);
+             if (rc !=0)
+         		elog(LOG, "rename failed \"%s\" \"%s\"",recoveryXlog, lastrecoXlog);
+             /*
+              * if it fails, ignore it - we'll create one soon...
+              */
+         }
+ 
+         /*
+          * Copy xlog from archive_dest to XLogDir
+          */
+         sprintf(restoreXlog, "%08X%08X", log, seg);
+       	snprintf(xlogRestoreCmd, MAXPGPATH, XLogArchRestoreProgram, 
+                    XLogArchiveDest, restoreXlog, recoveryXlog);
+         if (XLogArchiveDEBUG)
+     		elog(LOG, "redo: system(%s)", xlogRestoreCmd);
+ 
+         rc = system(xlogRestoreCmd);
+         if (rc!=0) {
+             /*
+              * remember, we rollforward UNTIL the restore fails
+              * so failure here is just part of the process...
+              * that makes it difficult to determine whether the restore
+              * failed because there isn't an archive to restore, or
+              * because the administrator has specified the restore
+              * program incorrectly...
+              * we could try to restore the testfile that the archiver writes
+              * when it starts up, but the absence of that file isn't
+              * very reliable evidence that the restore itself is broken,
+              * so just trust that the administrator has it correctly,
+              * XXX enhance that later
+              */
+  	        elog(LOG, "redo: cannot restore \"%s\" from archive", restoreXlog);
+             /*
+              * if an archived file is not available, there might just be 
+              * a partially full version of this file still in XLogDir
+              * so return this as the filename to open.
+              * In many recovery scenarios we expect this to fail also...
+              */
+             snprintf(recoveryXlog, MAXPGPATH, "%s/%s", XLogDir, restoreXlog);
+             UseArchiveFirst = false;
+             if (stat(recoveryXlog, &stat_buf) == 0) {
+      	        elog(LOG, "redo: archive chain ends; using local copy of \"%s\"", restoreXlog);
+             }
+             /*
+              * if this file isn't available, then we need to setup the previous
+              * restored xlog to be the last and current xlog, if it exists
+              * remember: we've been restoring from recoverXlog, which isn't
+              * named the same as the normal xlog chain...
+              * also remember to output a corresponding archive_status of .done
+              */
+             else if ((stat(lastrecoXlog, &stat_buf) == 0) && log==0 && seg > 0) {
+                 prevlog = log;
+                 prevseg = seg;
+         	    PrevLogSeg(prevlog, prevseg);
+                 XLogFileName(tmppath, prevlog, prevseg);
+            		elog(LOG, "redo: moving last restored xlog to \"%s\"", tmppath);
+                 rc = rename(lastrecoXlog, tmppath);
+                 if (rc!=0) {
+                	    elog(LOG, "redo: rename failed");
+             	    ereport(PANIC,
+         		        (errcode_for_file_access(),
+             	        errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
+                         tmpXlog, log, seg)));
+                 }
+ 
+                 /* 
+                  * write out an archive_status file for previous xlog
+                  * to allow xlog to be recycled when recovered database
+                  * is all up and working again
+                  * ...looks wrong, but checkpointer is smart enough
+                  * not to archive the current xlog!
+                  */
+             	sprintf(tmpXlog, "%08X%08X", prevlog, prevseg);
+             	snprintf(tmppath, MAXPGPATH, "%s/%s.done", RLogDir, tmpXlog);
+             	rlogFD = AllocateFile(tmppath, "w");
+ 	            if (rlogFD == NULL)
+                     ereport(ERROR,
+ 	       			    (errcode_for_file_access(),
+ 	       		         errmsg("could not write archive_status file \"%s\" ",
+ 	       			        tmppath)));
+ 	            FreeFile(rlogFD);
+             }
+             /* 
+              * there is NO else here...we just return the filename
+              * knowing that it isn't there...which then throws the usual error,
+              * will end with a clear message as to why...but not a problem
+              */
+         }
+         else {
+         /* restore success */
+             /* 
+              * if backup restored an xlog, yet we didnt use the local copy
+              * because we used the xlog version of that name from the
+              * archive instead, we need to write out an archive_status for
+              * it to show it can be recycled later
+              */
+             XLogFileName(tmppath, log, seg);
+             if (stat(tmppath, &stat_buf) == 0) {
+                	sprintf(tmpXlog, "%08X%08X", log, seg);
+                	snprintf(tmppath, MAXPGPATH, "%s/%s.done", RLogDir, tmpXlog);
+                	rlogFD = AllocateFile(tmppath, "w");
+         	    if (rlogFD == NULL)
+                     ereport(ERROR,
+         			    (errcode_for_file_access(),
+         		         errmsg("could not write archive_status file \"%s\" ",
+     	  			        tmppath)));
+     	        FreeFile(rlogFD);
+             }
+  	        elog(LOG, "redo: restored log file \"%s\" from archive", restoreXlog);
+         }
+         strcpy(path, recoveryXlog);                
+         return;
+ }
+ 
+ /*
   * Preallocate log files beyond the specified log endpoint, according to
   * the XLOGfile user parameter.
   */
***************
*** 1746,1751 ****
--- 2079,2085 ----
  	struct dirent *xlde;
  	char		lastoff[32];
  	char		path[MAXPGPATH];
+     bool        recycle=false;
  
  	XLByteToPrevSeg(endptr, endlogId, endlogSeg);
  
***************
*** 1761,1785 ****
  	errno = 0;
  	while ((xlde = readdir(xldir)) != NULL)
  	{
  		if (strlen(xlde->d_name) == 16 &&
  			strspn(xlde->d_name, "0123456789ABCDEF") == 16 &&
  			strcmp(xlde->d_name, lastoff) <= 0)
  		{
  			snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlde->d_name);
! 			if (XLOG_archive_dir[0])
! 			{
! 				ereport(LOG,
! 						(errmsg("archiving transaction log file \"%s\"",
! 								xlde->d_name)));
! 				elog(WARNING, "archiving log files is not implemented");
! 			}
! 			else
  			{
  				/*
  				 * Before deleting the file, see if it can be recycled as
  				 * a future log segment.  We allow recycling segments up
! 				 * to XLOGfileslop segments beyond the current XLOG
! 				 * location.
  				 */
  				if (InstallXLogFileSegment(endlogId, endlogSeg, path,
  										   true, XLOGfileslop,
--- 2095,2137 ----
  	errno = 0;
  	while ((xlde = readdir(xldir)) != NULL)
  	{
+ 		/* if correct length and alphanumeric makeup of file looks correct
+ 		 * use the alphanumeric sorting property of the filenames to decide
+ 		 * which ones are earlier than the lastoff transaction log
+ 		 * ...maybe should read lastwrite datetime of lastoff, then check that
+ 		 * only files last written earlier than this are removed/recycled
+ 		 */
  		if (strlen(xlde->d_name) == 16 &&
  			strspn(xlde->d_name, "0123456789ABCDEF") == 16 &&
  			strcmp(xlde->d_name, lastoff) <= 0)
  		{
  			snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlde->d_name);
! 			if (XLogArchiveMode) {
!                 if (InRecoveryCleanup)
!                     /*
!                      * this allows recycling of transaction logs
!                      * during the shutdown checkpoint at end of recovery
!                      * - we may have restored logs that were not used
!                      * in the recovery sequence, and so will not have
!                      * had an archive_status file written for them. 
!                      * - end-of-recovery doesn't clean up ALL xlogs,
!                      * which is why we also write archive_status files
!                      * as well as doing this
!                      */
!                     recycle=true;
!                 else
!                     recycle=XLogArchiveDone(xlde->d_name);
!             }
!             else
!                 recycle=false;
! 
! 			if ( recycle )
  			{
  				/*
  				 * Before deleting the file, see if it can be recycled as
  				 * a future log segment.  We allow recycling segments up
! 				 * until there are XLOGfileslop segments beyond the
! 				 * current XLOG location, otherwise they are removed.
  				 */
  				if (InstallXLogFileSegment(endlogId, endlogSeg, path,
  										   true, XLOGfileslop,
***************
*** 1793,1802 ****
  				{
  					/* No need for any more future segments... */
  					ereport(LOG,
! 						  (errmsg("removing transaction log file \"%s\"",
  								  xlde->d_name)));
  					unlink(path);
  				}
  			}
  		}
  		errno = 0;
--- 2145,2155 ----
  				{
  					/* No need for any more future segments... */
  					ereport(LOG,
! 						  (errmsg("too many transaction log files, removing \"%s\"",
  								  xlde->d_name)));
  					unlink(path);
  				}
+                 XLogArchiveCleanup(xlde->d_name);
  			}
  		}
  		errno = 0;
***************
*** 2254,2259 ****
--- 2607,2613 ----
  {
  	/* Init XLOG file paths */
  	snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);
+ 	snprintf(RLogDir, MAXPGPATH, "%s/archive_status", XLogDir);
  	snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
  }
  
***************
*** 2770,2775 ****
--- 3124,3184 ----
  }
  
  /*
+  * read in restore command from recovery.conf
+  *
+  * XXX longer term intention is to expand this to 
+  * cater for additional parameters and controls
+  * possibly using a bison grammar to control it
+  */
+ static void
+ readRecoveryCommandFile(void)
+ {
+     FILE     *fd;
+     char    *tok1 = NULL;
+     char    *tok2 = NULL;
+     char    *val = NULL;
+     char    cmdline[MAXPGPATH];
+     bool    syntax_error = false;
+ 
+     fd = AllocateFile(recoveryCommandFile, "r");
+ 	if (fd == NULL) {
+     		ereport(FATAL,
+     			(errcode_for_file_access(),
+ 				errmsg("could not open recovery command file \"%s\"",recoveryCommandFile)));
+     		return;
+     }
+     /*  
+      * expecting |restore_program = Qcommand stringQ|
+      * e.g.      |restore_program = 'cp %s/%s %s'|
+      * where | denote the beginning and end of the string
+      */
+     fgets(cmdline, MAXPGPATH, fd);
+ 
+ 	FreeFile(fd);
+ 
+     tok1 = strtok(cmdline, "'");
+     tok2 = strtok(NULL, "'");
+     
+     if (tok1 != NULL && tok2 != NULL) {
+         val = tok2;
+         tok1 = strtok(cmdline, " =");
+         if (strcmp(tok1,"restore_program") == 0)
+             strcpy(XLogArchRestoreProgram, tok2);
+         else
+             syntax_error = true;        
+     }
+     else
+         syntax_error = true;        
+ 
+     if (syntax_error)
+         ereport(FATAL,
+ 		  (errmsg("syntax error in \"%s\"", recoveryCommandFile),
+ 		 errhint("Syntax needs to be like \"restore_program = 'cp %%s/%%s %%s'\"")));
+ 
+     return;
+ }
+ 
+ /*
   * This must be called ONCE during postmaster or standalone-backend startup
   */
  void
***************
*** 2785,2790 ****
--- 3194,3200 ----
  	XLogRecord *record;
  	char	   *buffer;
  	uint32		freespace;
+    	struct stat stat_buf;
  
  	/* Use malloc() to ensure record buffer is MAXALIGNED */
  	buffer = (char *) malloc(_INTL_MAXLOGRECSZ);
***************
*** 2831,2836 ****
--- 3241,3278 ----
  		pg_usleep(60000000L);
  #endif
  
+     /*
+      * Check now for recovery.conf
+      *
+      * if this file exists, it demonstrates the intention of the administrator
+      * to recover this database using archived xlogs
+      *
+      * we do this now because the first xlog is about to be opened for the
+      * first time. We've read the checkpoint pointer from the control file
+      * and we are about to use that to open the xlog it points to, and
+      * will begin rollforward recovery from that point
+      */
+   	snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir);
+     if (stat(recoveryCommandFile, &stat_buf) == 0) {
+ 
+      	readRecoveryCommandFile();
+         /*
+          * clearly indicate our state
+          */
+         InArchiveRecovery = true;
+         /*
+          * set initial state for checking transaction logs
+          * this may change if the archive runs dry while still InArchiveRecovery
+          */
+         UseArchiveFirst = true;
+ 
+     	ereport(LOG,
+     		(errmsg("recovery command file found...starting archive recovery")));
+ 
+         if (XLogArchiveDEBUG)
+             elog(LOG,"restore_program = \"%s\"", XLogArchRestoreProgram);
+     }
+ 
  	/*
  	 * Get the last valid checkpoint record.  If the latest one according
  	 * to pg_control is broken, try the next-to-last one.
***************
*** 2861,2872 ****
  	LastRec = RecPtr = checkPointLoc;
  	memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
  	wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
! 
  	ereport(LOG,
  			(errmsg("redo record is at %X/%X; undo record is at %X/%X; shutdown %s",
  					checkPoint.redo.xlogid, checkPoint.redo.xrecoff,
  					checkPoint.undo.xlogid, checkPoint.undo.xrecoff,
! 					wasShutdown ? "TRUE" : "FALSE")));
  	ereport(LOG,
  			(errmsg("next transaction ID: %u; next OID: %u",
  					checkPoint.nextXid, checkPoint.nextOid)));
--- 3303,3322 ----
  	LastRec = RecPtr = checkPointLoc;
  	memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
  	wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
!     /*
!      * we report the state of the control_file, not the checkpoint, why?
!      * wasShutdown refers to whether the last checkpoint was a 
!      * shutdown checkpoint, NOT whether the database was shutdown
!      * correctly according to control file. This distinction is only
!      * important InArchiveRecovery, since otherwise we could
!      * report that the database was shutdown, when the control file disagrees
!      */
  	ereport(LOG,
  			(errmsg("redo record is at %X/%X; undo record is at %X/%X; shutdown %s",
  					checkPoint.redo.xlogid, checkPoint.redo.xrecoff,
  					checkPoint.undo.xlogid, checkPoint.undo.xrecoff,
!                     (ControlFile->state == DB_SHUTDOWNED) ? "TRUE" : "FALSE")));
! 
  	ereport(LOG,
  			(errmsg("next transaction ID: %u; next OID: %u",
  					checkPoint.nextXid, checkPoint.nextOid)));
***************
*** 2914,2919 ****
--- 3364,3373 ----
  	if (InRecovery)
  	{
  		int			rmid;
+     	char		reclogpath[MAXPGPATH];
+         bool        recovery_debug_log = false;
+         int         reclogFD = -1;
+         char        *recbuf = NULL;
  
  		ereport(LOG,
  				(errmsg("database system was not properly shut down; "
***************
*** 2922,2927 ****
--- 3376,3382 ----
  		ControlFile->time = time(NULL);
  		UpdateControlFile();
  
+ 
  		/* Start up the recovery environment */
  		XLogInitRelationCache();
  
***************
*** 2933,2939 ****
  
  		/* Is REDO required ? */
  		if (XLByteLT(checkPoint.redo, RecPtr))
! 			record = ReadRecord(&(checkPoint.redo), PANIC, buffer);
  		else
  		{
  			/* read past CheckPoint record */
--- 3388,3394 ----
  
  		/* Is REDO required ? */
  		if (XLByteLT(checkPoint.redo, RecPtr))
!     		record = ReadRecord(&(checkPoint.redo), PANIC, buffer);
  		else
  		{
  			/* read past CheckPoint record */
***************
*** 2946,2951 ****
--- 3401,3423 ----
  			ereport(LOG,
  					(errmsg("redo starts at %X/%X",
  							ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
+ #ifdef WAL_DEBUG
+             if (XLOG_DEBUG)
+                recovery_debug_log = true;
+ #endif
+             if (XLogArchiveDEBUG)            
+                recovery_debug_log = true;
+ 
+             if (recovery_debug_log) {
+            		recbuf = (char *) malloc(BLCKSZ);
+                 snprintf(reclogpath, MAXPGPATH, "%s/recovery.log", DataDir);
+                 unlink(reclogpath);
+                 reclogFD = BasicOpenFile(reclogpath, O_RDWR | O_CREAT | O_EXCL,
+ 					S_IRUSR | S_IWUSR);
+                 if (reclogFD < 0)
+                     recovery_debug_log = false;
+             }
+ 
  			do
  			{
  				/* nextXid must be beyond record's xid */
***************
*** 2956,2976 ****
  					TransactionIdAdvance(ShmemVariableCache->nextXid);
  				}
  
! #ifdef WAL_DEBUG
! 				if (XLOG_DEBUG)
  				{
! 					char		buf[8192];
! 
! 					sprintf(buf, "REDO @ %X/%X; LSN %X/%X: ",
  							ReadRecPtr.xlogid, ReadRecPtr.xrecoff,
  							EndRecPtr.xlogid, EndRecPtr.xrecoff);
! 					xlog_outrec(buf, record);
! 					strcat(buf, " - ");
! 					RmgrTable[record->xl_rmid].rm_desc(buf,
  								record->xl_info, XLogRecGetData(record));
! 					elog(LOG, "%s", buf);
  				}
- #endif
  
  				if (record->xl_info & XLR_BKP_BLOCK_MASK)
  					RestoreBkpBlocks(record, EndRecPtr);
--- 3428,3445 ----
  					TransactionIdAdvance(ShmemVariableCache->nextXid);
  				}
  
! 				if (recovery_debug_log)
  				{
! 					sprintf(recbuf, "\nREDO @ %X/%X; LSN %X/%X: ",
  							ReadRecPtr.xlogid, ReadRecPtr.xrecoff,
  							EndRecPtr.xlogid, EndRecPtr.xrecoff);
! 					xlog_outrec(recbuf, record);
! 					strcat(recbuf, " - ");
! 					RmgrTable[record->xl_rmid].rm_desc(recbuf,
  								record->xl_info, XLogRecGetData(record));
!                     
!                     write(reclogFD, recbuf, strlen(recbuf));
  				}
  
  				if (record->xl_info & XLR_BKP_BLOCK_MASK)
  					RestoreBkpBlocks(record, EndRecPtr);
***************
*** 2978,2988 ****
--- 3447,3467 ----
  				RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
  				record = ReadRecord(NULL, LOG, buffer);
  			} while (record != NULL);
+ 
+             if (reclogFD >= 0) {
+                 close(reclogFD);
+                 free(recbuf);
+             }
+ 
  			ereport(LOG,
  					(errmsg("redo done at %X/%X",
  							ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
  			LastRec = ReadRecPtr;
  			InRedo = false;
+             if (InArchiveRecovery)
+                 UseArchiveFirst = false;
+                 InRecoveryCleanup = true;
+             InArchiveRecovery = false;
  		}
  		else
  			ereport(LOG,
***************
*** 3147,3152 ****
--- 3626,3637 ----
  	 * Okay, we're officially UP.
  	 */
  	InRecovery = false;
+     if (InRecoveryCleanup) {
+         unlink(recoveryCommandFile);
+         InRecoveryCleanup = false;
+ 		ereport(LOG,
+ 			(errmsg("archive recovery complete")));
+     }
  
  	ControlFile->state = DB_IN_PRODUCTION;
  	ControlFile->time = time(NULL);
***************
*** 3701,3707 ****
  		strcat(buf, "UNKNOWN");
  }
  
- #ifdef WAL_DEBUG
  static void
  xlog_outrec(char *buf, XLogRecord *record)
  {
--- 4186,4191 ----
***************
*** 3726,3733 ****
  	sprintf(buf + strlen(buf), ": %s",
  			RmgrTable[record->xl_rmid].rm_name);
  }
- #endif /* WAL_DEBUG */
- 
  
  /*
   * GUC support
--- 4210,4215 ----
---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster

Reply via email to