On 13.01.2011 23:32, Heikki Linnakangas wrote:
Anyway, here's an updated patch with all the known issues fixed.

Another updated patch. Fixed bitrot, and addressed the privilege issue Fujii-san raised here: http://archives.postgresql.org/message-id/4d380560.3040...@enterprisedb.com. I changed the privilege checks so that pg_start/stop_backup() functions require superuser privileges again, but not for a base backup via the replication protocol (replication privilege is needed to establish a replication connection to begin with).

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 60,67 ****
  
  
  /* File path names (all relative to $PGDATA) */
- #define BACKUP_LABEL_FILE		"backup_label"
- #define BACKUP_LABEL_OLD		"backup_label.old"
  #define RECOVERY_COMMAND_FILE	"recovery.conf"
  #define RECOVERY_COMMAND_DONE	"recovery.done"
  
--- 60,65 ----
***************
*** 338,344 **** typedef struct XLogCtlInsert
  	XLogPageHeader currpage;	/* points to header of block in cache */
  	char	   *currpos;		/* current insertion point in cache */
  	XLogRecPtr	RedoRecPtr;		/* current redo point for insertions */
! 	bool		forcePageWrites;	/* forcing full-page writes for PITR? */
  } XLogCtlInsert;
  
  /*
--- 336,343 ----
  	XLogPageHeader currpage;	/* points to header of block in cache */
  	char	   *currpos;		/* current insertion point in cache */
  	XLogRecPtr	RedoRecPtr;		/* current redo point for insertions */
! 	int			forcePageWrites;	/* forcing full-page writes for PITR? */
! 	bool		exclusiveBackup;	/* a backup was started with pg_start_backup() */
  } XLogCtlInsert;
  
  /*
***************
*** 8352,8367 **** pg_start_backup(PG_FUNCTION_ARGS)
  
  	backupidstr = text_to_cstring(backupid);
  
! 	startpoint = do_pg_start_backup(backupidstr, fast);
  
  	snprintf(startxlogstr, sizeof(startxlogstr), "%X/%X",
  			 startpoint.xlogid, startpoint.xrecoff);
  	PG_RETURN_TEXT_P(cstring_to_text(startxlogstr));
  }
  
  XLogRecPtr
! do_pg_start_backup(const char *backupidstr, bool fast)
  {
  	XLogRecPtr	checkpointloc;
  	XLogRecPtr	startpoint;
  	pg_time_t	stamp_time;
--- 8351,8388 ----
  
  	backupidstr = text_to_cstring(backupid);
  
! 	startpoint = do_pg_start_backup(backupidstr, fast, NULL);
  
  	snprintf(startxlogstr, sizeof(startxlogstr), "%X/%X",
  			 startpoint.xlogid, startpoint.xrecoff);
  	PG_RETURN_TEXT_P(cstring_to_text(startxlogstr));
  }
  
+ /*
+  * do_pg_start_backup is the workhorse of the user-visible pg_start_backup()
+  * function. It creates the necessary starting checkpoint and constructs the
+  * backup label file.
+  * 
+  * There are two kind of backups: exclusive and non-exclusive. An exclusive
+  * backup is started with pg_start_backup(), and there can be only one active
+  * at a time. The backup label file of an exclusive backup is written to
+  * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+  *
+  * A non-exclusive backup is used for the streaming base backups (see
+  * src/backend/replication/basebackup.c). The difference to exclusive backups
+  * is that the backup label file is not written to disk. Instead, its would-be
+  * contents are returned in *labelfile, and the caller is responsible for
+  * including it in the backup archive as 'backup_label'. There can be many
+  * non-exclusive backups active at the same time, and they don't conflict
+  * with an exclusive backup either.
+  *
+  * Every successfully started non-exclusive backup must be stopped by calling
+  * do_pg_stop_backup() or do_pg_abort_backup().
+  */
  XLogRecPtr
! do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
  {
+ 	bool		exclusive = (labelfile == NULL);
  	XLogRecPtr	checkpointloc;
  	XLogRecPtr	startpoint;
  	pg_time_t	stamp_time;
***************
*** 8371,8381 **** do_pg_start_backup(const char *backupidstr, bool fast)
  	uint32		_logSeg;
  	struct stat stat_buf;
  	FILE	   *fp;
  
! 	if (!superuser() && !is_authenticated_user_replication_role())
  		ereport(ERROR,
  				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
! 				 errmsg("must be superuser or replication role to run a backup")));
  
  	if (RecoveryInProgress())
  		ereport(ERROR,
--- 8392,8403 ----
  	uint32		_logSeg;
  	struct stat stat_buf;
  	FILE	   *fp;
+ 	StringInfoData labelfbuf;
  
! 	if (exclusive && !superuser())
  		ereport(ERROR,
  				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
! 				 errmsg("must be superuser to run a backup")));
  
  	if (RecoveryInProgress())
  		ereport(ERROR,
***************
*** 8389,8394 **** do_pg_start_backup(const char *backupidstr, bool fast)
--- 8411,8422 ----
  			  errmsg("WAL level not sufficient for making an online backup"),
  				 errhint("wal_level must be set to \"archive\" or \"hot_standby\" at server start.")));
  
+ 	if (strlen(backupidstr) > MAXPGPATH)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 				 errmsg("backup label too long (max %d bytes)",
+ 						MAXPGPATH)));
+ 
  	/*
  	 * Mark backup active in shared memory.  We must do full-page WAL writes
  	 * during an on-line backup even if not doing so at other times, because
***************
*** 8407,8421 **** do_pg_start_backup(const char *backupidstr, bool fast)
  	 * ensure adequate interlocking against XLogInsert().
  	 */
  	LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
! 	if (XLogCtl->Insert.forcePageWrites)
  	{
! 		LWLockRelease(WALInsertLock);
! 		ereport(ERROR,
! 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
! 				 errmsg("a backup is already in progress"),
! 				 errhint("Run pg_stop_backup() and try again.")));
  	}
! 	XLogCtl->Insert.forcePageWrites = true;
  	LWLockRelease(WALInsertLock);
  
  	/*
--- 8435,8453 ----
  	 * ensure adequate interlocking against XLogInsert().
  	 */
  	LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
! 	if (exclusive)
  	{
! 		if (XLogCtl->Insert.exclusiveBackup)
! 		{
! 			LWLockRelease(WALInsertLock);
! 			ereport(ERROR,
! 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
! 					 errmsg("a backup is already in progress"),
! 					 errhint("Run pg_stop_backup() and try again.")));
! 		}
! 		XLogCtl->Insert.exclusiveBackup = true;
  	}
! 	XLogCtl->Insert.forcePageWrites++;
  	LWLockRelease(WALInsertLock);
  
  	/*
***************
*** 8432,8438 **** do_pg_start_backup(const char *backupidstr, bool fast)
  	RequestXLogSwitch();
  
  	/* Ensure we release forcePageWrites if fail below */
! 	PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) 0);
  	{
  		/*
  		 * Force a CHECKPOINT.	Aside from being necessary to prevent torn
--- 8464,8470 ----
  	RequestXLogSwitch();
  
  	/* Ensure we release forcePageWrites if fail below */
! 	PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
  	{
  		/*
  		 * Force a CHECKPOINT.	Aside from being necessary to prevent torn
***************
*** 8459,8512 **** do_pg_start_backup(const char *backupidstr, bool fast)
  		XLByteToSeg(startpoint, _logId, _logSeg);
  		XLogFileName(xlogfilename, ThisTimeLineID, _logId, _logSeg);
  
  		/* Use the log timezone here, not the session timezone */
  		stamp_time = (pg_time_t) time(NULL);
  		pg_strftime(strfbuf, sizeof(strfbuf),
  					"%Y-%m-%d %H:%M:%S %Z",
  					pg_localtime(&stamp_time, log_timezone));
  
  		/*
! 		 * Check for existing backup label --- implies a backup is already
! 		 * running.  (XXX given that we checked forcePageWrites above, maybe
! 		 * it would be OK to just unlink any such label file?)
  		 */
! 		if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
  		{
! 			if (errno != ENOENT)
  				ereport(ERROR,
  						(errcode_for_file_access(),
! 						 errmsg("could not stat file \"%s\": %m",
  								BACKUP_LABEL_FILE)));
  		}
  		else
! 			ereport(ERROR,
! 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
! 					 errmsg("a backup is already in progress"),
! 					 errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
! 							 BACKUP_LABEL_FILE)));
! 
! 		/*
! 		 * Okay, write the file
! 		 */
! 		fp = AllocateFile(BACKUP_LABEL_FILE, "w");
! 		if (!fp)
! 			ereport(ERROR,
! 					(errcode_for_file_access(),
! 					 errmsg("could not create file \"%s\": %m",
! 							BACKUP_LABEL_FILE)));
! 		fprintf(fp, "START WAL LOCATION: %X/%X (file %s)\n",
! 				startpoint.xlogid, startpoint.xrecoff, xlogfilename);
! 		fprintf(fp, "CHECKPOINT LOCATION: %X/%X\n",
! 				checkpointloc.xlogid, checkpointloc.xrecoff);
! 		fprintf(fp, "START TIME: %s\n", strfbuf);
! 		fprintf(fp, "LABEL: %s\n", backupidstr);
! 		if (fflush(fp) || ferror(fp) || FreeFile(fp))
! 			ereport(ERROR,
! 					(errcode_for_file_access(),
! 					 errmsg("could not write file \"%s\": %m",
! 							BACKUP_LABEL_FILE)));
  	}
! 	PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) 0);
  
  	/*
  	 * We're done.  As a convenience, return the starting WAL location.
--- 8491,8557 ----
  		XLByteToSeg(startpoint, _logId, _logSeg);
  		XLogFileName(xlogfilename, ThisTimeLineID, _logId, _logSeg);
  
+ 		/*
+ 		 * Construct backup label file 
+ 		 */
+ 		initStringInfo(&labelfbuf);
+ 
  		/* Use the log timezone here, not the session timezone */
  		stamp_time = (pg_time_t) time(NULL);
  		pg_strftime(strfbuf, sizeof(strfbuf),
  					"%Y-%m-%d %H:%M:%S %Z",
  					pg_localtime(&stamp_time, log_timezone));
+ 		appendStringInfo(&labelfbuf, "START WAL LOCATION: %X/%X (file %s)\n",
+ 						 startpoint.xlogid, startpoint.xrecoff, xlogfilename);
+ 		appendStringInfo(&labelfbuf, "CHECKPOINT LOCATION: %X/%X\n",
+ 						 checkpointloc.xlogid, checkpointloc.xrecoff);
+ 		appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf);
+ 		appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr);
  
  		/*
! 		 * Okay, write the file, or return its contents to caller.
  		 */
! 		if (exclusive)
  		{
! 			/*
! 			 * Check for existing backup label --- implies a backup is already
! 			 * running.  (XXX given that we checked exclusiveBackup above, maybe
! 			 * it would be OK to just unlink any such label file?)
! 			 */
! 			if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
! 			{
! 				if (errno != ENOENT)
! 					ereport(ERROR,
! 							(errcode_for_file_access(),
! 							 errmsg("could not stat file \"%s\": %m",
! 									BACKUP_LABEL_FILE)));
! 			}
! 			else
! 				ereport(ERROR,
! 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
! 						 errmsg("a backup is already in progress"),
! 						 errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
! 								 BACKUP_LABEL_FILE)));
! 
! 			fp = AllocateFile(BACKUP_LABEL_FILE, "w");
! 
! 			if (!fp)
  				ereport(ERROR,
  						(errcode_for_file_access(),
! 						 errmsg("could not create file \"%s\": %m",
! 								BACKUP_LABEL_FILE)));
! 			fwrite(labelfbuf.data, labelfbuf.len, 1, fp);
! 			if (fflush(fp) || ferror(fp) || FreeFile(fp))
! 				ereport(ERROR,
! 						(errcode_for_file_access(),
! 						 errmsg("could not write file \"%s\": %m",
  								BACKUP_LABEL_FILE)));
+ 			pfree(labelfbuf.data);
  		}
  		else
! 			*labelfile = labelfbuf.data;
  	}
! 	PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
  
  	/*
  	 * We're done.  As a convenience, return the starting WAL location.
***************
*** 8518,8526 **** do_pg_start_backup(const char *backupidstr, bool fast)
  static void
  pg_start_backup_callback(int code, Datum arg)
  {
! 	/* Turn off forcePageWrites on failure */
  	LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
! 	XLogCtl->Insert.forcePageWrites = false;
  	LWLockRelease(WALInsertLock);
  }
  
--- 8563,8578 ----
  static void
  pg_start_backup_callback(int code, Datum arg)
  {
! 	bool exclusive = DatumGetBool(arg);
! 
! 	/* Decrement forcePageWrites on failure */
  	LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
! 	if (exclusive)
! 	{
! 		Assert(XLogCtl->Insert.exclusiveBackup);
! 		XLogCtl->Insert.exclusiveBackup = false;
! 	}
! 	XLogCtl->Insert.forcePageWrites--;
  	LWLockRelease(WALInsertLock);
  }
  
***************
*** 8543,8558 **** pg_stop_backup(PG_FUNCTION_ARGS)
  	XLogRecPtr	stoppoint;
  	char		stopxlogstr[MAXFNAMELEN];
  
! 	stoppoint = do_pg_stop_backup();
  
  	snprintf(stopxlogstr, sizeof(stopxlogstr), "%X/%X",
  			 stoppoint.xlogid, stoppoint.xrecoff);
  	PG_RETURN_TEXT_P(cstring_to_text(stopxlogstr));
  }
  
  XLogRecPtr
! do_pg_stop_backup(void)
  {
  	XLogRecPtr	startpoint;
  	XLogRecPtr	stoppoint;
  	XLogRecData rdata;
--- 8595,8618 ----
  	XLogRecPtr	stoppoint;
  	char		stopxlogstr[MAXFNAMELEN];
  
! 	stoppoint = do_pg_stop_backup(NULL);
  
  	snprintf(stopxlogstr, sizeof(stopxlogstr), "%X/%X",
  			 stoppoint.xlogid, stoppoint.xrecoff);
  	PG_RETURN_TEXT_P(cstring_to_text(stopxlogstr));
  }
  
+ /*
+  * do_pg_start_backup is the workhorse of the user-visible pg_stop_backup()
+  * function.
+ 
+  * If labelfile is NULL, this stops an exclusive backup. Otherwise this stops
+  * the non-exclusive backup specified by 'labelfile'.
+  */
  XLogRecPtr
! do_pg_stop_backup(char *labelfile)
  {
+ 	bool		exclusive = (labelfile == NULL);
  	XLogRecPtr	startpoint;
  	XLogRecPtr	stoppoint;
  	XLogRecData rdata;
***************
*** 8568,8582 **** do_pg_stop_backup(void)
  	FILE	   *lfp;
  	FILE	   *fp;
  	char		ch;
- 	int			ich;
  	int			seconds_before_warning;
  	int			waits = 0;
  	bool		reported_waiting = false;
  
! 	if (!superuser() && !is_authenticated_user_replication_role())
  		ereport(ERROR,
  				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
! 				 (errmsg("must be superuser or replication role to run a backup"))));
  
  	if (RecoveryInProgress())
  		ereport(ERROR,
--- 8628,8642 ----
  	FILE	   *lfp;
  	FILE	   *fp;
  	char		ch;
  	int			seconds_before_warning;
  	int			waits = 0;
  	bool		reported_waiting = false;
+ 	char	   *remaining;
  
! 	if (exclusive && !superuser())
  		ereport(ERROR,
  				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
! 				 errmsg("must be superuser to run a backup")));
  
  	if (RecoveryInProgress())
  		ereport(ERROR,
***************
*** 8591,8628 **** do_pg_stop_backup(void)
  				 errhint("wal_level must be set to \"archive\" or \"hot_standby\" at server start.")));
  
  	/*
! 	 * OK to clear forcePageWrites
  	 */
  	LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
! 	XLogCtl->Insert.forcePageWrites = false;
  	LWLockRelease(WALInsertLock);
  
! 	/*
! 	 * Open the existing label file
! 	 */
! 	lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
! 	if (!lfp)
  	{
! 		if (errno != ENOENT)
  			ereport(ERROR,
  					(errcode_for_file_access(),
  					 errmsg("could not read file \"%s\": %m",
  							BACKUP_LABEL_FILE)));
! 		ereport(ERROR,
! 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
! 				 errmsg("a backup is not in progress")));
  	}
  
  	/*
  	 * Read and parse the START WAL LOCATION line (this code is pretty crude,
  	 * but we are not expecting any variability in the file format).
  	 */
! 	if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %24s)%c",
  			   &startpoint.xlogid, &startpoint.xrecoff, startxlogfilename,
  			   &ch) != 4 || ch != '\n')
  		ereport(ERROR,
  				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
  				 errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
  
  	/*
  	 * Write the backup-end xlog record
--- 8651,8738 ----
  				 errhint("wal_level must be set to \"archive\" or \"hot_standby\" at server start.")));
  
  	/*
! 	 * OK to decrement forcePageWrites
  	 */
  	LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
! 	if (exclusive)
! 	{
! 		if (XLogCtl->Insert.exclusiveBackup)
! 		{
! 			XLogCtl->Insert.forcePageWrites--;
! 			XLogCtl->Insert.exclusiveBackup = false;
! 		}
! 	}
! 	else
! 	{
! 		/*
! 		 * The user-visible pg_start/stop_backup() functions that operate on
! 		 * exclusive backups can be called at any time, but for non-exclusive
! 		 * backups, it is expected that each do_pg_start_backup() call is
! 		 * matched by exactly one do_pg_stop_backup() call.
! 		 */
! 		Assert(XLogCtl->Insert.forcePageWrites > (XLogCtl->Insert.exclusiveBackup ? 1 : 0));
! 		XLogCtl->Insert.forcePageWrites--;
! 	}
  	LWLockRelease(WALInsertLock);
  
! 	if (exclusive)
  	{
! 		/*
! 		 * Read the existing label file into memory.
! 		 */
! 		struct	stat statbuf;
! 		int		r;
! 
! 		if (stat(BACKUP_LABEL_FILE, &statbuf))
! 		{
! 			if (errno != ENOENT)
! 				ereport(ERROR,
! 						(errcode_for_file_access(),
! 						 errmsg("could not stat file \"%s\": %m",
! 								BACKUP_LABEL_FILE)));
! 			ereport(ERROR,
! 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
! 					 errmsg("a backup is not in progress")));
! 		}
! 
! 		lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
! 		if (!lfp)
! 		{
  			ereport(ERROR,
  					(errcode_for_file_access(),
  					 errmsg("could not read file \"%s\": %m",
  							BACKUP_LABEL_FILE)));
! 		}
! 		labelfile = palloc(statbuf.st_size + 1);
! 		r = fread(labelfile, statbuf.st_size, 1, lfp);
! 		labelfile[statbuf.st_size] = '\0';
! 
! 		/*
! 		 * Close and remove the backup label file
! 		 */
! 		if (r != 1 || ferror(lfp) || FreeFile(lfp))
! 			ereport(ERROR,
! 					(errcode_for_file_access(),
! 					 errmsg("could not read file \"%s\": %m",
! 							BACKUP_LABEL_FILE)));
! 		if (unlink(BACKUP_LABEL_FILE) != 0)
! 			ereport(ERROR,
! 					(errcode_for_file_access(),
! 					 errmsg("could not remove file \"%s\": %m",
! 							BACKUP_LABEL_FILE)));
  	}
  
  	/*
  	 * Read and parse the START WAL LOCATION line (this code is pretty crude,
  	 * but we are not expecting any variability in the file format).
  	 */
! 	if (sscanf(labelfile, "START WAL LOCATION: %X/%X (file %24s)%c",
  			   &startpoint.xlogid, &startpoint.xrecoff, startxlogfilename,
  			   &ch) != 4 || ch != '\n')
  		ereport(ERROR,
  				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
  				 errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
+ 	remaining = strchr(labelfile, '\n') + 1; /* %n is not portable enough */
  
  	/*
  	 * Write the backup-end xlog record
***************
*** 8665,8672 **** do_pg_stop_backup(void)
  	fprintf(fp, "STOP WAL LOCATION: %X/%X (file %s)\n",
  			stoppoint.xlogid, stoppoint.xrecoff, stopxlogfilename);
  	/* transfer remaining lines from label to history file */
! 	while ((ich = fgetc(lfp)) != EOF)
! 		fputc(ich, fp);
  	fprintf(fp, "STOP TIME: %s\n", strfbuf);
  	if (fflush(fp) || ferror(fp) || FreeFile(fp))
  		ereport(ERROR,
--- 8775,8781 ----
  	fprintf(fp, "STOP WAL LOCATION: %X/%X (file %s)\n",
  			stoppoint.xlogid, stoppoint.xrecoff, stopxlogfilename);
  	/* transfer remaining lines from label to history file */
! 	fprintf(fp, "%s", remaining);
  	fprintf(fp, "STOP TIME: %s\n", strfbuf);
  	if (fflush(fp) || ferror(fp) || FreeFile(fp))
  		ereport(ERROR,
***************
*** 8675,8694 **** do_pg_stop_backup(void)
  						histfilepath)));
  
  	/*
- 	 * Close and remove the backup label file
- 	 */
- 	if (ferror(lfp) || FreeFile(lfp))
- 		ereport(ERROR,
- 				(errcode_for_file_access(),
- 				 errmsg("could not read file \"%s\": %m",
- 						BACKUP_LABEL_FILE)));
- 	if (unlink(BACKUP_LABEL_FILE) != 0)
- 		ereport(ERROR,
- 				(errcode_for_file_access(),
- 				 errmsg("could not remove file \"%s\": %m",
- 						BACKUP_LABEL_FILE)));
- 
- 	/*
  	 * Clean out any no-longer-needed history files.  As a side effect, this
  	 * will post a .ready file for the newly created history file, notifying
  	 * the archiver that history file may be archived immediately.
--- 8784,8789 ----
***************
*** 8769,8796 **** do_pg_stop_backup(void)
  /*
   * do_pg_abort_backup: abort a running backup
   *
!  * This does just the most basic steps of pg_stop_backup(), by taking the
   * system out of backup mode, thus making it a lot more safe to call from
   * an error handler.
   */
  void
  do_pg_abort_backup(void)
  {
- 	/*
- 	 * OK to clear forcePageWrites
- 	 */
  	LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
! 	XLogCtl->Insert.forcePageWrites = false;
  	LWLockRelease(WALInsertLock);
- 
- 	/*
- 	 * Remove backup label file
- 	 */
- 	if (unlink(BACKUP_LABEL_FILE) != 0)
- 		ereport(ERROR,
- 				(errcode_for_file_access(),
- 				 errmsg("could not remove file \"%s\": %m",
- 						BACKUP_LABEL_FILE)));
  }
  
  /*
--- 8864,8884 ----
  /*
   * do_pg_abort_backup: abort a running backup
   *
!  * This does just the most basic steps of do_pg_stop_backup(), by taking the
   * system out of backup mode, thus making it a lot more safe to call from
   * an error handler.
+  *
+  * NB: This is only for aborting a non-exclusive backup that doesn't write
+  * backup_label. A backup started with pg_stop_backup() needs to be finished
+  * with pg_stop_backup().
   */
  void
  do_pg_abort_backup(void)
  {
  	LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
! 	Assert(XLogCtl->Insert.forcePageWrites > (XLogCtl->Insert.exclusiveBackup ? 1 : 0));
! 	XLogCtl->Insert.forcePageWrites--;
  	LWLockRelease(WALInsertLock);
  }
  
  /*
*** a/src/backend/replication/basebackup.c
--- b/src/backend/replication/basebackup.c
***************
*** 40,52 **** typedef struct
  }	basebackup_options;
  
  
  static int64 sendDir(char *path, int basepathlen, bool sizeonly);
! static void sendFile(char *path, int basepathlen, struct stat * statbuf);
  static void _tarWriteHeader(char *filename, char *linktarget,
  				struct stat * statbuf);
  static void send_int8_string(StringInfoData *buf, int64 intval);
  static void SendBackupHeader(List *tablespaces);
- static void SendBackupDirectory(char *location, char *spcoid);
  static void base_backup_cleanup(int code, Datum arg);
  static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
  static void parse_basebackup_options(List *options, basebackup_options *opt);
--- 40,53 ----
  }	basebackup_options;
  
  
+ static void sendLabelFile(const char *content);
  static int64 sendDir(char *path, int basepathlen, bool sizeonly);
! static void sendFile(char *readfilename, char *tarfilename,
! 		 struct stat * statbuf);
  static void _tarWriteHeader(char *filename, char *linktarget,
  				struct stat * statbuf);
  static void send_int8_string(StringInfoData *buf, int64 intval);
  static void SendBackupHeader(List *tablespaces);
  static void base_backup_cleanup(int code, Datum arg);
  static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
  static void parse_basebackup_options(List *options, basebackup_options *opt);
***************
*** 78,84 **** base_backup_cleanup(int code, Datum arg)
  static void
  perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
  {
! 	do_pg_start_backup(opt->label, opt->fastcheckpoint);
  
  	PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
  	{
--- 79,87 ----
  static void
  perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
  {
! 	char *labelfile;
! 
! 	do_pg_start_backup(opt->label, opt->fastcheckpoint, &labelfile);
  
  	PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
  	{
***************
*** 128,140 **** perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
  		foreach(lc, tablespaces)
  		{
  			tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
  
! 			SendBackupDirectory(ti->path, ti->oid);
  		}
  	}
  	PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
  
! 	do_pg_stop_backup();
  }
  
  /*
--- 131,162 ----
  		foreach(lc, tablespaces)
  		{
  			tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
+ 			StringInfoData buf;
+ 
+ 			/* Send CopyOutResponse message */
+ 			pq_beginmessage(&buf, 'H');
+ 			pq_sendbyte(&buf, 0);		/* overall format */
+ 			pq_sendint(&buf, 0, 2);		/* natts */
+ 			pq_endmessage(&buf);
+ 
+ 			/*
+ 			 * In the main tar, include the backup_label first.
+ 			 */
+ 			if (ti->path == NULL)
+ 			{
+ 				sendLabelFile(labelfile);
+ 				sendDir(".", 1, false);
+ 			}
+ 			else
+ 				sendDir(ti->path, strlen(ti->path), false);
  
! 			/* Send CopyDone message */
! 			pq_putemptymessage('c');
  		}
  	}
  	PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
  
! 	do_pg_stop_backup(labelfile);
  }
  
  /*
***************
*** 192,199 **** parse_basebackup_options(List *options, basebackup_options *opt)
  /*
   * SendBaseBackup() - send a complete base backup.
   *
!  * The function will take care of running pg_start_backup() and
!  * pg_stop_backup() for the user.
   */
  void
  SendBaseBackup(BaseBackupCmd *cmd)
--- 214,222 ----
  /*
   * SendBaseBackup() - send a complete base backup.
   *
!  * The function will put the system into backup mode like pg_start_backup()
!  * does, so that the backup is consistent even though we read directly from
!  * the filesystem, bypassing the buffer cache.
   */
  void
  SendBaseBackup(BaseBackupCmd *cmd)
***************
*** 316,342 **** SendBackupHeader(List *tablespaces)
  	pq_puttextmessage('C', "SELECT");
  }
  
  static void
! SendBackupDirectory(char *location, char *spcoid)
  {
! 	StringInfoData buf;
  
! 	/* Send CopyOutResponse message */
! 	pq_beginmessage(&buf, 'H');
! 	pq_sendbyte(&buf, 0);		/* overall format */
! 	pq_sendint(&buf, 0, 2);		/* natts */
! 	pq_endmessage(&buf);
  
! 	/* tar up the data directory if NULL, otherwise the tablespace */
! 	sendDir(location == NULL ? "." : location,
! 			location == NULL ? 1 : strlen(location),
! 			false);
  
! 	/* Send CopyDone message */
! 	pq_putemptymessage('c');
! }
  
  
  static int64
  sendDir(char *path, int basepathlen, bool sizeonly)
  {
--- 339,390 ----
  	pq_puttextmessage('C', "SELECT");
  }
  
+ /*
+  * Include a label file with the given content in the output tar stream.
+  */
  static void
! sendLabelFile(const char *content)
  {
! 	struct stat statbuf;
! 	int pad, len;
  
! 	len = strlen(content);
  
! 	/*
! 	 * Construct a stat struct for the backup_label file we're injecting
! 	 * in the tar.
! 	 */
! 	/* Windows doesn't have the concept of uid and gid */
! #ifdef WIN32
! 	statbuf.st_uid = 0;
! 	statbuf.st_gid = 0;
! #else
! 	statbuf.st_uid = geteuid();
! 	statbuf.st_gid = getegid();
! #endif
! 	statbuf.st_mtime = time(NULL);
! 	statbuf.st_mode = S_IRUSR | S_IWUSR;
! 	statbuf.st_size = len;
  
! 	_tarWriteHeader(BACKUP_LABEL_FILE, NULL, &statbuf);
! 	/* Send the contents as a CopyData message */
! 	pq_putmessage('d', content, len);
  
+ 	/* Pad to 512 byte boundary, per tar format requirements */
+ 	pad = ((len + 511) & ~511) - len;
+ 	if (pad > 0)
+ 	{
+ 		char buf[512];
+ 		MemSet(buf, 0, pad);
+ 		pq_putmessage('d', buf, pad);
+ 	}
+ }
  
+ /*
+  * Include all files from the given directory in the output tar stream. If
+  * 'sizeonly' is true, we just calculate a total length and return ig, without
+  * actually sending anything.
+  */
  static int64
  sendDir(char *path, int basepathlen, bool sizeonly)
  {
***************
*** 360,365 **** sendDir(char *path, int basepathlen, bool sizeonly)
--- 408,421 ----
  			continue;
  
  		/*
+ 		 * If there's a backup_label file, it belongs to a backup started by
+ 		 * the user with pg_start_backup(). It is *not* correct for this
+ 		 * backup, our backup_label is injected into the tar separately.
+ 		 */
+ 		if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
+ 			continue;
+ 
+ 		/*
  		 * Check if the postmaster has signaled us to exit, and abort
  		 * with an error in that case. The error handler further up
  		 * will call do_pg_abort_backup() for us.
***************
*** 445,451 **** sendDir(char *path, int basepathlen, bool sizeonly)
  			/* Add size, rounded up to 512byte block */
  			size += ((statbuf.st_size + 511) & ~511);
  			if (!sizeonly)
! 				sendFile(pathbuf, basepathlen, &statbuf);
  			size += 512;		/* Size of the header of the file */
  		}
  		else
--- 501,507 ----
  			/* Add size, rounded up to 512byte block */
  			size += ((statbuf.st_size + 511) & ~511);
  			if (!sizeonly)
! 				sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf);
  			size += 512;		/* Size of the header of the file */
  		}
  		else
***************
*** 503,509 **** _tarChecksum(char *header)
  
  /* Given the member, write the TAR header & send the file */
  static void
! sendFile(char *filename, int basepathlen, struct stat * statbuf)
  {
  	FILE	   *fp;
  	char		buf[32768];
--- 559,565 ----
  
  /* Given the member, write the TAR header & send the file */
  static void
! sendFile(char *readfilename, char *tarfilename, struct stat *statbuf)
  {
  	FILE	   *fp;
  	char		buf[32768];
***************
*** 511,521 **** sendFile(char *filename, int basepathlen, struct stat * statbuf)
  	pgoff_t		len = 0;
  	size_t		pad;
  
! 	fp = AllocateFile(filename, "rb");
  	if (fp == NULL)
  		ereport(ERROR,
  				(errcode(errcode_for_file_access()),
! 				 errmsg("could not open file \"%s\": %m", filename)));
  
  	/*
  	 * Some compilers will throw a warning knowing this test can never be true
--- 567,577 ----
  	pgoff_t		len = 0;
  	size_t		pad;
  
! 	fp = AllocateFile(readfilename, "rb");
  	if (fp == NULL)
  		ereport(ERROR,
  				(errcode(errcode_for_file_access()),
! 				 errmsg("could not open file \"%s\": %m", readfilename)));
  
  	/*
  	 * Some compilers will throw a warning knowing this test can never be true
***************
*** 524,532 **** sendFile(char *filename, int basepathlen, struct stat * statbuf)
  	if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN)
  		ereport(ERROR,
  				(errmsg("archive member \"%s\" too large for tar format",
! 						filename)));
  
! 	_tarWriteHeader(filename + basepathlen + 1, NULL, statbuf);
  
  	while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
  	{
--- 580,588 ----
  	if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN)
  		ereport(ERROR,
  				(errmsg("archive member \"%s\" too large for tar format",
! 						tarfilename)));
  
! 	_tarWriteHeader(tarfilename, NULL, statbuf);
  
  	while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
  	{
*** a/src/include/access/xlog.h
--- b/src/include/access/xlog.h
***************
*** 312,319 **** extern void HandleStartupProcInterrupts(void);
  extern void StartupProcessMain(void);
  extern void WakeupRecovery(void);
  
! extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast);
! extern XLogRecPtr do_pg_stop_backup(void);
  extern void do_pg_abort_backup(void);
  
  #endif   /* XLOG_H */
--- 312,326 ----
  extern void StartupProcessMain(void);
  extern void WakeupRecovery(void);
  
! /*
!  * Starting/stopping a base backup
!  */
! extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile);
! extern XLogRecPtr do_pg_stop_backup(char *labelfile);
  extern void do_pg_abort_backup(void);
  
+ /* File path names (all relative to $PGDATA) */
+ #define BACKUP_LABEL_FILE		"backup_label"
+ #define BACKUP_LABEL_OLD		"backup_label.old"
+ 
  #endif   /* XLOG_H */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to