From 1a07ced0c5dac96c7bbe7bf6451fb6a897b9058c Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Tue, 17 Dec 2019 08:44:11 -0500
Subject: [PATCH v2] Fix minor problems with non-exclusive backup cleanup.

The previous coding imagined that it could call before_shmem_exit()
when a non-exclusive backup began and then remove the previously-added
handler by calling cancel_before_shmem_exit() when that backup
ended. However, this only works provided that nothing else in the
system has registered a before_shmem_exit() hook in the interim,
because cancel_before_shmem_exit() is documented to remove a callback
only if it is the latest callback registered.

To fix, leave the handler installed for the lifetime of the session,
arrange to install it just once, and teach it to quietly do nothing if
there isn't a non-exclusive backup in process.

This is a bug, but for now I'm not going to back-patch, because the
consequences are minor. It's possible to cause a spurious warning
to be generated, but that doesn't really matter. It's also possible
to trigger an assertion failure, but production builds shouldn't
have assertions enabled.

Patch by me, reviewed by Kyotaro Horiguchi, Michael Paquier, and
Fujii Masao, and with comments by various others.

Discussion: http://postgr.es/m/CA+TgmobMjnyBfNhGTKQEDbqXYE3_rXWpc4CM63fhyerNCes3mA@mail.gmail.com
---
 src/backend/access/transam/xlog.c      | 29 +++++++++++++++++++++++---
 src/backend/access/transam/xlogfuncs.c | 17 ++-------------
 src/backend/replication/basebackup.c   | 16 ++------------
 src/include/access/xlog.h              |  3 ++-
 4 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3baf1b009a..89a9cf0c78 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -11133,23 +11133,27 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
  * system out of backup mode, thus making it a lot more safe to call from
  * an error handler.
  *
+ * The caller can pass 'arg' as 'true' or 'false' to control whether a warning
+ * is emitted.
+ *
  * NB: This is only for aborting a non-exclusive backup that doesn't write
  * backup_label. A backup started with pg_start_backup() needs to be finished
  * with pg_stop_backup().
  */
 void
-do_pg_abort_backup(void)
+do_pg_abort_backup(int code, Datum arg)
 {
+	bool	emit_warning = DatumGetBool(arg);
+
 	/*
 	 * Quick exit if session is not keeping around a non-exclusive backup
 	 * already started.
 	 */
-	if (sessionBackupState == SESSION_BACKUP_NONE)
+	if (sessionBackupState != SESSION_BACKUP_NON_EXCLUSIVE)
 		return;
 
 	WALInsertLockAcquireExclusive();
 	Assert(XLogCtl->Insert.nonExclusiveBackups > 0);
-	Assert(sessionBackupState == SESSION_BACKUP_NON_EXCLUSIVE);
 	XLogCtl->Insert.nonExclusiveBackups--;
 
 	if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
@@ -11158,6 +11162,25 @@ do_pg_abort_backup(void)
 		XLogCtl->Insert.forcePageWrites = false;
 	}
 	WALInsertLockRelease();
+
+	if (emit_warning)
+		ereport(WARNING,
+				(errmsg("aborting backup due to backend exiting before pg_stop_back up was called")));
+}
+
+/*
+ * Register a handler that will warn about unterminated backups at end of
+ * session, unless this has already been done.
+ */
+void
+register_persistent_abort_backup_handler(void)
+{
+	static bool already_done = false;
+
+	if (already_done)
+		return;
+	before_shmem_exit(do_pg_abort_backup, DatumGetBool(true));
+	already_done = true;
 }
 
 /*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 1fccf29a36..5fd12152b2 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -44,18 +44,6 @@
 static StringInfo label_file;
 static StringInfo tblspc_map_file;
 
-/*
- * Called when the backend exits with a running non-exclusive base backup,
- * to clean up state.
- */
-static void
-nonexclusive_base_backup_cleanup(int code, Datum arg)
-{
-	do_pg_abort_backup();
-	ereport(WARNING,
-			(errmsg("aborting backup due to backend exiting before pg_stop_backup was called")));
-}
-
 /*
  * pg_start_backup: set up for taking an on-line backup dump
  *
@@ -103,10 +91,10 @@ pg_start_backup(PG_FUNCTION_ARGS)
 		tblspc_map_file = makeStringInfo();
 		MemoryContextSwitchTo(oldcontext);
 
+		register_persistent_abort_backup_handler();
+
 		startpoint = do_pg_start_backup(backupidstr, fast, NULL, label_file,
 										NULL, tblspc_map_file, false, true);
-
-		before_shmem_exit(nonexclusive_base_backup_cleanup, (Datum) 0);
 	}
 
 	PG_RETURN_LSN(startpoint);
@@ -248,7 +236,6 @@ pg_stop_backup_v2(PG_FUNCTION_ARGS)
 		 * and tablespace map so they can be written to disk by the caller.
 		 */
 		stoppoint = do_pg_stop_backup(label_file->data, waitforarchive, NULL);
-		cancel_before_shmem_exit(nonexclusive_base_backup_cleanup, (Datum) 0);
 
 		values[1] = CStringGetTextDatum(label_file->data);
 		values[2] = CStringGetTextDatum(tblspc_map_file->data);
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 1fa4551eff..1bb72a0a57 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -65,7 +65,6 @@ static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *sta
 						  bool sizeonly);
 static void send_int8_string(StringInfoData *buf, int64 intval);
 static void SendBackupHeader(List *tablespaces);
-static void base_backup_cleanup(int code, Datum arg);
 static void perform_base_backup(basebackup_options *opt);
 static void parse_basebackup_options(List *options, basebackup_options *opt);
 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
@@ -216,17 +215,6 @@ static const char *const noChecksumFiles[] = {
 	NULL,
 };
 
-
-/*
- * Called when ERROR or FATAL happens in perform_base_backup() after
- * we have started the backup - make sure we end it!
- */
-static void
-base_backup_cleanup(int code, Datum arg)
-{
-	do_pg_abort_backup();
-}
-
 /*
  * Actually do a base backup for the specified tablespaces.
  *
@@ -265,7 +253,7 @@ perform_base_backup(basebackup_options *opt)
 	 * do_pg_stop_backup() should be inside the error cleanup block!
 	 */
 
-	PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
+	PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
 	{
 		ListCell   *lc;
 		tablespaceinfo *ti;
@@ -374,7 +362,7 @@ perform_base_backup(basebackup_options *opt)
 
 		endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
 	}
-	PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
+	PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
 
 
 	if (opt->includewal)
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 9b588c87a5..3fea1993bc 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -349,7 +349,8 @@ extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
 									 bool needtblspcmapfile);
 extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
 									TimeLineID *stoptli_p);
-extern void do_pg_abort_backup(void);
+extern void do_pg_abort_backup(int code, Datum arg);
+extern void register_persistent_abort_backup_handler(void);
 extern SessionBackupState get_backup_status(void);
 
 /* File path names (all relative to $PGDATA) */
-- 
2.17.2 (Apple Git-113)

