Here is a new version of the patch with feedback addressed.
--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From 3cda5bb87c82738ad6f8a082ef5dfeb49cd51392 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Tue, 28 Nov 2023 11:17:13 -0600
Subject: [PATCH v3 1/1] archiver exception handling
---
contrib/basic_archive/basic_archive.c | 134 +-------------------------
doc/src/sgml/archive-modules.sgml | 11 ++-
src/backend/archive/shell_archive.c | 5 -
src/backend/postmaster/pgarch.c | 93 +++++++++++++++++-
4 files changed, 107 insertions(+), 136 deletions(-)
diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c
index 4d78c31859..d575faab93 100644
--- a/contrib/basic_archive/basic_archive.c
+++ b/contrib/basic_archive/basic_archive.c
@@ -40,26 +40,18 @@
PG_MODULE_MAGIC;
-typedef struct BasicArchiveData
-{
- MemoryContext context;
-} BasicArchiveData;
-
static char *archive_directory = NULL;
-static void basic_archive_startup(ArchiveModuleState *state);
static bool basic_archive_configured(ArchiveModuleState *state);
static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path);
-static void basic_archive_file_internal(const char *file, const char *path);
static bool check_archive_directory(char **newval, void **extra, GucSource source);
static bool compare_files(const char *file1, const char *file2);
-static void basic_archive_shutdown(ArchiveModuleState *state);
static const ArchiveModuleCallbacks basic_archive_callbacks = {
- .startup_cb = basic_archive_startup,
+ .startup_cb = NULL,
.check_configured_cb = basic_archive_configured,
.archive_file_cb = basic_archive_file,
- .shutdown_cb = basic_archive_shutdown
+ .shutdown_cb = NULL
};
/*
@@ -93,24 +85,6 @@ _PG_archive_module_init(void)
return &basic_archive_callbacks;
}
-/*
- * basic_archive_startup
- *
- * Creates the module's memory context.
- */
-void
-basic_archive_startup(ArchiveModuleState *state)
-{
- BasicArchiveData *data;
-
- data = (BasicArchiveData *) MemoryContextAllocZero(TopMemoryContext,
- sizeof(BasicArchiveData));
- data->context = AllocSetContextCreate(TopMemoryContext,
- "basic_archive",
- ALLOCSET_DEFAULT_SIZES);
- state->private_data = (void *) data;
-}
-
/*
* check_archive_directory
*
@@ -171,74 +145,6 @@ basic_archive_configured(ArchiveModuleState *state)
*/
static bool
basic_archive_file(ArchiveModuleState *state, const char *file, const char *path)
-{
- sigjmp_buf local_sigjmp_buf;
- MemoryContext oldcontext;
- BasicArchiveData *data = (BasicArchiveData *) state->private_data;
- MemoryContext basic_archive_context = data->context;
-
- /*
- * We run basic_archive_file_internal() in our own memory context so that
- * we can easily reset it during error recovery (thus avoiding memory
- * leaks).
- */
- oldcontext = MemoryContextSwitchTo(basic_archive_context);
-
- /*
- * Since the archiver operates at the bottom of the exception stack,
- * ERRORs turn into FATALs and cause the archiver process to restart.
- * However, using ereport(ERROR, ...) when there are problems is easy to
- * code and maintain. Therefore, we create our own exception handler to
- * catch ERRORs and return false instead of restarting the archiver
- * whenever there is a failure.
- */
- if (sigsetjmp(local_sigjmp_buf, 1) != 0)
- {
- /* Since not using PG_TRY, must reset error stack by hand */
- error_context_stack = NULL;
-
- /* Prevent interrupts while cleaning up */
- HOLD_INTERRUPTS();
-
- /* Report the error and clear ErrorContext for next time */
- EmitErrorReport();
- FlushErrorState();
-
- /* Close any files left open by copy_file() or compare_files() */
- AtEOSubXact_Files(false, InvalidSubTransactionId, InvalidSubTransactionId);
-
- /* Reset our memory context and switch back to the original one */
- MemoryContextSwitchTo(oldcontext);
- MemoryContextReset(basic_archive_context);
-
- /* Remove our exception handler */
- PG_exception_stack = NULL;
-
- /* Now we can allow interrupts again */
- RESUME_INTERRUPTS();
-
- /* Report failure so that the archiver retries this file */
- return false;
- }
-
- /* Enable our exception handler */
- PG_exception_stack = &local_sigjmp_buf;
-
- /* Archive the file! */
- basic_archive_file_internal(file, path);
-
- /* Remove our exception handler */
- PG_exception_stack = NULL;
-
- /* Reset our memory context and switch back to the original one */
- MemoryContextSwitchTo(oldcontext);
- MemoryContextReset(basic_archive_context);
-
- return true;
-}
-
-static void
-basic_archive_file_internal(const char *file, const char *path)
{
char destination[MAXPGPATH];
char temp[MAXPGPATH + 256];
@@ -272,7 +178,7 @@ basic_archive_file_internal(const char *file, const char *path)
fsync_fname(destination, false);
fsync_fname(archive_directory, true);
- return;
+ return true;
}
ereport(ERROR,
@@ -312,6 +218,8 @@ basic_archive_file_internal(const char *file, const char *path)
ereport(DEBUG1,
(errmsg("archived \"%s\" via basic_archive", file)));
+
+ return true;
}
/*
@@ -394,35 +302,3 @@ compare_files(const char *file1, const char *file2)
return ret;
}
-
-/*
- * basic_archive_shutdown
- *
- * Frees our allocated state.
- */
-static void
-basic_archive_shutdown(ArchiveModuleState *state)
-{
- BasicArchiveData *data = (BasicArchiveData *) state->private_data;
- MemoryContext basic_archive_context;
-
- /*
- * If we didn't get to storing the pointer to our allocated state, we
- * don't have anything to clean up.
- */
- if (data == NULL)
- return;
-
- basic_archive_context = data->context;
- Assert(CurrentMemoryContext != basic_archive_context);
-
- if (MemoryContextIsValid(basic_archive_context))
- MemoryContextDelete(basic_archive_context);
- data->context = NULL;
-
- /*
- * Finally, free the state.
- */
- pfree(data);
- state->private_data = NULL;
-}
diff --git a/doc/src/sgml/archive-modules.sgml b/doc/src/sgml/archive-modules.sgml
index 7064307d9e..f9b8d6c740 100644
--- a/doc/src/sgml/archive-modules.sgml
+++ b/doc/src/sgml/archive-modules.sgml
@@ -128,12 +128,21 @@ typedef bool (*ArchiveFileCB) (ArchiveModuleState *state, const char *file, cons
If <literal>true</literal> is returned, the server proceeds as if the file
was successfully archived, which may include recycling or removing the
- original WAL file. If <literal>false</literal> is returned, the server will
+ original WAL file. If <literal>false</literal> is returned or an error is thrown, the server will
keep the original WAL file and retry archiving later.
<replaceable>file</replaceable> will contain just the file name of the WAL
file to archive, while <replaceable>path</replaceable> contains the full
path of the WAL file (including the file name).
</para>
+
+ <note>
+ <para>
+ The <function>archive_file_cb</function> callback is called in a
+ short-lived memory context that will be reset between invocations. If you
+ need longer-lived storage, create a memory context in the module's
+ <function>startup_cb</function> callback.
+ </para>
+ </note>
</sect2>
<sect2 id="archive-module-shutdown">
diff --git a/src/backend/archive/shell_archive.c b/src/backend/archive/shell_archive.c
index 157ca4c751..793f607db8 100644
--- a/src/backend/archive/shell_archive.c
+++ b/src/backend/archive/shell_archive.c
@@ -66,9 +66,6 @@ shell_archive_file(ArchiveModuleState *state, const char *file,
"archive_command", "fp",
file, nativePath);
- if (nativePath)
- pfree(nativePath);
-
ereport(DEBUG3,
(errmsg_internal("executing archive command \"%s\"",
xlogarchcmd)));
@@ -127,8 +124,6 @@ shell_archive_file(ArchiveModuleState *state, const char *file,
return false;
}
- pfree(xlogarchcmd);
-
elog(DEBUG1, "archived write-ahead log file \"%s\"", file);
return true;
}
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 46af349564..8bb15ac960 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -38,6 +38,7 @@
#include "pgstat.h"
#include "postmaster/interrupt.h"
#include "postmaster/pgarch.h"
+#include "storage/condition_variable.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/latch.h"
@@ -49,6 +50,8 @@
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
+#include "utils/resowner.h"
+#include "utils/timeout.h"
/* ----------
@@ -101,6 +104,7 @@ static time_t last_sigterm_time = 0;
static PgArchData *PgArch = NULL;
static const ArchiveModuleCallbacks *ArchiveCallbacks;
static ArchiveModuleState *archive_module_state;
+static MemoryContext archive_context;
/*
@@ -252,6 +256,11 @@ PgArchiverMain(void)
arch_files->arch_heap = binaryheap_allocate(NUM_FILES_PER_DIRECTORY_SCAN,
ready_file_comparator, NULL);
+ /* Initialize our memory context. */
+ archive_context = AllocSetContextCreate(TopMemoryContext,
+ "archiver",
+ ALLOCSET_DEFAULT_SIZES);
+
/* Load the archive_library. */
LoadArchiveLibrary();
@@ -501,6 +510,8 @@ pgarch_ArchiverCopyLoop(void)
static bool
pgarch_archiveXlog(char *xlog)
{
+ sigjmp_buf local_sigjmp_buf;
+ MemoryContext oldcontext;
char pathname[MAXPGPATH];
char activitymsg[MAXFNAMELEN + 16];
bool ret;
@@ -511,7 +522,87 @@ pgarch_archiveXlog(char *xlog)
snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
set_ps_display(activitymsg);
- ret = ArchiveCallbacks->archive_file_cb(archive_module_state, xlog, pathname);
+ oldcontext = MemoryContextSwitchTo(archive_context);
+
+ /*
+ * Since the archiver operates at the bottom of the exception stack,
+ * ERRORs turn into FATALs and cause the archiver process to restart.
+ * However, using ereport(ERROR, ...) when there are problems is easy to
+ * code and maintain. Therefore, we create our own exception handler to
+ * catch ERRORs and return false instead of restarting the archiver
+ * whenever there is a failure.
+ *
+ * We assume ERRORs from the archiving callback are the most common
+ * exceptions experienced by the archiver, so we opt to handle exceptions
+ * here instead of PgArchiverMain() to avoid reinitializing the archiver
+ * too frequently. We could instead add a sigsetjmp() block to
+ * PgArchiverMain() and use PG_TRY/PG_CATCH here, but the extra code to
+ * avoid the odd archiver restart doesn't seem worth it.
+ */
+ if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+ {
+ /* Since not using PG_TRY, must reset error stack by hand */
+ error_context_stack = NULL;
+
+ /* Prevent interrupts while cleaning up */
+ HOLD_INTERRUPTS();
+
+ /* Report the error to the server log. */
+ EmitErrorReport();
+
+ /*
+ * Try to clean up anything the archive module left behind. We try to
+ * cover anything that an archive module could conceivably have left
+ * behind, but it is of course possible that modules could be doing
+ * unexpected things that require additional cleanup. Module authors
+ * should be sure to do any extra required cleanup in a PG_CATCH block
+ * within the archiving callback, and they are encouraged to notify
+ * the pgsql-hackers mailing list so that we can add it here.
+ */
+ disable_all_timeouts(false);
+ LWLockReleaseAll();
+ ConditionVariableCancelSleep();
+ pgstat_report_wait_end();
+ ReleaseAuxProcessResources(false);
+ AtEOXact_Files(false);
+ AtEOXact_HashTables(false);
+
+ /*
+ * Return to the original memory context and clear ErrorContext for
+ * next time.
+ */
+ MemoryContextSwitchTo(oldcontext);
+ FlushErrorState();
+
+ /* Flush any leaked data */
+ MemoryContextReset(archive_context);
+
+ /* Remove our exception handler */
+ PG_exception_stack = NULL;
+
+ /* Now we can allow interrupts again */
+ RESUME_INTERRUPTS();
+
+ /* Report failure so that the archiver retries this file */
+ ret = false;
+ }
+ else
+ {
+ /* Enable our exception handler */
+ PG_exception_stack = &local_sigjmp_buf;
+
+ /* Archive the file! */
+ ret = ArchiveCallbacks->archive_file_cb(archive_module_state,
+ xlog, pathname);
+
+ /* Remove our exception handler */
+ PG_exception_stack = NULL;
+
+ /* Reset our memory context and switch back to the original one */
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextReset(archive_context);
+ }
+
if (ret)
snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
else
--
2.25.1