On Fri, Apr 19, 2024 at 1:52 PM shveta malik <[email protected]> wrote:
>
> Please find v9 with the above comments addressed.
>
I have made minor modifications in the comments and a function name.
Please see the attached top-up patch. Apart from this, the patch looks
good to me.
--
With Regards,
Amit Kapila.
diff --git a/src/backend/replication/logical/slotsync.c
b/src/backend/replication/logical/slotsync.c
index 66fb46ac27..72a775b09c 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -79,10 +79,11 @@
* and also sets stopSignaled=true to handle the race condition when the
* postmaster has not noticed the promotion yet and thus may end up restarting
* the slot sync worker. If stopSignaled is set, the worker will exit in such a
- * case. Note that we don't need to reset this variable as after promotion the
- * slot sync worker won't be restarted because the pmState changes to PM_RUN
from
- * PM_HOT_STANDBY and we don't support demoting primary without restarting the
- * server. See MaybeStartSlotSyncWorker.
+ * case. The SQL function pg_sync_replication_slots() will also error out if
+ * this flag is set. Note that we don't need to reset this variable as after
+ * promotion the slot sync worker won't be restarted because the pmState
+ * changes to PM_RUN from PM_HOT_STANDBY and we don't support demoting
+ * primary without restarting the server. See MaybeStartSlotSyncWorker.
*
* The 'syncing' flag is needed to prevent concurrent slot syncs to avoid slot
* overwrites.
@@ -1246,12 +1247,11 @@ wait_for_slot_activity(bool some_slot_updated)
}
/*
- * Check stopSignaled and syncing flags. Emit error if promotion has
- * already set stopSignaled or if it is concurrent sync call. Otherwise,
- * set 'syncing' flag and pid info.
+ * Emit an error if a promotion or a concurrent sync call is in progress.
+ * Otherwise, advertise that a sync is in progress.
*/
static void
-check_flags_and_set_sync_info(pid_t worker_pid)
+check_and_set_sync_info(pid_t worker_pid)
{
SpinLockAcquire(&SlotSyncCtx->mutex);
@@ -1259,9 +1259,8 @@ check_flags_and_set_sync_info(pid_t worker_pid)
Assert(worker_pid == InvalidPid || SlotSyncCtx->pid == InvalidPid);
/*
- * Startup process signaled the slot sync machinery to stop, so if
- * meanwhile postmaster ended up starting the worker again or user has
- * invoked pg_sync_replication_slots(), error out.
+ * Emit an error if startup process signaled the slot sync machinery to
+ * stop. See comments atop SlotSyncCtxStruct.
*/
if (SlotSyncCtx->stopSignaled)
{
@@ -1281,7 +1280,10 @@ check_flags_and_set_sync_info(pid_t worker_pid)
SlotSyncCtx->syncing = true;
- /* Advertise our PID so that the startup process can kill us on
promotion */
+ /*
+ * Advertise the required PID so that the startup process can kill the
slot
+ * sync worker on promotion.
+ */
SlotSyncCtx->pid = worker_pid;
SpinLockRelease(&SlotSyncCtx->mutex);
@@ -1333,13 +1335,13 @@ ReplSlotSyncWorkerMain(char *startup_data, size_t
startup_data_len)
/*
* Register slotsync_worker_onexit() before we register
- * ReplicationSlotShmemExit() in BaseInit(), to ensure that during exit
of
- * slot sync worker, ReplicationSlotShmemExit() is called first,
followed
- * by slotsync_worker_onexit(). Startup process during promotion calls
- * ShutDownSlotSync() which waits for slot sync to finish and it does
that
- * by checking the 'syncing' flag. Thus it is important that worker
should
- * be done with slots' release and cleanup before it actually marks
itself
- * as finished syncing.
+ * ReplicationSlotShmemExit() in BaseInit(), to ensure that during the
exit
+ * of the slot sync worker, ReplicationSlotShmemExit() is called first,
+ * followed by slotsync_worker_onexit(). The startup process during
+ * promotion invokes ShutDownSlotSync() which waits for slot sync to
finish
+ * and it does that by checking the 'syncing' flag. Thus worker must be
+ * done with the slots' release and cleanup before it marks itself as
+ * finished syncing.
*/
before_shmem_exit(slotsync_worker_onexit, (Datum) 0);
@@ -1391,7 +1393,7 @@ ReplSlotSyncWorkerMain(char *startup_data, size_t
startup_data_len)
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGCHLD, SIG_DFL);
- check_flags_and_set_sync_info(MyProcPid);
+ check_and_set_sync_info(MyProcPid);
ereport(LOG, errmsg("slot sync worker started"));
@@ -1544,9 +1546,9 @@ update_synced_slots_inactive_since(void)
/*
* Shut down the slot sync worker.
*
- * It sends signal to shutdown slot sync worker. It also waits till
- * the slot sync worker has exited and pg_sync_replication_slots()
- * has finished.
+ * This function sends signal to shutdown slot sync worker, if required. It
+ * also waits till the slot sync worker has exited or
+ * pg_sync_replication_slots() has finished.
*/
void
ShutDownSlotSync(void)
@@ -1593,10 +1595,7 @@ ShutDownSlotSync(void)
SpinLockAcquire(&SlotSyncCtx->mutex);
- /*
- * Confirm that both the worker and the function
- * pg_sync_replication_slots() are done.
- */
+ /* Ensure that no process is syncing the slots. */
if (!SlotSyncCtx->syncing)
break;
@@ -1685,12 +1684,13 @@ slotsync_failure_callback(int code, Datum arg)
/*
* We need to do slots cleanup here just like WalSndErrorCleanup() does.
*
- * Startup process during promotion calls ShutDownSlotSync() which waits
- * for slot sync to finish and it does that by checking the 'syncing'
- * flag. Thus it is important that SQL function should be done with
slots'
- * release and cleanup before it actually marks itself as finished
- * syncing.
+ * The startup process during promotion invokes ShutDownSlotSync() which
+ * waits for slot sync to finish and it does that by checking the
'syncing'
+ * flag. Thus the SQL function must be done with slots' release and
cleanup
+ * to avoid any dangling temporary slots or active slots before it marks
+ * itself as finished syncing.
*/
+
/* Make sure active replication slots are released */
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
@@ -1699,9 +1699,9 @@ slotsync_failure_callback(int code, Datum arg)
ReplicationSlotCleanup();
/*
- * If syncing_slots is true, it indicates that the process errored out
- * without resetting the flag. So, we need to clean up shared memory and
- * reset the flag here.
+ * The set syncing_slots indicates that the process errored out without
+ * resetting the flag. So, we need to clean up shared memory and reset
the
+ * flag here.
*/
if (syncing_slots)
reset_syncing_flag();
@@ -1718,7 +1718,7 @@ SyncReplicationSlots(WalReceiverConn *wrconn)
{
PG_ENSURE_ERROR_CLEANUP(slotsync_failure_callback,
PointerGetDatum(wrconn));
{
- check_flags_and_set_sync_info(InvalidPid);
+ check_and_set_sync_info(InvalidPid);
validate_remote_info(wrconn);