Hi Nisha, Some review comments for v72-0001.
====== GENERAL My preference was to just keep the enum as per v70 for the *actual* cause, and introduce a separate set of bit flags for *possible* causes to be checked. This creates a clear code separation between the actual and possible. It also eliminates the need to jump through hoops just to map a cause to its name. You wrote: > OTOH, keeping the enums as they are in v70, and defining new macros for the very similar purpose could add unnecessary complexity to code management. Since both the enum and the bit flags would be defined in slot.h adjacent to each other I don't foresee much complexity. I concede, a dev might write code and accidentally muddle the enum instead of the flag or vice versa but that's an example of sloppiness, not complexity. Certainly there would be fewer necessary changes than what are in the v72 patch due to all the cause/causename mappings. For example, slot.h - Now, introduces a NEW typedef SlotInvalidationCauseMap slot.h - Now, need extern for NEW function GetSlotInvalidationCauseName slot.c - Now, needed minor rewrite of GetSlotInvalidationCause instead of leaving it as-is slot.c - Now, needs a whole NEW looping function GetSlotInvalidationCauseName instead of direct array index. Several place now must call to the GetSlotInvalidationCauseName where previously a simple direct array lookup was done slot.c - NEW call in ReplicationSlotAcquire slotfuncs.c - NEW call in pg_get_replication_slots ~ FWIW, I've attached a topup patch using my idea just to see what it might look like. The result was 20 lines less code. Anyway, YMMV. ====== Other review comments follow ... src/backend/replication/slot.c InvalidatePossiblyObsoleteSlot: 1. Having all those 'gotos' seems like something best avoided. Can you try removing them to see if it improves this function? IIUC you maybe can try rid all of them using logic like: - assign invalidation_cause = NONE outside the loop - loop until invalidation_cause != NONE - include 'invalidation_cause == NONE' condition with all the bit flag checks - reassign invalidation_cause = NONE in the racy place where you want to continue the loop. and instead just keep looping and checking while the 'invalidation_cause' remains NONE. ====== Kind Regards, Peter Smith. Fujitsu Australia
From c51d7b8eddfc991c5978052b8f99aad0f7330c5c Mon Sep 17 00:00:00 2001 From: Peter Smith <peter.b.sm...@fujitsu.com> Date: Mon, 10 Feb 2025 16:39:00 +1100 Subject: [PATCH v210] ps-tmp-topup-nisha-v720001 --- src/backend/access/transam/xlog.c | 6 +-- src/backend/replication/slot.c | 73 ++++++++++++------------------------- src/backend/replication/slotfuncs.c | 2 +- src/backend/storage/ipc/standby.c | 2 +- src/include/replication/slot.h | 32 ++++++++-------- 5 files changed, 46 insertions(+), 69 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index d583313..e59df20 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7316,7 +7316,7 @@ CreateCheckPoint(int flags) */ XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size); KeepLogSeg(recptr, &_logSegNo); - if (InvalidateObsoleteReplicationSlots(RS_INVAL_WAL_REMOVED | RS_INVAL_IDLE_TIMEOUT, + if (InvalidateObsoleteReplicationSlots(RS_CHECK_IF_INVAL_WAL_REMOVED | RS_CHECK_IF_INVAL_IDLE_TIMEOUT, _logSegNo, InvalidOid, InvalidTransactionId)) { @@ -7771,7 +7771,7 @@ CreateRestartPoint(int flags) replayPtr = GetXLogReplayRecPtr(&replayTLI); endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr; KeepLogSeg(endptr, &_logSegNo); - if (InvalidateObsoleteReplicationSlots(RS_INVAL_WAL_REMOVED | RS_INVAL_IDLE_TIMEOUT, + if (InvalidateObsoleteReplicationSlots(RS_CHECK_IF_INVAL_WAL_REMOVED | RS_CHECK_IF_INVAL_IDLE_TIMEOUT, _logSegNo, InvalidOid, InvalidTransactionId)) { @@ -8512,7 +8512,7 @@ xlog_redo(XLogReaderState *record) if (InRecovery && InHotStandby && xlrec.wal_level < WAL_LEVEL_LOGICAL && wal_level >= WAL_LEVEL_LOGICAL) - InvalidateObsoleteReplicationSlots(RS_INVAL_WAL_LEVEL, + InvalidateObsoleteReplicationSlots(RS_CHECK_IF_INVAL_WAL_LEVEL, 0, InvalidOid, InvalidTransactionId); diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index f433bf1..3c7c70d 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -102,16 +102,16 @@ typedef struct /* * Lookup table for slot invalidation causes. */ -const SlotInvalidationCauseMap InvalidationCauses[] = { - {RS_INVAL_NONE, "none"}, - {RS_INVAL_WAL_REMOVED, "wal_removed"}, - {RS_INVAL_HORIZON, "rows_removed"}, - {RS_INVAL_WAL_LEVEL, "wal_level_insufficient"}, - {RS_INVAL_IDLE_TIMEOUT, "idle_timeout"}, +const char *const SlotInvalidationCauses[] = { + [RS_INVAL_NONE] = "none", + [RS_INVAL_WAL_REMOVED] = "wal_removed", + [RS_INVAL_HORIZON] = "rows_removed", + [RS_INVAL_WAL_LEVEL] = "wal_level_insufficient", + [RS_INVAL_IDLE_TIMEOUT] = "idle_timeout", }; -/* Maximum number of invalidation causes */ -#define RS_INVAL_MAX_CAUSES (sizeof(InvalidationCauses) / sizeof(InvalidationCauses[0])) +StaticAssertDecl(lengthof(SlotInvalidationCauses) == (RS_INVAL_MAX_CAUSES + 1), + "array length mismatch"); /* size of version independent data */ #define ReplicationSlotOnDiskConstantSize \ @@ -579,7 +579,7 @@ retry: errmsg("can no longer access replication slot \"%s\"", NameStr(s->data.name)), errdetail("This replication slot has been invalidated due to \"%s\".", - GetSlotInvalidationCauseName(s->data.invalidated))); + SlotInvalidationCauses[s->data.invalidated])); } /* @@ -1630,7 +1630,7 @@ CanInvalidateIdleSlot(ReplicationSlot *s) * for syscalls, so caller must restart if we return true. */ static bool -InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, +InvalidatePossiblyObsoleteSlot(int possible_causes, ReplicationSlot *s, XLogRecPtr oldestLSN, Oid dboid, TransactionId snapshotConflictHorizon, @@ -1662,7 +1662,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, break; } - if (cause & RS_INVAL_IDLE_TIMEOUT) + if (possible_causes & RS_CHECK_IF_INVAL_IDLE_TIMEOUT) { /* * Assign the current time here to avoid system call overhead @@ -1698,7 +1698,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, initial_catalog_effective_xmin = s->effective_catalog_xmin; } - if (cause & RS_INVAL_WAL_REMOVED) + if (possible_causes & RS_CHECK_IF_INVAL_WAL_REMOVED) { if (initial_restart_lsn != InvalidXLogRecPtr && initial_restart_lsn < oldestLSN) @@ -1707,7 +1707,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, goto invalidation_marked; } } - if (cause & RS_INVAL_HORIZON) + if (possible_causes & RS_CHECK_IF_INVAL_HORIZON) { if (!SlotIsLogical(s)) goto invalidation_marked; @@ -1729,7 +1729,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, goto invalidation_marked; } } - if (cause & RS_INVAL_WAL_LEVEL) + if (possible_causes & RS_CHECK_IF_INVAL_WAL_LEVEL) { if (SlotIsLogical(s)) { @@ -1737,7 +1737,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, goto invalidation_marked; } } - if (cause & RS_INVAL_IDLE_TIMEOUT) + if (possible_causes & RS_CHECK_IF_INVAL_IDLE_TIMEOUT) { Assert(now > 0); @@ -1919,16 +1919,16 @@ invalidation_marked: * NB - this runs as part of checkpoint, so avoid raising errors if possible. */ bool -InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, +InvalidateObsoleteReplicationSlots(int possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon) { XLogRecPtr oldestLSN; bool invalidated = false; - Assert(!(cause & RS_INVAL_HORIZON) || TransactionIdIsValid(snapshotConflictHorizon)); - Assert(!(cause & RS_INVAL_WAL_REMOVED) || oldestSegno > 0); - Assert(!(cause & RS_INVAL_NONE)); + Assert(!(possible_causes & RS_CHECK_IF_INVAL_HORIZON) || TransactionIdIsValid(snapshotConflictHorizon)); + Assert(!(possible_causes & RS_CHECK_IF_INVAL_WAL_REMOVED) || oldestSegno > 0); + Assert(possible_causes); if (max_replication_slots == 0) return invalidated; @@ -1944,7 +1944,7 @@ restart: if (!s->in_use) continue; - if (InvalidatePossiblyObsoleteSlot(cause, s, oldestLSN, dboid, + if (InvalidatePossiblyObsoleteSlot(possible_causes, s, oldestLSN, dboid, snapshotConflictHorizon, &invalidated)) { @@ -2530,43 +2530,18 @@ RestoreSlotFromDisk(const char *name) ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *invalidation_reason) { + ReplicationSlotInvalidationCause cause; ReplicationSlotInvalidationCause result = RS_INVAL_NONE; bool found PG_USED_FOR_ASSERTS_ONLY = false; - int cause_idx; Assert(invalidation_reason); - for (cause_idx = 0; cause_idx <= RS_INVAL_MAX_CAUSES; cause_idx++) + for (cause = RS_INVAL_NONE; cause <= RS_INVAL_MAX_CAUSES; cause++) { - if (strcmp(InvalidationCauses[cause_idx].cause_name, invalidation_reason) == 0) + if (strcmp(SlotInvalidationCauses[cause], invalidation_reason) == 0) { found = true; - result = InvalidationCauses[cause_idx].cause; - break; - } - } - - Assert(found); - return result; -} - -/* - * Maps an ReplicationSlotInvalidationCause to the invalidation - * reason for a replication slot. - */ -const char * -GetSlotInvalidationCauseName(ReplicationSlotInvalidationCause cause) -{ - const char *result = "none"; - bool found PG_USED_FOR_ASSERTS_ONLY = false; - int cause_idx; - - for (cause_idx = 0; cause_idx <= RS_INVAL_MAX_CAUSES; cause_idx++) - { - if (InvalidationCauses[cause_idx].cause == cause) - { - found = true; - result = InvalidationCauses[cause_idx].cause_name; + result = cause; break; } } diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index f652ec8..8be4b8c 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -431,7 +431,7 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) if (cause == RS_INVAL_NONE) nulls[i++] = true; else - values[i++] = CStringGetTextDatum(GetSlotInvalidationCauseName(cause)); + values[i++] = CStringGetTextDatum(SlotInvalidationCauses[cause]); values[i++] = BoolGetDatum(slot_contents.data.failover); diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 2039062..d68a5a4a0 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -499,7 +499,7 @@ ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, * reached, e.g. due to using a physical replication slot. */ if (wal_level >= WAL_LEVEL_LOGICAL && isCatalogRel) - InvalidateObsoleteReplicationSlots(RS_INVAL_HORIZON, 0, locator.dbOid, + InvalidateObsoleteReplicationSlots(RS_CHECK_IF_INVAL_HORIZON, 0, locator.dbOid, snapshotConflictHorizon); } diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index c4a62bc..d8da3dd 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -44,28 +44,31 @@ typedef enum ReplicationSlotPersistency * Slots can be invalidated, e.g. due to max_slot_wal_keep_size. If so, the * 'invalidated' field is set to a value other than _NONE. * - * When adding a new invalidation cause here, the value must be powers of 2 - * (e.g., 1, 2, 4...) for proper bitwise operations. Also, remember to update - * SlotInvalidationCauseMap in slot.c. + * When adding a new invalidation cause here, remember to update + * SlotInvalidationCauses and RS_INVAL_MAX_CAUSES. */ typedef enum ReplicationSlotInvalidationCause { - RS_INVAL_NONE = 0x00, + RS_INVAL_NONE, /* required WAL has been removed */ - RS_INVAL_WAL_REMOVED = 0x01, + RS_INVAL_WAL_REMOVED, /* required rows have been removed */ - RS_INVAL_HORIZON = 0x02, + RS_INVAL_HORIZON, /* wal_level insufficient for slot */ - RS_INVAL_WAL_LEVEL = 0x04, + RS_INVAL_WAL_LEVEL, /* idle slot timeout has occurred */ - RS_INVAL_IDLE_TIMEOUT = 0x08, + RS_INVAL_IDLE_TIMEOUT, } ReplicationSlotInvalidationCause; -typedef struct SlotInvalidationCauseMap -{ - int cause; - const char *cause_name; -} SlotInvalidationCauseMap; +#define RS_INVAL_MAX_CAUSES RS_INVAL_IDLE_TIMEOUT + +extern PGDLLIMPORT const char *const SlotInvalidationCauses[]; + +/* Bit flags for checking possible invalidation causes. */ +#define RS_CHECK_IF_INVAL_WAL_REMOVED (1U << RS_INVAL_WAL_REMOVED) +#define RS_CHECK_IF_INVAL_HORIZON (1U << RS_INVAL_HORIZON) +#define RS_CHECK_IF_INVAL_WAL_LEVEL (1U << RS_INVAL_WAL_LEVEL) +#define RS_CHECK_IF_INVAL_IDLE_TIMEOUT (1U << RS_INVAL_IDLE_TIMEOUT) /* * On-Disk data of a replication slot, preserved across restarts. @@ -277,7 +280,7 @@ extern void ReplicationSlotsComputeRequiredLSN(void); extern XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void); extern bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive); extern void ReplicationSlotsDropDBSlots(Oid dboid); -extern bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, +extern bool InvalidateObsoleteReplicationSlots(int possible_cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon); @@ -294,7 +297,6 @@ extern void CheckSlotRequirements(void); extern void CheckSlotPermissions(void); extern ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *invalidation_reason); -extern const char *GetSlotInvalidationCauseName(ReplicationSlotInvalidationCause cause); extern bool SlotExistsInSyncStandbySlots(const char *slot_name); extern bool StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel); -- 1.8.3.1