On Fri, 31 Jan 2025 at 17:50, Nisha Moond <[email protected]> wrote:
>
> Thanks for the review! I have incorporated the above comments. The
> test in patch-002 has been optimized as suggested and now completes in
> less than a second.
> Please find the attached v66 patch set. The base patch(v65-001) is
> committed now, so I have rebased the patches.
Few comments:
1)We should set inactive_since only if the slot can be invalidated:
+ /* For testing timeout slot
invalidation */
+ if
(IS_INJECTION_POINT_ATTACHED("slot-time-out-inval"))
+ s->inactive_since = 1;
+
2) Instead of "alter system set" and reload, let's do this in
$node->append_conf itself:
+# Set timeout GUC so that the next checkpoint will invalidate inactive slots
+$node->safe_psql(
+ 'postgres', qq[
+ ALTER SYSTEM SET idle_replication_slot_timeout TO '1min';
+]);
+$node->reload;
3) No need to trigger checkpoint twice, we can move it outside so that
just a single checkpoint will invalidate both the slots:
+sub trigger_slot_invalidation
+{
+ my ($node, $slot, $offset) = @_;
+ my $node_name = $node->name;
+ my $invalidated = 0;
+
+ # Run a checkpoint
+ $node->safe_psql('postgres', "CHECKPOINT");
4) I fel this trigger_slot_invalidation is not required after removing
the checkpoint from the function, let's move the waiting for
"invalidating obsolete replication slot" also to
wait_for_slot_invalidation function:
+ # The slot's invalidation should be logged
+ $node->wait_for_log(qr/invalidating obsolete replication slot
\"$slot\"/,
+ $offset);
+
+ # Check that the invalidation reason is 'idle_timeout'
+ $node->poll_query_until(
+ 'postgres', qq[
+ SELECT COUNT(slot_name) = 1 FROM pg_replication_slots
+ WHERE slot_name = '$slot' AND
+ invalidation_reason = 'idle_timeout';
+ ])
5) Can we move the subroutine to the beginning, I noticed in other
places we have kept it before the tests like in 027_nosuperuser and
040_createsubscriber:
+# Wait for slot to first become idle and then get invalidated
+sub wait_for_slot_invalidation
+{
+ my ($node, $slot, $offset) = @_;
+ my $node_name = $node->name;
+
+ trigger_slot_invalidation($node, $slot, $offset);
+
+ # Check that an invalidated slot cannot be acquired
+ my ($result, $stdout, $stderr);
+ ($result, $stdout, $stderr) = $node->psql(
+ 'postgres', qq[
+ SELECT pg_replication_slot_advance('$slot', '0/1');
+ ]);
6) Since idle_replication_slot_timeout is related more closely with
max_slot_wal_keep_size, let's keep it along with it.
diff --git a/src/backend/utils/misc/postgresql.conf.sample
b/src/backend/utils/misc/postgresql.conf.sample
index 079efa1baa..0ed9eb057e 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -329,6 +329,7 @@
#wal_sender_timeout = 60s # in milliseconds; 0 disables
#track_commit_timestamp = off # collect timestamp of transaction commit
# (change requires restart)
+#idle_replication_slot_timeout = 1d # in minutes; 0 disables
If you accept the comments, you can merge the changes from the attached patch.
Regards,
Vignesh
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index c09b90eeac..e81bce348c 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1724,20 +1724,22 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
case RS_INVAL_IDLE_TIMEOUT:
Assert(now > 0);
- /* For testing timeout slot invalidation */
- if (IS_INJECTION_POINT_ATTACHED("slot-time-out-inval"))
- s->inactive_since = 1;
-
- /*
- * Check if the slot needs to be invalidated due to
- * idle_replication_slot_timeout GUC.
- */
- if (CanInvalidateIdleSlot(s) &&
- TimestampDifferenceExceedsSeconds(s->inactive_since, now,
- idle_replication_slot_timeout_mins * SECS_PER_MINUTE))
+ if (CanInvalidateIdleSlot(s))
{
- invalidation_cause = cause;
- inactive_since = s->inactive_since;
+ /* For testing timeout slot invalidation */
+ if (IS_INJECTION_POINT_ATTACHED("slot-time-out-inval"))
+ s->inactive_since = 1;
+
+ /*
+ * Check if the slot needs to be invalidated due to
+ * idle_replication_slot_timeout GUC.
+ */
+ if (TimestampDifferenceExceedsSeconds(s->inactive_since, now,
+ idle_replication_slot_timeout_mins * SECS_PER_MINUTE))
+ {
+ invalidation_cause = cause;
+ inactive_since = s->inactive_since;
+ }
}
break;
case RS_INVAL_NONE:
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 0ed9eb057e..70be3a2ce5 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -326,10 +326,10 @@
# (change requires restart)
#wal_keep_size = 0 # in megabytes; 0 disables
#max_slot_wal_keep_size = -1 # in megabytes; -1 disables
+#idle_replication_slot_timeout = 1d # in minutes; 0 disables
#wal_sender_timeout = 60s # in milliseconds; 0 disables
#track_commit_timestamp = off # collect timestamp of transaction commit
# (change requires restart)
-#idle_replication_slot_timeout = 1d # in minutes; 0 disables
# - Primary Server -
diff --git a/src/test/recovery/t/044_invalidate_inactive_slots.pl b/src/test/recovery/t/044_invalidate_inactive_slots.pl
index fd907e7f82..7f18881cba 100644
--- a/src/test/recovery/t/044_invalidate_inactive_slots.pl
+++ b/src/test/recovery/t/044_invalidate_inactive_slots.pl
@@ -13,6 +13,39 @@ if ($ENV{enable_injection_points} ne 'yes')
plan skip_all => 'Injection points not supported by this build';
}
+# Wait for slot to first become idle and then get invalidated
+sub wait_for_slot_invalidation
+{
+ my ($node, $slot, $offset) = @_;
+ my $node_name = $node->name;
+
+ # The slot's invalidation should be logged
+ $node->wait_for_log(qr/invalidating obsolete replication slot \"$slot\"/,
+ $offset);
+
+ # Check that the invalidation reason is 'idle_timeout'
+ $node->poll_query_until(
+ 'postgres', qq[
+ SELECT COUNT(slot_name) = 1 FROM pg_replication_slots
+ WHERE slot_name = '$slot' AND
+ invalidation_reason = 'idle_timeout';
+ ])
+ or die
+ "Timed out while waiting for invalidation reason of slot $slot to be set on node $node_name";
+
+ # Check that an invalidated slot cannot be acquired
+ my ($result, $stdout, $stderr);
+ ($result, $stdout, $stderr) = $node->psql(
+ 'postgres', qq[
+ SELECT pg_replication_slot_advance('$slot', '0/1');
+ ]);
+ ok( $stderr =~ /can no longer access replication slot "$slot"/,
+ "detected error upon trying to acquire invalidated slot $slot on node $node_name"
+ )
+ or die
+ "could not detect error upon trying to acquire invalidated slot $slot on node $node_name";
+}
+
# ========================================================================
# Testcase start
#
@@ -27,6 +60,7 @@ $node->init(allows_streaming => 'logical');
$node->append_conf(
'postgresql.conf', qq{
checkpoint_timeout = 1h
+idle_replication_slot_timeout = 1min
});
$node->start;
@@ -41,13 +75,6 @@ $node->psql('postgres',
my $logstart = -s $node->logfile;
-# Set timeout GUC so that the next checkpoint will invalidate inactive slots
-$node->safe_psql(
- 'postgres', qq[
- ALTER SYSTEM SET idle_replication_slot_timeout TO '1min';
-]);
-$node->reload;
-
# Register an injection point on the primary to forcibly cause a slot timeout
$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
@@ -62,6 +89,9 @@ if (!$node->check_extension('injection_points'))
$node->safe_psql('postgres',
"SELECT injection_points_attach('slot-time-out-inval', 'error');");
+# Run a checkpoint which will invalidate the slots
+$node->safe_psql('postgres', "CHECKPOINT");
+
# Wait for slots to become inactive. Note that nobody has acquired the slot
# yet, so it must get invalidated due to idle timeout.
wait_for_slot_invalidation($node, 'physical_slot', $logstart);
@@ -70,50 +100,4 @@ wait_for_slot_invalidation($node, 'logical_slot', $logstart);
# Testcase end
# =============================================================================
-# Wait for slot to first become idle and then get invalidated
-sub wait_for_slot_invalidation
-{
- my ($node, $slot, $offset) = @_;
- my $node_name = $node->name;
-
- trigger_slot_invalidation($node, $slot, $offset);
-
- # Check that an invalidated slot cannot be acquired
- my ($result, $stdout, $stderr);
- ($result, $stdout, $stderr) = $node->psql(
- 'postgres', qq[
- SELECT pg_replication_slot_advance('$slot', '0/1');
- ]);
- ok( $stderr =~ /can no longer access replication slot "$slot"/,
- "detected error upon trying to acquire invalidated slot $slot on node $node_name"
- )
- or die
- "could not detect error upon trying to acquire invalidated slot $slot on node $node_name";
-}
-
-# Trigger slot invalidation and confirm it in the server log
-sub trigger_slot_invalidation
-{
- my ($node, $slot, $offset) = @_;
- my $node_name = $node->name;
- my $invalidated = 0;
-
- # Run a checkpoint
- $node->safe_psql('postgres', "CHECKPOINT");
-
- # The slot's invalidation should be logged
- $node->wait_for_log(qr/invalidating obsolete replication slot \"$slot\"/,
- $offset);
-
- # Check that the invalidation reason is 'idle_timeout'
- $node->poll_query_until(
- 'postgres', qq[
- SELECT COUNT(slot_name) = 1 FROM pg_replication_slots
- WHERE slot_name = '$slot' AND
- invalidation_reason = 'idle_timeout';
- ])
- or die
- "Timed out while waiting for invalidation reason of slot $slot to be set on node $node_name";
-}
-
done_testing();