Hi, > This is a tricky bug. Do you also have a proposal of a particular fix?
If my understanding is correct, we should make a WAL record with the XLH_LOCK_ALL_FROZEN_CLEARED flag *before* we modify the VM but within the same critical section (in order to avoid race conditions within the same backend). A draft patch is attached. It makes the test pass and doesn't seem to break any other tests. Thoughts?
From 2909bfd3483b4796cb671c722520853d3b6ce61b Mon Sep 17 00:00:00 2001 From: Andrey Borodin <amboro...@acm.org> Date: Sun, 27 Jul 2025 11:37:55 +0500 Subject: [PATCH v2 1/2] Corrupt VM on standby --- src/backend/access/heap/heapam.c | 3 + src/backend/access/heap/heapam_xlog.c | 1 + src/test/modules/test_slru/Makefile | 2 +- src/test/modules/test_slru/t/001_multixact.pl | 172 +++++++++--------- src/test/perl/PostgreSQL/Test/Cluster.pm | 40 ++++ 5 files changed, 128 insertions(+), 90 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0dcd6ee817e..e5da051aaf0 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2123,6 +2123,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, */ CheckForSerializableConflictIn(relation, NULL, InvalidBlockNumber); + INJECTION_POINT_LOAD("heap-insert:visibilitymap-clear"); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2136,6 +2138,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, visibilitymap_clear(relation, ItemPointerGetBlockNumber(&(heaptup->t_self)), vmbuffer, VISIBILITYMAP_VALID_BITS); + INJECTION_POINT_CACHED("heap-insert:visibilitymap-clear", NULL); } /* diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index eb4bd3d6ae3..978030048b6 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -21,6 +21,7 @@ #include "access/xlogutils.h" #include "storage/freespace.h" #include "storage/standby.h" +#include "utils/injection_point.h" /* diff --git a/src/test/modules/test_slru/Makefile b/src/test/modules/test_slru/Makefile index fe4e7390437..b6ad41ec804 100644 --- a/src/test/modules/test_slru/Makefile +++ b/src/test/modules/test_slru/Makefile @@ -7,7 +7,7 @@ OBJS = \ test_multixact.o PGFILEDESC = "test_slru - test module for SLRUs" -EXTRA_INSTALL=src/test/modules/injection_points +EXTRA_INSTALL=src/test/modules/injection_points contrib/pg_visibility export enable_injection_points TAP_TESTS = 1 diff --git a/src/test/modules/test_slru/t/001_multixact.pl b/src/test/modules/test_slru/t/001_multixact.pl index e2b567a603d..0c347a5157d 100644 --- a/src/test/modules/test_slru/t/001_multixact.pl +++ b/src/test/modules/test_slru/t/001_multixact.pl @@ -1,11 +1,6 @@ # Copyright (c) 2024-2025, PostgreSQL Global Development Group -# This test verifies edge case of reading a multixact: -# when we have multixact that is followed by exactly one another multixact, -# and another multixact have no offset yet, we must wait until this offset -# becomes observable. Previously we used to wait for 1ms in a loop in this -# case, but now we use CV for this. This test is exercising such a sleep. - +# This test verifies edge case of visibilitymap_clear use strict; use warnings FATAL => 'all'; @@ -18,106 +13,105 @@ if ($ENV{enable_injection_points} ne 'yes') { plan skip_all => 'Injection points not supported by this build'; } +if ($windows_os) +{ + plan skip_all => 'Kill9 works unpredicatably on Windows'; +} my ($node, $result); $node = PostgreSQL::Test::Cluster->new('mike'); -$node->init; +$node->init(allows_streaming => 1); $node->append_conf('postgresql.conf', "shared_preload_libraries = 'test_slru,injection_points'"); $node->start; $node->safe_psql('postgres', q(CREATE EXTENSION injection_points)); +$node->safe_psql('postgres', q(CREATE EXTENSION pg_visibility)); $node->safe_psql('postgres', q(CREATE EXTENSION test_slru)); -# Test for Multixact generation edge case -$node->safe_psql('postgres', - q{select injection_points_attach('test-multixact-read','wait')}); -$node->safe_psql('postgres', - q{select injection_points_attach('multixact-get-members-cv-sleep','wait')} -); - -# This session must observe sleep on the condition variable while generating a -# multixact. To achieve this it first will create a multixact, then pause -# before reading it. -my $observer = $node->background_psql('postgres'); - -# This query will create a multixact, and hang just before reading it. -$observer->query_until( - qr/start/, - q{ - \echo start - SELECT test_read_multixact(test_create_multixact()); -}); -$node->wait_for_event('client backend', 'test-multixact-read'); - -# This session will create the next Multixact. This is necessary to avoid -# multixact.c's non-sleeping edge case 1. -my $creator = $node->background_psql('postgres'); +my $backup_name = 'my_backup'; +$node->backup($backup_name); +my $standby = PostgreSQL::Test::Cluster->new('standby'); +$standby->init_from_backup($node, $backup_name, has_streaming => 1); +$standby->start; + +my $bg_psql = $node->background_psql('postgres'); + +# prepare the table, index and freeze some rows. +# index is here only to demonstrate selecting deleted data on standby +my $multi = $bg_psql->query_safe( + q( + CREATE TABLE x(i int); + CREATE INDEX on x(i); + INSERT INTO x VALUES (1); + VACUUM FREEZE x; + )); + +# next backand clearing VM in heap_insert() will hang after releasing buffer lock $node->safe_psql('postgres', - q{SELECT injection_points_attach('multixact-create-from-members','wait');} + q{SELECT injection_points_attach('heap-insert:visibilitymap-clear','wait');} ); -# We expect this query to hang in the critical section after generating new -# multixact, but before filling its offset into SLRU. -# Running an injection point inside a critical section requires it to be -# loaded beforehand. -$creator->query_until( - qr/start/, q{ - \echo start - SELECT test_create_multixact(); -}); - -$node->wait_for_event('client backend', 'multixact-create-from-members'); - -# Ensure we have the backends waiting that we expect -is( $node->safe_psql( - 'postgres', - q{SELECT string_agg(wait_event, ', ' ORDER BY wait_event) - FROM pg_stat_activity WHERE wait_event_type = 'InjectionPoint'} - ), - 'multixact-create-from-members, test-multixact-read', - "matching injection point waits"); - -# Now wake observer to get it to read the initial multixact. A subsequent -# multixact already exists, but that one doesn't have an offset assigned, so -# this will hit multixact.c's edge case 2. -$node->safe_psql('postgres', - q{SELECT injection_points_wakeup('test-multixact-read')}); -$node->wait_for_event('client backend', 'multixact-get-members-cv-sleep'); - -# Ensure we have the backends waiting that we expect -is( $node->safe_psql( - 'postgres', - q{SELECT string_agg(wait_event, ', ' ORDER BY wait_event) - FROM pg_stat_activity WHERE wait_event_type = 'InjectionPoint'} - ), - 'multixact-create-from-members, multixact-get-members-cv-sleep', - "matching injection point waits"); - -# Now we have two backends waiting in multixact-create-from-members and -# multixact-get-members-cv-sleep. Also we have 3 injections points set to wait. -# If we wakeup multixact-get-members-cv-sleep it will happen again, so we must -# detach it first. So let's detach all injection points, then wake up all -# backends. - -$node->safe_psql('postgres', - q{SELECT injection_points_detach('test-multixact-read')}); -$node->safe_psql('postgres', - q{SELECT injection_points_detach('multixact-create-from-members')}); -$node->safe_psql('postgres', - q{SELECT injection_points_detach('multixact-get-members-cv-sleep')}); +# we expect this backend to hang forever +$bg_psql->query_until( + qr/deploying lost VM bit/, q( +\echo deploying lost VM bit + INSERT INTO x VALUES (1); +)); +$node->wait_for_event('client backend', 'heap-insert:visibilitymap-clear'); $node->safe_psql('postgres', - q{SELECT injection_points_wakeup('multixact-create-from-members')}); + q{SELECT injection_points_detach('heap-insert:visibilitymap-clear')}); + +# now we want VM on-disk. Checkpoint will hang too, hence another background session. +my $bg_psql1 = $node->background_psql('postgres'); +$bg_psql1->query_until( + qr/flush data to disk/, q( +\echo flush data to disk + checkpoint; +)); + +sleep(1); +# All set and done, it's time for hard restart +$node->kill9; +$node->stop('immediate', fail_ok => 1); +$node->poll_start; +$bg_psql->{run}->finish; +$bg_psql1->{run}->finish; + +# Now VMs are different on primary and standby. Insert and some data to demonstrate problems. $node->safe_psql('postgres', - q{SELECT injection_points_wakeup('multixact-get-members-cv-sleep')}); - -# Background psql will now be able to read the result and disconnect. -$observer->quit; -$creator->quit; + q{ + INSERT INTO x VALUES (1); + INSERT INTO x VALUES (2); + DELETE FROM x WHERE i = 2; + }); + +$node->wait_for_catchup($standby); + +# corruptino tests +my $corrupted_all_frozen = $standby->safe_psql('postgres', + qq{SELECT pg_check_frozen('x')}); +my $corrupted_all_visible = $standby->safe_psql('postgres', + qq{SELECT pg_check_visible('x')}); +my $observed_deleted_data = $standby->safe_psql('postgres', + qq{set enable_seqscan to false; SELECT * FROM x WHERE i = 2;}); + +# debug printing +print("\n"); +print($corrupted_all_frozen); +print("\n\n"); +print($corrupted_all_visible); +print("\n\n"); +print($observed_deleted_data); +print("\n\n"); + +# fail the test if any corruption is reported +is($corrupted_all_frozen, '', 'pg_check_frozen() observes corruption'); +is($corrupted_all_visible, '', 'pg_check_visible() observes corruption'); +is($observed_deleted_data, '', 'deleted data returned by select'); $node->stop; +$standby->stop; -# If we reached this point - everything is OK. -ok(1); done_testing(); diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 35413f14019..e810f123f93 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -1191,6 +1191,46 @@ sub start =pod +=item $node->poll_start() => success_or_failure + +Polls start after kill9 + +We may need retries to start a new postmaster. Causes: + - kernel is slow to deliver SIGKILL + - postmaster parent is slow to waitpid() + - postmaster child is slow to exit in response to SIGQUIT + - postmaster child is slow to exit after postmaster death + +=cut + +sub poll_start +{ + my ($self) = @_; + + my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default; + my $attempts = 0; + + while ($attempts < $max_attempts) + { + $self->start(fail_ok => 1) && return 1; + + # Wait 0.1 second before retrying. + usleep(100_000); + + # Clean up in case the start attempt just timed out or some such. + $self->stop('fast', fail_ok => 1); + + $attempts++; + } + + # Try one last time without fail_ok, which will BAIL_OUT unless it + # succeeds. + $self->start && return 1; + return 0; +} + +=pod + =item $node->kill9() Send SIGKILL (signal 9) to the postmaster. -- 2.43.0
From 4617d97e2953fcb5b20bb14750bcc971ed504286 Mon Sep 17 00:00:00 2001 From: Aleksander Alekseev <aleksan...@timescale.com> Date: Thu, 7 Aug 2025 17:04:30 +0300 Subject: [PATCH v2 2/2] Bugfix TODO FIXME write a better message --- src/backend/access/heap/heapam.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e5da051aaf0..6e38c9b4d37 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2135,10 +2135,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, { all_visible_cleared = true; PageClearAllVisible(BufferGetPage(buffer)); - visibilitymap_clear(relation, - ItemPointerGetBlockNumber(&(heaptup->t_self)), - vmbuffer, VISIBILITYMAP_VALID_BITS); - INJECTION_POINT_CACHED("heap-insert:visibilitymap-clear", NULL); + + /* + * Visibility map should be updated *after* logging the change + * within the same critical section. + * + * AALEKSEEV TODO clarify the reason + */ } /* @@ -2233,6 +2236,14 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, PageSetLSN(page, recptr); } + if (all_visible_cleared) + { + visibilitymap_clear(relation, + ItemPointerGetBlockNumber(&(heaptup->t_self)), + vmbuffer, VISIBILITYMAP_VALID_BITS); + INJECTION_POINT_CACHED("heap-insert:visibilitymap-clear", NULL); + } + END_CRIT_SECTION(); UnlockReleaseBuffer(buffer); -- 2.43.0