During a post-commit review of bc22dc0 "Get rid of WALBufMappingLock", I got suspicious of this older AdvanceXLInsertBuffer() code:
/* * If online backup is not in progress, mark the header to indicate * that WAL records beginning in this page have removable backup * blocks. This allows the WAL archiver to know whether it is safe to * compress archived WAL data by transforming full-block records into * the non-full-block format. It is sufficient to record this at the * page level because we force a page switch (in fact a segment * switch) when starting a backup, so the flag will be off before any * records can be written during the backup. At the end of a backup, * the last page will be marked as all unsafe when perhaps only part * is unsafe, but at worst the archiver would miss the opportunity to * compress a few records. */ if (Insert->runningBackups == 0) NewPage->xlp_info |= XLP_BKP_REMOVABLE; No lock stops runningBackups from changing while walwriter runs that code. The attached test case reproduces setting XLP_BKP_REMOVABLE on a page that should not have it: 2025-07-04 16:35:17.322 PDT [1496038] PANIC: initialization of 0/1000000 not reflecting backup started at 0/F00028 The test patch is for v17, but a similar test reaches the same race condition in master (equivalent to v18 for $SUBJECT purposes, I bet). Nothing in core reacts to XLP_BKP_REMOVABLE, and I'm not aware of non-core code that reacts to it and is still maintained. https://codesearch.debian.net/search?q=XLP_BKP_REMOVABLE&literal=1&perpkg=1 finds none, and 2016-2018 discussion postgr.es/m/flat/579297F8.7020107%40anastigmatix.net found none. I wonder if ceasing to set XLP_BKP_REMOVABLE for v19 is the best fix. I don't plan to address this myself, so I'm leaving this here. Thanks, nm
From: Noah Misch <n...@leadboat.com> Reproduce incorrect XLP_BKP_REMOVABLE. Draft quality only. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a00786d..47b8736 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -1995,6 +1995,9 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) XLogPageHeader NewPage; int npages pg_attribute_unused() = 0; + if (opportunistic) + INJECTION_POINT("xlog-page-init-dsm"); + LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE); /* @@ -2118,7 +2121,15 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) * compress a few records. */ if (Insert->runningBackups == 0) + { + if (opportunistic) + INJECTION_POINT("after-running-backups-zero"); NewPage->xlp_info |= XLP_BKP_REMOVABLE; + if (Insert->runningBackups != 0) + elog(PANIC, "initialization of %X/%X not reflecting backup started at %X/%X", + LSN_FORMAT_ARGS(NewPageBeginPtr), + LSN_FORMAT_ARGS(Insert->lastBackupStart)); + } /* * If first page of an XLOG segment file, make it a long header. diff --git a/src/test/recovery/t/200_XLP_BKP_REMOVABLE.pl b/src/test/recovery/t/200_XLP_BKP_REMOVABLE.pl new file mode 100644 index 0000000..40b18e8 --- /dev/null +++ b/src/test/recovery/t/200_XLP_BKP_REMOVABLE.pl @@ -0,0 +1,97 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group +# +# Test race between backup start and decision about whether to set +# XLP_BKP_REMOVABLE while initializing a new WAL page header. +# +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More; + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +my $wal_segsize = 1; +# Set wal_buffers to span multiple segments, so the segment switch done at +# start of backup does not consume all of wal_buffers. +my $wal_buffers_segments = 8; + +my ($node, $result); + +$node = PostgreSQL::Test::Cluster->new('primary'); +$node->init( + has_archiving => 1, + allows_streaming => 1, + extra => [ '--wal-segsize' => $wal_segsize ]); +$node->append_conf('postgresql.conf', + "wal_buffers = '" . ($wal_buffers_segments * $wal_segsize) . "MB'"); +$node->append_conf('postgresql.conf', "wal_writer_delay = 1") + ; # make test faster +$node->start; + +# v17 lacks check_extension +if (0) +{ + # Check if the extension injection_points is available, as it may be + # possible that this script is run with installcheck, where the module + # would not be installed by default. + if (!$node->check_extension('injection_points')) + { + plan skip_all => 'Extension injection_points not installed'; + } +} + +$node->safe_psql('postgres', q(CREATE EXTENSION injection_points)); +$node->safe_psql( + 'postgres', q( + SELECT injection_points_attach('xlog-page-init-dsm', 'wait'); + SELECT injection_points_attach('after-running-backups-zero', 'wait'); +)); + +# Use every page of wal_buffers, so walwriter has maximum work to do. +$node->advance_wal($wal_buffers_segments); + +# Should find a way to init the injection_points DSM in v18+ without first +# hitting a real injection point, which may be in a critical section. v17 +# doesn't have the critical section problem, but we still want to pause +# without holding locks, so advance_wal doesn't deadlock w/ walwriter. +$node->wait_for_event('walwriter', 'xlog-page-init-dsm'); +$node->safe_psql( + 'postgres', q{ + SELECT injection_points_detach('xlog-page-init-dsm'); + SELECT injection_points_wakeup('xlog-page-init-dsm'); +}); +# any WAL insertion may now hang + +# initialize two segments, 128 pages per segment at default XLOG_BLCKSZ +for (1 ... 256) +{ + note('waiting for injection_point'); + $node->wait_for_event('walwriter', 'after-running-backups-zero'); + $node->safe_psql('postgres', + q{SELECT injection_points_wakeup('after-running-backups-zero')}); + note('injection_point is reached'); +} + +# runningBackups += 1 +my $backup = $node->background_psql('postgres'); +$backup->query_safe(q{SELECT pg_backup_start('my_backup', true)}); +note "backup started, releasing injection point"; +$node->safe_psql( + 'postgres', + q{ + SELECT injection_points_detach('after-running-backups-zero'); + SELECT injection_points_wakeup('after-running-backups-zero'); +}); +note "injection point released, sleeping"; +sleep 1; +note "slept, stopping backup"; +$backup->query_safe(q{SELECT pg_backup_stop(false)}); +$backup->quit; + +done_testing();