During a post-commit review of bc22dc0 "Get rid of WALBufMappingLock", I got
suspicious of this older AdvanceXLInsertBuffer() code:
/*
* If online backup is not in progress, mark the header to
indicate
* that WAL records beginning in this page have removable backup
* blocks. This allows the WAL archiver to know whether it is
safe to
* compress archived WAL data by transforming full-block
records into
* the non-full-block format. It is sufficient to record this
at the
* page level because we force a page switch (in fact a segment
* switch) when starting a backup, so the flag will be off
before any
* records can be written during the backup. At the end of a
backup,
* the last page will be marked as all unsafe when perhaps only
part
* is unsafe, but at worst the archiver would miss the
opportunity to
* compress a few records.
*/
if (Insert->runningBackups == 0)
NewPage->xlp_info |= XLP_BKP_REMOVABLE;
No lock stops runningBackups from changing while walwriter runs that code.
The attached test case reproduces setting XLP_BKP_REMOVABLE on a page that
should not have it:
2025-07-04 16:35:17.322 PDT [1496038] PANIC: initialization of 0/1000000 not
reflecting backup started at 0/F00028
The test patch is for v17, but a similar test reaches the same race condition
in master (equivalent to v18 for $SUBJECT purposes, I bet).
Nothing in core reacts to XLP_BKP_REMOVABLE, and I'm not aware of non-core
code that reacts to it and is still maintained.
https://codesearch.debian.net/search?q=XLP_BKP_REMOVABLE&literal=1&perpkg=1
finds none, and 2016-2018 discussion
postgr.es/m/flat/579297F8.7020107%40anastigmatix.net found none. I wonder if
ceasing to set XLP_BKP_REMOVABLE for v19 is the best fix.
I don't plan to address this myself, so I'm leaving this here.
Thanks,
nm
From: Noah Misch <[email protected]>
Reproduce incorrect XLP_BKP_REMOVABLE.
Draft quality only.
diff --git a/src/backend/access/transam/xlog.c
b/src/backend/access/transam/xlog.c
index a00786d..47b8736 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1995,6 +1995,9 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli,
bool opportunistic)
XLogPageHeader NewPage;
int npages pg_attribute_unused() = 0;
+ if (opportunistic)
+ INJECTION_POINT("xlog-page-init-dsm");
+
LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
/*
@@ -2118,7 +2121,15 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli,
bool opportunistic)
* compress a few records.
*/
if (Insert->runningBackups == 0)
+ {
+ if (opportunistic)
+ INJECTION_POINT("after-running-backups-zero");
NewPage->xlp_info |= XLP_BKP_REMOVABLE;
+ if (Insert->runningBackups != 0)
+ elog(PANIC, "initialization of %X/%X not
reflecting backup started at %X/%X",
+ LSN_FORMAT_ARGS(NewPageBeginPtr),
+
LSN_FORMAT_ARGS(Insert->lastBackupStart));
+ }
/*
* If first page of an XLOG segment file, make it a long header.
diff --git a/src/test/recovery/t/200_XLP_BKP_REMOVABLE.pl
b/src/test/recovery/t/200_XLP_BKP_REMOVABLE.pl
new file mode 100644
index 0000000..40b18e8
--- /dev/null
+++ b/src/test/recovery/t/200_XLP_BKP_REMOVABLE.pl
@@ -0,0 +1,97 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+#
+# Test race between backup start and decision about whether to set
+# XLP_BKP_REMOVABLE while initializing a new WAL page header.
+#
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+ plan skip_all => 'Injection points not supported by this build';
+}
+
+my $wal_segsize = 1;
+# Set wal_buffers to span multiple segments, so the segment switch done at
+# start of backup does not consume all of wal_buffers.
+my $wal_buffers_segments = 8;
+
+my ($node, $result);
+
+$node = PostgreSQL::Test::Cluster->new('primary');
+$node->init(
+ has_archiving => 1,
+ allows_streaming => 1,
+ extra => [ '--wal-segsize' => $wal_segsize ]);
+$node->append_conf('postgresql.conf',
+ "wal_buffers = '" . ($wal_buffers_segments * $wal_segsize) . "MB'");
+$node->append_conf('postgresql.conf', "wal_writer_delay = 1")
+ ; # make test faster
+$node->start;
+
+# v17 lacks check_extension
+if (0)
+{
+ # Check if the extension injection_points is available, as it may be
+ # possible that this script is run with installcheck, where the module
+ # would not be installed by default.
+ if (!$node->check_extension('injection_points'))
+ {
+ plan skip_all => 'Extension injection_points not installed';
+ }
+}
+
+$node->safe_psql('postgres', q(CREATE EXTENSION injection_points));
+$node->safe_psql(
+ 'postgres', q(
+ SELECT injection_points_attach('xlog-page-init-dsm', 'wait');
+ SELECT injection_points_attach('after-running-backups-zero', 'wait');
+));
+
+# Use every page of wal_buffers, so walwriter has maximum work to do.
+$node->advance_wal($wal_buffers_segments);
+
+# Should find a way to init the injection_points DSM in v18+ without first
+# hitting a real injection point, which may be in a critical section. v17
+# doesn't have the critical section problem, but we still want to pause
+# without holding locks, so advance_wal doesn't deadlock w/ walwriter.
+$node->wait_for_event('walwriter', 'xlog-page-init-dsm');
+$node->safe_psql(
+ 'postgres', q{
+ SELECT injection_points_detach('xlog-page-init-dsm');
+ SELECT injection_points_wakeup('xlog-page-init-dsm');
+});
+# any WAL insertion may now hang
+
+# initialize two segments, 128 pages per segment at default XLOG_BLCKSZ
+for (1 ... 256)
+{
+ note('waiting for injection_point');
+ $node->wait_for_event('walwriter', 'after-running-backups-zero');
+ $node->safe_psql('postgres',
+ q{SELECT
injection_points_wakeup('after-running-backups-zero')});
+ note('injection_point is reached');
+}
+
+# runningBackups += 1
+my $backup = $node->background_psql('postgres');
+$backup->query_safe(q{SELECT pg_backup_start('my_backup', true)});
+note "backup started, releasing injection point";
+$node->safe_psql(
+ 'postgres',
+ q{
+ SELECT injection_points_detach('after-running-backups-zero');
+ SELECT injection_points_wakeup('after-running-backups-zero');
+});
+note "injection point released, sleeping";
+sleep 1;
+note "slept, stopping backup";
+$backup->query_safe(q{SELECT pg_backup_stop(false)});
+$backup->quit;
+
+done_testing();