During a post-commit review of bc22dc0 "Get rid of WALBufMappingLock", I got
suspicious of this older AdvanceXLInsertBuffer() code:

                /*
                 * If online backup is not in progress, mark the header to 
indicate
                 * that WAL records beginning in this page have removable backup
                 * blocks.  This allows the WAL archiver to know whether it is 
safe to
                 * compress archived WAL data by transforming full-block 
records into
                 * the non-full-block format.  It is sufficient to record this 
at the
                 * page level because we force a page switch (in fact a segment
                 * switch) when starting a backup, so the flag will be off 
before any
                 * records can be written during the backup.  At the end of a 
backup,
                 * the last page will be marked as all unsafe when perhaps only 
part
                 * is unsafe, but at worst the archiver would miss the 
opportunity to
                 * compress a few records.
                 */
                if (Insert->runningBackups == 0)
                        NewPage->xlp_info |= XLP_BKP_REMOVABLE;

No lock stops runningBackups from changing while walwriter runs that code.
The attached test case reproduces setting XLP_BKP_REMOVABLE on a page that
should not have it:

  2025-07-04 16:35:17.322 PDT [1496038] PANIC:  initialization of 0/1000000 not 
reflecting backup started at 0/F00028

The test patch is for v17, but a similar test reaches the same race condition
in master (equivalent to v18 for $SUBJECT purposes, I bet).

Nothing in core reacts to XLP_BKP_REMOVABLE, and I'm not aware of non-core
code that reacts to it and is still maintained.
https://codesearch.debian.net/search?q=XLP_BKP_REMOVABLE&literal=1&perpkg=1
finds none, and 2016-2018 discussion
postgr.es/m/flat/579297F8.7020107%40anastigmatix.net found none.  I wonder if
ceasing to set XLP_BKP_REMOVABLE for v19 is the best fix.

I don't plan to address this myself, so I'm leaving this here.

Thanks,
nm
From: Noah Misch <n...@leadboat.com>

Reproduce incorrect XLP_BKP_REMOVABLE.

Draft quality only.

diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index a00786d..47b8736 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1995,6 +1995,9 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, 
bool opportunistic)
        XLogPageHeader NewPage;
        int                     npages pg_attribute_unused() = 0;
 
+       if (opportunistic)
+               INJECTION_POINT("xlog-page-init-dsm");
+
        LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
 
        /*
@@ -2118,7 +2121,15 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, 
bool opportunistic)
                 * compress a few records.
                 */
                if (Insert->runningBackups == 0)
+               {
+                       if (opportunistic)
+                               INJECTION_POINT("after-running-backups-zero");
                        NewPage->xlp_info |= XLP_BKP_REMOVABLE;
+                       if (Insert->runningBackups != 0)
+                               elog(PANIC, "initialization of %X/%X not 
reflecting backup started at %X/%X",
+                                        LSN_FORMAT_ARGS(NewPageBeginPtr),
+                                        
LSN_FORMAT_ARGS(Insert->lastBackupStart));
+               }
 
                /*
                 * If first page of an XLOG segment file, make it a long header.
diff --git a/src/test/recovery/t/200_XLP_BKP_REMOVABLE.pl 
b/src/test/recovery/t/200_XLP_BKP_REMOVABLE.pl
new file mode 100644
index 0000000..40b18e8
--- /dev/null
+++ b/src/test/recovery/t/200_XLP_BKP_REMOVABLE.pl
@@ -0,0 +1,97 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+#
+# Test race between backup start and decision about whether to set
+# XLP_BKP_REMOVABLE while initializing a new WAL page header.
+#
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+       plan skip_all => 'Injection points not supported by this build';
+}
+
+my $wal_segsize = 1;
+# Set wal_buffers to span multiple segments, so the segment switch done at
+# start of backup does not consume all of wal_buffers.
+my $wal_buffers_segments = 8;
+
+my ($node, $result);
+
+$node = PostgreSQL::Test::Cluster->new('primary');
+$node->init(
+       has_archiving => 1,
+       allows_streaming => 1,
+       extra => [ '--wal-segsize' => $wal_segsize ]);
+$node->append_conf('postgresql.conf',
+       "wal_buffers = '" . ($wal_buffers_segments * $wal_segsize) . "MB'");
+$node->append_conf('postgresql.conf', "wal_writer_delay = 1")
+  ;    # make test faster
+$node->start;
+
+# v17 lacks check_extension
+if (0)
+{
+       # Check if the extension injection_points is available, as it may be
+       # possible that this script is run with installcheck, where the module
+       # would not be installed by default.
+       if (!$node->check_extension('injection_points'))
+       {
+               plan skip_all => 'Extension injection_points not installed';
+       }
+}
+
+$node->safe_psql('postgres', q(CREATE EXTENSION injection_points));
+$node->safe_psql(
+       'postgres', q(
+       SELECT injection_points_attach('xlog-page-init-dsm', 'wait');
+       SELECT injection_points_attach('after-running-backups-zero', 'wait');
+));
+
+# Use every page of wal_buffers, so walwriter has maximum work to do.
+$node->advance_wal($wal_buffers_segments);
+
+# Should find a way to init the injection_points DSM in v18+ without first
+# hitting a real injection point, which may be in a critical section.  v17
+# doesn't have the critical section problem, but we still want to pause
+# without holding locks, so advance_wal doesn't deadlock w/ walwriter.
+$node->wait_for_event('walwriter', 'xlog-page-init-dsm');
+$node->safe_psql(
+       'postgres', q{
+       SELECT injection_points_detach('xlog-page-init-dsm');
+       SELECT injection_points_wakeup('xlog-page-init-dsm');
+});
+# any WAL insertion may now hang
+
+# initialize two segments, 128 pages per segment at default XLOG_BLCKSZ
+for (1 ... 256)
+{
+       note('waiting for injection_point');
+       $node->wait_for_event('walwriter', 'after-running-backups-zero');
+       $node->safe_psql('postgres',
+               q{SELECT 
injection_points_wakeup('after-running-backups-zero')});
+       note('injection_point is reached');
+}
+
+# runningBackups += 1
+my $backup = $node->background_psql('postgres');
+$backup->query_safe(q{SELECT pg_backup_start('my_backup', true)});
+note "backup started, releasing injection point";
+$node->safe_psql(
+       'postgres',
+       q{
+       SELECT injection_points_detach('after-running-backups-zero');
+       SELECT injection_points_wakeup('after-running-backups-zero');
+});
+note "injection point released, sleeping";
+sleep 1;
+note "slept, stopping backup";
+$backup->query_safe(q{SELECT pg_backup_stop(false)});
+$backup->quit;
+
+done_testing();

Reply via email to