Hi, Please find attached v5. What changed: 1. Now we collect which files should be kept in a separate hash table. 2. Decision whether to keep the file is made only when the file is actually missing on the source. That is, remaining WAL files will be copied over as it currently is, although it could be extremely inefficient and unnecessary. 3. Added TAP test that actually at least one file isn't removed.
Regards, -- Alexander Kukushkin
From 3e1e6c9d968e9b829357b6eb0a7dfa366b550668 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin <cyberd...@gmail.com> Date: Tue, 12 Sep 2023 14:09:47 +0200 Subject: [PATCH v5] Be more picky with WAL segment deletion in pg_rewind Make pg_rewind to be a bit wiser in terms of creating filemap: preserve on the target all WAL segments that contain records between the last common checkpoint and the point of divergence. Co-authored-by: Polina Bungina <bung...@gmail.com> --- src/bin/pg_rewind/filemap.c | 62 ++++++++++++++++++- src/bin/pg_rewind/filemap.h | 4 ++ src/bin/pg_rewind/parsexlog.c | 22 +++++++ src/bin/pg_rewind/pg_rewind.c | 3 + src/bin/pg_rewind/t/010_keep_recycled_wals.pl | 62 +++++++++++++++++++ 5 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 src/bin/pg_rewind/t/010_keep_recycled_wals.pl diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index 58280d9abc..d44d716d82 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -64,6 +64,27 @@ static file_entry_t *lookup_filehash_entry(const char *path); static int final_filemap_cmp(const void *a, const void *b); static bool check_file_excluded(const char *path, bool is_source); +typedef struct skipwal_t { + const char *path; + uint32 status; +} skipwal_t; + +#define SH_PREFIX keepwalhash +#define SH_ELEMENT_TYPE skipwal_t +#define SH_KEY_TYPE const char * +#define SH_KEY path +#define SH_HASH_KEY(tb, key) hash_string_pointer(key) +#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) +#define SH_SCOPE static inline +#define SH_RAW_ALLOCATOR pg_malloc0 +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +static keepwalhash_hash *keepwalhash = NULL; + +static bool keepwalhash_entry_exists(const char *path); + /* * Definition of one element part of an exclusion list, used to exclude * contents when rewinding. "name" is the name of the file or path to @@ -207,6 +228,35 @@ lookup_filehash_entry(const char *path) return filehash_lookup(filehash, path); } +/* Initialize a hash table to store WAL file names that must be kept */ +void +keepwalhash_init(void) +{ + keepwalhash = keepwalhash_create(FILEHASH_INITIAL_SIZE, NULL); +} + +/* Prevent a given file deletion during rewind */ +void +insert_keepwalhash_entry(const char *path) +{ + skipwal_t *entry; + bool found; + + /* Should only be called with keepwalhash initialized */ + Assert(keepwalhash); + + entry = keepwalhash_insert(keepwalhash, path, &found); + + if (!found) + entry->path = pg_strdup(path); +} + +static bool +keepwalhash_entry_exists(const char *path) +{ + return keepwalhash_lookup(keepwalhash, path) != NULL; +} + /* * Callback for processing source file list. * @@ -682,7 +732,16 @@ decide_file_action(file_entry_t *entry) } else if (entry->target_exists && !entry->source_exists) { - /* File exists in target, but not source. Remove it. */ + /* File exists in target, but not source. */ + + if (keepwalhash_entry_exists(path)) + { + /* This is a WAL file that should be kept. */ + pg_log_debug("Not removing %s because it is required for recovery", path); + return FILE_ACTION_NONE; + } + + /* Otherwise remove an unexpected file. */ return FILE_ACTION_REMOVE; } else if (!entry->target_exists && !entry->source_exists) @@ -818,7 +877,6 @@ decide_file_actions(void) return filemap; } - /* * Helper function for filemap hash table. */ diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h index 48f240dff1..9a0d4bbc54 100644 --- a/src/bin/pg_rewind/filemap.h +++ b/src/bin/pg_rewind/filemap.h @@ -109,5 +109,9 @@ extern void process_target_wal_block_change(ForkNumber forknum, extern filemap_t *decide_file_actions(void); extern void calculate_totals(filemap_t *filemap); extern void print_filemap(filemap_t *filemap); +extern void preserve_file(char *filepath); + +extern void keepwalhash_init(void); +extern void insert_keepwalhash_entry(const char *path); #endif /* FILEMAP_H */ diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c index 27782237d0..4c7f9bef14 100644 --- a/src/bin/pg_rewind/parsexlog.c +++ b/src/bin/pg_rewind/parsexlog.c @@ -176,6 +176,10 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, char *errormsg; XLogPageReadPrivate private; + /* Track WAL segments opened while searching a checkpoint */ + XLogSegNo segno = 0; + TimeLineID tli = 0; + /* * The given fork pointer points to the end of the last common record, * which is not necessarily the beginning of the next record, if the @@ -217,6 +221,24 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, LSN_FORMAT_ARGS(searchptr)); } + /* We are trying to detect if the new WAL file was opened */ + if (xlogreader->seg.ws_tli != tli || xlogreader->seg.ws_segno != segno) + { + char xlogfname[MAXFNAMELEN]; + + tli = xlogreader->seg.ws_tli; + segno = xlogreader->seg.ws_segno; + + snprintf(xlogfname, MAXPGPATH, XLOGDIR "/"); + XLogFileName(xlogfname + strlen(xlogfname), + xlogreader->seg.ws_tli, + xlogreader->seg.ws_segno, WalSegSz); + + /* Make sure pg_rewind doesn't remove this file, because it is + * required for postgres to start after rewind. */ + insert_keepwalhash_entry(xlogfname); + } + /* * Check if it is a checkpoint record. This checkpoint record needs to * be the latest checkpoint before WAL forked and not the checkpoint diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index bfd44a284e..bfbc65c111 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -455,6 +455,9 @@ main(int argc, char **argv) exit(0); } + /* Hash to memorize WAL files that should be kept */ + keepwalhash_init(); + findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex, &chkptrec, &chkpttli, &chkptredo, restore_command); pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u", diff --git a/src/bin/pg_rewind/t/010_keep_recycled_wals.pl b/src/bin/pg_rewind/t/010_keep_recycled_wals.pl new file mode 100644 index 0000000000..e1bee01a6e --- /dev/null +++ b/src/bin/pg_rewind/t/010_keep_recycled_wals.pl @@ -0,0 +1,62 @@ + +# Copyright (c) 2021-2023, PostgreSQL Global Development Group + +# +# Test situation where a target data directory contains +# WAL files that were already recycled by the new primary. + +use strict; +use warnings; +use PostgreSQL::Test::Utils; +use Test::More; + +use FindBin; +use lib $FindBin::RealBin; + +use RewindTest; + +setup_cluster; +$node_primary->enable_archiving; +start_primary(); + +create_standby; +$node_standby->enable_restoring($node_primary, 0); +$node_standby->reload; + +primary_psql("CHECKPOINT"); # last common checkpoint + +# primary can't archive anymore +$node_primary->append_conf( + 'postgresql.conf', qq( +archive_command = 'false' +)); +$node_primary->reload; + +# advance WAL on the primary; WAL segment will never make it to the archive +primary_psql("create table t(a int)"); +primary_psql("insert into t values(0)"); +primary_psql("select pg_switch_wal()"); + +promote_standby; + +# new primary loses diverging WAL segment +standby_psql("insert into t values(0)"); +standby_psql("select pg_switch_wal()"); + +$node_standby->stop; +$node_primary->stop; + +my ($stdout, $stderr) = run_command( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $node_standby->data_dir, + '--target-pgdata', $node_primary->data_dir, + '--no-sync', + ]); + +like( + $stderr, + qr/Not removing pg_wal.* because it is required for recovery/, + "no WAL files skipped"); + +done_testing(); -- 2.34.1