On Wed, Feb 19, 2020 at 12:37:00PM -0600, David Steele wrote: > On 2/19/20 2:13 AM, Michael Paquier wrote: >> Please note that pg_internal.init is listed within noChecksumFiles in >> basebackup.c, so we would miss any temporary pg_internal.init.PID if >> we don't check after the file prefix and the base backup would issue >> extra WARNING messages, potentially masking messages that could >> matter. So let's fix that as well. > > Agreed. Though, I think pg_internal.init.XX should be excluded from the > backup as well.
Sure. That's the intention. pg_rewind, pg_checksums and basebackup.c are all the things on my list. > As far as I can see, the pg_internal.init.XX will not be cleaned up by > PostgreSQL on startup. I've only tested this in 9.6 so far, but I don't see > any differences in the code since then that would lead to better behavior. > Perhaps that's also something we should fix? Not sure that it is worth spending cycles on that at the beginning of recovery as when a mapping file is written its temporary entry truncates any existing one present matching its name. > I'm really not a fan of a blind prefix match. I think we should stick with > only excluding files that are created by Postgres. Thinking more on that, excluding any backup_label with a custom suffix worries me as it could cause a potential breakage for exiting backup solutions. So attached is an updated patch making things in a smarter way: I have added to the exclusion lists the possibility to match an entry based on its prefix, or not, the choice being optional. This solves the problem with pg_internal.PID and is careful to not exclude unnecessary entries like suffixed backup labels or such. This leads to some extra duplication within pg_rewind, basebackup.c and pg_checksums but I think we can live with that, and that makes back-patching simpler. Refactoring is still tricky though as it relates to the use of paths across the backend and the frontend.. > So backup_label.old and > tablespace_map.old should just be added to the exclude list. That's how we > have it in pgBackRest. That would be a behavior change. We could change that on HEAD, but I don't think that this can be back-patched as this does not cause an actual problem. For now, my proposal is to fix the prefix first, and then let's look at the business with tablespaces where needed. -- Michael
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index dea8aab45e..c03bc0c00b 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -121,6 +121,18 @@ static long long int total_checksum_failures;
/* Do not verify checksums. */
static bool noverify_checksums = false;
+/*
+ * Definition of one item part of an exclusion list, used for checksum
+ * or base backup items. "name" is the name of the file or path to
+ * check for exclusion. If "match_prefix" is true, any items matching the
+ * name as prefix are excluded.
+ */
+struct exclude_list_item
+{
+ const char *name;
+ bool match_prefix;
+};
+
/*
* The contents of these directories are removed or recreated during server
* start so they are not included in backups. The directories themselves are
@@ -170,16 +182,19 @@ static const char *const excludeDirContents[] =
/*
* List of files excluded from backups.
*/
-static const char *const excludeFiles[] =
+static const struct exclude_list_item excludeFiles[] =
{
/* Skip auto conf temporary file. */
- PG_AUTOCONF_FILENAME ".tmp",
+ {PG_AUTOCONF_FILENAME ".tmp", false},
/* Skip current log file temporary file */
- LOG_METAINFO_DATAFILE_TMP,
+ {LOG_METAINFO_DATAFILE_TMP, false},
- /* Skip relation cache because it is rebuilt on startup */
- RELCACHE_INIT_FILENAME,
+ /*
+ * Skip relation cache because it is rebuilt on startup. This includes
+ * temporary files.
+ */
+ {RELCACHE_INIT_FILENAME, true},
/*
* If there's a backup_label or tablespace_map file, it belongs to a
@@ -187,14 +202,14 @@ static const char *const excludeFiles[] =
* for this backup. Our backup_label/tablespace_map is injected into the
* tar separately.
*/
- BACKUP_LABEL_FILE,
- TABLESPACE_MAP,
+ {BACKUP_LABEL_FILE, false},
+ {TABLESPACE_MAP, false},
- "postmaster.pid",
- "postmaster.opts",
+ {"postmaster.pid", false},
+ {"postmaster.opts", false},
/* end of list */
- NULL
+ {NULL, false}
};
/*
@@ -203,16 +218,15 @@ static const char *const excludeFiles[] =
* Note: this list should be kept in sync with what pg_checksums.c
* includes.
*/
-static const char *const noChecksumFiles[] = {
- "pg_control",
- "pg_filenode.map",
- "pg_internal.init",
- "PG_VERSION",
+static const struct exclude_list_item noChecksumFiles[] = {
+ {"pg_control", false},
+ {"pg_filenode.map", false},
+ {"pg_internal.init", true},
+ {"PG_VERSION", false},
#ifdef EXEC_BACKEND
- "config_exec_params",
- "config_exec_params.new",
+ {"config_exec_params", true},
#endif
- NULL,
+ {NULL, false}
};
/*
@@ -1082,13 +1096,27 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
/* Scan for files that should be excluded */
excludeFound = false;
- for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
+ for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
{
- if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0)
+ if (excludeFiles[excludeIdx].match_prefix)
{
- elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
- excludeFound = true;
- break;
+ if (strncmp(de->d_name, excludeFiles[excludeIdx].name,
+ strlen(excludeFiles[excludeIdx].name)) == 0)
+ {
+ elog(DEBUG1, "file \"%s\" matching prefix \"%s\" excluded from backup",
+ de->d_name, excludeFiles[excludeIdx].name);
+ excludeFound = true;
+ break;
+ }
+ }
+ else
+ {
+ if (strcmp(de->d_name, excludeFiles[excludeIdx].name) == 0)
+ {
+ elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
+ excludeFound = true;
+ break;
+ }
}
}
@@ -1317,17 +1345,28 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
static bool
is_checksummed_file(const char *fullpath, const char *filename)
{
- const char *const *f;
-
/* Check that the file is in a tablespace */
if (strncmp(fullpath, "./global/", 9) == 0 ||
strncmp(fullpath, "./base/", 7) == 0 ||
strncmp(fullpath, "/", 1) == 0)
{
- /* Compare file against noChecksumFiles skiplist */
- for (f = noChecksumFiles; *f; f++)
- if (strcmp(*f, filename) == 0)
- return false;
+ int excludeIdx;
+
+ /* Compare file against noChecksumFiles skip list */
+ for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++)
+ {
+ if (noChecksumFiles[excludeIdx].match_prefix)
+ {
+ if (strncmp(filename, noChecksumFiles[excludeIdx].name,
+ strlen(noChecksumFiles[excludeIdx].name)) == 0)
+ return false;
+ }
+ else
+ {
+ if (strcmp(noChecksumFiles[excludeIdx].name, filename) == 0)
+ return false;
+ }
+ }
return true;
}
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index b7d36b65dd..3c70499feb 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -6,7 +6,7 @@ use File::Basename qw(basename dirname);
use File::Path qw(rmtree);
use PostgresNode;
use TestLib;
-use Test::More tests => 106;
+use Test::More tests => 107;
program_help_ok('pg_basebackup');
program_version_ok('pg_basebackup');
@@ -65,8 +65,8 @@ $node->restart;
# Write some files to test that they are not copied.
foreach my $filename (
- qw(backup_label tablespace_map postgresql.auto.conf.tmp current_logfiles.tmp)
- )
+ qw(backup_label tablespace_map postgresql.auto.conf.tmp
+ current_logfiles.tmp global/pg_internal.init.123))
{
open my $file, '>>', "$pgdata/$filename";
print $file "DONOTCOPY";
@@ -135,7 +135,7 @@ foreach my $dirname (
# These files should not be copied.
foreach my $filename (
qw(postgresql.auto.conf.tmp postmaster.opts postmaster.pid tablespace_map current_logfiles.tmp
- global/pg_internal.init))
+ global/pg_internal.init global/pg_internal.init.123))
{
ok(!-f "$tempdir/backup/$filename", "$filename not copied");
}
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 46ee1f1dc3..4c8eb33698 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -91,21 +91,32 @@ usage(void)
printf(_("Report bugs to <[email protected]>.\n"));
}
+/*
+ * Definition of one item part of an exclusion list, used for file
+ * entries. "name" is the name of the file or path to check for
+ * exclusion. If "match_prefix" is true, any items matching the name
+ * as prefix are excluded.
+ */
+struct exclude_list_item
+{
+ const char *name;
+ bool match_prefix;
+};
+
/*
* List of files excluded from checksum validation.
*
* Note: this list should be kept in sync with what basebackup.c includes.
*/
-static const char *const skip[] = {
- "pg_control",
- "pg_filenode.map",
- "pg_internal.init",
- "PG_VERSION",
+static const struct exclude_list_item skip[] = {
+ {"pg_control", false},
+ {"pg_filenode.map", false},
+ {"pg_internal.init", true},
+ {"PG_VERSION", false},
#ifdef EXEC_BACKEND
- "config_exec_params",
- "config_exec_params.new",
+ {"config_exec_params", true},
#endif
- NULL,
+ {NULL, false}
};
/*
@@ -157,11 +168,22 @@ progress_report(bool force)
static bool
skipfile(const char *fn)
{
- const char *const *f;
+ int excludeIdx;
- for (f = skip; *f; f++)
- if (strcmp(*f, fn) == 0)
- return true;
+ for (excludeIdx = 0; skip[excludeIdx].name != NULL; excludeIdx++)
+ {
+ if (skip[excludeIdx].match_prefix)
+ {
+ if (strncmp(skip[excludeIdx].name, fn,
+ strlen(skip[excludeIdx].name)) == 0)
+ return true;
+ }
+ else
+ {
+ if (strcmp(skip[excludeIdx].name, fn) == 0)
+ return true;
+ }
+ }
return false;
}
diff --git a/src/bin/pg_checksums/t/002_actions.pl b/src/bin/pg_checksums/t/002_actions.pl
index 59228b916c..83a730ea94 100644
--- a/src/bin/pg_checksums/t/002_actions.pl
+++ b/src/bin/pg_checksums/t/002_actions.pl
@@ -111,7 +111,9 @@ append_to_file "$pgdata/global/99999_vm.123", "";
# should be ignored by the scan.
append_to_file "$pgdata/global/pgsql_tmp_123", "foo";
mkdir "$pgdata/global/pgsql_tmp";
-append_to_file "$pgdata/global/pgsql_tmp/1.1", "foo";
+append_to_file "$pgdata/global/pgsql_tmp/1.1", "foo";
+append_to_file "$pgdata/global/pg_internal.init", "foo";
+append_to_file "$pgdata/global/pg_internal.init.123", "foo";
# Enable checksums.
command_ok([ 'pg_checksums', '--enable', '--no-sync', '-D', $pgdata ],
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index fd14844eec..67977ada99 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -30,6 +30,18 @@ static int final_filemap_cmp(const void *a, const void *b);
static void filemap_list_to_array(filemap_t *map);
static bool check_file_excluded(const char *path, bool is_source);
+/*
+ * Definition of one item part of an exclusion list, used for file
+ * entries. "name" is the name of the file or path to check for exclusion.
+ * If "match_prefix" is true, any items matching the name as prefix
+ * are excluded.
+ */
+struct exclude_list_item
+{
+ const char *name;
+ bool match_prefix;
+};
+
/*
* The contents of these directories are removed or recreated during server
* start so they are not included in data processed by pg_rewind.
@@ -78,32 +90,34 @@ static const char *excludeDirContents[] =
};
/*
- * List of files excluded from filemap processing.
+ * List of files excluded from filemap processing. Files are excluded
+ * if their prefix match.
*/
-static const char *excludeFiles[] =
+static const struct exclude_list_item excludeFiles[] =
{
/* Skip auto conf temporary file. */
- "postgresql.auto.conf.tmp", /* defined as PG_AUTOCONF_FILENAME */
+ {"postgresql.auto.conf.tmp", false}, /* defined as PG_AUTOCONF_FILENAME */
/* Skip current log file temporary file */
- "current_logfiles.tmp", /* defined as LOG_METAINFO_DATAFILE_TMP */
+ {"current_logfiles.tmp", false}, /* defined as
+ * LOG_METAINFO_DATAFILE_TMP */
/* Skip relation cache because it is rebuilt on startup */
- "pg_internal.init", /* defined as RELCACHE_INIT_FILENAME */
+ {"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
/*
* If there's a backup_label or tablespace_map file, it belongs to a
* backup started by the user with pg_start_backup(). It is *not* correct
* for this backup. Our backup_label is written later on separately.
*/
- "backup_label", /* defined as BACKUP_LABEL_FILE */
- "tablespace_map", /* defined as TABLESPACE_MAP */
+ {"backup_label", false}, /* defined as BACKUP_LABEL_FILE */
+ {"tablespace_map", false}, /* defined as TABLESPACE_MAP */
- "postmaster.pid",
- "postmaster.opts",
+ {"postmaster.pid", false},
+ {"postmaster.opts", false},
/* end of list */
- NULL
+ {NULL, false}
};
/*
@@ -496,22 +510,40 @@ check_file_excluded(const char *path, bool is_source)
const char *filename;
/* check individual files... */
- for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
+ for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
{
filename = last_dir_separator(path);
if (filename == NULL)
filename = path;
else
filename++;
- if (strcmp(filename, excludeFiles[excludeIdx]) == 0)
+
+ if (excludeFiles[excludeIdx].match_prefix)
{
- if (is_source)
- pg_log_debug("entry \"%s\" excluded from source file list",
- path);
- else
- pg_log_debug("entry \"%s\" excluded from target file list",
- path);
- return true;
+ if (strncmp(filename, excludeFiles[excludeIdx].name,
+ strlen(excludeFiles[excludeIdx].name)) == 0)
+ {
+ if (is_source)
+ pg_log_debug("entry \"%s\" matching prefix \"%s\" excluded from source file list",
+ path, excludeFiles[excludeIdx].name);
+ else
+ pg_log_debug("entry \"%s\" matching prefix \"%s\" excluded from target file list",
+ path, excludeFiles[excludeIdx].name);
+ return true;
+ }
+ }
+ else
+ {
+ if (strcmp(filename, excludeFiles[excludeIdx].name) == 0)
+ {
+ if (is_source)
+ pg_log_debug("entry \"%s\" excluded from source file list",
+ path);
+ else
+ pg_log_debug("entry \"%s\" excluded from target file list",
+ path);
+ return true;
+ }
}
}
signature.asc
Description: PGP signature
