Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-26 Thread Michael Paquier
On Wed, Feb 26, 2020 at 06:02:22PM +0100, Bernd Helmle wrote:
> My feeling is that in the case we cannot successfully resolve a
> tablespace location from pg_tblspc, we should error out, but i could
> imagine that people would like to have just a warning instead.

Thanks, this patch is much cleaner in its approach, and I don't have
much to say about it except that the error message for lstat() should
be more consistent with the one above in scan_directory().  The
version for v11 has required a bit of rework, but nothing huge
either.

> I've updated the TAP test for pg_checksums by adding a dummy
> subdirectory into the tablespace directory already created for the
> corrupted relfilenode test, containing a file to process in case an
> unpatched pg_checksums is run. With the patch attached, these
> directories simply won't be considered to check.

What you have here is much more simple than the original proposal, so
I kept it.  Applied and back-patched down to 11.
--
Michael


signature.asc
Description: PGP signature


Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-26 Thread Bernd Helmle
Am Dienstag, den 25.02.2020, 11:33 +0900 schrieb Michael Paquier:
> I really think that
> we should avoid duplicating the same logic around, and that we should
> remain consistent with non-directory entries in those paths,
> complaining with a proper failure if extra, unwanted files are
> present.

Okay, please find an updated patch attached.

My feeling is that in the case we cannot successfully resolve a
tablespace location from pg_tblspc, we should error out, but i could
imagine that people would like to have just a warning instead.

I've updated the TAP test for pg_checksums by adding a dummy
subdirectory into the tablespace directory already created for the
corrupted relfilenode test, containing a file to process in case an
unpatched pg_checksums is run. With the patch attached, these
directories simply won't be considered to check.

Thanks,

Bernd
From 15bd3a6457f59f3ba4bf72c645499132e90127cd Mon Sep 17 00:00:00 2001
From: Bernd Helmle 
Date: Wed, 26 Feb 2020 16:49:03 +0100
Subject: [PATCH 1/2] Formerly pg_checksums recursively dived into pg_tblspc,
 scanning any tablespace location found there without taking into account,
 that tablespace locations might be used by other PostgreSQL instances at the
 same time. This could likely happen for example by using pg_upgrade to
 upgrade clusters with tablespaces.

Fix this by resolving tablespace locations explicitly by looking
up their TABLESPACE_VERSION_DIRECTORY subdirectory if called on
the pg_tblspc directory, and, if found, recursively diving into
them directly. If we can't resolve the tablespace location link, we
throw an error to indicate a probably orphaned or missing tablespace
location.
---
 src/bin/pg_checksums/pg_checksums.c | 43 -
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 9bd0bf947f..1367f53bbf 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -385,7 +385,48 @@ scan_directory(const char *basedir, const char *subdir, bool sizeonly)
 #else
 		else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn))
 #endif
-			dirsize += scan_directory(path, de->d_name, sizeonly);
+		{
+			/*
+			 * If we are called with subdir pg_tblspc, we assume to
+			 * operate on tablespace locations. We're just interested
+			 * in TABLESPACE_VERSION_DIRECTORY only, so we're resolving
+			 * the linked locations here explicitely and dive into them
+			 * directly.
+			 */
+			if (strncmp("pg_tblspc", subdir, strlen("pg_tblspc")) == 0)
+			{
+chartblspc_path[MAXPGPATH];
+struct stat tblspc_st;
+
+/*
+ * Resolve tablespace location path and check
+ * whether a TABLESPACE_VERSION_DIRECTORY exists. Not finding
+ * a valid location is an unexpected condition, since there
+ * shouldn't be orphaned links. Tell the user something
+ * is wrong.
+ */
+snprintf(tblspc_path, sizeof(tblspc_path), "%s/%s/%s",
+		 path, de->d_name, TABLESPACE_VERSION_DIRECTORY);
+
+if (lstat(tblspc_path, _st) < 0)
+{
+	pg_log_error("invalid tablespace location \"%s/%s\"",
+ subdir, de->d_name);
+	exit(1);
+}
+
+snprintf(tblspc_path, sizeof(tblspc_path), "%s/%s",
+		 path, de->d_name);
+
+/* Looks like a valid tablespace location */
+dirsize += scan_directory(tblspc_path, TABLESPACE_VERSION_DIRECTORY,
+		  sizeonly);
+			}
+			else
+			{
+dirsize += scan_directory(path, de->d_name, sizeonly);
+			}
+		}
 	}
 	closedir(dir);
 	return dirsize;
-- 
2.24.1

From 77393902b2b9f92c51ad525eacde5ab87e55faac Mon Sep 17 00:00:00 2001
From: Bernd Helmle 
Date: Wed, 26 Feb 2020 17:48:11 +0100
Subject: [PATCH 2/2] Update TAP tests for pg_checksums.

Add an additional test with a dummy foreign tablespace
location to check the new behavior of pg_checksums to properly
ignore any other versioned tablespace subdirectory from e.g.
pg_upgrade'd clusters.
---
 src/bin/pg_checksums/t/002_actions.pl | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/bin/pg_checksums/t/002_actions.pl b/src/bin/pg_checksums/t/002_actions.pl
index 83a730ea94..88e186d873 100644
--- a/src/bin/pg_checksums/t/002_actions.pl
+++ b/src/bin/pg_checksums/t/002_actions.pl
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 62;
+use Test::More tests => 63;
 
 
 # Utility routine to create and check a table with corrupted checksums
@@ -217,6 +217,14 @@ sub fail_corrupt
 # Stop instance for the follow-up checks.
 $node->stop;
 
+# Create fake directories in the tablespace location, claiming
+# to be foreign objects we shouldn't touch.
+mkdir "$tablespace_dir/PG_99_1/";
+append_to_file "$tablespace_dir/PG_99_1/foo", "123";
+# Should not fail
+command_ok([ 'pg_checksums', '--check', '-D', $pgdata ],
+	"succeeds with foreign tablespace");
+
 # Authorized relation files filled with 

Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-24 Thread Michael Paquier
On Mon, Feb 24, 2020 at 01:11:10PM +0100, Bernd Helmle wrote:
> The other makes scan_directories() complicated to read and special
> cases just a single directory in an otherwise more or less generic
> function.  E.g. it makes me uncomfortable if we get a pg_tblspc
> somewhere else than PGDATA (if someone managed to create such a
> directory in a foreign tablespace location for example), so we should
> maintain an additional check if we really operate on the pg_tblspc we
> have to. That was the reason(s) i've moved it into a separate function.

We are just discussing about the code path involving scanning a
directory, so that does not seem that bad to me.  I really think that
we should avoid duplicating the same logic around, and that we should
remain consistent with non-directory entries in those paths,
complaining with a proper failure if extra, unwanted files are
present.
--
Michael


signature.asc
Description: PGP signature


Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-24 Thread David Steele

Hi Michael,

On 2/24/20 7:26 AM, Michael Paquier wrote:

On Sun, Feb 23, 2020 at 04:08:58PM +0900, Michael Paquier wrote:

Good idea.  Let's do things as you suggest.


Applied and back-patched this one down to 11.


FWIW, we took a slightly narrower approach to this issue in the 
pgBackRest patch (attached).


I don't have an issue with the prefix approach since it works and the 
Postgres project is very likely to catch it if there is a change in 
behavior.


For third-party projects, though, it might pay to be more conservative 
in case the behavior changes in the future, i.e. 
pg_internal.init[something] (but not pg_internal\.init[0-9]+) becomes valid.


Regards,
--
-David
da...@pgmasters.net
diff --git a/doc/xml/release.xml b/doc/xml/release.xml
index a9a61bc0..0089d222 100644
--- a/doc/xml/release.xml
+++ b/doc/xml/release.xml
@@ -67,6 +67,16 @@
 
 
 
+
+
+
+
+
+
+
+Skip pg_internal.init temp file during 
backup.
+
+
 
 
 
@@ -8039,6 +8049,11 @@
 mibiio
 
 
+
+Michael 
Paquier
+michaelpq
+
+
 
 Michael Renner
 terrorobe
diff --git a/src/info/manifest.c b/src/info/manifest.c
index a5662fd6..1eae49af 100644
--- a/src/info/manifest.c
+++ b/src/info/manifest.c
@@ -579,8 +579,13 @@ manifestBuildCallback(void *data, const StorageInfo *info)
 strPtr(manifestName));
 }
 
-// Skip pg_internal.init since it is recreated on startup
-if (strEqZ(info->name, PG_FILE_PGINTERNALINIT))
+// Skip pg_internal.init since it is recreated on startup.  It's 
also possible, (though unlikely) that a temp file with
+// the creating process id as the extension can exist so skip that 
as well.  This seems to be a bug in PostgreSQL since
+// the temp file should be removed on startup.  Use 
regExpMatchOne() here instead of preparing a regexp in advance since
+// the likelihood of needing the regexp should be very small.
+if ((pgVersion <= PG_VERSION_84 || buildData.dbPath) && 
strBeginsWithZ(info->name, PG_FILE_PGINTERNALINIT) &&
+(strSize(info->name) == sizeof(PG_FILE_PGINTERNALINIT) - 1 ||
+regExpMatchOne(STRDEF("\\.[0-9]+"), strSub(info->name, 
sizeof(PG_FILE_PGINTERNALINIT) - 1
 {
 FUNCTION_TEST_RETURN_VOID();
 return;
diff --git a/test/src/module/info/manifestTest.c 
b/test/src/module/info/manifestTest.c
index 684905d1..9d945152 100644
--- a/test/src/module/info/manifestTest.c
+++ b/test/src/module/info/manifestTest.c
@@ -252,6 +252,10 @@ testRun(void)
 // global directory
 storagePathCreateP(storagePgWrite, STRDEF(PG_PATH_GLOBAL), .mode = 
0700, .noParentCreate = true);
 storagePutP(storageNewWriteP(storagePgWrite, STRDEF(PG_PATH_GLOBAL "/" 
PG_FILE_PGINTERNALINIT)), NULL);
+storagePutP(storageNewWriteP(storagePgWrite, STRDEF(PG_PATH_GLOBAL "/" 
PG_FILE_PGINTERNALINIT ".1")), NULL);
+storagePutP(
+storageNewWriteP(storagePgWrite, STRDEF(PG_PATH_GLOBAL "/" 
PG_FILE_PGINTERNALINIT ".allow"), .modeFile = 0400,
+.timeModified = 1565282114), NULL);
 storagePutP(
 storageNewWriteP(storagePgWrite, STRDEF(PG_PATH_GLOBAL "/t1_1"), 
.modeFile = 0400, .timeModified = 1565282114), NULL);
 
@@ -282,6 +286,7 @@ testRun(void)
 "\n"
 "[target:file]\n"
 "pg_data/PG_VERSION={\"size\":4,\"timestamp\":1565282100}\n"
+
"pg_data/global/pg_internal.init.allow={\"master\":false,\"size\":0,\"timestamp\":1565282114}\n"
 
"pg_data/global/t1_1={\"master\":false,\"size\":0,\"timestamp\":1565282114}\n"
 
"pg_data/pg_dynshmem/BOGUS={\"size\":0,\"timestamp\":1565282101}\n"
 
"pg_data/pg_notify/BOGUS={\"size\":0,\"timestamp\":1565282102}\n"
@@ -419,6 +424,7 @@ testRun(void)
 "pg_data/base/1/t1_1.1={\"size\":0,\"timestamp\":1565282113}\n"
 
"pg_data/base/1/t888_888_vm={\"size\":0,\"timestamp\":1565282113}\n"
 
"pg_data/base/1/t888_888_vm.99={\"size\":0,\"timestamp\":1565282113}\n"
+
"pg_data/global/pg_internal.init.allow={\"size\":0,\"timestamp\":1565282114}\n"
 "pg_data/global/t1_1={\"size\":0,\"timestamp\":1565282114}\n"
 
"pg_data/pg_dynshmem/BOGUS={\"master\":true,\"size\":0,\"timestamp\":1565282101}\n"
 
"pg_data/pg_hba.conf={\"master\":true,\"size\":9,\"timestamp\":1565282117}\n"
@@ -537,6 +543,7 @@ testRun(void)
 

Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-24 Thread Michael Paquier
On Sun, Feb 23, 2020 at 04:08:58PM +0900, Michael Paquier wrote:
> Good idea.  Let's do things as you suggest.

Applied and back-patched this one down to 11.
--
Michael


signature.asc
Description: PGP signature


Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-24 Thread Bernd Helmle
On Fri, 2020-02-21 at 15:36 +0900, Michael Paquier wrote:
> We don't do that for the normal directory scan path, so it does not
> strike me as a good idea on consistency ground.  As a whole, I don't
> see much point in having a separate routine which is just roughly a
> duplicate of scan_directory(), and I think that we had better just
> add
> the check looking for matches with TABLESPACE_VERSION_DIRECTORY
> directly when having a directory, if subdir is "pg_tblspc".  That
> also makes the patch much shorter.

To be honest, i dislike both: The other doubles logic (note: i don't
see it necessarily as 100% code duplication, since the semantic of
scan_tablespaces() is different: it serves as a driver for
scan_directories() and just resolves entries in pg_tblspc directly).

The other makes scan_directories() complicater to read and special
cases just a single directory in an otherwise more or less generic
function. E.g. it makes me uncomfortable if we get a pg_tblspc
somewhere else than PGDATA (if someone managed to create such a
directory in a foreign tablespace location for example), so we should
maintain an additional check if we really operate on the pg_tblspc we
have to. That was the reason(s) i've moved it into a separate function.

That said, i'll provide an updated patch with your ideas.

Bernd






Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-22 Thread Michael Paquier
On Fri, Feb 21, 2020 at 08:13:34AM -0500, David Steele wrote:
> Do you have the thread?  I'd like to see what was proposed and what the
> objections were.

Here you go:
https://www.postgresql.org/message-id/20180205071022.ga17...@paquier.xyz
--
Michael


signature.asc
Description: PGP signature


Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-22 Thread Michael Paquier
On Fri, Feb 21, 2020 at 05:37:15PM +0900, Kyotaro Horiguchi wrote:
> The two str[n]cmps are different only in matching length. I don't
> think we don't need to differentiate the two message there, so we
> could reduce the code as:
> 
> | cmplen = strlen(excludeFiles[].name);
> | if (!prefix_patch)
> |   cmplen++;
> | if (strncmp(d_name, excludeFilep.name, cmplen) == 0)
> |   ...

Good idea.  Let's do things as you suggest.
--
Michael
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index dea8aab45e..2a3b365a91 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -121,6 +121,18 @@ static long long int total_checksum_failures;
 /* Do not verify checksums. */
 static bool noverify_checksums = false;
 
+/*
+ * Definition of one element part of an exclusion list, used for paths part
+ * of checksum validation or base backups.  "name" is the name of the file
+ * or path to check for exclusion.  If "match_prefix" is true, any items
+ * matching the name as prefix are excluded.
+ */
+struct exclude_list_item
+{
+	const char *name;
+	bool		match_prefix;
+};
+
 /*
  * The contents of these directories are removed or recreated during server
  * start so they are not included in backups.  The directories themselves are
@@ -170,16 +182,19 @@ static const char *const excludeDirContents[] =
 /*
  * List of files excluded from backups.
  */
-static const char *const excludeFiles[] =
+static const struct exclude_list_item excludeFiles[] =
 {
 	/* Skip auto conf temporary file. */
-	PG_AUTOCONF_FILENAME ".tmp",
+	{PG_AUTOCONF_FILENAME ".tmp", false},
 
 	/* Skip current log file temporary file */
-	LOG_METAINFO_DATAFILE_TMP,
+	{LOG_METAINFO_DATAFILE_TMP, false},
 
-	/* Skip relation cache because it is rebuilt on startup */
-	RELCACHE_INIT_FILENAME,
+	/*
+	 * Skip relation cache because it is rebuilt on startup.  This includes
+	 * temporary files.
+	 */
+	{RELCACHE_INIT_FILENAME, true},
 
 	/*
 	 * If there's a backup_label or tablespace_map file, it belongs to a
@@ -187,14 +202,14 @@ static const char *const excludeFiles[] =
 	 * for this backup.  Our backup_label/tablespace_map is injected into the
 	 * tar separately.
 	 */
-	BACKUP_LABEL_FILE,
-	TABLESPACE_MAP,
+	{BACKUP_LABEL_FILE, false},
+	{TABLESPACE_MAP, false},
 
-	"postmaster.pid",
-	"postmaster.opts",
+	{"postmaster.pid", false},
+	{"postmaster.opts", false},
 
 	/* end of list */
-	NULL
+	{NULL, false}
 };
 
 /*
@@ -203,16 +218,15 @@ static const char *const excludeFiles[] =
  * Note: this list should be kept in sync with what pg_checksums.c
  * includes.
  */
-static const char *const noChecksumFiles[] = {
-	"pg_control",
-	"pg_filenode.map",
-	"pg_internal.init",
-	"PG_VERSION",
+static const struct exclude_list_item noChecksumFiles[] = {
+	{"pg_control", false},
+	{"pg_filenode.map", false},
+	{"pg_internal.init", true},
+	{"PG_VERSION", false},
 #ifdef EXEC_BACKEND
-	"config_exec_params",
-	"config_exec_params.new",
+	{"config_exec_params", true},
 #endif
-	NULL,
+	{NULL, false}
 };
 
 /*
@@ -1082,9 +1096,13 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 
 		/* Scan for files that should be excluded */
 		excludeFound = false;
-		for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
+		for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
 		{
-			if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0)
+			int		cmplen = strlen(excludeFiles[excludeIdx].name);
+
+			if (!excludeFiles[excludeIdx].match_prefix)
+cmplen++;
+			if (strncmp(de->d_name, excludeFiles[excludeIdx].name, cmplen) == 0)
 			{
 elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
 excludeFound = true;
@@ -1317,17 +1335,24 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 static bool
 is_checksummed_file(const char *fullpath, const char *filename)
 {
-	const char *const *f;
-
 	/* Check that the file is in a tablespace */
 	if (strncmp(fullpath, "./global/", 9) == 0 ||
 		strncmp(fullpath, "./base/", 7) == 0 ||
 		strncmp(fullpath, "/", 1) == 0)
 	{
-		/* Compare file against noChecksumFiles skiplist */
-		for (f = noChecksumFiles; *f; f++)
-			if (strcmp(*f, filename) == 0)
+		int			excludeIdx;
+
+		/* Compare file against noChecksumFiles skip list */
+		for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++)
+		{
+			int		cmplen = strlen(noChecksumFiles[excludeIdx].name);
+
+			if (!noChecksumFiles[excludeIdx].match_prefix)
+cmplen++;
+			if (strncmp(filename, noChecksumFiles[excludeIdx].name,
+		cmplen) == 0)
 return false;
+		}
 
 		return true;
 	}
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index b7d36b65dd..3c70499feb 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -6,7 +6,7 @@ use File::Basename qw(basename dirname);
 use 

Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-21 Thread David Steele

On 2/21/20 1:36 AM, Michael Paquier wrote:
> On Thu, Feb 20, 2020 at 05:38:15PM +0100, Bernd Helmle wrote:
>> So i propose a different approach like the attached patch tries to
>> implement: instead of just blindly iterating over directory contents
>> and filter them out, reference the tablespace location and
>> TABLESPACE_VERSION_DIRECTORY directly. This is done by a new function
>> scan_tablespaces() which is specialized in just follow the
>> symlinks/junctions in pg_tblspc and call scan_directory() with just
>> what it has found there. It will also honour directories, just in case
>> an experienced DBA has copied over the tablespace into pg_tblspc
>> directly.
>
> +   if (S_ISREG(st.st_mode))
> +   {
> +   pg_log_debug("ignoring file %s in pg_tblspc", de->d_name);
> +   continue;
> +   }
> We don't do that for the normal directory scan path, so it does not
> strike me as a good idea on consistency ground.  As a whole, I don't
> see much point in having a separate routine which is just roughly a
> duplicate of scan_directory(), and I think that we had better just add
> the check looking for matches with TABLESPACE_VERSION_DIRECTORY
> directly when having a directory, if subdir is "pg_tblspc".  That
> also makes the patch much shorter.

+1.  This is roughly what pg_basebackup does and it seems simpler to me.

Regards,
--
-David
da...@pgmasters.net




Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-21 Thread David Steele

Hi Michael,

On 2/20/20 11:07 PM, Michael Paquier wrote:
>   On Thu, Feb 20, 2020 at 07:37:15AM -0600, David Steele wrote:
>> But since the name includes the backend's pid you would need to get 
lucky
>> and have a new backend with the same pid create the file after a 
restart.  I

>> tried it and the old temp file was left behind after restart and first
>> connection to the database.
>>
>> I doubt this is a big issue in the field, but it seems like it would 
be nice

>> to do something about it.
>
> The natural area to do that would be around ResetUnloggedRelations().
> Still that would require combine both operations to not do any
> unnecessary lookups at the data folder paths.

Yeah, that's what I was thinking as well, since there is already a 
directory scan there and doing the check would be very cheap.  It's not 
obvious how to combine these in the right way without moving a lot of 
code around to non-obvious places.


One solution might be to have each subsystem register a function that 
does checks/cleanup as each path/file is found in a common scan 
function.  That's a pretty major rework though, and I doubt there would 
be much appetite for it to solve such a minor problem.


>> I'm not excited about the amount of code duplication between these three
>> tools.  I know this was because of back-patching various issues in 
the past,
>> but I really think we need to unify these data structures/functions 
in HEAD.

>
> The lists are duplicated because we have never really figured out how
> to combine this code in one place.  The idea was to have all the data
> folder path logic and the lists within one header shared between the
> frontend and backend but there was not much support for that on HEAD.

Do you have the thread?  I'd like to see what was proposed and what the 
objections were.


Regards,
--
-David
da...@pgmasters.net




Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-21 Thread Kyotaro Horiguchi
Thank you David for decrypting my previous mail.., and your
translation was correct.

At Fri, 21 Feb 2020 15:07:12 +0900, Michael Paquier  wrote 
in 
>  On Thu, Feb 20, 2020 at 07:37:15AM -0600, David Steele wrote:
> > But since the name includes the backend's pid you would need to get lucky
> > and have a new backend with the same pid create the file after a restart.  I
> > tried it and the old temp file was left behind after restart and first
> > connection to the database.
> > 
> > I doubt this is a big issue in the field, but it seems like it would be nice
> > to do something about it.
> 
> The natural area to do that would be around ResetUnloggedRelations().
> Still that would require combine both operations to not do any
> unnecessary lookups at the data folder paths.
> 
> > I'm not excited about the amount of code duplication between these three
> > tools.  I know this was because of back-patching various issues in the past,
> > but I really think we need to unify these data structures/functions in HEAD.
> 
> The lists are duplicated because we have never really figured out how
> to combine this code in one place.  The idea was to have all the data
> folder path logic and the lists within one header shared between the
> frontend and backend but there was not much support for that on HEAD.
> 
> >> For now, my proposal is to fix the prefix first, and then let's look
> >> at the business with tablespaces where needed.
> > 
> > OK.
> 
> I'll let this patch round for a couple of extra day, and revisit it at
> the beginning of next week.


Thank you for the version.
I didn't look it closer bat it looks in the direction I wanted.
At a quick look, the following section attracted my eyes.

+   if (strncmp(de->d_name, 
excludeFiles[excludeIdx].name,
+   
strlen(excludeFiles[excludeIdx].name)) == 0)
+   {
+   elog(DEBUG1, "file \"%s\" matching 
prefix \"%s\" excluded from backup",
+de->d_name, 
excludeFiles[excludeIdx].name);
+   excludeFound = true;
+   break;
+   }
+   }
+   else
+   {
+   if (strcmp(de->d_name, 
excludeFiles[excludeIdx].name) == 0)
+   {
+   elog(DEBUG1, "file \"%s\" excluded from 
backup", de->d_name);
+   excludeFound = true;
+   break;
+   }

The two str[n]cmps are different only in matching length. I don't
think we don't need to differentiate the two message there, so we
could reduce the code as:

| cmplen = strlen(excludeFiles[].name);
| if (!prefix_patch)
|   cmplen++;
| if (strncmp(d_name, excludeFilep.name, cmplen) == 0)
|   ...
  
regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-20 Thread Michael Paquier
On Thu, Feb 20, 2020 at 05:38:15PM +0100, Bernd Helmle wrote:
> So i propose a different approach like the attached patch tries to
> implement: instead of just blindly iterating over directory contents
> and filter them out, reference the tablespace location and
> TABLESPACE_VERSION_DIRECTORY directly. This is done by a new function
> scan_tablespaces() which is specialized in just follow the
> symlinks/junctions in pg_tblspc and call scan_directory() with just
> what it has found there. It will also honour directories, just in case
> an experienced DBA has copied over the tablespace into pg_tblspc
> directly.

+   if (S_ISREG(st.st_mode))
+   {
+   pg_log_debug("ignoring file %s in pg_tblspc", de->d_name);
+   continue;
+   }
We don't do that for the normal directory scan path, so it does not
strike me as a good idea on consistency ground.  As a whole, I don't
see much point in having a separate routine which is just roughly a
duplicate of scan_directory(), and I think that we had better just add
the check looking for matches with TABLESPACE_VERSION_DIRECTORY
directly when having a directory, if subdir is "pg_tblspc".  That
also makes the patch much shorter.

+ * the direct path to it and check via lstat wether it exists.
s/wether/whether/, repeated three times.

We should have some TAP tests for that.  The first patch of this
thread from Michael had some, but I would just have added a dummy
tablespace with an empty file in 002_actions.pl, triggering an error
if pg_checksums is not fixed.  Dummy entries around the place where
dummy temp files are added would be fine.
--
Michael


signature.asc
Description: PGP signature


Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-20 Thread Michael Paquier
 On Thu, Feb 20, 2020 at 07:37:15AM -0600, David Steele wrote:
> But since the name includes the backend's pid you would need to get lucky
> and have a new backend with the same pid create the file after a restart.  I
> tried it and the old temp file was left behind after restart and first
> connection to the database.
> 
> I doubt this is a big issue in the field, but it seems like it would be nice
> to do something about it.

The natural area to do that would be around ResetUnloggedRelations().
Still that would require combine both operations to not do any
unnecessary lookups at the data folder paths.

> I'm not excited about the amount of code duplication between these three
> tools.  I know this was because of back-patching various issues in the past,
> but I really think we need to unify these data structures/functions in HEAD.

The lists are duplicated because we have never really figured out how
to combine this code in one place.  The idea was to have all the data
folder path logic and the lists within one header shared between the
frontend and backend but there was not much support for that on HEAD.

>> For now, my proposal is to fix the prefix first, and then let's look
>> at the business with tablespaces where needed.
> 
> OK.

I'll let this patch round for a couple of extra day, and revisit it at
the beginning of next week.
--
Michael


signature.asc
Description: PGP signature


Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-20 Thread Bernd Helmle
Am Dienstag, den 18.02.2020, 15:15 +0900 schrieb Michael Paquier:
> Fair point.  Now, while the proposed patch is right to use
> TABLESPACE_VERSION_DIRECTORY, shouldn't we use strncmp based on the
> length of TABLESPACE_VERSION_DIRECTORY instead of de->d_name?  It
> seems also to me that the code as proposed is rather fragile, and
> that
> we had better be sure that the check only happens when we are
> scanning
> entries within pg_tblspc.
> 

Yes, after thinking and playing around with it a little i share your
position. You can still easily cause pg_checksums to error out by just
having arbitrary files around in the reference tablespace locations.
Though i don't think this is something of a big issue, it looks strange
and misleading if pg_checksums just complains about files not belonging
to the scanned PostgreSQL data directory (even we explicitly note in
the docs that even tablespace locations are somehow taboo for DBAs to
put other files and/or directories in there).

So i propose a different approach like the attached patch tries to
implement: instead of just blindly iterating over directory contents
and filter them out, reference the tablespace location and
TABLESPACE_VERSION_DIRECTORY directly. This is done by a new function
scan_tablespaces() which is specialized in just follow the
symlinks/junctions in pg_tblspc and call scan_directory() with just
what it has found there. It will also honour directories, just in case
an experienced DBA has copied over the tablespace into pg_tblspc
directly.

> The issue with pg_internal.init.XX is quite different, so I think
> that
> it would be better to commit that separately first.

Agreed.

Thanks,
Bernd
From 4b6d369f731f111216f9ed6613ad1b41872fb0ea Mon Sep 17 00:00:00 2001
From: Bernd Helmle 
Date: Thu, 20 Feb 2020 17:18:10 +0100
Subject: [PATCH] Make scanning pg_tblspc more robust.

Currently we dive blindly into the subdirectories referenced by
the symlinks (or under Windows via file junctions) located in pg_tblspc,
which has various problems. We need to filter out any other possible tablespace
location found in the referenced subdirectory, which could likely happen
if you have e.g. an upgraded instance via pg_upgrade or other installations using
the same tablespace location.

Instead of filtering out any of these possible foreign directories, try to
resolve the locations in pg_tblspc directly. This is done with the new
scan_tablespaces() helper function, which indirectly calls scan_directory()
after resolving the links in pg_tblspc. It explicitely follows directories found
there too, since it might happen that users copy over tablespace locations directly
into pg_tblspc.
---
 src/bin/pg_checksums/pg_checksums.c | 99 -
 1 file changed, 97 insertions(+), 2 deletions(-)

diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 46ee1f1dc3..4e2e9cb3e2 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -108,6 +108,15 @@ static const char *const skip[] = {
 	NULL,
 };
 
+/*
+ * Forwarded declarations.
+ */
+static int64
+scan_directory(const char *basedir, const char *subdir, bool sizeonly);
+
+static int64
+scan_tablespaces(const char *basedir, bool sizeonly);
+
 /*
  * Report current progress status.  Parts borrowed from
  * src/bin/pg_basebackup/pg_basebackup.c.
@@ -267,6 +276,91 @@ scan_file(const char *fn, BlockNumber segmentno)
 	close(f);
 }
 
+/*
+ * Scans pg_tblspc of the given base directory for either links
+ * or directories and examines wether an existing tablespace version
+ * directory exists there. If true, scan_directory() will do the checksum
+ * legwork. sizeonly specifies wether we want to compute the directory size
+ * only, required for progress reporting.
+ */
+static int64
+scan_tablespaces(const char *basedir, bool sizeonly)
+{
+	int64  dirsize = 0;
+	char   path[MAXPGPATH];
+	DIR   *dir;
+	struct dirent *de;
+
+	snprintf(path, sizeof(path), "%s/%s", basedir, "pg_tblspc");
+	dir = opendir(path);
+
+	if (!dir)
+	{
+		pg_log_error("could not open directory \"%s\": %m", path);
+		exit(1);
+	}
+
+	/*
+	 * Scan through the pg_tblspc contents. We expect either a symlink
+	 * (or Windows junction) or a directory. The latter might exist due
+	 * systems where administrators copy over the tablespace directory
+	 * directly into PGDATA, e.g. on recovery systems.
+	 */
+	while((de = readdir(dir)) != NULL)
+	{
+		char tblspc_path[MAXPGPATH];
+		char fn[MAXPGPATH];
+		struct stat st;
+
+		if (strcmp(de->d_name, ".") == 0 ||
+			strcmp(de->d_name, "..") == 0)
+			continue;
+
+		snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
+		if (lstat(fn, ) < 0)
+		{
+			pg_log_error("could not stat file \"%s\": %m", fn);
+			exit(1);
+		}
+
+		if (S_ISREG(st.st_mode))
+		{
+			pg_log_debug("ignoring file %s in pg_tblspc", de->d_name);
+			continue;
+		}
+
+#ifndef WIN32
+		if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))

Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-20 Thread David Steele

On 2/20/20 12:55 AM, Michael Paquier wrote:

On Wed, Feb 19, 2020 at 12:37:00PM -0600, David Steele wrote:


As far as I can see, the pg_internal.init.XX will not be cleaned up by
PostgreSQL on startup.  I've only tested this in 9.6 so far, but I don't see
any differences in the code since then that would lead to better behavior.
Perhaps that's also something we should fix?


Not sure that it is worth spending cycles on that at the beginning of
recovery as when a mapping file is written its temporary entry
truncates any existing one present matching its name.


But since the name includes the backend's pid you would need to get 
lucky and have a new backend with the same pid create the file after a 
restart.  I tried it and the old temp file was left behind after restart 
and first connection to the database.


I doubt this is a big issue in the field, but it seems like it would be 
nice to do something about it.



I'm really not a fan of a blind prefix match.  I think we should stick with
only excluding files that are created by Postgres.


Thinking more on that, excluding any backup_label with a custom suffix
worries me as it could cause a potential breakage for exiting backup
solutions.  So attached is an updated patch making things in a
smarter way: I have added to the exclusion lists the possibility to
match an entry based on its prefix, or not, the choice being optional.
This solves the problem with pg_internal.PID and is careful to not
exclude unnecessary entries like suffixed backup labels or such.  This
leads to some extra duplication within pg_rewind, basebackup.c and
pg_checksums but I think we can live with that, and that makes
back-patching simpler.  Refactoring is still tricky though as it
relates to the use of paths across the backend and the frontend..


I'm not excited about the amount of code duplication between these three 
tools.  I know this was because of back-patching various issues in the 
past, but I really think we need to unify these data 
structures/functions in HEAD.



So backup_label.old and
tablespace_map.old should just be added to the exclude list.  That's how we
have it in pgBackRest.


That would be a behavior change.  We could change that on HEAD, but I
don't think that this can be back-patched as this does not cause an
actual problem.


Right, that should be in HEAD.


For now, my proposal is to fix the prefix first, and then let's look
at the business with tablespaces where needed.


OK.

Regards,
--
-David
da...@pgmasters.net




Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-19 Thread Michael Paquier
On Wed, Feb 19, 2020 at 12:37:00PM -0600, David Steele wrote:
> On 2/19/20 2:13 AM, Michael Paquier wrote:
>> Please note that pg_internal.init is listed within noChecksumFiles in
>> basebackup.c, so we would miss any temporary pg_internal.init.PID if
>> we don't check after the file prefix and the base backup would issue
>> extra WARNING messages, potentially masking messages that could
>> matter.  So let's fix that as well.
> 
> Agreed.  Though, I think pg_internal.init.XX should be excluded from the
> backup as well.

Sure.  That's the intention.  pg_rewind, pg_checksums and basebackup.c
are all the things on my list.

> As far as I can see, the pg_internal.init.XX will not be cleaned up by
> PostgreSQL on startup.  I've only tested this in 9.6 so far, but I don't see
> any differences in the code since then that would lead to better behavior.
> Perhaps that's also something we should fix?

Not sure that it is worth spending cycles on that at the beginning of
recovery as when a mapping file is written its temporary entry
truncates any existing one present matching its name.

> I'm really not a fan of a blind prefix match.  I think we should stick with
> only excluding files that are created by Postgres.

Thinking more on that, excluding any backup_label with a custom suffix
worries me as it could cause a potential breakage for exiting backup
solutions.  So attached is an updated patch making things in a
smarter way: I have added to the exclusion lists the possibility to
match an entry based on its prefix, or not, the choice being optional.
This solves the problem with pg_internal.PID and is careful to not
exclude unnecessary entries like suffixed backup labels or such.  This
leads to some extra duplication within pg_rewind, basebackup.c and
pg_checksums but I think we can live with that, and that makes
back-patching simpler.  Refactoring is still tricky though as it
relates to the use of paths across the backend and the frontend..

> So backup_label.old and
> tablespace_map.old should just be added to the exclude list.  That's how we
> have it in pgBackRest.

That would be a behavior change.  We could change that on HEAD, but I
don't think that this can be back-patched as this does not cause an
actual problem.

For now, my proposal is to fix the prefix first, and then let's look
at the business with tablespaces where needed. 
--
Michael
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index dea8aab45e..c03bc0c00b 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -121,6 +121,18 @@ static long long int total_checksum_failures;
 /* Do not verify checksums. */
 static bool noverify_checksums = false;
 
+/*
+ * Definition of one item part of an exclusion list, used for checksum
+ * or base backup items.  "name" is the name of the file or path to
+ * check for exclusion.  If "match_prefix" is true, any items matching the
+ * name as prefix are excluded.
+ */
+struct exclude_list_item
+{
+	const char *name;
+	bool		match_prefix;
+};
+
 /*
  * The contents of these directories are removed or recreated during server
  * start so they are not included in backups.  The directories themselves are
@@ -170,16 +182,19 @@ static const char *const excludeDirContents[] =
 /*
  * List of files excluded from backups.
  */
-static const char *const excludeFiles[] =
+static const struct exclude_list_item excludeFiles[] =
 {
 	/* Skip auto conf temporary file. */
-	PG_AUTOCONF_FILENAME ".tmp",
+	{PG_AUTOCONF_FILENAME ".tmp", false},
 
 	/* Skip current log file temporary file */
-	LOG_METAINFO_DATAFILE_TMP,
+	{LOG_METAINFO_DATAFILE_TMP, false},
 
-	/* Skip relation cache because it is rebuilt on startup */
-	RELCACHE_INIT_FILENAME,
+	/*
+	 * Skip relation cache because it is rebuilt on startup.  This includes
+	 * temporary files.
+	 */
+	{RELCACHE_INIT_FILENAME, true},
 
 	/*
 	 * If there's a backup_label or tablespace_map file, it belongs to a
@@ -187,14 +202,14 @@ static const char *const excludeFiles[] =
 	 * for this backup.  Our backup_label/tablespace_map is injected into the
 	 * tar separately.
 	 */
-	BACKUP_LABEL_FILE,
-	TABLESPACE_MAP,
+	{BACKUP_LABEL_FILE, false},
+	{TABLESPACE_MAP, false},
 
-	"postmaster.pid",
-	"postmaster.opts",
+	{"postmaster.pid", false},
+	{"postmaster.opts", false},
 
 	/* end of list */
-	NULL
+	{NULL, false}
 };
 
 /*
@@ -203,16 +218,15 @@ static const char *const excludeFiles[] =
  * Note: this list should be kept in sync with what pg_checksums.c
  * includes.
  */
-static const char *const noChecksumFiles[] = {
-	"pg_control",
-	"pg_filenode.map",
-	"pg_internal.init",
-	"PG_VERSION",
+static const struct exclude_list_item noChecksumFiles[] = {
+	{"pg_control", false},
+	{"pg_filenode.map", false},
+	{"pg_internal.init", true},
+	{"PG_VERSION", false},
 #ifdef EXEC_BACKEND
-	"config_exec_params",
-	"config_exec_params.new",
+	{"config_exec_params", true},
 #endif
-	NULL,
+	{NULL, false}
 };
 
 /*

Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-19 Thread David Steele

On 1/31/20 3:59 AM, Michael Banck wrote:

Hi,

Am Freitag, den 31.01.2020, 13:53 +0900 schrieb Michael Paquier:

On Thu, Jan 30, 2020 at 06:11:22PM +0100, Michael Banck wrote:
Having a past tablespace version left
around after an upgrade is a pilot error in my opinion because
pg_upgrade generates a script to cleanup past tablespaces, no?  So
your patch does not look like a good idea to me.


Not sure I agree with it, but if that (i.e. after pg_upgrade in copy
mode, you have no business to use the old cluster as well as the new
one) is project policy, fair enough.


I don't see how this is project policy.  The directories for other 
versions of Postgres should be ignored as they are in other utilities, 
e.g. pg_basebackup.



However, Postgres does not disallow to just create tablespaces in the
same location from two different versions, so you don't need the
pg_upgade scenario to get into this (pg_checksums checking the wrong
cluster's data) problem:


Exactly.

Regards,
--
-David
da...@pgmasters.net




Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-19 Thread David Steele

On 2/19/20 2:13 AM, Michael Paquier wrote:

On Fri, Jan 31, 2020 at 05:39:36PM +0900, Kyotaro Horiguchi wrote:

At Fri, 31 Jan 2020 17:30:43 +0900 (JST), Kyotaro Horiguchi 
 wrote in

I don't think that is a problem right away, of course. It looks good
to me except for the possible excessive exclusion.  So, I don't object
it if we don't mind that.


That's a bit wrong.  All the discussion is only on excludeFiles.  I
think we should refrain from letting more files match to
nohecksumFiles.


I am not sure what you are saying here.  Are you saying that we should
not use a prefix matching for that part?  Or are you saying that we
should not touch this list at all?


Perhaps he is saying that if it is already excluded it won't be 
checksummed.  So, if pg_internal.init* is excluded from the backup, that 
is all that is needed.  If so, I agree.  This might not help 
pg_verify_checksums, though, except that it should be applying the same 
rules.



Please note that pg_internal.init is listed within noChecksumFiles in
basebackup.c, so we would miss any temporary pg_internal.init.PID if
we don't check after the file prefix and the base backup would issue
extra WARNING messages, potentially masking messages that could
matter.  So let's fix that as well.


Agreed.  Though, I think pg_internal.init.XX should be excluded from the 
backup as well.


As far as I can see, the pg_internal.init.XX will not be cleaned up by 
PostgreSQL on startup.  I've only tested this in 9.6 so far, but I don't 
see any differences in the code since then that would lead to better 
behavior.  Perhaps that's also something we should fix?



I agree that a side effect of this change would be to discard anything
prefixed with "backup_label" or "tablespace_map", including any old,
renamed files.  Do you know of any backup solutions which could be
impacted by that?  I am adding David Steele and Stephen Frost in CC so
as they can comment based on their experience in this area.  I recall
that backrest stuff uses the replication protocol, but I may be
wrong.


I'm really not a fan of a blind prefix match.  I think we should stick 
with only excluding files that are created by Postgres.  So 
backup_label.old and tablespace_map.old should just be added to the 
exclude list.  That's how we have it in pgBackRest.


Regards,
--
-David
da...@pgmasters.net




Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-19 Thread Michael Paquier
On Fri, Jan 31, 2020 at 05:39:36PM +0900, Kyotaro Horiguchi wrote:
> At Fri, 31 Jan 2020 17:30:43 +0900 (JST), Kyotaro Horiguchi 
>  wrote in 
>> I don't think that is a problem right away, of course. It looks good
>> to me except for the possible excessive exclusion.  So, I don't object
>> it if we don't mind that.
> 
> That's a bit wrong.  All the discussion is only on excludeFiles.  I
> think we should refrain from letting more files match to
> nohecksumFiles.

I am not sure what you are saying here.  Are you saying that we should
not use a prefix matching for that part?  Or are you saying that we
should not touch this list at all?

Please note that pg_internal.init is listed within noChecksumFiles in
basebackup.c, so we would miss any temporary pg_internal.init.PID if
we don't check after the file prefix and the base backup would issue
extra WARNING messages, potentially masking messages that could
matter.  So let's fix that as well.

I agree that a side effect of this change would be to discard anything
prefixed with "backup_label" or "tablespace_map", including any old,
renamed files.  Do you know of any backup solutions which could be
impacted by that?  I am adding David Steele and Stephen Frost in CC so
as they can comment based on their experience in this area.  I recall
that backrest stuff uses the replication protocol, but I may be
wrong.
--
Michael


signature.asc
Description: PGP signature


Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-02-17 Thread Michael Paquier
On Fri, Jan 31, 2020 at 11:33:34AM +0100, Bernd Helmle wrote:
> And to be honest, even PostgreSQL itself allows you to reuse tablespace
> locations for multiple instances as well, so the described problem
> should exist not in upgraded clusters only.

Fair point.  Now, while the proposed patch is right to use
TABLESPACE_VERSION_DIRECTORY, shouldn't we use strncmp based on the
length of TABLESPACE_VERSION_DIRECTORY instead of de->d_name?  It
seems also to me that the code as proposed is rather fragile, and that
we had better be sure that the check only happens when we are scanning
entries within pg_tblspc.

The issue with pg_internal.init.XX is quite different, so I think that
it would be better to commit that separately first.
--
Michael


signature.asc
Description: PGP signature


Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-01-31 Thread Bernd Helmle
Am Freitag, den 31.01.2020, 13:53 +0900 schrieb Michael Paquier:
> Indeed, with a bad timing and a crash in the middle of
> write_relcache_init_file(), it could be possible to have such a file
> left around in the data folder.  Having a past tablespace version
> left
> around after an upgrade is a pilot error in my opinion because
> pg_upgrade generates a script to cleanup past tablespaces, no?

I'm suprised, why should that be a problem in copy mode? For me this is
a fair use case to test upgrades, e.g. for development purposes, if
someone want's to still have application tests against the current old
version, for fallback and whatever. And people might not want such
upgrades as a "fire-and-forget" task. We even have the --clone feature
now, making this even faster.

If our project policy is to never ever touch an pg_upgrade'd PostgreSQL
instance again in copy mode, i wasn't aware of it.

And to be honest, even PostgreSQL itself allows you to reuse tablespace
locations for multiple instances as well, so the described problem
should exist not in upgraded clusters only.


Bernd






Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-01-31 Thread Michael Banck
Hi,

Am Freitag, den 31.01.2020, 13:53 +0900 schrieb Michael Paquier:
> On Thu, Jan 30, 2020 at 06:11:22PM +0100, Michael Banck wrote:
> Having a past tablespace version left
> around after an upgrade is a pilot error in my opinion because
> pg_upgrade generates a script to cleanup past tablespaces, no?  So
> your patch does not look like a good idea to me. 

Not sure I agree with it, but if that (i.e. after pg_upgrade in copy
mode, you have no business to use the old cluster as well as the new
one) is project policy, fair enough.

However, Postgres does not disallow to just create tablespaces in the
same location from two different versions, so you don't need the
pg_upgade scenario to get into this (pg_checksums checking the wrong
cluster's data) problem:

postgres@kohn:~$ psql -p 5437 -c "CREATE TABLESPACE bar LOCATION 
'/var/lib/postgresql/bar'"
CREATE TABLESPACE
postgres@kohn:~$ psql -p 5444 -c "CREATE TABLESPACE bar LOCATION 
'/var/lib/postgresql/bar'"
CREATE TABLESPACE
postgres@kohn:~$ ls bar
PG_11_201809051  PG_12_201909212
postgres@kohn:~$ touch bar/PG_11_201809051/pg_internal.init.123
postgres@kohn:~$ pg_ctlcluster 12 main stop
  sudo systemctl stop postgresql@12-main
postgres@kohn:~$ LANG=C /usr/lib/postgresql/12/bin/pg_checksums -D 
/var/lib/postgresql/12/main
pg_checksums: error: invalid segment number 0 in file name
"/var/lib/postgresql/12/main/pg_tblspc/16396/PG_11_201809051/pg_internal
.init.123"

I believe this is in order to allow pg_upgrade to run in the first
place. But is this pilot error as well? In any case, it is a situation
we allow to happen so IMO we should fix pg_checksums to skip the foreign
tablespaces.

As an aside, I would advocate to just skip files which fail the segment
number determination step (and maybe log a warning), not bail out.


Michael

-- 
Michael Banck
Projektleiter / Senior Berater
Tel.: +49 2166 9901-171
Fax:  +49 2166 9901-100
Email: michael.ba...@credativ.de

credativ GmbH, HRB Mönchengladbach 12080
USt-ID-Nummer: DE204566209
Trompeterallee 108, 41189 Mönchengladbach
Geschäftsführung: Dr. Michael Meskes, Jörg Folz, Sascha Heuer

Unser Umgang mit personenbezogenen Daten unterliegt
folgenden Bestimmungen: https://www.credativ.de/datenschutz





Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-01-31 Thread Kyotaro Horiguchi
At Fri, 31 Jan 2020 17:30:43 +0900 (JST), Kyotaro Horiguchi 
 wrote in 
> I don't think that is a problem right away, of course. It looks good
> to me except for the possible excessive exclusion.  So, I don't object
> it if we don't mind that.

That's a bit wrong.  All the discussion is only on excludeFiles.  I
think we should refrain from letting more files match to
nohecksumFiles.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-01-31 Thread Kyotaro Horiguchi
At Fri, 31 Jan 2020 13:53:52 +0900, Michael Paquier  wrote 
in 
> On Thu, Jan 30, 2020 at 06:11:22PM +0100, Michael Banck wrote:
> > The other question is whether it is possible to end up with a
> > pg_internal.init.$PID file in a running cluster. E.g. if an instance
> > crashes and gets started up again - are those cleaned up during crash
> > recovery, or should pg_checksums ignore them? Right now pg_checksums
> > only checks against a list of filenames and only skips on exact matches
> > not prefixes so that might take a bit of work.
> 
> Indeed, with a bad timing and a crash in the middle of
> write_relcache_init_file(), it could be possible to have such a file
> left around in the data folder.  Having a past tablespace version left
> around after an upgrade is a pilot error in my opinion because
> pg_upgrade generates a script to cleanup past tablespaces, no?  So
> your patch does not look like a good idea to me.  And now that I look
> at it, if we happen to leave behind a temporary file for
> pg_internal.init, backups fail with the following error:
> 2020-01-31 13:26:18.345 JST [102076] 010_pg_basebackup.pl ERROR:
> invalid segment number 0 in file "pg_internal.init.123"

Agreed.

> So, I think that it would be better to change basebackup.c,
> pg_checksums and pg_rewind so as files are excluded if there is a
> prefix match with the exclude lists.  Please see the attached.

Agreed that the tools should ignore such files.  Looking excludeFile,
it seems to me that basebackup makes sure to exclude only files that
should harm.  I'm not sure whether it's explicitly, but
tablespace_map.old and backup_label.old are not excluded.

The patch excludes harmless files such like "backup_label.20200131"
and the two files above.

I don't think that is a problem right away, of course. It looks good
to me except for the possible excessive exclusion.  So, I don't object
it if we don't mind that.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: [Patch] Make pg_checksums skip foreign tablespace directories

2020-01-30 Thread Michael Paquier
On Thu, Jan 30, 2020 at 06:11:22PM +0100, Michael Banck wrote:
> The other question is whether it is possible to end up with a
> pg_internal.init.$PID file in a running cluster. E.g. if an instance
> crashes and gets started up again - are those cleaned up during crash
> recovery, or should pg_checksums ignore them? Right now pg_checksums
> only checks against a list of filenames and only skips on exact matches
> not prefixes so that might take a bit of work.

Indeed, with a bad timing and a crash in the middle of
write_relcache_init_file(), it could be possible to have such a file
left around in the data folder.  Having a past tablespace version left
around after an upgrade is a pilot error in my opinion because
pg_upgrade generates a script to cleanup past tablespaces, no?  So
your patch does not look like a good idea to me.  And now that I look
at it, if we happen to leave behind a temporary file for
pg_internal.init, backups fail with the following error:
2020-01-31 13:26:18.345 JST [102076] 010_pg_basebackup.pl ERROR:
invalid segment number 0 in file "pg_internal.init.123"

So, I think that it would be better to change basebackup.c,
pg_checksums and pg_rewind so as files are excluded if there is a
prefix match with the exclude lists.  Please see the attached.
--
Michael
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index dea8aab45e..37e4adb57b 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -168,7 +168,8 @@ static const char *const excludeDirContents[] =
 };
 
 /*
- * List of files excluded from backups.
+ * List of files excluded from backups.  Files are excluded if their
+ * prefix match.
  */
 static const char *const excludeFiles[] =
 {
@@ -198,7 +199,8 @@ static const char *const excludeFiles[] =
 };
 
 /*
- * List of files excluded from checksum validation.
+ * List of files excluded from checksum validation.  Files are excluded
+ * if their prefix match.
  *
  * Note: this list should be kept in sync with what pg_checksums.c
  * includes.
@@ -210,7 +212,6 @@ static const char *const noChecksumFiles[] = {
 	"PG_VERSION",
 #ifdef EXEC_BACKEND
 	"config_exec_params",
-	"config_exec_params.new",
 #endif
 	NULL,
 };
@@ -1084,7 +1085,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 		excludeFound = false;
 		for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
 		{
-			if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0)
+			if (strncmp(de->d_name, excludeFiles[excludeIdx],
+		strlen(excludeFiles[excludeIdx])) == 0)
 			{
 elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
 excludeFound = true;
@@ -1326,7 +1328,7 @@ is_checksummed_file(const char *fullpath, const char *filename)
 	{
 		/* Compare file against noChecksumFiles skiplist */
 		for (f = noChecksumFiles; *f; f++)
-			if (strcmp(*f, filename) == 0)
+			if (strncmp(*f, filename, strlen(*f)) == 0)
 return false;
 
 		return true;
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index b7d36b65dd..1c14317099 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -6,7 +6,7 @@ use File::Basename qw(basename dirname);
 use File::Path qw(rmtree);
 use PostgresNode;
 use TestLib;
-use Test::More tests => 106;
+use Test::More tests => 107;
 
 program_help_ok('pg_basebackup');
 program_version_ok('pg_basebackup');
@@ -65,7 +65,8 @@ $node->restart;
 
 # Write some files to test that they are not copied.
 foreach my $filename (
-	qw(backup_label tablespace_map postgresql.auto.conf.tmp current_logfiles.tmp)
+	qw(backup_label tablespace_map postgresql.auto.conf.tmp
+	current_logfiles.tmp global/pg_internal.init.123)
   )
 {
 	open my $file, '>>', "$pgdata/$filename";
@@ -135,7 +136,7 @@ foreach my $dirname (
 # These files should not be copied.
 foreach my $filename (
 	qw(postgresql.auto.conf.tmp postmaster.opts postmaster.pid tablespace_map current_logfiles.tmp
-	global/pg_internal.init))
+	global/pg_internal.init global/pg_internal.init.123))
 {
 	ok(!-f "$tempdir/backup/$filename", "$filename not copied");
 }
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 46ee1f1dc3..cd1a4bcd60 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -92,7 +92,8 @@ usage(void)
 }
 
 /*
- * List of files excluded from checksum validation.
+ * List of files excluded from checksum validation.  Files are excluded
+ * if their prefix match.
  *
  * Note: this list should be kept in sync with what basebackup.c includes.
  */
@@ -103,7 +104,6 @@ static const char *const skip[] = {
 	"PG_VERSION",
 #ifdef EXEC_BACKEND
 	"config_exec_params",
-	"config_exec_params.new",
 #endif
 	NULL,
 };
@@ -160,7 +160,7 @@ skipfile(const char *fn)
 	const char *const *f;
 
 	for (f = skip; *f; f++)
-		if (strcmp(*f, fn) == 0)
+