Hi,

Am Dienstag, den 22.09.2020, 16:26 +0200 schrieb Michael Banck:
> Am Mittwoch, den 02.09.2020, 16:50 +0300 schrieb Anastasia Lubennikova:
> > I've looked through the previous discussion. As far as I got it, most of 
> > the controversy was about online checksums improvements.
> > 
> > The warning about pd_upper inconsistency that you've added is a good 
> > addition. The patch is a bit messy, though, because a huge code block 
> > was shifted.
> > 
> > Will it be different, if you just leave
> >      "if (!PageIsNew(page) && PageGetLSN(page) < startptr)"
> > block as it was, and add
> >      "else if (PageIsNew(page) && !PageIsZero(page))" ?
> 
> Thanks, that does indeed look better as a patch and I think it's fine
> as-is for the code as well, I've attached a v2.

Sorry, forgot to add you as reviewer in the proposed commit message,
I've fixed that up now in V3.


Michael

-- 
Michael Banck
Projektleiter / Senior Berater
Tel.: +49 2166 9901-171
Fax:  +49 2166 9901-100
Email: michael.ba...@credativ.de

credativ GmbH, HRB Mönchengladbach 12080
USt-ID-Nummer: DE204566209
Trompeterallee 108, 41189 Mönchengladbach
Geschäftsführung: Dr. Michael Meskes, Jörg Folz, Sascha Heuer

Unser Umgang mit personenbezogenen Daten unterliegt
folgenden Bestimmungen: https://www.credativ.de/datenschutz
From a6706ec63709137881d415a8acf98c390a39ee56 Mon Sep 17 00:00:00 2001
From: Michael Banck <michael.ba...@credativ.de>
Date: Tue, 22 Sep 2020 16:09:36 +0200
Subject: [PATCH] Fix checksum verification in base backups for zero page
 headers

We currently do not checksum a page if it is considered new by PageIsNew().
However, this means that if the whole page header is zero, PageIsNew() will
consider that page new due to the pd_upper field being zero and no subsequent
checksum verification is done.

To fix, factor out the all-zeroes check from PageIsVerified() into a new method
PageIsZero() and call that for pages considered new by PageIsNew(). If a page
is all zero, consider that a checksum failure.

Add one more test to the pg_basebackup TAP tests to check this error.

Reported-By: Andres Freund
Reviewed-By: Asif Rehman, Anastasia Lubennikova
Discussion: https://postgr.es/m/20190319200050.ncuxejradurja...@alap3.anarazel.de
---
 src/backend/replication/basebackup.c         | 30 +++++++++++++++++
 src/backend/storage/page/bufpage.c           | 35 +++++++++++---------
 src/bin/pg_basebackup/t/010_pg_basebackup.pl | 18 +++++++++-
 src/include/storage/bufpage.h                | 11 +++---
 4 files changed, 73 insertions(+), 21 deletions(-)

diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index b89df01fa7..c82765a429 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -1746,6 +1746,36 @@ sendFile(const char *readfilename, const char *tarfilename,
 											"be reported", readfilename)));
 					}
 				}
+				else
+				{
+					/*
+					 * We skip completely new pages after checking they are
+					 * all-zero, since they don't have a checksum yet.
+					 */
+					if (PageIsNew(page) && !PageIsZero(page))
+					{
+						/*
+						 * pd_upper is zero, but the page is not all zero.  We
+						 * cannot run pg_checksum_page() on the page as it
+						 * would throw an assertion failure.  Consider this a
+						 * checksum failure.
+						 */
+						checksum_failures++;
+
+						if (checksum_failures <= 5)
+							ereport(WARNING,
+									(errmsg("checksum verification failed in "
+											"file \"%s\", block %d: pd_upper "
+											"is zero but page is not all-zero",
+											readfilename, blkno)));
+						if (checksum_failures == 5)
+							ereport(WARNING,
+									(errmsg("further checksum verification "
+											"failures in file \"%s\" will not "
+											"be reported", readfilename)));
+					}
+				}
+
 				block_retry = false;
 				blkno++;
 			}
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 4bc2bf955d..8be3cd6190 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -82,11 +82,8 @@ bool
 PageIsVerified(Page page, BlockNumber blkno)
 {
 	PageHeader	p = (PageHeader) page;
-	size_t	   *pagebytes;
-	int			i;
 	bool		checksum_failure = false;
 	bool		header_sane = false;
-	bool		all_zeroes = false;
 	uint16		checksum = 0;
 
 	/*
@@ -120,18 +117,7 @@ PageIsVerified(Page page, BlockNumber blkno)
 	}
 
 	/* Check all-zeroes case */
-	all_zeroes = true;
-	pagebytes = (size_t *) page;
-	for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
-	{
-		if (pagebytes[i] != 0)
-		{
-			all_zeroes = false;
-			break;
-		}
-	}
-
-	if (all_zeroes)
+	if (PageIsZero(page))
 		return true;
 
 	/*
@@ -154,6 +140,25 @@ PageIsVerified(Page page, BlockNumber blkno)
 	return false;
 }
 
+/*
+ * PageIsZero
+ *		Check that the page consists only of zero bytes.
+ *
+ */
+bool
+PageIsZero(Page page)
+{
+	int			i;
+	size_t	   *pagebytes = (size_t *) page;
+
+	for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
+	{
+		if (pagebytes[i] != 0)
+			return false;
+	}
+
+	return true;
+}
 
 /*
  *	PageAddItemExtended
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index f674a7c94e..f5735569c5 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -6,7 +6,7 @@ use File::Basename qw(basename dirname);
 use File::Path qw(rmtree);
 use PostgresNode;
 use TestLib;
-use Test::More tests => 109;
+use Test::More tests => 112;
 
 program_help_ok('pg_basebackup');
 program_version_ok('pg_basebackup');
@@ -528,6 +528,22 @@ $node->command_checks_all(
 	'pg_basebackup reports checksum mismatch');
 rmtree("$tempdir/backup_corrupt");
 
+# zero out the pageheader completely
+open $file, '+<', "$pgdata/$file_corrupt1";
+system_or_bail 'pg_ctl', '-D', $pgdata, 'stop';
+my $zero_data = "\0"x$pageheader_size;
+syswrite($file, $zero_data);
+close $file;
+system_or_bail 'pg_ctl', '-D', $pgdata, 'start';
+
+$node->command_checks_all(
+	[ 'pg_basebackup', '-D', "$tempdir/backup_corrupt1a" ],
+	1,
+	[qr{^$}],
+	[qr/^WARNING.*checksum verification failed/s],
+	"pg_basebackup reports checksum mismatch for zeroed pageheader");
+rmtree("$tempdir/backup_corrupt1a");
+
 # induce further corruption in 5 more blocks
 system_or_bail 'pg_ctl', '-D', $pgdata, 'stop';
 open $file, '+<', "$pgdata/$file_corrupt1";
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 51b8f994ac..f3de0b36af 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -413,17 +413,18 @@ do { \
 						((is_heap) ? PAI_IS_HEAP : 0))
 
 /*
- * Check that BLCKSZ is a multiple of sizeof(size_t).  In PageIsVerified(),
- * it is much faster to check if a page is full of zeroes using the native
- * word size.  Note that this assertion is kept within a header to make
- * sure that StaticAssertDecl() works across various combinations of
- * platforms and compilers.
+ * Check that BLCKSZ is a multiple of sizeof(size_t).  In PageIsZero(), it is
+ * much faster to check if a page is full of zeroes using the native word size.
+ * Note that this assertion is kept within a header to make sure that
+ * StaticAssertDecl() works across various combinations of platforms and
+ * compilers.
  */
 StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
 				 "BLCKSZ has to be a multiple of sizeof(size_t)");
 
 extern void PageInit(Page page, Size pageSize, Size specialSize);
 extern bool PageIsVerified(Page page, BlockNumber blkno);
+extern bool PageIsZero(Page page);
 extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
 										OffsetNumber offsetNumber, int flags);
 extern Page PageGetTempPage(Page page);
-- 
2.20.1

Reply via email to