On Fri, Oct 16, 2020 at 06:02:52PM +0300, Anastasia Lubennikova wrote:
> In the current patch, PageIsVerifed called from pgbasebackup simply doesn't
> Should we change this too? I don't see any difference.

I considered that, but now that does not seem worth bothering here.

> Done.

Thanks for the updated patch.  I have played with dd and some fake
files to check the validity of the zero-case (as long as pd_lsn does
not go wild).  Here are some comments, with an updated patch attached:
- Added a PageIsVerifiedExtended to make this patch back-patchable,
with the original routine keeping its original shape.
- I did not like much to show the WARNING from PageIsVerified() for
the report, and we'd lose some context related to the base backups.
The important part is to know which blocks from which files are found
as problematic.
- Switched the checks to have PageIsVerified() placed first in the
hierarchy, so as we do all the basic validity checks before a look at
the LSN.  This is not really change things logically.
- The meaning of block_retry is also the opposite of what it should be
in the original code?  IMO, the flag should be set to true if we still
are fine to retry a block, and set it to false once the one-time retry
has failed.
- Moved the hardcoded failure report threshold of 5 into its own
variable.
- The error strings are not really consistent with the project style
in this area.  These are usually not spawned across multiple lines to
ease searches with grep or such.

Anastasia, Michael B, does that look OK to you?

NB: This patch fixes only one problem, the zero-page case, as it was
the intention of Michael B to split this part into its own thread.  We
still have, of course, a second problem when it comes to a random LSN
put into the page header which could mask an actual checksum failure
so this only takes care of half the issues.  Having a correct LSN
approximation is a tricky problem IMO, but we could improve things by
having some delta with an extra WAL page on top of GetInsertRecPtr().
And this function cannot be used for base backups taken from
standbys.
--
Michael
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 51b8f994ac..1a28ee1130 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -404,26 +404,33 @@ do { \
  *		extern declarations
  * ----------------------------------------------------------------
  */
+
+/* flags for PageAddItemExtended() */
 #define PAI_OVERWRITE			(1 << 0)
 #define PAI_IS_HEAP				(1 << 1)
 
+/* flags for PageIsVerifiedExtended() */
+#define PIV_LOG_WARNING			(1 << 0)
+#define PIV_REPORT_STAT			(1 << 1)
+
 #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
 	PageAddItemExtended(page, item, size, offsetNumber, \
 						((overwrite) ? PAI_OVERWRITE : 0) | \
 						((is_heap) ? PAI_IS_HEAP : 0))
 
 /*
- * Check that BLCKSZ is a multiple of sizeof(size_t).  In PageIsVerified(),
- * it is much faster to check if a page is full of zeroes using the native
- * word size.  Note that this assertion is kept within a header to make
- * sure that StaticAssertDecl() works across various combinations of
- * platforms and compilers.
+ * Check that BLCKSZ is a multiple of sizeof(size_t).  In
+ * PageIsVerifiedExtended(), it is much faster to check if a page is
+ * full of zeroes using the native word size.  Note that this assertion
+ * is kept within a header to make sure that StaticAssertDecl() works
+ * across various combinations of platforms and compilers.
  */
 StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
 				 "BLCKSZ has to be a multiple of sizeof(size_t)");
 
 extern void PageInit(Page page, Size pageSize, Size specialSize);
 extern bool PageIsVerified(Page page, BlockNumber blkno);
+extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
 extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
 										OffsetNumber offsetNumber, int flags);
 extern Page PageGetTempPage(Page page);
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index dbbd3aa31f..d538f25726 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -443,7 +443,8 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
 
 		smgrread(src, forkNum, blkno, buf.data);
 
-		if (!PageIsVerified(page, blkno))
+		if (!PageIsVerifiedExtended(page, blkno,
+									PIV_LOG_WARNING | PIV_REPORT_STAT))
 			ereport(ERROR,
 					(errcode(ERRCODE_DATA_CORRUPTED),
 					 errmsg("invalid page in block %u of relation %s",
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index b89df01fa7..d1eb350b60 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -1569,21 +1569,22 @@ sendFile(const char *readfilename, const char *tarfilename,
 {
 	int			fd;
 	BlockNumber blkno = 0;
-	bool		block_retry = false;
+	bool		block_retry = true;
 	char		buf[TAR_SEND_SIZE];
-	uint16		checksum;
 	int			checksum_failures = 0;
 	off_t		cnt;
 	int			i;
 	pgoff_t		len = 0;
 	char	   *page;
 	size_t		pad;
-	PageHeader	phdr;
 	int			segmentno = 0;
 	char	   *segmentpath;
 	bool		verify_checksum = false;
 	pg_checksum_context checksum_ctx;
 
+	/* Maximum number of checksum failures reported for this file */
+#define CHECKSUM_REPORT_THRESHOLD		5
+
 	pg_checksum_init(&checksum_ctx, manifest->checksum_type);
 
 	fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY);
@@ -1661,9 +1662,7 @@ sendFile(const char *readfilename, const char *tarfilename,
 		if (verify_checksum && (cnt % BLCKSZ != 0))
 		{
 			ereport(WARNING,
-					(errmsg("could not verify checksum in file \"%s\", block "
-							"%d: read buffer size %d and page size %d "
-							"differ",
+					(errmsg("could not verify checksum in file \"%s\", block %u: read buffer size %d and page size %d differ",
 							readfilename, blkno, (int) cnt, BLCKSZ)));
 			verify_checksum = false;
 		}
@@ -1676,77 +1675,70 @@ sendFile(const char *readfilename, const char *tarfilename,
 
 				/*
 				 * Only check pages which have not been modified since the
-				 * start of the base backup. Otherwise, they might have been
+				 * start of the base backup.  Otherwise, they might have been
 				 * written only halfway and the checksum would not be valid.
 				 * However, replaying WAL would reinstate the correct page in
-				 * this case. We also skip completely new pages, since they
+				 * this case.  Note that PageIsVerifiedExtended() skips
+				 * completely new pages filled only with zeros, since they
 				 * don't have a checksum yet.
+				 *
+				 * Note that checksum failures are not reported to pgstat here
+				 * within PageIsVerifiedExtended() as this is done in a single
+				 * batch once this single file is completed.
 				 */
-				if (!PageIsNew(page) && PageGetLSN(page) < startptr)
+				if (PageIsVerifiedExtended(page, blkno, 0) ||
+					PageGetLSN(page) >= startptr)
 				{
-					checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
-					phdr = (PageHeader) page;
-					if (phdr->pd_checksum != checksum)
+					/* success, move on to the next block */
+					blkno++;
+					block_retry = true;
+					continue;
+				}
+
+				/* The verification of a page has failed, retry once */
+				if (block_retry)
+				{
+					int			reread_cnt;
+
+					/* Reread the failed block */
+					reread_cnt =
+						basebackup_read_file(fd, buf + BLCKSZ * i,
+											 BLCKSZ, len + BLCKSZ * i,
+											 readfilename,
+											 false);
+					if (reread_cnt == 0)
 					{
 						/*
-						 * Retry the block on the first failure.  It's
-						 * possible that we read the first 4K page of the
-						 * block just before postgres updated the entire block
-						 * so it ends up looking torn to us.  We only need to
-						 * retry once because the LSN should be updated to
-						 * something we can ignore on the next pass.  If the
-						 * error happens again then it is a true validation
-						 * failure.
+						 * If we hit end-of-file, a concurrent truncation must
+						 * have occurred, so break out of this loop just as if
+						 * the initial fread() returned 0. We'll drop through
+						 * to the same code that handles that case. (We must
+						 * fix up cnt first, though.)
 						 */
-						if (block_retry == false)
-						{
-							int			reread_cnt;
-
-							/* Reread the failed block */
-							reread_cnt =
-								basebackup_read_file(fd, buf + BLCKSZ * i,
-													 BLCKSZ, len + BLCKSZ * i,
-													 readfilename,
-													 false);
-							if (reread_cnt == 0)
-							{
-								/*
-								 * If we hit end-of-file, a concurrent
-								 * truncation must have occurred, so break out
-								 * of this loop just as if the initial fread()
-								 * returned 0. We'll drop through to the same
-								 * code that handles that case. (We must fix
-								 * up cnt first, though.)
-								 */
-								cnt = BLCKSZ * i;
-								break;
-							}
-
-							/* Set flag so we know a retry was attempted */
-							block_retry = true;
-
-							/* Reset loop to validate the block again */
-							i--;
-							continue;
-						}
-
-						checksum_failures++;
-
-						if (checksum_failures <= 5)
-							ereport(WARNING,
-									(errmsg("checksum verification failed in "
-											"file \"%s\", block %d: calculated "
-											"%X but expected %X",
-											readfilename, blkno, checksum,
-											phdr->pd_checksum)));
-						if (checksum_failures == 5)
-							ereport(WARNING,
-									(errmsg("further checksum verification "
-											"failures in file \"%s\" will not "
-											"be reported", readfilename)));
+						cnt = BLCKSZ * i;
+						break;
 					}
+
+					/* Set flag so we know a retry was attempted */
+					block_retry = false;
+
+					/* Reset loop to validate the block again */
+					i--;
+					continue;
 				}
-				block_retry = false;
+
+				checksum_failures++;
+				if (checksum_failures <= CHECKSUM_REPORT_THRESHOLD)
+					ereport(WARNING,
+							(errmsg("checksum verification failed in block %u of file \"%s\"",
+									blkno, readfilename)));
+				if (checksum_failures == CHECKSUM_REPORT_THRESHOLD)
+					ereport(WARNING,
+							(errmsg("further checksum verification failures in file \"%s\" will not be reported",
+									readfilename)));
+
+				/* move to next block */
+				block_retry = true;
 				blkno++;
 			}
 		}
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index e549fa1d30..3eee86afe5 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -625,7 +625,8 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
  *
  * In RBM_NORMAL mode, the page is read from disk, and the page header is
  * validated.  An error is thrown if the page header is not valid.  (But
- * note that an all-zero page is considered "valid"; see PageIsVerified().)
+ * note that an all-zero page is considered "valid"; see
+ * PageIsVerifiedExtended().)
  *
  * RBM_ZERO_ON_ERROR is like the normal mode, but if the page header is not
  * valid, the page is zeroed instead of throwing an error. This is intended
@@ -917,7 +918,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			}
 
 			/* check for garbage data */
-			if (!PageIsVerified((Page) bufBlock, blockNum))
+			if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
+										PIV_LOG_WARNING | PIV_REPORT_STAT))
 			{
 				if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
 				{
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 4bc2bf955d..3a27dff04d 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -62,6 +62,18 @@ PageInit(Page page, Size pageSize, Size specialSize)
 
 /*
  * PageIsVerified
+ *		Utility wrapper for PageIsVerifiedExtended().
+ */
+bool
+PageIsVerified(Page page, BlockNumber blkno)
+{
+	return PageIsVerifiedExtended(page, blkno,
+								  PIV_LOG_WARNING | PIV_REPORT_STAT);
+}
+
+
+/*
+ * PageIsVerifiedExtended
  *		Check that the page header and checksum (if any) appear valid.
  *
  * This is called when a page has just been read in from disk.  The idea is
@@ -77,9 +89,15 @@ PageInit(Page page, Size pageSize, Size specialSize)
  * allow zeroed pages here, and are careful that the page access macros
  * treat such a page as empty and without free space.  Eventually, VACUUM
  * will clean up such a page and make it usable.
+ *
+ * If flag PIV_LOG_WARNING is set, a WARNING is logged in the event of
+ * a checksum failure.
+ *
+ * If flag PIV_REPORT_STAT is set, a checksum failure is reported directly
+ * to pgstat.
  */
 bool
-PageIsVerified(Page page, BlockNumber blkno)
+PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
 {
 	PageHeader	p = (PageHeader) page;
 	size_t	   *pagebytes;
@@ -140,12 +158,14 @@ PageIsVerified(Page page, BlockNumber blkno)
 	 */
 	if (checksum_failure)
 	{
-		ereport(WARNING,
-				(errcode(ERRCODE_DATA_CORRUPTED),
-				 errmsg("page verification failed, calculated checksum %u but expected %u",
-						checksum, p->pd_checksum)));
+		if ((flags & PIV_LOG_WARNING) != 0)
+			ereport(WARNING,
+					(errcode(ERRCODE_DATA_CORRUPTED),
+					 errmsg("page verification failed, calculated checksum %u but expected %u",
+							checksum, p->pd_checksum)));
 
-		pgstat_report_checksum_failure();
+		if ((flags & PIV_REPORT_STAT) != 0)
+			pgstat_report_checksum_failure();
 
 		if (header_sane && ignore_checksum_failure)
 			return true;
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index f674a7c94e..a46a9ece42 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -6,7 +6,7 @@ use File::Basename qw(basename dirname);
 use File::Path qw(rmtree);
 use PostgresNode;
 use TestLib;
-use Test::More tests => 109;
+use Test::More tests => 112;
 
 program_help_ok('pg_basebackup');
 program_version_ok('pg_basebackup');
@@ -528,6 +528,24 @@ $node->command_checks_all(
 	'pg_basebackup reports checksum mismatch');
 rmtree("$tempdir/backup_corrupt");
 
+# Create a new type of corruption and zero out one page header
+# completely.
+open $file, '+<', "$pgdata/$file_corrupt1";
+seek($file, $pageheader_size, 0);
+system_or_bail 'pg_ctl', '-D', $pgdata, 'stop';
+my $zero_data = '\0' x $pageheader_size;
+syswrite($file, $zero_data);
+close $file;
+system_or_bail 'pg_ctl', '-D', $pgdata, 'start';
+
+$node->command_checks_all(
+	[ 'pg_basebackup', '-D', "$tempdir/backup_corrupt1a" ],
+	1,
+	[qr{^$}],
+	[qr/^WARNING.*checksum verification failed/s],
+	"pg_basebackup reports checksum mismatch for zeroed page headers");
+rmtree("$tempdir/backup_corrupt1a");
+
 # induce further corruption in 5 more blocks
 system_or_bail 'pg_ctl', '-D', $pgdata, 'stop';
 open $file, '+<', "$pgdata/$file_corrupt1";

Attachment: signature.asc
Description: PGP signature

Reply via email to