From e8bf51858c4151b957775e0b17b91e3f42b02589 Mon Sep 17 00:00:00 2001
From: Amul Sul <amul.sul@enterprisedb.com>
Date: Wed, 14 Aug 2024 10:42:37 +0530
Subject: [PATCH v15] pg_verifybackup: Verify tar-format backups.

This also works for compressed tar-format backups. However, -n must be
used, because we use pg_waldump to verify WAL, and it doesn't yet know
how to verify WAL that is stored inside of a tarfile.

Amul Sul, reviewed by Sravan Kumar and by me, and substantially
revised by me.
---
 doc/src/sgml/ref/pg_verifybackup.sgml         |  47 +-
 src/bin/pg_verifybackup/Makefile              |   2 +
 src/bin/pg_verifybackup/astreamer_verify.c    | 428 +++++++++++++++++
 src/bin/pg_verifybackup/meson.build           |   1 +
 src/bin/pg_verifybackup/pg_verifybackup.c     | 433 ++++++++++++++++--
 src/bin/pg_verifybackup/pg_verifybackup.h     |   7 +
 src/bin/pg_verifybackup/t/002_algorithm.pl    |  34 +-
 src/bin/pg_verifybackup/t/003_corruption.pl   |  77 +++-
 src/bin/pg_verifybackup/t/004_options.pl      |  17 +
 src/bin/pg_verifybackup/t/008_untar.pl        |  71 +--
 src/bin/pg_verifybackup/t/010_client_untar.pl |  48 +-
 src/fe_utils/simple_list.c                    |  19 +
 src/include/fe_utils/simple_list.h            |   1 +
 src/tools/pgindent/typedefs.list              |   2 +
 14 files changed, 1033 insertions(+), 154 deletions(-)
 create mode 100644 src/bin/pg_verifybackup/astreamer_verify.c

diff --git a/doc/src/sgml/ref/pg_verifybackup.sgml b/doc/src/sgml/ref/pg_verifybackup.sgml
index a3f167f9f6e..53341024cd2 100644
--- a/doc/src/sgml/ref/pg_verifybackup.sgml
+++ b/doc/src/sgml/ref/pg_verifybackup.sgml
@@ -34,8 +34,12 @@ PostgreSQL documentation
    integrity of a database cluster backup taken using
    <command>pg_basebackup</command> against a
    <literal>backup_manifest</literal> generated by the server at the time
-   of the backup.  The backup must be stored in the "plain"
-   format; a "tar" format backup can be checked after extracting it.
+   of the backup. The backup may be stored either in the "plain" or the "tar"
+   format; this includes tar-format backups compressed with any algorithm
+   supported by <application>pg_basebackup</application>. However, at present,
+   <literal>WAL</literal> verification is supported only for plain-format
+   backups. Therefore, if the backup is stored in tar-format, the
+   <literal>-n, --no-parse-wal</literal> option should be used.
   </para>
 
   <para>
@@ -168,6 +172,45 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>-F <replaceable class="parameter">format</replaceable></option></term>
+      <term><option>--format=<replaceable class="parameter">format</replaceable></option></term>
+      <listitem>
+       <para>
+        Specifies the format of the backup. <replaceable>format</replaceable>
+        can be one of the following:
+
+        <variablelist>
+         <varlistentry>
+          <term><literal>p</literal></term>
+          <term><literal>plain</literal></term>
+          <listitem>
+           <para>
+            Backup consists of plain files with the same layout as the
+            source server's data directory and tablespaces.
+           </para>
+          </listitem>
+         </varlistentry>
+
+         <varlistentry>
+          <term><literal>t</literal></term>
+          <term><literal>tar</literal></term>
+          <listitem>
+           <para>
+            Backup consists of tar files, which  may be compressed.  A valid
+            backup includes the main data directory in a file named
+            <filename>base.tar</filename>, the WAL files in
+            <filename>pg_wal.tar</filename>, and separate tar files for
+            each tablespace, named after the tablespace's OID. If the backup
+            is compressed, the relevant compression extension is added to the
+            end of each file name.
+           </para>
+           </listitem>
+         </varlistentry>
+        </variablelist></para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-n</option></term>
       <term><option>--no-parse-wal</option></term>
diff --git a/src/bin/pg_verifybackup/Makefile b/src/bin/pg_verifybackup/Makefile
index 7c045f142e8..374d4a8afd1 100644
--- a/src/bin/pg_verifybackup/Makefile
+++ b/src/bin/pg_verifybackup/Makefile
@@ -17,10 +17,12 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 # We need libpq only because fe_utils does.
+override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
 LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
 
 OBJS = \
 	$(WIN32RES) \
+	astreamer_verify.o \
 	pg_verifybackup.o
 
 all: pg_verifybackup
diff --git a/src/bin/pg_verifybackup/astreamer_verify.c b/src/bin/pg_verifybackup/astreamer_verify.c
new file mode 100644
index 00000000000..57072fdfe04
--- /dev/null
+++ b/src/bin/pg_verifybackup/astreamer_verify.c
@@ -0,0 +1,428 @@
+/*-------------------------------------------------------------------------
+ *
+ * astreamer_verify.c
+ *
+ * Archive streamer for verification of a tar format backup (including
+ * compressed tar format backups).
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ *
+ * src/bin/pg_verifybackup/astreamer_verify.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include "catalog/pg_control.h"
+#include "pg_verifybackup.h"
+
+typedef struct astreamer_verify
+{
+	/* These fields don't change once initialized. */
+	astreamer	base;
+	verifier_context *context;
+	char	   *archive_name;
+	Oid			tblspc_oid;
+
+	/* These fields change for each archive member. */
+	manifest_file *mfile;
+	bool		verify_checksum;
+	bool		verify_control_data;
+	pg_checksum_context *checksum_ctx;
+	uint64		checksum_bytes;
+	ControlFileData control_file;
+	uint64		control_file_bytes;
+} astreamer_verify;
+
+static void astreamer_verify_content(astreamer *streamer,
+									 astreamer_member *member,
+									 const char *data, int len,
+									 astreamer_archive_context context);
+static void astreamer_verify_finalize(astreamer *streamer);
+static void astreamer_verify_free(astreamer *streamer);
+
+static void member_verify_header(astreamer *streamer, astreamer_member *member);
+static void member_compute_checksum(astreamer *streamer,
+									astreamer_member *member,
+									const char *data, int len);
+static void member_verify_checksum(astreamer *streamer);
+static void member_copy_control_data(astreamer *streamer,
+									 astreamer_member *member,
+									 const char *data, int len);
+static void member_verify_control_data(astreamer *streamer);
+static void member_reset_info(astreamer *streamer);
+
+static const astreamer_ops astreamer_verify_ops = {
+	.content = astreamer_verify_content,
+	.finalize = astreamer_verify_finalize,
+	.free = astreamer_verify_free
+};
+
+/*
+ * Create an astreamer that can verify a tar file.
+ */
+astreamer *
+astreamer_verify_content_new(astreamer *next, verifier_context *context,
+							 char *archive_name, Oid tblspc_oid)
+{
+	astreamer_verify *streamer;
+
+	streamer = palloc0(sizeof(astreamer_verify));
+	*((const astreamer_ops **) &streamer->base.bbs_ops) =
+		&astreamer_verify_ops;
+
+	streamer->base.bbs_next = next;
+	streamer->context = context;
+	streamer->archive_name = archive_name;
+	streamer->tblspc_oid = tblspc_oid;
+
+	if (!context->skip_checksums)
+		streamer->checksum_ctx = pg_malloc(sizeof(pg_checksum_context));
+
+	return &streamer->base;
+}
+
+/*
+ * Main entry point of the archive streamer for verifying tar members.
+ */
+static void
+astreamer_verify_content(astreamer *streamer, astreamer_member *member,
+						 const char *data, int len,
+						 astreamer_archive_context context)
+{
+	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
+
+	Assert(context != ASTREAMER_UNKNOWN);
+
+	switch (context)
+	{
+		case ASTREAMER_MEMBER_HEADER:
+			/* Initial setup plus decide which checks to perform. */
+			member_verify_header(streamer, member);
+			break;
+
+		case ASTREAMER_MEMBER_CONTENTS:
+			/* Incremental work required to verify file contents. */
+			if (mystreamer->verify_checksum)
+				member_compute_checksum(streamer, member, data, len);
+			if (mystreamer->verify_control_data)
+				member_copy_control_data(streamer, member, data, len);
+			break;
+
+		case ASTREAMER_MEMBER_TRAILER:
+			/* Now we've got all the file data. */
+			if (mystreamer->verify_checksum)
+				member_verify_checksum(streamer);
+			if (mystreamer->verify_control_data)
+				member_verify_control_data(streamer);
+
+			/* Reset for next archive member. */
+			member_reset_info(streamer);
+			break;
+
+		case ASTREAMER_ARCHIVE_TRAILER:
+			break;
+
+		default:
+			/* Shouldn't happen. */
+			pg_fatal("unexpected state while parsing tar file");
+	}
+}
+
+/*
+ * End-of-stream processing for a astreamer_verify stream.
+ */
+static void
+astreamer_verify_finalize(astreamer *streamer)
+{
+	Assert(streamer->bbs_next == NULL);
+}
+
+/*
+ * Free memory associated with a astreamer_verify stream.
+ */
+static void
+astreamer_verify_free(astreamer *streamer)
+{
+	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
+
+	if (mystreamer->checksum_ctx)
+		pfree(mystreamer->checksum_ctx);
+
+	pfree(streamer);
+}
+
+/*
+ * Prepare to validate the next archive member.
+ */
+static void
+member_verify_header(astreamer *streamer, astreamer_member *member)
+{
+	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
+	manifest_file *m;
+	char		pathname[MAXPGPATH];
+
+	/* We are only interested in normal files. */
+	if (member->is_directory || member->is_link)
+		return;
+
+	/*
+	 * The backup manifest stores a relative path to the base directory for
+	 * files belonging to a tablespace, while the tablespace backup tar
+	 * archive does not include this path.
+	 *
+	 * The pathname taken from the tar file could contain '.' or '..'
+	 * references, which we want to remove, so apply canonicalize_path(). It
+	 * could also be an absolute pathname, which we want to treat as a
+	 * relative path, so prepend "./" if we're not adding a tablespace prefix
+	 * to make sure that canonicalize_path() does what we want.
+	 */
+	if (OidIsValid(mystreamer->tblspc_oid))
+		snprintf(pathname, MAXPGPATH, "%s/%u/%s",
+				 "pg_tblspc", mystreamer->tblspc_oid, member->pathname);
+	else
+		snprintf(pathname, MAXPGPATH, "./%s", member->pathname);
+	canonicalize_path(pathname);
+
+	/* Ignore any files that are listed in the ignore list. */
+	if (should_ignore_relpath(mystreamer->context, pathname))
+		return;
+
+	/* Check whether there's an entry in the manifest hash. */
+	m = manifest_files_lookup(mystreamer->context->manifest->files, pathname);
+	if (m == NULL)
+	{
+		report_backup_error(mystreamer->context,
+							"\"%s\" is present in \"%s\" but not in the manifest",
+							member->pathname, mystreamer->archive_name);
+		return;
+	}
+	mystreamer->mfile = m;
+
+	/* Flag this entry as having been encountered in a tar archive. */
+	m->matched = true;
+
+	/* Check that the size matches. */
+	if (m->size != member->size)
+	{
+		report_backup_error(mystreamer->context,
+							"\"%s\" has size %lld in \"%s\" but size %zu in the manifest",
+							member->pathname, (long long int) member->size,
+							mystreamer->archive_name, m->size);
+		m->bad = true;
+		return;
+	}
+
+	/*
+	 * Decide whether we're going to verify the checksum for this file, and
+	 * whether we're going to perform the additional validation that we do
+	 * only for the control file.
+	 */
+	mystreamer->verify_checksum =
+		(!mystreamer->context->skip_checksums && should_verify_checksum(m));
+	mystreamer->verify_control_data =
+		mystreamer->context->manifest->version != 1 &&
+		!m->bad && strcmp(m->pathname, "global/pg_control") == 0;
+
+	/* If we're going to verify the checksum, initial a checksum context. */
+	if (mystreamer->verify_checksum &&
+		pg_checksum_init(mystreamer->checksum_ctx, m->checksum_type) < 0)
+	{
+		report_backup_error(mystreamer->context,
+							"%s: could not initialize checksum of file \"%s\"",
+							mystreamer->archive_name, m->pathname);
+
+		/*
+		 * Checksum verification cannot be performed without proper context
+		 * initialization.
+		 */
+		mystreamer->verify_checksum = false;
+	}
+}
+
+/*
+ * Computes the checksum incrementally for the received file content.
+ *
+ * Should have a correctly initialized checksum_ctx, which will be used for
+ * incremental checksum computation.
+ */
+static void
+member_compute_checksum(astreamer *streamer, astreamer_member *member,
+						const char *data, int len)
+{
+	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
+	pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx;
+	manifest_file *m = mystreamer->mfile;
+
+	Assert(mystreamer->verify_checksum);
+	Assert(m->checksum_type == checksum_ctx->type);
+
+	/*
+	 * Update the total count of computed checksum bytes so that we can
+	 * cross-check against the file size.
+	 */
+	mystreamer->checksum_bytes += len;
+
+	/* Feed these bytes to the checksum calculation. */
+	if (pg_checksum_update(checksum_ctx, (uint8 *) data, len) < 0)
+	{
+		report_backup_error(mystreamer->context,
+							"could not update checksum of file \"%s\"",
+							m->pathname);
+		mystreamer->verify_checksum = false;
+	}
+}
+
+/*
+ * Perform the final computation and checksum verification after the entire
+ * file content has been processed.
+ */
+static void
+member_verify_checksum(astreamer *streamer)
+{
+	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
+	manifest_file *m = mystreamer->mfile;
+	uint8		checksumbuf[PG_CHECKSUM_MAX_LENGTH];
+	int			checksumlen;
+
+	Assert(mystreamer->verify_checksum);
+
+	/*
+	 * It's unclear how this could fail, but let's check anyway to be safe.
+	 */
+	if (mystreamer->checksum_bytes != m->size)
+	{
+		report_backup_error(mystreamer->context,
+							"file \"%s\" in \"%s\" should contain %zu bytes, but read %zu bytes",
+							m->pathname, mystreamer->archive_name,
+							m->size, mystreamer->checksum_bytes);
+		return;
+	}
+
+	/* Get the final checksum. */
+	checksumlen = pg_checksum_final(mystreamer->checksum_ctx, checksumbuf);
+	if (checksumlen < 0)
+	{
+		report_backup_error(mystreamer->context,
+							"could not finalize checksum of file \"%s\"",
+							m->pathname);
+		return;
+	}
+
+	/* And check it against the manifest. */
+	if (checksumlen != m->checksum_length)
+		report_backup_error(mystreamer->context,
+							"file \"%s\" in \"%s\" has checksum of length %d, but expected %d",
+							m->pathname, mystreamer->archive_name,
+							m->checksum_length, checksumlen);
+	else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
+		report_backup_error(mystreamer->context,
+							"checksum mismatch for file \"%s\" in \"%s\"",
+							m->pathname, mystreamer->archive_name);
+}
+
+/*
+ * Stores the pg_control file contents into a local buffer; we need the entire
+ * control file data for verification.
+ */
+static void
+member_copy_control_data(astreamer *streamer, astreamer_member *member,
+						 const char *data, int len)
+{
+	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
+
+	/* Should be here only for control file */
+	Assert(mystreamer->verify_control_data);
+
+	/*
+	 * Copy the new data into the control file buffer, but do not overrun the
+	 * buffer. Note that the on-disk length of the control file is expected to
+	 * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
+	 * shorter, just sizeof(ControlFileData).
+	 */
+	if (mystreamer->control_file_bytes <= sizeof(ControlFileData))
+	{
+		int			remaining;
+
+		remaining = sizeof(ControlFileData) - mystreamer->control_file_bytes;
+		memcpy(((char *) &mystreamer->control_file)
+			   + mystreamer->control_file_bytes,
+			   data, Min(len, remaining));
+	}
+
+	/* Remember how many bytes we saw, even if we didn't buffer them. */
+	mystreamer->control_file_bytes += len;
+}
+
+/*
+ * Performs the CRC calculation of pg_control data and then calls the routines
+ * that execute the final verification of the control file information.
+ */
+static void
+member_verify_control_data(astreamer *streamer)
+{
+	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
+	manifest_data *manifest = mystreamer->context->manifest;
+	pg_crc32c	crc;
+
+	/* Should be here only for control file */
+	Assert(strcmp(mystreamer->mfile->pathname, "global/pg_control") == 0);
+	Assert(mystreamer->verify_control_data);
+
+	/*
+	 * If the control file is not the right length, that's a big problem.
+	 *
+	 * NB: There is a theoretical overflow risk here from casting to int, but
+	 * it isn't likely to be a real problem and this enables us to match the
+	 * same format string that pg_rewind uses for this case. Perhaps both this
+	 * and pg_rewind should use an unsigned 64-bit value, but for now we don't
+	 * worry about it.
+	 */
+	if (mystreamer->control_file_bytes != PG_CONTROL_FILE_SIZE)
+		report_fatal_error("unexpected control file size %d, expected %d",
+						   (int) mystreamer->control_file_bytes,
+						   PG_CONTROL_FILE_SIZE);
+
+	/* Compute the CRC. */
+	INIT_CRC32C(crc);
+	COMP_CRC32C(crc, &mystreamer->control_file,
+				offsetof(ControlFileData, crc));
+	FIN_CRC32C(crc);
+
+	/* Control file contents not meaningful if CRC is bad. */
+	if (!EQ_CRC32C(crc, mystreamer->control_file.crc))
+		report_fatal_error("%s: %s: CRC is incorrect",
+						   mystreamer->archive_name,
+						   mystreamer->mfile->pathname);
+
+	/* Can't interpret control file if not current version. */
+	if (mystreamer->control_file.pg_control_version != PG_CONTROL_VERSION)
+		report_fatal_error("%s: %s: unexpected control file version",
+						   mystreamer->archive_name,
+						   mystreamer->mfile->pathname);
+
+	/* System identifiers should match. */
+	if (manifest->system_identifier !=
+		mystreamer->control_file.system_identifier)
+		report_fatal_error("%s: %s: manifest system identifier is %llu, but control file has %llu",
+						   mystreamer->archive_name,
+						   mystreamer->mfile->pathname,
+						   (unsigned long long) manifest->system_identifier,
+						   (unsigned long long) mystreamer->control_file.system_identifier);
+}
+
+/*
+ * Reset flags and free memory allocations for member file verification.
+ */
+static void
+member_reset_info(astreamer *streamer)
+{
+	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
+
+	mystreamer->mfile = NULL;
+	mystreamer->verify_checksum = false;
+	mystreamer->verify_control_data = false;
+	mystreamer->checksum_bytes = 0;
+	mystreamer->control_file_bytes = 0;
+}
diff --git a/src/bin/pg_verifybackup/meson.build b/src/bin/pg_verifybackup/meson.build
index 7c7d31a0350..0e09d1379d1 100644
--- a/src/bin/pg_verifybackup/meson.build
+++ b/src/bin/pg_verifybackup/meson.build
@@ -1,6 +1,7 @@
 # Copyright (c) 2022-2024, PostgreSQL Global Development Group
 
 pg_verifybackup_sources = files(
+  'astreamer_verify.c',
   'pg_verifybackup.c'
 )
 
diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c
index 3fcfb167217..1504576303b 100644
--- a/src/bin/pg_verifybackup/pg_verifybackup.c
+++ b/src/bin/pg_verifybackup/pg_verifybackup.c
@@ -22,6 +22,7 @@
 #include "common/parse_manifest.h"
 #include "fe_utils/simple_list.h"
 #include "getopt_long.h"
+#include "limits.h"
 #include "pg_verifybackup.h"
 #include "pgtime.h"
 
@@ -44,6 +45,16 @@
  */
 #define READ_CHUNK_SIZE				(128 * 1024)
 
+/*
+ * Tar file information needed for content verification.
+ */
+typedef struct tar_file
+{
+	char	   *relpath;
+	Oid			tblspc_oid;
+	pg_compress_algorithm compress_algorithm;
+} tar_file;
+
 static manifest_data *parse_manifest_file(char *manifest_path);
 static void verifybackup_version_cb(JsonManifestParseContext *context,
 									int manifest_version);
@@ -62,12 +73,18 @@ static void report_manifest_error(JsonManifestParseContext *context,
 								  const char *fmt,...)
 			pg_attribute_printf(2, 3) pg_attribute_noreturn();
 
-static void verify_backup_directory(verifier_context *context,
-									char *relpath, char *fullpath);
-static void verify_backup_file(verifier_context *context,
-							   char *relpath, char *fullpath);
+static void verify_tar_backup(verifier_context *context, DIR *dir);
+static void verify_plain_backup_directory(verifier_context *context,
+										  char *relpath, char *fullpath,
+										  DIR *dir);
+static void verify_plain_backup_file(verifier_context *context, char *relpath,
+									 char *fullpath);
 static void verify_control_file(const char *controlpath,
 								uint64 manifest_system_identifier);
+static void precheck_tar_backup_file(verifier_context *context, char *relpath,
+									 char *fullpath, SimplePtrList *tarfiles);
+static void verify_tar_file(verifier_context *context, char *relpath,
+							char *fullpath, astreamer *streamer);
 static void report_extra_backup_files(verifier_context *context);
 static void verify_backup_checksums(verifier_context *context);
 static void verify_file_checksum(verifier_context *context,
@@ -76,6 +93,10 @@ static void verify_file_checksum(verifier_context *context,
 static void parse_required_wal(verifier_context *context,
 							   char *pg_waldump_path,
 							   char *wal_directory);
+static astreamer *create_archive_verifier(verifier_context *context,
+										  char *archive_name,
+										  Oid tblspc_oid,
+										  pg_compress_algorithm compress_algo);
 
 static void progress_report(bool finished);
 static void usage(void);
@@ -99,6 +120,7 @@ main(int argc, char **argv)
 		{"exit-on-error", no_argument, NULL, 'e'},
 		{"ignore", required_argument, NULL, 'i'},
 		{"manifest-path", required_argument, NULL, 'm'},
+		{"format", required_argument, NULL, 'F'},
 		{"no-parse-wal", no_argument, NULL, 'n'},
 		{"progress", no_argument, NULL, 'P'},
 		{"quiet", no_argument, NULL, 'q'},
@@ -114,6 +136,7 @@ main(int argc, char **argv)
 	bool		quiet = false;
 	char	   *wal_directory = NULL;
 	char	   *pg_waldump_path = NULL;
+	DIR		   *dir;
 
 	pg_logging_init(argv[0]);
 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_verifybackup"));
@@ -156,7 +179,7 @@ main(int argc, char **argv)
 	simple_string_list_append(&context.ignore_list, "recovery.signal");
 	simple_string_list_append(&context.ignore_list, "standby.signal");
 
-	while ((c = getopt_long(argc, argv, "ei:m:nPqsw:", long_options, NULL)) != -1)
+	while ((c = getopt_long(argc, argv, "eF:i:m:nPqsw:", long_options, NULL)) != -1)
 	{
 		switch (c)
 		{
@@ -175,6 +198,15 @@ main(int argc, char **argv)
 				manifest_path = pstrdup(optarg);
 				canonicalize_path(manifest_path);
 				break;
+			case 'F':
+				if (strcmp(optarg, "p") == 0 || strcmp(optarg, "plain") == 0)
+					context.format = 'p';
+				else if (strcmp(optarg, "t") == 0 || strcmp(optarg, "tar") == 0)
+					context.format = 't';
+				else
+					pg_fatal("invalid backup format \"%s\", must be \"plain\" or \"tar\"",
+							 optarg);
+				break;
 			case 'n':
 				no_parse_wal = true;
 				break;
@@ -264,25 +296,75 @@ main(int argc, char **argv)
 	context.manifest = parse_manifest_file(manifest_path);
 
 	/*
-	 * Now scan the files in the backup directory. At this stage, we verify
-	 * that every file on disk is present in the manifest and that the sizes
-	 * match. We also set the "matched" flag on every manifest entry that
-	 * corresponds to a file on disk.
+	 * If the backup directory cannot be found, treat this as a fatal error.
+	 */
+	dir = opendir(context.backup_directory);
+	if (dir == NULL)
+		report_fatal_error("could not open directory \"%s\": %m",
+						   context.backup_directory);
+
+	/*
+	 * At this point, we know that the backup directory exists, so it's now
+	 * reasonable to check for files immediately inside it. Thus, before going
+	 * futher, if the user did not specify the backup format, check for
+	 * PG_VERSION to distinguish between tar and plain format.
 	 */
-	verify_backup_directory(&context, NULL, context.backup_directory);
+	if (context.format == '\0')
+	{
+		struct stat sb;
+		char	   *path;
+
+		path = psprintf("%s/%s", context.backup_directory, "PG_VERSION");
+		if (stat(path, &sb) == 0)
+			context.format = 'p';
+		else if (errno != ENOENT)
+		{
+			pg_log_error("could not stat file \"%s\": %m", path);
+			exit(1);
+		}
+		else
+		{
+			/* No PG_VERSION, so assume tar format. */
+			context.format = 't';
+		}
+		pfree(path);
+	}
+
+	/*
+	 * XXX: In the future, we should consider enhancing pg_waldump to read
+	 * WAL files from an archive.
+	 */
+	if (!no_parse_wal && context.format == 't')
+	{
+		pg_log_error("pg_waldump cannot read tar files");
+		pg_log_error_hint("You must use -n or --no-parse-wal when verifying a tar-format backup.");
+		exit(1);
+	}
+
+	/*
+	 * Perform the appropriate type of verification appropriate based on the
+	 * backup format. This will close 'dir'.
+	 */
+	if (context.format == 'p')
+		verify_plain_backup_directory(&context, NULL, context.backup_directory,
+									  dir);
+	else
+		verify_tar_backup(&context, dir);
 
 	/*
 	 * The "matched" flag should now be set on every entry in the hash table.
 	 * Any entries for which the bit is not set are files mentioned in the
-	 * manifest that don't exist on disk.
+	 * manifest that don't exist on disk (or in the relevant tar files).
 	 */
 	report_extra_backup_files(&context);
 
 	/*
-	 * Now do the expensive work of verifying file checksums, unless we were
-	 * told to skip it.
+	 * If this is a tar-format backup, checksums were already verified above;
+	 * but if it's a plain-format backup, we postpone it until this point,
+	 * since the earlier checks can be performed just by knowing which files
+	 * are present, without needing to read all of them.
 	 */
-	if (!context.skip_checksums)
+	if (context.format == 'p' && !context.skip_checksums)
 		verify_backup_checksums(&context);
 
 	/*
@@ -517,35 +599,27 @@ verifybackup_per_wal_range_cb(JsonManifestParseContext *context,
 }
 
 /*
- * Verify one directory.
+ * Verify one directory of a plain-format backup.
  *
  * 'relpath' is NULL if we are to verify the top-level backup directory,
  * and otherwise the relative path to the directory that is to be verified.
  *
  * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual
  * filesystem path at which it can be found.
+ *
+ * 'dir' is an open directory handle, or NULL if the caller wants us to
+ * open it. If the caller chooses to pass a handle, we'll close it when
+ * we're done with it.
  */
 static void
-verify_backup_directory(verifier_context *context, char *relpath,
-						char *fullpath)
+verify_plain_backup_directory(verifier_context *context, char *relpath,
+							  char *fullpath, DIR *dir)
 {
-	DIR		   *dir;
 	struct dirent *dirent;
 
-	dir = opendir(fullpath);
-	if (dir == NULL)
+	/* Open the directory unless the caller did it. */
+	if (dir == NULL && ((dir = opendir(fullpath)) == NULL))
 	{
-		/*
-		 * If even the toplevel backup directory cannot be found, treat this
-		 * as a fatal error.
-		 */
-		if (relpath == NULL)
-			report_fatal_error("could not open directory \"%s\": %m", fullpath);
-
-		/*
-		 * Otherwise, treat this as a non-fatal error, but ignore any further
-		 * errors related to this path and anything beneath it.
-		 */
 		report_backup_error(context,
 							"could not open directory \"%s\": %m", fullpath);
 		simple_string_list_append(&context->ignore_list, relpath);
@@ -570,7 +644,7 @@ verify_backup_directory(verifier_context *context, char *relpath,
 			newrelpath = psprintf("%s/%s", relpath, filename);
 
 		if (!should_ignore_relpath(context, newrelpath))
-			verify_backup_file(context, newrelpath, newfullpath);
+			verify_plain_backup_file(context, newrelpath, newfullpath);
 
 		pfree(newfullpath);
 		pfree(newrelpath);
@@ -587,11 +661,12 @@ verify_backup_directory(verifier_context *context, char *relpath,
 /*
  * Verify one file (which might actually be a directory or a symlink).
  *
- * The arguments to this function have the same meaning as the arguments to
- * verify_backup_directory.
+ * The arguments to this function have the same meaning as the similarly named
+ * arguments to verify_plain_backup_directory.
  */
 static void
-verify_backup_file(verifier_context *context, char *relpath, char *fullpath)
+verify_plain_backup_file(verifier_context *context, char *relpath,
+						 char *fullpath)
 {
 	struct stat sb;
 	manifest_file *m;
@@ -614,7 +689,7 @@ verify_backup_file(verifier_context *context, char *relpath, char *fullpath)
 	/* If it's a directory, just recurse. */
 	if (S_ISDIR(sb.st_mode))
 	{
-		verify_backup_directory(context, relpath, fullpath);
+		verify_plain_backup_directory(context, relpath, fullpath, NULL);
 		return;
 	}
 
@@ -703,6 +778,252 @@ verify_control_file(const char *controlpath, uint64 manifest_system_identifier)
 	pfree(control_file);
 }
 
+/*
+ * Verify tar backup.
+ *
+ * The caller should pass a handle to the target directory, which we will
+ * close when we're done with it.
+ */
+static void
+verify_tar_backup(verifier_context *context, DIR *dir)
+{
+	struct dirent *dirent;
+	SimplePtrList tarfiles = {NULL, NULL};
+	SimplePtrListCell *cell;
+
+	Assert(context->format != 'p');
+
+	progress_report(false);
+
+	/* First pass: scan the directory for tar files. */
+	while (errno = 0, (dirent = readdir(dir)) != NULL)
+	{
+		char	   *filename = dirent->d_name;
+
+		/* Skip "." and ".." */
+		if (filename[0] == '.' && (filename[1] == '\0'
+								   || strcmp(filename, "..") == 0))
+			continue;
+
+		/*
+		 * Unless it's something we should ignore, perform prechecks and add
+		 * it to the list.
+		 */
+		if (!should_ignore_relpath(context, filename))
+		{
+			char	   *fullpath;
+
+			fullpath = psprintf("%s/%s", context->backup_directory, filename);
+			precheck_tar_backup_file(context, filename, fullpath, &tarfiles);
+			pfree(fullpath);
+		}
+	}
+
+	if (closedir(dir))
+	{
+		report_backup_error(context,
+							"could not close directory \"%s\": %m",
+							context->backup_directory);
+		return;
+	}
+
+	/* Second pass: Perform the final verification of the tar contents. */
+	for (cell = tarfiles.head; cell != NULL; cell = cell->next)
+	{
+		tar_file   *tar = (tar_file *) cell->ptr;
+		astreamer  *streamer;
+		char	   *fullpath;
+
+		/*
+		 * Prepares the archive streamer stack according to the tar
+		 * compression format.
+		 */
+		streamer = create_archive_verifier(context,
+										   tar->relpath,
+										   tar->tblspc_oid,
+										   tar->compress_algorithm);
+
+		/* Compute the full pathname to the target file. */
+		fullpath = psprintf("%s/%s", context->backup_directory,
+							tar->relpath);
+
+		/* Invoke the streamer for reading, decompressing, and verifying. */
+		verify_tar_file(context, tar->relpath, fullpath, streamer);
+
+		/* Cleanup. */
+		pfree(tar->relpath);
+		pfree(tar);
+		pfree(fullpath);
+
+		astreamer_finalize(streamer);
+		astreamer_free(streamer);
+	}
+	simple_ptr_list_destroy(&tarfiles);
+
+	progress_report(true);
+}
+
+/*
+ * Preparatory steps for verifying files in tar format backups.
+ *
+ * Carries out basic validation of the tar format backup file, detects the
+ * compression type, and appends that information to the tarfiles list. An
+ * error will be reported if the tar file is inaccessible, or if the file type,
+ * name, or compression type is not as expected.
+ *
+ * The arguments to this function are mostly the same as the
+ * verify_plain_backup_file. The additional argument outputs a list of valid
+ * tar files.
+ */
+static void
+precheck_tar_backup_file(verifier_context *context, char *relpath,
+						 char *fullpath, SimplePtrList *tarfiles)
+{
+	struct stat sb;
+	Oid			tblspc_oid = InvalidOid;
+	pg_compress_algorithm compress_algorithm;
+	tar_file   *tar;
+	char	   *suffix = NULL;
+
+	/* Should be tar format backup */
+	Assert(context->format == 't');
+
+	/* Get file information */
+	if (stat(fullpath, &sb) != 0)
+	{
+		report_backup_error(context,
+							"could not stat file or directory \"%s\": %m",
+							relpath);
+		return;
+	}
+
+	/* In a tar format backup, we expect only plain files. */
+	if (!S_ISREG(sb.st_mode))
+	{
+		report_backup_error(context,
+							"\"%s\" is not a plain file",
+							relpath);
+		return;
+	}
+
+	/*
+	 * We expect tar files for backing up the main directory, tablespace, and
+	 * pg_wal directory.
+	 *
+	 * pg_basebackup writes the main data directory to an archive file named
+	 * base.tar, the pg_wal directory to pg_wal.tar, and the tablespace
+	 * directory to <tablespaceoid>.tar, each followed by a compression type
+	 * extension such as .gz, .lz4, or .zst.
+	 */
+	if (strncmp("base", relpath, 4) == 0)
+		suffix = relpath + 4;
+	else if (strncmp("pg_wal", relpath, 6) == 0)
+		suffix = relpath + 6;
+	else
+	{
+		/* Expected a <tablespaceoid>.tar file here. */
+		uint64		num = strtoul(relpath, &suffix, 10);
+
+		/*
+		 * Report an error if we didn't consume at least one character, if the
+		 * result is 0, or if the value is too large to be a valid OID.
+		 */
+		if (suffix == NULL || num <= 0 || num > OID_MAX)
+			report_backup_error(context,
+								"file \"%s\" is not expected in a tar format backup",
+								relpath);
+		tblspc_oid = (Oid) num;
+	}
+
+	/* Now, check the compression type of the tar */
+	if (strcmp(suffix, ".tar") == 0)
+		compress_algorithm = PG_COMPRESSION_NONE;
+	else if (strcmp(suffix, ".tgz") == 0)
+		compress_algorithm = PG_COMPRESSION_GZIP;
+	else if (strcmp(suffix, ".tar.gz") == 0)
+		compress_algorithm = PG_COMPRESSION_GZIP;
+	else if (strcmp(suffix, ".tar.lz4") == 0)
+		compress_algorithm = PG_COMPRESSION_LZ4;
+	else if (strcmp(suffix, ".tar.zst") == 0)
+		compress_algorithm = PG_COMPRESSION_ZSTD;
+	else
+	{
+		report_backup_error(context,
+							"file \"%s\" is not expected in a tar format backup",
+							relpath);
+		return;
+	}
+
+	/*
+	 * Ignore WALs, as reading and verification will be handled through
+	 * pg_waldump.
+	 */
+	if (strncmp("pg_wal", relpath, 6) == 0)
+		return;
+
+	/*
+	 * Append the information to the list for complete verification at a later
+	 * stage.
+	 */
+	tar = pg_malloc(sizeof(tar_file));
+	tar->relpath = pstrdup(relpath);
+	tar->tblspc_oid = tblspc_oid;
+	tar->compress_algorithm = compress_algorithm;
+
+	simple_ptr_list_append(tarfiles, tar);
+
+	/* Update statistics for progress report, if necessary */
+	if (show_progress)
+		total_size += sb.st_size;
+}
+
+/*
+ * Verification of a single tar file content.
+ *
+ * It reads a given tar archive in predefined chunks and passes it to the
+ * streamer, which initiates routines for decompression (if necessary) and then
+ * verifies each member within the tar file.
+ */
+static void
+verify_tar_file(verifier_context *context, char *relpath, char *fullpath,
+				astreamer *streamer)
+{
+	int			fd;
+	int			rc;
+	char	   *buffer;
+
+	pg_log_debug("reading \"%s\"", fullpath);
+
+	/* Open the target file. */
+	if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0)
+	{
+		report_backup_error(context, "could not open file \"%s\": %m",
+							relpath);
+		return;
+	}
+
+	buffer = pg_malloc(READ_CHUNK_SIZE * sizeof(uint8));
+
+	/* Perform the reads */
+	while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0)
+	{
+		astreamer_content(streamer, NULL, buffer, rc, ASTREAMER_UNKNOWN);
+
+		/* Report progress */
+		done_size += rc;
+		progress_report(false);
+	}
+
+	if (rc < 0)
+		report_backup_error(context, "could not read file \"%s\": %m",
+							relpath);
+
+	/* Close the file. */
+	if (close(fd) != 0)
+		report_backup_error(context, "could not close file \"%s\": %m",
+							relpath);
+}
+
 /*
  * Scan the hash table for entries where the 'matched' flag is not set; report
  * that such files are present in the manifest but not on disk.
@@ -830,10 +1151,10 @@ verify_file_checksum(verifier_context *context, manifest_file *m,
 
 	/*
 	 * Double-check that we read the expected number of bytes from the file.
-	 * Normally, a file size mismatch would be caught in verify_backup_file
-	 * and this check would never be reached, but this provides additional
-	 * safety and clarity in the event of concurrent modifications or
-	 * filesystem misbehavior.
+	 * Normally, mismatches would be caught in verify_plain_backup_file and
+	 * this check would never be reached, but this provides additional safety
+	 * and clarity in the event of concurrent modifications or filesystem
+	 * misbehavior.
 	 */
 	if (bytes_read != m->size)
 	{
@@ -955,6 +1276,37 @@ should_ignore_relpath(verifier_context *context, const char *relpath)
 	return false;
 }
 
+/*
+ * Create a chain of archive streamers appropriate for verifying a given
+ * archive.
+ */
+static astreamer *
+create_archive_verifier(verifier_context *context, char *archive_name,
+						Oid tblspc_oid, pg_compress_algorithm compress_algo)
+{
+	astreamer  *streamer = NULL;
+
+	/* Should be here only for tar backup */
+	Assert(context->format == 't');
+
+	/* Last step is the actual verification. */
+	streamer = astreamer_verify_content_new(streamer, context, archive_name,
+											tblspc_oid);
+
+	/* Before that we must parse the tar file. */
+	streamer = astreamer_tar_parser_new(streamer);
+
+	/* Before that we must decompress, if archive is compressed. */
+	if (compress_algo == PG_COMPRESSION_GZIP)
+		streamer = astreamer_gzip_decompressor_new(streamer);
+	else if (compress_algo == PG_COMPRESSION_LZ4)
+		streamer = astreamer_lz4_decompressor_new(streamer);
+	else if (compress_algo == PG_COMPRESSION_ZSTD)
+		streamer = astreamer_zstd_decompressor_new(streamer);
+
+	return streamer;
+}
+
 /*
  * Print a progress report based on the global variables.
  *
@@ -1010,6 +1362,7 @@ usage(void)
 	printf(_("Usage:\n  %s [OPTION]... BACKUPDIR\n\n"), progname);
 	printf(_("Options:\n"));
 	printf(_("  -e, --exit-on-error         exit immediately on error\n"));
+	printf(_("  -F, --format=p|t            backup format (plain, tar)\n"));
 	printf(_("  -i, --ignore=RELATIVE_PATH  ignore indicated path\n"));
 	printf(_("  -m, --manifest-path=PATH    use specified path for manifest\n"));
 	printf(_("  -n, --no-parse-wal          do not try to parse WAL files\n"));
diff --git a/src/bin/pg_verifybackup/pg_verifybackup.h b/src/bin/pg_verifybackup/pg_verifybackup.h
index d8c566ed587..183b1d5111b 100644
--- a/src/bin/pg_verifybackup/pg_verifybackup.h
+++ b/src/bin/pg_verifybackup/pg_verifybackup.h
@@ -18,6 +18,7 @@
 #include "common/hashfn_unstable.h"
 #include "common/logging.h"
 #include "common/parse_manifest.h"
+#include "fe_utils/astreamer.h"
 #include "fe_utils/simple_list.h"
 
 /*
@@ -88,6 +89,7 @@ typedef struct verifier_context
 	manifest_data *manifest;
 	char	   *backup_directory;
 	SimpleStringList ignore_list;
+	char		format;			/* backup format:  p(lain)/t(ar) */
 	bool		skip_checksums;
 	bool		exit_on_error;
 	bool		saw_any_error;
@@ -101,4 +103,9 @@ extern void report_fatal_error(const char *pg_restrict fmt,...)
 extern bool should_ignore_relpath(verifier_context *context,
 								  const char *relpath);
 
+extern astreamer *astreamer_verify_content_new(astreamer *next,
+											   verifier_context *context,
+											   char *archive_name,
+											   Oid tblspc_oid);
+
 #endif							/* PG_VERIFYBACKUP_H */
diff --git a/src/bin/pg_verifybackup/t/002_algorithm.pl b/src/bin/pg_verifybackup/t/002_algorithm.pl
index fb2a1fd7c4e..4959d5bd0b9 100644
--- a/src/bin/pg_verifybackup/t/002_algorithm.pl
+++ b/src/bin/pg_verifybackup/t/002_algorithm.pl
@@ -14,24 +14,35 @@ my $primary = PostgreSQL::Test::Cluster->new('primary');
 $primary->init(allows_streaming => 1);
 $primary->start;
 
-for my $algorithm (qw(bogus none crc32c sha224 sha256 sha384 sha512))
+sub test_checksums
 {
-	my $backup_path = $primary->backup_dir . '/' . $algorithm;
+	my ($format, $algorithm) = @_;
+	my $backup_path = $primary->backup_dir . '/' . $format . '/' . $algorithm;
 	my @backup = (
 		'pg_basebackup', '-D', $backup_path,
 		'--manifest-checksums', $algorithm, '--no-sync', '-cfast');
 	my @verify = ('pg_verifybackup', '-e', $backup_path);
 
+	if ($format eq 'tar')
+	{
+		# Add switch to get a tar-format backup
+		push @backup, ('-F', 't');
+
+		# Add switch to skip WAL verification, which is not yet supported for
+		# tar-format backups
+		push @verify, ('-n');
+	}
+
 	# A backup with a bogus algorithm should fail.
 	if ($algorithm eq 'bogus')
 	{
 		$primary->command_fails(\@backup,
-			"backup fails with algorithm \"$algorithm\"");
-		next;
+			"$format format backup fails with algorithm \"$algorithm\"");
+		return;
 	}
 
 	# A backup with a valid algorithm should work.
-	$primary->command_ok(\@backup, "backup ok with algorithm \"$algorithm\"");
+	$primary->command_ok(\@backup, "$format format backup ok with algorithm \"$algorithm\"");
 
 	# We expect each real checksum algorithm to be mentioned on every line of
 	# the backup manifest file except the first and last; for simplicity, we
@@ -39,7 +50,7 @@ for my $algorithm (qw(bogus none crc32c sha224 sha256 sha384 sha512))
 	# is none, we just check that the manifest exists.
 	if ($algorithm eq 'none')
 	{
-		ok(-f "$backup_path/backup_manifest", "backup manifest exists");
+		ok(-f "$backup_path/backup_manifest", "$format format backup manifest exists");
 	}
 	else
 	{
@@ -52,10 +63,19 @@ for my $algorithm (qw(bogus none crc32c sha224 sha256 sha384 sha512))
 
 	# Make sure that it verifies OK.
 	$primary->command_ok(\@verify,
-		"verify backup with algorithm \"$algorithm\"");
+		"verify $format format backup with algorithm \"$algorithm\"");
 
 	# Remove backup immediately to save disk space.
 	rmtree($backup_path);
 }
 
+# Do the check
+for my $format (qw(plain tar))
+{
+	for my $algorithm (qw(bogus none crc32c sha224 sha256 sha384 sha512))
+	{
+		test_checksums($format, $algorithm);
+	}
+}
+
 done_testing();
diff --git a/src/bin/pg_verifybackup/t/003_corruption.pl b/src/bin/pg_verifybackup/t/003_corruption.pl
index ae91e043384..8fe911a3aec 100644
--- a/src/bin/pg_verifybackup/t/003_corruption.pl
+++ b/src/bin/pg_verifybackup/t/003_corruption.pl
@@ -5,12 +5,15 @@
 
 use strict;
 use warnings FATAL => 'all';
+use Cwd;
 use File::Path qw(rmtree);
 use File::Copy;
 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
 use Test::More;
 
+my $tar = $ENV{TAR};
+
 my $primary = PostgreSQL::Test::Cluster->new('primary');
 $primary->init(allows_streaming => 1);
 $primary->start;
@@ -34,35 +37,35 @@ my @scenario = (
 		'name' => 'extra_file',
 		'mutilate' => \&mutilate_extra_file,
 		'fails_like' =>
-		  qr/extra_file.*present on disk but not in the manifest/
+		  qr/extra_file.*present (on disk|in "[^"]+") but not in the manifest/
 	},
 	{
 		'name' => 'extra_tablespace_file',
 		'mutilate' => \&mutilate_extra_tablespace_file,
 		'fails_like' =>
-		  qr/extra_ts_file.*present on disk but not in the manifest/
+		  qr/extra_ts_file.*present (on disk|in "[^"]+") but not in the manifest/
 	},
 	{
 		'name' => 'missing_file',
 		'mutilate' => \&mutilate_missing_file,
 		'fails_like' =>
-		  qr/pg_xact\/0000.*present in the manifest but not on disk/
+		  qr/pg_xact\/0000.*present in the manifest but not (on disk|in "[^"]+")/
 	},
 	{
 		'name' => 'missing_tablespace',
 		'mutilate' => \&mutilate_missing_tablespace,
 		'fails_like' =>
-		  qr/pg_tblspc.*present in the manifest but not on disk/
+		  qr/pg_tblspc.*present in the manifest but not (on disk|in "[^"]+")/
 	},
 	{
 		'name' => 'append_to_file',
 		'mutilate' => \&mutilate_append_to_file,
-		'fails_like' => qr/has size \d+ on disk but size \d+ in the manifest/
+		'fails_like' => qr/has size \d+ (on disk|in "[^"]+") but size \d+ in the manifest/
 	},
 	{
 		'name' => 'truncate_file',
 		'mutilate' => \&mutilate_truncate_file,
-		'fails_like' => qr/has size 0 on disk but size \d+ in the manifest/
+		'fails_like' => qr/has size 0 (on disk|in "[^"]+") but size \d+ in the manifest/
 	},
 	{
 		'name' => 'replace_file',
@@ -84,21 +87,21 @@ my @scenario = (
 		'name' => 'open_file_fails',
 		'mutilate' => \&mutilate_open_file_fails,
 		'fails_like' => qr/could not open file/,
-		'skip_on_windows' => 1
+		'needs_unix_permissions' => 1
 	},
 	{
 		'name' => 'open_directory_fails',
 		'mutilate' => \&mutilate_open_directory_fails,
 		'cleanup' => \&cleanup_open_directory_fails,
 		'fails_like' => qr/could not open directory/,
-		'skip_on_windows' => 1
+		'needs_unix_permissions' => 1
 	},
 	{
 		'name' => 'search_directory_fails',
 		'mutilate' => \&mutilate_search_directory_fails,
 		'cleanup' => \&cleanup_search_directory_fails,
 		'fails_like' => qr/could not stat file or directory/,
-		'skip_on_windows' => 1
+		'needs_unix_permissions' => 1
 	});
 
 for my $scenario (@scenario)
@@ -108,7 +111,7 @@ for my $scenario (@scenario)
   SKIP:
 	{
 		skip "unix-style permissions not supported on Windows", 4
-		  if ($scenario->{'skip_on_windows'}
+		  if ($scenario->{'needs_unix_permissions'}
 			&& ($windows_os || $Config::Config{osname} eq 'cygwin'));
 
 		# Take a backup and check that it verifies OK.
@@ -140,7 +143,59 @@ for my $scenario (@scenario)
 		$scenario->{'cleanup'}->($backup_path)
 		  if exists $scenario->{'cleanup'};
 
-		# Finally, use rmtree to reclaim space.
+		# Turn it into a tar-format backup and see if we can still detect the
+		# same problem, unless the scenario needs UNIX permissions or we don't
+		# have a TAR program available. Note that this destructively modifies
+		# the backup directory.
+		if (! $scenario->{'needs_unix_permissions'} ||
+			!defined $tar || $tar eq '')
+		{
+			my $tar_backup_path = $primary->backup_dir . '/tar_' . $name;
+			mkdir($tar_backup_path) || die "mkdir $tar_backup_path: $!";
+
+			# tar and then remove each tablespace. We remove the original files
+			# so that they don't also end up in base.tar.
+			my @tsoid = grep { $_ ne '.' && $_ ne '..' }
+				slurp_dir("$backup_path/pg_tblspc");
+			my $cwd = getcwd;
+			for my $tsoid (@tsoid)
+			{
+				my $tspath = $backup_path . '/pg_tblspc/' . $tsoid;
+
+				chdir($tspath) || die "chdir: $!";
+				command_ok([ $tar, '-cf', "$tar_backup_path/$tsoid.tar", '.' ]);
+				chdir($cwd) || die "chdir: $!";
+				rmtree($tspath);
+			}
+
+			# tar and remove pg_wal
+			chdir($backup_path . '/pg_wal') || die "chdir: $!";
+			command_ok([ $tar, '-cf', "$tar_backup_path/pg_wal.tar", '.' ]);
+			chdir($cwd) || die "chdir: $!";
+			rmtree($backup_path . '/pg_wal');
+
+			# move the backup manifest
+			move($backup_path . '/backup_manifest',
+				$tar_backup_path . '/backup_manifest')
+				or die "could not copy manifest to $tar_backup_path";
+
+			# Construct base.tar with what's left.
+			chdir($backup_path) || die "chdir: $!";
+			command_ok([ $tar, '-cf', "$tar_backup_path/base.tar", '.' ]);
+			chdir($cwd) || die "chdir: $!";
+
+			# Now check that the backup no longer verifies. We must use -n
+			# here, because pg_waldump can't yet read WAL from a tarfile.
+			command_fails_like(
+				[ 'pg_verifybackup', '-n', $tar_backup_path ],
+				$scenario->{'fails_like'},
+				"corrupt backup fails verification: $name");
+
+			# Use rmtree to reclaim space.
+			rmtree($tar_backup_path);
+		}
+
+		# Use rmtree to reclaim space.
 		rmtree($backup_path);
 	}
 }
diff --git a/src/bin/pg_verifybackup/t/004_options.pl b/src/bin/pg_verifybackup/t/004_options.pl
index 8ed2214408e..9dbb8c1a0ac 100644
--- a/src/bin/pg_verifybackup/t/004_options.pl
+++ b/src/bin/pg_verifybackup/t/004_options.pl
@@ -28,6 +28,23 @@ ok($result, "-q succeeds: exit code 0");
 is($stdout, '', "-q succeeds: no stdout");
 is($stderr, '', "-q succeeds: no stderr");
 
+# Should still work if we specify -Fp.
+$primary->command_ok(
+	[ 'pg_verifybackup', '-Fp', $backup_path ],
+	"verifies with -Fp");
+
+# Should not work if we specify -Fy because that's invalid.
+$primary->command_fails_like(
+	[ 'pg_verifybackup', '-Fy', $backup_path ],
+	qr(invalid backup format "y", must be "plain" or "tar"),
+	"does not verify with -Fy");
+
+# Should produce a lengthy list of errors; we test for just one of those.
+$primary->command_fails_like(
+	[ 'pg_verifybackup', '-Ft', '-n', $backup_path ],
+	qr("pg_multixact" is not a plain file),
+	"does not verify with -Ft -n");
+
 # Test invalid options
 command_fails_like(
 	[ 'pg_verifybackup', '--progress', '--quiet', $backup_path ],
diff --git a/src/bin/pg_verifybackup/t/008_untar.pl b/src/bin/pg_verifybackup/t/008_untar.pl
index 7a09f3b75b2..e7ec8369362 100644
--- a/src/bin/pg_verifybackup/t/008_untar.pl
+++ b/src/bin/pg_verifybackup/t/008_untar.pl
@@ -16,6 +16,18 @@ my $primary = PostgreSQL::Test::Cluster->new('primary');
 $primary->init(allows_streaming => 1);
 $primary->start;
 
+# Create a tablespace directory.
+my $source_ts_path = PostgreSQL::Test::Utils::tempdir_short();
+
+# Create a tablespace with table in it.
+$primary->safe_psql('postgres', qq(
+		CREATE TABLESPACE regress_ts1 LOCATION '$source_ts_path';
+		SELECT oid FROM pg_tablespace WHERE spcname = 'regress_ts1';
+		CREATE TABLE regress_tbl1(i int) TABLESPACE regress_ts1;
+		INSERT INTO regress_tbl1 VALUES(generate_series(1,5));));
+my $tsoid = $primary->safe_psql('postgres', qq(
+		SELECT oid FROM pg_tablespace WHERE spcname = 'regress_ts1'));
+
 my $backup_path = $primary->backup_dir . '/server-backup';
 my $extract_path = $primary->backup_dir . '/extracted-backup';
 
@@ -23,39 +35,31 @@ my @test_configuration = (
 	{
 		'compression_method' => 'none',
 		'backup_flags' => [],
-		'backup_archive' => 'base.tar',
+		'backup_archive' => ['base.tar', "$tsoid.tar"],
 		'enabled' => 1
 	},
 	{
 		'compression_method' => 'gzip',
 		'backup_flags' => [ '--compress', 'server-gzip' ],
-		'backup_archive' => 'base.tar.gz',
-		'decompress_program' => $ENV{'GZIP_PROGRAM'},
-		'decompress_flags' => ['-d'],
+		'backup_archive' => [ 'base.tar.gz', "$tsoid.tar.gz" ],
 		'enabled' => check_pg_config("#define HAVE_LIBZ 1")
 	},
 	{
 		'compression_method' => 'lz4',
 		'backup_flags' => [ '--compress', 'server-lz4' ],
-		'backup_archive' => 'base.tar.lz4',
-		'decompress_program' => $ENV{'LZ4'},
-		'decompress_flags' => [ '-d', '-m' ],
+		'backup_archive' => ['base.tar.lz4', "$tsoid.tar.lz4" ],
 		'enabled' => check_pg_config("#define USE_LZ4 1")
 	},
 	{
 		'compression_method' => 'zstd',
 		'backup_flags' => [ '--compress', 'server-zstd' ],
-		'backup_archive' => 'base.tar.zst',
-		'decompress_program' => $ENV{'ZSTD'},
-		'decompress_flags' => ['-d'],
+		'backup_archive' => [ 'base.tar.zst', "$tsoid.tar.zst" ],
 		'enabled' => check_pg_config("#define USE_ZSTD 1")
 	},
 	{
 		'compression_method' => 'zstd',
 		'backup_flags' => [ '--compress', 'server-zstd:level=1,long' ],
-		'backup_archive' => 'base.tar.zst',
-		'decompress_program' => $ENV{'ZSTD'},
-		'decompress_flags' => ['-d'],
+		'backup_archive' => [ 'base.tar.zst', "$tsoid.tar.zst" ],
 		'enabled' => check_pg_config("#define USE_ZSTD 1")
 	});
 
@@ -86,47 +90,16 @@ for my $tc (@test_configuration)
 		my $backup_files = join(',',
 			sort grep { $_ ne '.' && $_ ne '..' } slurp_dir($backup_path));
 		my $expected_backup_files =
-		  join(',', sort ('backup_manifest', $tc->{'backup_archive'}));
+		  join(',', sort ('backup_manifest', @{ $tc->{'backup_archive'} }));
 		is($backup_files, $expected_backup_files,
 			"found expected backup files, compression $method");
 
-		# Decompress.
-		if (exists $tc->{'decompress_program'})
-		{
-			my @decompress = ($tc->{'decompress_program'});
-			push @decompress, @{ $tc->{'decompress_flags'} }
-			  if $tc->{'decompress_flags'};
-			push @decompress, $backup_path . '/' . $tc->{'backup_archive'};
-			system_or_bail(@decompress);
-		}
-
-	  SKIP:
-		{
-			my $tar = $ENV{TAR};
-			# don't check for a working tar here, to accommodate various odd
-			# cases. If tar doesn't work the init_from_backup below will fail.
-			skip "no tar program available", 1
-			  if (!defined $tar || $tar eq '');
-
-			# Untar.
-			mkdir($extract_path);
-			system_or_bail($tar, 'xf', $backup_path . '/base.tar',
-				'-C', $extract_path);
-
-			# Verify.
-			$primary->command_ok(
-				[
-					'pg_verifybackup', '-n',
-					'-m', "$backup_path/backup_manifest",
-					'-e', $extract_path
-				],
-				"verify backup, compression $method");
-		}
+		# Verify tar backup.
+		$primary->command_ok(['pg_verifybackup', '-n', '-e', $backup_path],
+			"verify backup, compression $method");
 
 		# Cleanup.
-		unlink($backup_path . '/backup_manifest');
-		unlink($backup_path . '/base.tar');
-		unlink($backup_path . '/' . $tc->{'backup_archive'});
+		rmtree($backup_path);
 		rmtree($extract_path);
 	}
 }
diff --git a/src/bin/pg_verifybackup/t/010_client_untar.pl b/src/bin/pg_verifybackup/t/010_client_untar.pl
index 8c076d46dee..6b7d7483f6e 100644
--- a/src/bin/pg_verifybackup/t/010_client_untar.pl
+++ b/src/bin/pg_verifybackup/t/010_client_untar.pl
@@ -29,41 +29,30 @@ my @test_configuration = (
 		'compression_method' => 'gzip',
 		'backup_flags' => [ '--compress', 'client-gzip:5' ],
 		'backup_archive' => 'base.tar.gz',
-		'decompress_program' => $ENV{'GZIP_PROGRAM'},
-		'decompress_flags' => ['-d'],
 		'enabled' => check_pg_config("#define HAVE_LIBZ 1")
 	},
 	{
 		'compression_method' => 'lz4',
 		'backup_flags' => [ '--compress', 'client-lz4:5' ],
 		'backup_archive' => 'base.tar.lz4',
-		'decompress_program' => $ENV{'LZ4'},
-		'decompress_flags' => ['-d'],
-		'output_file' => 'base.tar',
 		'enabled' => check_pg_config("#define USE_LZ4 1")
 	},
 	{
 		'compression_method' => 'zstd',
 		'backup_flags' => [ '--compress', 'client-zstd:5' ],
 		'backup_archive' => 'base.tar.zst',
-		'decompress_program' => $ENV{'ZSTD'},
-		'decompress_flags' => ['-d'],
 		'enabled' => check_pg_config("#define USE_ZSTD 1")
 	},
 	{
 		'compression_method' => 'zstd',
 		'backup_flags' => [ '--compress', 'client-zstd:level=1,long' ],
 		'backup_archive' => 'base.tar.zst',
-		'decompress_program' => $ENV{'ZSTD'},
-		'decompress_flags' => ['-d'],
 		'enabled' => check_pg_config("#define USE_ZSTD 1")
 	},
 	{
 		'compression_method' => 'parallel zstd',
 		'backup_flags' => [ '--compress', 'client-zstd:workers=3' ],
 		'backup_archive' => 'base.tar.zst',
-		'decompress_program' => $ENV{'ZSTD'},
-		'decompress_flags' => ['-d'],
 		'enabled' => check_pg_config("#define USE_ZSTD 1"),
 		'possibly_unsupported' =>
 		  qr/could not set compression worker count to 3: Unsupported parameter/
@@ -118,40 +107,9 @@ for my $tc (@test_configuration)
 		is($backup_files, $expected_backup_files,
 			"found expected backup files, compression $method");
 
-		# Decompress.
-		if (exists $tc->{'decompress_program'})
-		{
-			my @decompress = ($tc->{'decompress_program'});
-			push @decompress, @{ $tc->{'decompress_flags'} }
-			  if $tc->{'decompress_flags'};
-			push @decompress, $backup_path . '/' . $tc->{'backup_archive'};
-			push @decompress, $backup_path . '/' . $tc->{'output_file'}
-			  if $tc->{'output_file'};
-			system_or_bail(@decompress);
-		}
-
-	  SKIP:
-		{
-			my $tar = $ENV{TAR};
-			# don't check for a working tar here, to accommodate various odd
-			# cases. If tar doesn't work the init_from_backup below will fail.
-			skip "no tar program available", 1
-			  if (!defined $tar || $tar eq '');
-
-			# Untar.
-			mkdir($extract_path);
-			system_or_bail($tar, 'xf', $backup_path . '/base.tar',
-				'-C', $extract_path);
-
-			# Verify.
-			$primary->command_ok(
-				[
-					'pg_verifybackup', '-n',
-					'-m', "$backup_path/backup_manifest",
-					'-e', $extract_path
-				],
-				"verify backup, compression $method");
-		}
+		# Verify tar backup.
+		$primary->command_ok( [ 'pg_verifybackup', '-n', '-e', $backup_path ],
+			"verify backup, compression $method");
 
 		# Cleanup.
 		rmtree($extract_path);
diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c
index 2d88eb54067..c07e6bd9180 100644
--- a/src/fe_utils/simple_list.c
+++ b/src/fe_utils/simple_list.c
@@ -173,3 +173,22 @@ simple_ptr_list_append(SimplePtrList *list, void *ptr)
 		list->head = cell;
 	list->tail = cell;
 }
+
+/*
+ * Destroy only pointer list and not the pointed-to element
+ */
+void
+simple_ptr_list_destroy(SimplePtrList *list)
+{
+	SimplePtrListCell *cell;
+
+	cell = list->head;
+	while (cell != NULL)
+	{
+		SimplePtrListCell *next;
+
+		next = cell->next;
+		pg_free(cell);
+		cell = next;
+	}
+}
diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h
index d42ecded8ed..c83ab6f77e4 100644
--- a/src/include/fe_utils/simple_list.h
+++ b/src/include/fe_utils/simple_list.h
@@ -66,5 +66,6 @@ extern void simple_string_list_destroy(SimpleStringList *list);
 extern const char *simple_string_list_not_touched(SimpleStringList *list);
 
 extern void simple_ptr_list_append(SimplePtrList *list, void *ptr);
+extern void simple_ptr_list_destroy(SimplePtrList *list);
 
 #endif							/* SIMPLE_LIST_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b6135f03479..5fabb127d7e 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3336,6 +3336,7 @@ astreamer_plain_writer
 astreamer_recovery_injector
 astreamer_tar_archiver
 astreamer_tar_parser
+astreamer_verify
 astreamer_zstd_frame
 bgworker_main_type
 bh_node_type
@@ -3957,6 +3958,7 @@ substitute_phv_relids_context
 subxids_array_status
 symbol
 tablespaceinfo
+tar_file
 td_entry
 teSection
 temp_tablespaces_extra
-- 
2.39.3 (Apple Git-145)

