On Mon, Aug 5, 2024 at 10:29 PM Robert Haas <robertmh...@gmail.com> wrote: > > On Fri, Aug 2, 2024 at 7:43 AM Amul Sul <sula...@gmail.com> wrote: > > Please consider the attached version for the review. > > Thanks. I committed 0001-0003. The only thing that I changed was that > in 0001, you forgot to pgindent, which actually mattered quite a bit, > because astreamer is one character shorter than bbstreamer. >
Understood. Thanks for tidying up and committing the patches. > Before we proceed with the rest of this patch series, I think we > should fix up the comments for some of the astreamer files. Proposed > patch for that attached; please review. > Looks good to me, except for the following typo that I have fixed in the attached version: s/astreamer_plan_writer/astreamer_plain_writer/ > I also noticed that cfbot was unhappy about this patch set: > > [10:37:55.075] pg_verifybackup.c:100:7: error: no previous extern > declaration for non-static variable 'format' > [-Werror,-Wmissing-variable-declarations] > [10:37:55.075] char format = '\0'; /* p(lain)/t(ar) */ > [10:37:55.075] ^ > [10:37:55.075] pg_verifybackup.c:100:1: note: declare 'static' if the > variable is not intended to be used outside of this translation unit > [10:37:55.075] char format = '\0'; /* p(lain)/t(ar) */ > [10:37:55.075] ^ > [10:37:55.075] pg_verifybackup.c:101:23: error: no previous extern > declaration for non-static variable 'compress_algorithm' > [-Werror,-Wmissing-variable-declarations] > [10:37:55.075] pg_compress_algorithm compress_algorithm = PG_COMPRESSION_NONE; > [10:37:55.075] ^ > [10:37:55.075] pg_verifybackup.c:101:1: note: declare 'static' if the > variable is not intended to be used outside of this translation unit > [10:37:55.075] pg_compress_algorithm compress_algorithm = PG_COMPRESSION_NONE; > [10:37:55.075] ^ > [10:37:55.075] 2 errors generated. > Fixed in the attached version. > Please fix and, after posting future versions of the patch set, try to > remember to check http://cfbot.cputube.org/amul-sul.html Sure. I used to rely on that earlier, but after Cirrus CI in the GitHub repo, I assumed the workflow would be the same as cfbot and started overlooking it. However, cfbot reported a warning that didn't appear in my GitHub run. From now on, I'll make sure to check cfbot as well. Regards, Amul
From 975b994d8ca76f3c16b7eb9119e45ec25dba410b Mon Sep 17 00:00:00 2001 From: Amul Sul <amul.sul@enterprisedb.com> Date: Tue, 2 Jul 2024 10:26:35 +0530 Subject: [PATCH v7 10/12] pg_verifybackup: Add backup format and compression option ---- NOTE: This patch is not meant to be committed separately. It should be squashed with the next patch that implements tar format support. ---- --- src/bin/pg_verifybackup/pg_verifybackup.c | 146 +++++++++++++++++++++- 1 file changed, 144 insertions(+), 2 deletions(-) diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index 801e13886c2..f20b6e2895c 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -18,6 +18,7 @@ #include <sys/stat.h> #include <time.h> +#include "common/compression.h" #include "common/parse_manifest.h" #include "fe_utils/simple_list.h" #include "getopt_long.h" @@ -56,6 +57,8 @@ static void report_manifest_error(JsonManifestParseContext *context, const char *fmt,...) pg_attribute_printf(2, 3) pg_attribute_noreturn(); +static char find_backup_format(verifier_context *context); +static pg_compress_algorithm find_backup_compression(verifier_context *context); static void verify_backup_directory(verifier_context *context, char *relpath, char *fullpath); static void verify_backup_file(verifier_context *context, @@ -74,6 +77,9 @@ static void usage(void); static const char *progname; +static char format = '\0'; /* p(lain)/t(ar) */ +static pg_compress_algorithm compress_algorithm = PG_COMPRESSION_NONE; + /* * Main entry point. */ @@ -84,11 +90,13 @@ main(int argc, char **argv) {"exit-on-error", no_argument, NULL, 'e'}, {"ignore", required_argument, NULL, 'i'}, {"manifest-path", required_argument, NULL, 'm'}, + {"format", required_argument, NULL, 'F'}, {"no-parse-wal", no_argument, NULL, 'n'}, {"progress", no_argument, NULL, 'P'}, {"quiet", no_argument, NULL, 'q'}, {"skip-checksums", no_argument, NULL, 's'}, {"wal-directory", required_argument, NULL, 'w'}, + {"compress", required_argument, NULL, 'Z'}, {NULL, 0, NULL, 0} }; @@ -99,6 +107,7 @@ main(int argc, char **argv) bool quiet = false; char *wal_directory = NULL; char *pg_waldump_path = NULL; + bool tar_compression_specified = false; pg_logging_init(argv[0]); set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_verifybackup")); @@ -141,7 +150,7 @@ main(int argc, char **argv) simple_string_list_append(&context.ignore_list, "recovery.signal"); simple_string_list_append(&context.ignore_list, "standby.signal"); - while ((c = getopt_long(argc, argv, "ei:m:nPqsw:", long_options, NULL)) != -1) + while ((c = getopt_long(argc, argv, "eF:i:m:nPqsw:Z:", long_options, NULL)) != -1) { switch (c) { @@ -160,6 +169,15 @@ main(int argc, char **argv) manifest_path = pstrdup(optarg); canonicalize_path(manifest_path); break; + case 'F': + if (strcmp(optarg, "p") == 0 || strcmp(optarg, "plain") == 0) + format = 'p'; + else if (strcmp(optarg, "t") == 0 || strcmp(optarg, "tar") == 0) + format = 't'; + else + pg_fatal("invalid backup format \"%s\", must be \"plain\" or \"tar\"", + optarg); + break; case 'n': no_parse_wal = true; break; @@ -176,6 +194,12 @@ main(int argc, char **argv) wal_directory = pstrdup(optarg); canonicalize_path(wal_directory); break; + case 'Z': + if (!parse_compress_algorithm(optarg, &compress_algorithm)) + pg_fatal("unrecognized compression algorithm: \"%s\"", + optarg); + tar_compression_specified = true; + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -207,11 +231,41 @@ main(int argc, char **argv) pg_fatal("cannot specify both %s and %s", "-P/--progress", "-q/--quiet"); + /* Complain if compression method specified but the format isn't tar. */ + if (format != 't' && tar_compression_specified) + { + pg_log_error("only tar mode backups can be compressed"); + pg_log_error_hint("Try \"%s --help\" for more information.", progname); + exit(1); + } + + /* Determine the backup format if it hasn't been specified. */ + if (format == '\0') + format = find_backup_format(&context); + + /* + * Determine the tar backup compression method if it hasn't been + * specified. + */ + if (format == 't' && !tar_compression_specified) + compress_algorithm = find_backup_compression(&context); + /* Unless --no-parse-wal was specified, we will need pg_waldump. */ if (!no_parse_wal) { int ret; + /* + * XXX: In the future, we should consider enhancing pg_waldump to read + * WAL files from the tar archive. + */ + if (format != 'p') + { + pg_log_error("pg_waldump does not support parsing WAL files from a tar archive."); + pg_log_error_hint("Try \"%s --help\" to skip parse WAL files option.", progname); + exit(1); + } + pg_waldump_path = pg_malloc(MAXPGPATH); ret = find_other_exec(argv[0], "pg_waldump", "pg_waldump (PostgreSQL) " PG_VERSION "\n", @@ -279,7 +333,15 @@ main(int argc, char **argv) */ if (!context.skip_checksums) { - verify_backup_checksums(&context); + /* + * We were only checking the plain backup here. For the tar backup, + * file checksums verification (if requested) will be done immediately + * when the file is accessed, as we don't have random access to the + * files like we do with plain backups. + */ + if (format == 'p') + verify_backup_checksums(&context); + progress_report(&context, true); } @@ -972,6 +1034,84 @@ should_ignore_relpath(verifier_context *context, const char *relpath) return false; } +/* + * To detect the backup format, it checks for the PG_VERSION file in the backup + * directory. If found, it will be considered a plain-format backup; otherwise, + * it will be assumed to be a tar-format backup. + */ +static char +find_backup_format(verifier_context *context) +{ + char result; + char *path; + struct stat sb; + + /* Should be here only if the backup format is unknown */ + Assert(format == '\0'); + + /* Check PG_VERSION file. */ + path = psprintf("%s/%s", context->backup_directory, "PG_VERSION"); + result = (stat(path, &sb) == 0) ? 'p' : 't'; + pfree(path); + + return result; +} + +/* + * To determine the compression format, we will search for the main data + * directory archive and its extension, which starts with base.tar, as + * pg_basebackup writes the main data directory to an archive file named + * base.tar followed by a compression type extension like .gz, .lz4, or .zst. + */ +static pg_compress_algorithm +find_backup_compression(verifier_context *context) +{ + char *path; + struct stat sb; + bool found; + + /* Should be here only for tar backup */ + Assert(format == 't'); + + /* + * Is this a tar archive? + */ + path = psprintf("%s/%s", context->backup_directory, "base.tar"); + found = (stat(path, &sb) == 0); + pfree(path); + if (found) + return PG_COMPRESSION_NONE; + + /* + * Is this a .tar.gz archive? + */ + path = psprintf("%s/%s", context->backup_directory, "base.tar.gz"); + found = (stat(path, &sb) == 0); + pfree(path); + if (found) + return PG_COMPRESSION_GZIP; + + /* + * Is this a .tar.lz4 archive? + */ + path = psprintf("%s/%s", context->backup_directory, "base.tar.lz4"); + found = (stat(path, &sb) == 0); + pfree(path); + if (found) + return PG_COMPRESSION_LZ4; + + /* + * Is this a .tar.zst archive? + */ + path = psprintf("%s/%s", context->backup_directory, "base.tar.zst"); + found = (stat(path, &sb) == 0); + pfree(path); + if (found) + return PG_COMPRESSION_ZSTD; + + return PG_COMPRESSION_NONE; /* placate compiler */ +} + /* * Print a progress report based on the variables in verifier_context. * @@ -1054,11 +1194,13 @@ usage(void) printf(_(" -e, --exit-on-error exit immediately on error\n")); printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n")); printf(_(" -m, --manifest-path=PATH use specified path for manifest\n")); + printf(_(" -F, --format=p|t backup format (plain, tar)\n")); printf(_(" -n, --no-parse-wal do not try to parse WAL files\n")); printf(_(" -P, --progress show progress information\n")); printf(_(" -q, --quiet do not print any output, except for errors\n")); printf(_(" -s, --skip-checksums skip checksum verification\n")); printf(_(" -w, --wal-directory=PATH use specified path for WAL files\n")); + printf(_(" -Z, --compress=METHOD compress method (gzip, lz4, zstd, none) \n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); -- 2.18.0
From c8ca272edb9cc433f7eb680876f5947b98fe010f Mon Sep 17 00:00:00 2001 From: Amul Sul <amul.sul@enterprisedb.com> Date: Tue, 2 Jul 2024 17:04:56 +0530 Subject: [PATCH v7 12/12] pg_verifybackup: Tests and document ---- NOTE: This patch is not meant to be committed separately. It should be squashed with the previous patch that implements tar format support. ---- --- doc/src/sgml/ref/pg_verifybackup.sgml | 54 +++++++++++++- src/bin/pg_verifybackup/t/001_basic.pl | 18 ++++- src/bin/pg_verifybackup/t/004_options.pl | 2 +- src/bin/pg_verifybackup/t/005_bad_manifest.pl | 2 +- src/bin/pg_verifybackup/t/008_untar.pl | 73 ++++++------------- src/bin/pg_verifybackup/t/010_client_untar.pl | 48 +----------- 6 files changed, 96 insertions(+), 101 deletions(-) diff --git a/doc/src/sgml/ref/pg_verifybackup.sgml b/doc/src/sgml/ref/pg_verifybackup.sgml index a3f167f9f6e..c743bd89a92 100644 --- a/doc/src/sgml/ref/pg_verifybackup.sgml +++ b/doc/src/sgml/ref/pg_verifybackup.sgml @@ -34,8 +34,10 @@ PostgreSQL documentation integrity of a database cluster backup taken using <command>pg_basebackup</command> against a <literal>backup_manifest</literal> generated by the server at the time - of the backup. The backup must be stored in the "plain" - format; a "tar" format backup can be checked after extracting it. + of the backup. The backup must be stored in the "plain" or "tar" + format. Verification is supported for <literal>gzip</literal>, + <literal>lz4</literal>, and <literal>zstd</literal> compressed tar backup; + any other compressed format backups can be checked after decompressing them. </para> <para> @@ -168,6 +170,42 @@ PostgreSQL documentation </listitem> </varlistentry> + <varlistentry> + <term><option>-F <replaceable class="parameter">format</replaceable></option></term> + <term><option>--format=<replaceable class="parameter">format</replaceable></option></term> + <listitem> + <para> + Specifies the format of the backup. <replaceable>format</replaceable> + can be one of the following: + + <variablelist> + <varlistentry> + <term><literal>p</literal></term> + <term><literal>plain</literal></term> + <listitem> + <para> + Backup consists of plain files with the same layout as the + source server's data directory and tablespaces. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>t</literal></term> + <term><literal>tar</literal></term> + <listitem> + <para> + Backup consists of tar files. The main data directory's contents + will be written to a file named <filename>base.tar</filename>, + and each other tablespace will be written to a separate tar file + named after that tablespace's OID. + </para> + </listitem> + </varlistentry> + </variablelist></para> + </listitem> + </varlistentry> + <varlistentry> <term><option>-n</option></term> <term><option>--no-parse-wal</option></term> @@ -227,6 +265,18 @@ PostgreSQL documentation </para> </listitem> </varlistentry> + + <varlistentry> + <term><option>-Z <replaceable class="parameter">method</replaceable></option></term> + <term><option>--compress=<replaceable class="parameter">method</replaceable></option></term> + <listitem> + <para> + The tar backup compression method can be <literal>gzip</literal>, + <literal>lz4</literal>, <literal>zstd</literal>, or + <literal>none</literal> if no compression. + </para> + </listitem> + </varlistentry> </variablelist> </para> diff --git a/src/bin/pg_verifybackup/t/001_basic.pl b/src/bin/pg_verifybackup/t/001_basic.pl index 2f3e52d296f..d47ce1f04fc 100644 --- a/src/bin/pg_verifybackup/t/001_basic.pl +++ b/src/bin/pg_verifybackup/t/001_basic.pl @@ -17,13 +17,25 @@ command_fails_like( qr/no backup directory specified/, 'target directory must be specified'); command_fails_like( - [ 'pg_verifybackup', $tempdir ], + [ 'pg_verifybackup', '-Fp', $tempdir ], qr/could not open file.*\/backup_manifest\"/, 'pg_verifybackup requires a manifest'); command_fails_like( - [ 'pg_verifybackup', $tempdir, $tempdir ], + [ 'pg_verifybackup', '-Fp', $tempdir, $tempdir ], qr/too many command-line arguments/, 'multiple target directories not allowed'); +command_fails_like( + [ 'pg_verifybackup', '-Zgzip', $tempdir ], + qr/only tar mode backups can be compressed/, + 'compression method required format option'); +command_fails_like( + [ 'pg_verifybackup', '-Fp', '-Zlz4', $tempdir ], + qr/only tar mode backups can be compressed/, + 'compression method required tar format option'); +command_fails_like( + [ 'pg_verifybackup', '-Fp', '-Znon_exist', $tempdir ], + qr/unrecognized compression algorithm/, + 'compression method should be valid'); # create fake manifest file open(my $fh, '>', "$tempdir/backup_manifest") || die "open: $!"; @@ -31,7 +43,7 @@ close($fh); # but then try to use an alternate, nonexisting manifest command_fails_like( - [ 'pg_verifybackup', '-m', "$tempdir/not_the_manifest", $tempdir ], + [ 'pg_verifybackup', '-Fp', '-m', "$tempdir/not_the_manifest", $tempdir ], qr/could not open file.*\/not_the_manifest\"/, 'pg_verifybackup respects -m flag'); diff --git a/src/bin/pg_verifybackup/t/004_options.pl b/src/bin/pg_verifybackup/t/004_options.pl index 8ed2214408e..2f197648740 100644 --- a/src/bin/pg_verifybackup/t/004_options.pl +++ b/src/bin/pg_verifybackup/t/004_options.pl @@ -108,7 +108,7 @@ unlike( # Test valid manifest with nonexistent backup directory. command_fails_like( [ - 'pg_verifybackup', '-m', + 'pg_verifybackup', '-Fp', '-m', "$backup_path/backup_manifest", "$backup_path/fake" ], qr/could not open directory/, diff --git a/src/bin/pg_verifybackup/t/005_bad_manifest.pl b/src/bin/pg_verifybackup/t/005_bad_manifest.pl index c4ed64b62d5..28c51b6feb0 100644 --- a/src/bin/pg_verifybackup/t/005_bad_manifest.pl +++ b/src/bin/pg_verifybackup/t/005_bad_manifest.pl @@ -208,7 +208,7 @@ sub test_bad_manifest print $fh $manifest_contents; close($fh); - command_fails_like([ 'pg_verifybackup', $tempdir ], $regexp, $test_name); + command_fails_like([ 'pg_verifybackup', '-Fp', $tempdir ], $regexp, $test_name); return; } diff --git a/src/bin/pg_verifybackup/t/008_untar.pl b/src/bin/pg_verifybackup/t/008_untar.pl index 7a09f3b75b2..9896560adc3 100644 --- a/src/bin/pg_verifybackup/t/008_untar.pl +++ b/src/bin/pg_verifybackup/t/008_untar.pl @@ -16,6 +16,20 @@ my $primary = PostgreSQL::Test::Cluster->new('primary'); $primary->init(allows_streaming => 1); $primary->start; +# Create a couple of directories to use as tablespaces. +my $TS1_LOCATION = $primary->backup_dir .'/ts1'; +$TS1_LOCATION =~ s/\/\.\//\//g; # collapse foo/./bar to foo/bar +mkdir($TS1_LOCATION); + +# Create a tablespace with table in it. +$primary->safe_psql('postgres', qq( + CREATE TABLESPACE regress_ts1 LOCATION '$TS1_LOCATION'; + SELECT oid FROM pg_tablespace WHERE spcname = 'regress_ts1'; + CREATE TABLE regress_tbl1(i int) TABLESPACE regress_ts1; + INSERT INTO regress_tbl1 VALUES(generate_series(1,5));)); +my $tsoid = $primary->safe_psql('postgres', qq( + SELECT oid FROM pg_tablespace WHERE spcname = 'regress_ts1')); + my $backup_path = $primary->backup_dir . '/server-backup'; my $extract_path = $primary->backup_dir . '/extracted-backup'; @@ -23,39 +37,31 @@ my @test_configuration = ( { 'compression_method' => 'none', 'backup_flags' => [], - 'backup_archive' => 'base.tar', + 'backup_archive' => ['base.tar', "$tsoid.tar"], 'enabled' => 1 }, { 'compression_method' => 'gzip', 'backup_flags' => [ '--compress', 'server-gzip' ], - 'backup_archive' => 'base.tar.gz', - 'decompress_program' => $ENV{'GZIP_PROGRAM'}, - 'decompress_flags' => ['-d'], + 'backup_archive' => [ 'base.tar.gz', "$tsoid.tar.gz" ], 'enabled' => check_pg_config("#define HAVE_LIBZ 1") }, { 'compression_method' => 'lz4', 'backup_flags' => [ '--compress', 'server-lz4' ], - 'backup_archive' => 'base.tar.lz4', - 'decompress_program' => $ENV{'LZ4'}, - 'decompress_flags' => [ '-d', '-m' ], + 'backup_archive' => ['base.tar.lz4', "$tsoid.tar.lz4" ], 'enabled' => check_pg_config("#define USE_LZ4 1") }, { 'compression_method' => 'zstd', 'backup_flags' => [ '--compress', 'server-zstd' ], - 'backup_archive' => 'base.tar.zst', - 'decompress_program' => $ENV{'ZSTD'}, - 'decompress_flags' => ['-d'], + 'backup_archive' => [ 'base.tar.zst', "$tsoid.tar.zst" ], 'enabled' => check_pg_config("#define USE_ZSTD 1") }, { 'compression_method' => 'zstd', 'backup_flags' => [ '--compress', 'server-zstd:level=1,long' ], - 'backup_archive' => 'base.tar.zst', - 'decompress_program' => $ENV{'ZSTD'}, - 'decompress_flags' => ['-d'], + 'backup_archive' => [ 'base.tar.zst', "$tsoid.tar.zst" ], 'enabled' => check_pg_config("#define USE_ZSTD 1") }); @@ -86,47 +92,16 @@ for my $tc (@test_configuration) my $backup_files = join(',', sort grep { $_ ne '.' && $_ ne '..' } slurp_dir($backup_path)); my $expected_backup_files = - join(',', sort ('backup_manifest', $tc->{'backup_archive'})); + join(',', sort ('backup_manifest', @{ $tc->{'backup_archive'} })); is($backup_files, $expected_backup_files, "found expected backup files, compression $method"); - # Decompress. - if (exists $tc->{'decompress_program'}) - { - my @decompress = ($tc->{'decompress_program'}); - push @decompress, @{ $tc->{'decompress_flags'} } - if $tc->{'decompress_flags'}; - push @decompress, $backup_path . '/' . $tc->{'backup_archive'}; - system_or_bail(@decompress); - } - - SKIP: - { - my $tar = $ENV{TAR}; - # don't check for a working tar here, to accommodate various odd - # cases. If tar doesn't work the init_from_backup below will fail. - skip "no tar program available", 1 - if (!defined $tar || $tar eq ''); - - # Untar. - mkdir($extract_path); - system_or_bail($tar, 'xf', $backup_path . '/base.tar', - '-C', $extract_path); - - # Verify. - $primary->command_ok( - [ - 'pg_verifybackup', '-n', - '-m', "$backup_path/backup_manifest", - '-e', $extract_path - ], - "verify backup, compression $method"); - } + # Verify tar backup. + $primary->command_ok(['pg_verifybackup', '-n', '-e', $backup_path], + "verify backup, compression $method"); # Cleanup. - unlink($backup_path . '/backup_manifest'); - unlink($backup_path . '/base.tar'); - unlink($backup_path . '/' . $tc->{'backup_archive'}); + rmtree($backup_path); rmtree($extract_path); } } diff --git a/src/bin/pg_verifybackup/t/010_client_untar.pl b/src/bin/pg_verifybackup/t/010_client_untar.pl index 8c076d46dee..6b7d7483f6e 100644 --- a/src/bin/pg_verifybackup/t/010_client_untar.pl +++ b/src/bin/pg_verifybackup/t/010_client_untar.pl @@ -29,41 +29,30 @@ my @test_configuration = ( 'compression_method' => 'gzip', 'backup_flags' => [ '--compress', 'client-gzip:5' ], 'backup_archive' => 'base.tar.gz', - 'decompress_program' => $ENV{'GZIP_PROGRAM'}, - 'decompress_flags' => ['-d'], 'enabled' => check_pg_config("#define HAVE_LIBZ 1") }, { 'compression_method' => 'lz4', 'backup_flags' => [ '--compress', 'client-lz4:5' ], 'backup_archive' => 'base.tar.lz4', - 'decompress_program' => $ENV{'LZ4'}, - 'decompress_flags' => ['-d'], - 'output_file' => 'base.tar', 'enabled' => check_pg_config("#define USE_LZ4 1") }, { 'compression_method' => 'zstd', 'backup_flags' => [ '--compress', 'client-zstd:5' ], 'backup_archive' => 'base.tar.zst', - 'decompress_program' => $ENV{'ZSTD'}, - 'decompress_flags' => ['-d'], 'enabled' => check_pg_config("#define USE_ZSTD 1") }, { 'compression_method' => 'zstd', 'backup_flags' => [ '--compress', 'client-zstd:level=1,long' ], 'backup_archive' => 'base.tar.zst', - 'decompress_program' => $ENV{'ZSTD'}, - 'decompress_flags' => ['-d'], 'enabled' => check_pg_config("#define USE_ZSTD 1") }, { 'compression_method' => 'parallel zstd', 'backup_flags' => [ '--compress', 'client-zstd:workers=3' ], 'backup_archive' => 'base.tar.zst', - 'decompress_program' => $ENV{'ZSTD'}, - 'decompress_flags' => ['-d'], 'enabled' => check_pg_config("#define USE_ZSTD 1"), 'possibly_unsupported' => qr/could not set compression worker count to 3: Unsupported parameter/ @@ -118,40 +107,9 @@ for my $tc (@test_configuration) is($backup_files, $expected_backup_files, "found expected backup files, compression $method"); - # Decompress. - if (exists $tc->{'decompress_program'}) - { - my @decompress = ($tc->{'decompress_program'}); - push @decompress, @{ $tc->{'decompress_flags'} } - if $tc->{'decompress_flags'}; - push @decompress, $backup_path . '/' . $tc->{'backup_archive'}; - push @decompress, $backup_path . '/' . $tc->{'output_file'} - if $tc->{'output_file'}; - system_or_bail(@decompress); - } - - SKIP: - { - my $tar = $ENV{TAR}; - # don't check for a working tar here, to accommodate various odd - # cases. If tar doesn't work the init_from_backup below will fail. - skip "no tar program available", 1 - if (!defined $tar || $tar eq ''); - - # Untar. - mkdir($extract_path); - system_or_bail($tar, 'xf', $backup_path . '/base.tar', - '-C', $extract_path); - - # Verify. - $primary->command_ok( - [ - 'pg_verifybackup', '-n', - '-m', "$backup_path/backup_manifest", - '-e', $extract_path - ], - "verify backup, compression $method"); - } + # Verify tar backup. + $primary->command_ok( [ 'pg_verifybackup', '-n', '-e', $backup_path ], + "verify backup, compression $method"); # Cleanup. rmtree($extract_path); -- 2.18.0
From c9415e97006e5765151d7fcd3990bcb0c4a05966 Mon Sep 17 00:00:00 2001 From: Amul Sul <amul.sul@enterprisedb.com> Date: Wed, 31 Jul 2024 16:22:07 +0530 Subject: [PATCH v7 11/12] pg_verifybackup: Read tar files and verify its contents --- src/bin/pg_verifybackup/Makefile | 4 +- src/bin/pg_verifybackup/astreamer_verify.c | 367 +++++++++++++++++++++ src/bin/pg_verifybackup/meson.build | 6 +- src/bin/pg_verifybackup/pg_verifybackup.c | 216 +++++++++++- src/bin/pg_verifybackup/pg_verifybackup.h | 9 + src/tools/pgindent/typedefs.list | 1 + 6 files changed, 594 insertions(+), 9 deletions(-) create mode 100644 src/bin/pg_verifybackup/astreamer_verify.c diff --git a/src/bin/pg_verifybackup/Makefile b/src/bin/pg_verifybackup/Makefile index 7c045f142e8..df7aaabd530 100644 --- a/src/bin/pg_verifybackup/Makefile +++ b/src/bin/pg_verifybackup/Makefile @@ -17,11 +17,13 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global # We need libpq only because fe_utils does. +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) OBJS = \ $(WIN32RES) \ - pg_verifybackup.o + pg_verifybackup.o \ + astreamer_verify.o all: pg_verifybackup diff --git a/src/bin/pg_verifybackup/astreamer_verify.c b/src/bin/pg_verifybackup/astreamer_verify.c new file mode 100644 index 00000000000..be40922c042 --- /dev/null +++ b/src/bin/pg_verifybackup/astreamer_verify.c @@ -0,0 +1,367 @@ +/*------------------------------------------------------------------------- + * + * astreamer_verify.c + * + * Extend fe_utils/astreamer.h archive streaming facility to verify TAR + * backup. + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * + * src/bin/pg_verifybackup/astreamer_verify.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include "common/logging.h" +#include "fe_utils/astreamer.h" +#include "pg_verifybackup.h" + +typedef struct astreamer_verify +{ + astreamer base; + verifier_context *context; + char *archiveName; + Oid tblspcOid; + + /* Hold information for a member file verification */ + manifest_file *mfile; + int64 receivedBytes; + bool verifyChecksums; + bool verifyControlData; + pg_checksum_context *checksum_ctx; +} astreamer_verify; + +static void astreamer_verify_content(astreamer *streamer, + astreamer_member *member, + const char *data, int len, + astreamer_archive_context context); +static void astreamer_verify_finalize(astreamer *streamer); +static void astreamer_verify_free(astreamer *streamer); + +static void verify_member_header(astreamer *streamer, astreamer_member *member); +static void verify_member_contents(astreamer *streamer, + astreamer_member *member, + const char *data, int len); +static void verify_content_checksum(astreamer *streamer, + astreamer_member *member, + const char *buffer, int buffer_len); +static void verify_controldata(astreamer *streamer, + astreamer_member *member, + const char *data, int len); +static void reset_member_info(astreamer *streamer); + +static const astreamer_ops astreamer_verify_ops = { + .content = astreamer_verify_content, + .finalize = astreamer_verify_finalize, + .free = astreamer_verify_free +}; + +/* + * Create a astreamer that can verifies content of a TAR file. + */ +astreamer * +astreamer_verify_content_new(astreamer *next, verifier_context *context, + char *archive_name, Oid tblspc_oid) +{ + astreamer_verify *streamer; + + streamer = palloc0(sizeof(astreamer_verify)); + *((const astreamer_ops **) &streamer->base.bbs_ops) = + &astreamer_verify_ops; + + streamer->base.bbs_next = next; + streamer->context = context; + streamer->archiveName = archive_name; + streamer->tblspcOid = tblspc_oid; + initStringInfo(&streamer->base.bbs_buffer); + + return &streamer->base; +} + +/* + * It verifies each TAR member entry against the manifest data and performs + * checksum verification if enabled. Additionally, it validates the backup's + * system identifier against the backup_manifest. + */ +static void +astreamer_verify_content(astreamer *streamer, + astreamer_member *member, const char *data, + int len, astreamer_archive_context context) +{ + Assert(context != ASTREAMER_UNKNOWN); + + switch (context) + { + case ASTREAMER_MEMBER_HEADER: + + /* + * Perform the initial check and setup for the verification. + */ + verify_member_header(streamer, member); + break; + + case ASTREAMER_MEMBER_CONTENTS: + + /* + * Peform the required contants verification. + */ + verify_member_contents(streamer, member, data, len); + break; + + case ASTREAMER_MEMBER_TRAILER: + + /* + * Reset the temporary information stored for a verification. + */ + reset_member_info(streamer); + break; + + case ASTREAMER_ARCHIVE_TRAILER: + break; + + default: + /* Shouldn't happen. */ + pg_fatal("unexpected state while parsing tar archive"); + } +} + +/* + * End-of-stream processing for a astreamer_verify stream. + */ +static void +astreamer_verify_finalize(astreamer *streamer) +{ + Assert(streamer->bbs_next == NULL); +} + +/* + * Free memory associated with a astreamer_verify stream. + */ +static void +astreamer_verify_free(astreamer *streamer) +{ + pfree(streamer->bbs_buffer.data); + pfree(streamer); +} + +/* + * Verify the entry if it is a file in the backup manifest. If the archive being + * processed is a tablespace, prepare the required file path for subsequent + * operations. Finally, check if it needs to perform checksum verification and + * control data verification during file content processing. + */ +static void +verify_member_header(astreamer *streamer, astreamer_member *member) +{ + astreamer_verify *mystreamer = (astreamer_verify *) streamer; + manifest_file *m; + + /* We are only interested in files that are not in the ignore list. */ + if (member->is_directory || member->is_link || + should_ignore_relpath(mystreamer->context, member->pathname)) + return; + + /* + * The backup_manifest stores a relative path to the base directory for + * files belong tablespace, whereas <tablespaceoid>.tar doesn't. Prepare + * the required path, otherwise, the manfiest entry verification will + * fail. + */ + if (OidIsValid(mystreamer->tblspcOid)) + { + char temp[MAXPGPATH]; + + /* Copy original name at temporary space */ + memcpy(temp, member->pathname, MAXPGPATH); + + snprintf(member->pathname, MAXPGPATH, "%s/%d/%s", + "pg_tblspc", mystreamer->tblspcOid, temp); + } + + /* Check the manifest entry */ + m = verify_manifest_entry(mystreamer->context, member->pathname, + member->size); + mystreamer->mfile = (void *) m; + + /* + * Prepare for checksum and manifest system identifier verification. + * + * We could have these checks while receiving contents. However, since + * contents are received in multiple iterations, this would result in + * these lengthy checks being performed multiple times. Instead, having a + * single flag would be more efficient. + */ + mystreamer->verifyChecksums = + (!mystreamer->context->skip_checksums && should_verify_checksum(m)); + mystreamer->verifyControlData = + should_verify_control_data(mystreamer->context->manifest, m); +} + +/* + * Process the member content according to the flags set by the member header + * processing routine for checksum and control data verification. + */ +static void +verify_member_contents(astreamer *streamer, astreamer_member *member, + const char *data, int len) + +{ + astreamer_verify *mystreamer = (astreamer_verify *) streamer; + + /* Verify the checksums */ + if (mystreamer->verifyChecksums) + verify_content_checksum(streamer, member, data, len); + + /* Verify pg_control information */ + if (mystreamer->verifyControlData) + verify_controldata(streamer, member, data, len); +} + +/* + * Similar to verify_file_checksum() but this function computes the checksum + * incrementally for the received file content. Unlike a normal backup + * directory, TAR format files do not allow random access, so checksum + * verification occurs progressively. Additionally, the function calls the + * routine for control data verification if the flags indicate that it is + * required. + * + * On the first visit, the function initializes checksum_ctx, which will be used + * for incremental checksum calculation. Once the complete file content is + * received (tracked using the receivedBytes), the routine that performs the + * final checksum verification is called + */ +static void +verify_content_checksum(astreamer *streamer, astreamer_member *member, + const char *buffer, int buffer_len) +{ + astreamer_verify *mystreamer = (astreamer_verify *) streamer; + pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx; + verifier_context *context = mystreamer->context; + manifest_file *m = mystreamer->mfile; + const char *relpath = m->pathname; + uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH]; + + /* + * Mark it false to avoid unexpected re-entrance for the same file content + * (e.g. returned in error should not be revisited). + */ + Assert(mystreamer->verifyChecksums); + mystreamer->verifyChecksums = false; + + /* Should have came for the right file */ + Assert(strcmp(member->pathname, relpath) == 0); + + /* If we were first time for this file */ + if (!checksum_ctx) + { + checksum_ctx = pg_malloc(sizeof(pg_checksum_context)); + mystreamer->checksum_ctx = checksum_ctx; + + if (pg_checksum_init(checksum_ctx, m->checksum_type) < 0) + { + report_backup_error(context, + "%s: could not initialize checksum of file \"%s\"", + mystreamer->archiveName, relpath); + return; + } + } + + /* Update the total count of computed checksum bytes. */ + mystreamer->receivedBytes += buffer_len; + + if (pg_checksum_update(checksum_ctx, (uint8 *) buffer, buffer_len) < 0) + { + report_backup_error(context, "could not update checksum of file \"%s\"", + relpath); + return; + } + + /* Report progress */ + context->done_size += buffer_len; + progress_report(context, false); + + /* Yet to receive the full content of the file. */ + if (mystreamer->receivedBytes < m->size) + { + mystreamer->verifyChecksums = true; + return; + } + + /* Do the final computation and verification. */ + verify_checksum(context, m, checksum_ctx, checksumbuf); +} + +/* + * Prepare the control data from the received file contents, which are supposed + * to be from the pg_control file, including CRC calculation. Then, call the + * routines that perform the final verification of the control file information. + */ +static void +verify_controldata(astreamer *streamer, astreamer_member *member, + const char *data, int len) +{ + astreamer_verify *mystreamer = (astreamer_verify *) streamer; + manifest_data *manifest = mystreamer->context->manifest; + ControlFileData control_file; + pg_crc32c crc; + bool crc_ok; + + /* Should be here only for control file */ + Assert(strcmp(member->pathname, "global/pg_control") == 0); + Assert(manifest->version != 1); + + /* Mark it as false to avoid unexpected re-entrance */ + Assert(mystreamer->verifyControlData); + mystreamer->verifyControlData = false; + + /* Should have whole control file data. */ + if (!astreamer_buffer_until(streamer, &data, &len, sizeof(ControlFileData))) + { + mystreamer->verifyControlData = true; + return; + } + + pg_log_debug("%s: reading \"%s\"", mystreamer->archiveName, + member->pathname); + + if (streamer->bbs_buffer.len != sizeof(ControlFileData)) + report_fatal_error("%s: could not read control file: read %d of %zu", + mystreamer->archiveName, streamer->bbs_buffer.len, + sizeof(ControlFileData)); + + memcpy(&control_file, streamer->bbs_buffer.data, + sizeof(ControlFileData)); + + /* Check the CRC. */ + INIT_CRC32C(crc); + COMP_CRC32C(crc, + (char *) (&control_file), + offsetof(ControlFileData, crc)); + FIN_CRC32C(crc); + + crc_ok = EQ_CRC32C(crc, control_file.crc); + + /* Do the final control data verification. */ + verify_control_data(&control_file, member->pathname, crc_ok, + manifest->system_identifier); +} + +/* + * Reset flags and free memory allocations for member file verification. + */ +static void +reset_member_info(astreamer *streamer) +{ + astreamer_verify *mystreamer = (astreamer_verify *) streamer; + + mystreamer->mfile = NULL; + mystreamer->receivedBytes = 0; + mystreamer->verifyChecksums = false; + mystreamer->verifyControlData = false; + + if (mystreamer->checksum_ctx) + pfree(mystreamer->checksum_ctx); + mystreamer->checksum_ctx = NULL; +} diff --git a/src/bin/pg_verifybackup/meson.build b/src/bin/pg_verifybackup/meson.build index 7c7d31a0350..1e3fcf7ee5a 100644 --- a/src/bin/pg_verifybackup/meson.build +++ b/src/bin/pg_verifybackup/meson.build @@ -1,7 +1,8 @@ # Copyright (c) 2022-2024, PostgreSQL Global Development Group pg_verifybackup_sources = files( - 'pg_verifybackup.c' + 'pg_verifybackup.c', + 'astreamer_verify.c' ) if host_system == 'windows' @@ -10,9 +11,10 @@ if host_system == 'windows' '--FILEDESC', 'pg_verifybackup - verify a backup against using a backup manifest']) endif +pg_verifybackup_deps = [frontend_code, libpq, lz4, zlib, zstd] pg_verifybackup = executable('pg_verifybackup', pg_verifybackup_sources, - dependencies: [frontend_code, libpq], + dependencies: pg_verifybackup_deps, kwargs: default_bin_args, ) bin_targets += pg_verifybackup diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index f20b6e2895c..ce2ba7437be 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -20,9 +20,12 @@ #include "common/compression.h" #include "common/parse_manifest.h" +#include "common/relpath.h" +#include "fe_utils/astreamer.h" #include "fe_utils/simple_list.h" #include "getopt_long.h" #include "pg_verifybackup.h" +#include "pgtar.h" #include "pgtime.h" /* @@ -39,6 +42,11 @@ */ #define ESTIMATED_BYTES_PER_MANIFEST_LINE 100 + +static void (*verify_backup_file_cb) (verifier_context *context, + char *relpath, char *fullpath, + size_t filesize); + static manifest_data *parse_manifest_file(char *manifest_path); static void verifybackup_version_cb(JsonManifestParseContext *context, int manifest_version); @@ -63,6 +71,15 @@ static void verify_backup_directory(verifier_context *context, char *relpath, char *fullpath); static void verify_backup_file(verifier_context *context, char *relpath, char *fullpath); +static void verify_plain_file_cb(verifier_context *context, + char *relpath, char *fullpath, + size_t filesize); +static void verify_tar_file_cb(verifier_context *context, + char *relpath, char *fullpath, + size_t filesize); +static void verify_tar_content(verifier_context *context, + char *relpath, char *fullpath, + astreamer *streamer); static void report_extra_backup_files(verifier_context *context); static void verify_backup_checksums(verifier_context *context); static void verify_file_checksum(verifier_context *context, @@ -71,6 +88,9 @@ static void verify_file_checksum(verifier_context *context, static void parse_required_wal(verifier_context *context, char *pg_waldump_path, char *wal_directory); +static astreamer *create_archive_verifier(verifier_context *context, + char *archive_name, + Oid tblspc_oid); static void compute_total_size(verifier_context *context); static void usage(void); @@ -146,6 +166,10 @@ main(int argc, char **argv) */ simple_string_list_append(&context.ignore_list, "backup_manifest"); simple_string_list_append(&context.ignore_list, "pg_wal"); + simple_string_list_append(&context.ignore_list, "pg_wal.tar"); + simple_string_list_append(&context.ignore_list, "pg_wal.tar.gz"); + simple_string_list_append(&context.ignore_list, "pg_wal.tar.lz4"); + simple_string_list_append(&context.ignore_list, "pg_wal.tar.zst"); simple_string_list_append(&context.ignore_list, "postgresql.auto.conf"); simple_string_list_append(&context.ignore_list, "recovery.signal"); simple_string_list_append(&context.ignore_list, "standby.signal"); @@ -250,6 +274,15 @@ main(int argc, char **argv) if (format == 't' && !tar_compression_specified) compress_algorithm = find_backup_compression(&context); + /* + * Setup the required callback function to verify plain or tar backup + * files. + */ + if (format == 'p') + verify_backup_file_cb = verify_plain_file_cb; + else + verify_backup_file_cb = verify_tar_file_cb; + /* Unless --no-parse-wal was specified, we will need pg_waldump. */ if (!no_parse_wal) { @@ -645,7 +678,8 @@ verify_backup_directory(verifier_context *context, char *relpath, } /* - * Verify one file (which might actually be a directory or a symlink). + * Verify one file (which might actually be a directory, a symlink or a + * archive). * * The arguments to this function have the same meaning as the arguments to * verify_backup_directory. @@ -654,7 +688,6 @@ static void verify_backup_file(verifier_context *context, char *relpath, char *fullpath) { struct stat sb; - manifest_file *m; if (stat(fullpath, &sb) != 0) { @@ -687,8 +720,25 @@ verify_backup_file(verifier_context *context, char *relpath, char *fullpath) return; } + /* Do the further verifications */ + verify_backup_file_cb(context, relpath, fullpath, sb.st_size); +} + +/* + * Verify one plan file or a symlink. + * + * The arguments to this function are mostly the same as the + * verify_backup_directory. The additional argument is the file size for + * verifying against manifest entry. + */ +static void +verify_plain_file_cb(verifier_context *context, char *relpath, + char *fullpath, size_t filesize) +{ + manifest_file *m; + /* Check the backup manifest entry for this file. */ - m = verify_manifest_entry(context, relpath, sb.st_size); + m = verify_manifest_entry(context, relpath, filesize); /* Validate the manifest system identifier */ if (should_verify_control_data(context->manifest, m)) @@ -706,6 +756,124 @@ verify_backup_file(verifier_context *context, char *relpath, char *fullpath) } } +/* + * Verify one tar file. + * + * The arguments to this function are mostly the same as the + * verify_backup_directory. The additional argument is the file size for + * verifying against manifest entry. + */ +static void +verify_tar_file_cb(verifier_context *context, char *relpath, + char *fullpath, size_t filesize) +{ + astreamer *streamer; + Oid tblspc_oid = InvalidOid; + int file_name_len; + int file_extn_len = 0; /* placate compiler */ + char *file_extn = ""; + + /* Should be tar backup */ + Assert(format == 't'); + + /* Find the tar file extension. */ + if (compress_algorithm == PG_COMPRESSION_NONE) + { + file_extn = ".tar"; + file_extn_len = 4; + + } + else if (compress_algorithm == PG_COMPRESSION_GZIP) + { + file_extn = ".tar.gz"; + file_extn_len = 7; + + } + else if (compress_algorithm == PG_COMPRESSION_LZ4) + { + file_extn = ".tar.lz4"; + file_extn_len = 8; + } + else if (compress_algorithm == PG_COMPRESSION_ZSTD) + { + file_extn = ".tar.zst"; + file_extn_len = 8; + } + + /* + * Ensure that we have the correct file type corresponding to the backup + * format. + */ + file_name_len = strlen(relpath); + if (file_name_len < file_extn_len || + strcmp(relpath + file_name_len - file_extn_len, file_extn) != 0) + { + if (compress_algorithm == PG_COMPRESSION_NONE) + report_backup_error(context, + "\"%s\" is not a valid file, expecting tar file", + relpath); + else + report_backup_error(context, + "\"%s\" is not a valid file, expecting \"%s\" compressed tar file", + relpath, + get_compress_algorithm_name(compress_algorithm)); + return; + } + + /* + * For the tablespace, pg_basebackup writes the data out to + * <tablespaceoid>.tar. If a file matches that format, then extract the + * tablespaceoid, which we need to prepare the paths of the files + * belonging to that tablespace relative to the base directory. + */ + if (strspn(relpath, "0123456789") == (file_name_len - file_extn_len)) + tblspc_oid = strtoi64(relpath, NULL, 10); + + streamer = create_archive_verifier(context, relpath, tblspc_oid); + verify_tar_content(context, relpath, fullpath, streamer); + + /* Cleanup. */ + astreamer_finalize(streamer); + astreamer_free(streamer); +} + +/* + * Reads a given tar file in predefined chunks and pass to astreamer. Which + * initiates routines for decompression (if necessary) then verification + * of each member within the tar archive. + */ +static void +verify_tar_content(verifier_context *context, char *relpath, char *fullpath, + astreamer *streamer) +{ + int fd; + int rc; + char *buffer; + + /* Open the target file. */ + if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0) + { + report_backup_error(context, "could not open file \"%s\": %m", + relpath); + return; + } + + buffer = pg_malloc(READ_CHUNK_SIZE * sizeof(uint8)); + + /* Perform the reads */ + while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0) + astreamer_content(streamer, NULL, buffer, rc, ASTREAMER_UNKNOWN); + + if (rc < 0) + report_backup_error(context, "could not read file \"%s\": %m", + relpath); + + /* Close the file. */ + if (close(fd) != 0) + report_backup_error(context, "could not close file \"%s\": %m", + relpath); +} + /* * Verify file and its size entry in the manifest. */ @@ -1058,10 +1226,10 @@ find_backup_format(verifier_context *context) } /* - * To determine the compression format, we will search for the main data - * directory archive and its extension, which starts with base.tar, as * pg_basebackup writes the main data directory to an archive file named - * base.tar followed by a compression type extension like .gz, .lz4, or .zst. + * base.tar, followed by a compression type extension such as .gz, .lz4, or + * .zst. To determine the compression format, we need to search for this main + * data directory archive file. */ static pg_compress_algorithm find_backup_compression(verifier_context *context) @@ -1112,6 +1280,42 @@ find_backup_compression(verifier_context *context) return PG_COMPRESSION_NONE; /* placate compiler */ } +/* + * Identifies the necessary steps for verifying the contents of the + * provided tar file. + */ +static astreamer * +create_archive_verifier(verifier_context *context, char *archive_name, + Oid tblspc_oid) +{ + astreamer *streamer = NULL; + + /* Should be here only for tar backup */ + Assert(format == 't'); + + /* + * To verify the contents of the tar file, the initial step is to parse + * its content. + */ + streamer = astreamer_verify_content_new(streamer, context, archive_name, + tblspc_oid); + streamer = astreamer_tar_parser_new(streamer); + + /* + * If the tar file is compressed, we must perform the appropriate + * decompression operation before proceeding with the verification of its + * contents. + */ + if (compress_algorithm == PG_COMPRESSION_GZIP) + streamer = astreamer_gzip_decompressor_new(streamer); + else if (compress_algorithm == PG_COMPRESSION_LZ4) + streamer = astreamer_lz4_decompressor_new(streamer); + else if (compress_algorithm == PG_COMPRESSION_ZSTD) + streamer = astreamer_zstd_decompressor_new(streamer); + + return streamer; +} + /* * Print a progress report based on the variables in verifier_context. * diff --git a/src/bin/pg_verifybackup/pg_verifybackup.h b/src/bin/pg_verifybackup/pg_verifybackup.h index c88f71ff14b..f0a7c8918fb 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.h +++ b/src/bin/pg_verifybackup/pg_verifybackup.h @@ -137,4 +137,13 @@ extern bool should_ignore_relpath(verifier_context *context, extern void progress_report(verifier_context *context, bool finished); +/* Forward declarations to avoid fe_utils/astreamer.h include. */ +struct astreamer; +typedef struct astreamer astreamer; + +extern astreamer *astreamer_verify_content_new(astreamer *next, + verifier_context *context, + char *archive_name, + Oid tblspc_oid); + #endif /* PG_VERIFYBACKUP_H */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 547d14b3e7c..d86b28b260e 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3329,6 +3329,7 @@ astreamer_plain_writer astreamer_recovery_injector astreamer_tar_archiver astreamer_tar_parser +astreamer_verify astreamer_zstd_frame bgworker_main_type bh_node_type -- 2.18.0
From 44a78699dacfa90f36a37c8868ad11a50d53cb12 Mon Sep 17 00:00:00 2001 From: Amul Sul <amul.sul@enterprisedb.com> Date: Thu, 1 Aug 2024 15:47:26 +0530 Subject: [PATCH v7 08/12] Refactor: split verify_control_file. Separated the manifest entry verification code into a new function and introduced the should_verify_control_data() macro, similar to should_verify_checksum(). Note that should_verify_checksum() has been slightly modified to include a NULL check for its argument, maintaining the same code structure as should_verify_control_data(). --- src/bin/pg_verifybackup/pg_verifybackup.c | 44 +++++++++++------------ src/bin/pg_verifybackup/pg_verifybackup.h | 18 +++++++++- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index 3eddaa2468e..5f055a23a63 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -18,7 +18,6 @@ #include <sys/stat.h> #include <time.h> -#include "common/logging.h" #include "common/parse_manifest.h" #include "fe_utils/simple_list.h" #include "getopt_long.h" @@ -61,8 +60,6 @@ static void verify_backup_directory(verifier_context *context, char *relpath, char *fullpath); static void verify_backup_file(verifier_context *context, char *relpath, char *fullpath); -static void verify_control_file(const char *controlpath, - uint64 manifest_system_identifier); static void report_extra_backup_files(verifier_context *context); static void verify_backup_checksums(verifier_context *context); static void verify_file_checksum(verifier_context *context, @@ -625,14 +622,20 @@ verify_backup_file(verifier_context *context, char *relpath, char *fullpath) /* Check the backup manifest entry for this file. */ m = verify_manifest_entry(context, relpath, sb.st_size); - /* - * Validate the manifest system identifier, not available in manifest - * version 1. - */ - if (context->manifest->version != 1 && - strcmp(relpath, "global/pg_control") == 0 && - m != NULL && m->matched && !m->bad) - verify_control_file(fullpath, context->manifest->system_identifier); + /* Validate the manifest system identifier */ + if (should_verify_control_data(context->manifest, m)) + { + ControlFileData *control_file; + bool crc_ok; + + pg_log_debug("reading \"%s\"", fullpath); + control_file = get_controlfile_by_exact_path(fullpath, &crc_ok); + + verify_control_data(control_file, fullpath, crc_ok, + context->manifest->system_identifier); + /* Release memory. */ + pfree(control_file); + } } /* @@ -676,18 +679,14 @@ verify_manifest_entry(verifier_context *context, char *relpath, int64 filesize) } /* - * Sanity check control file and validate system identifier against manifest - * system identifier. + * Sanity check control file data and validate system identifier against + * manifest system identifier. */ -static void -verify_control_file(const char *controlpath, uint64 manifest_system_identifier) +void +verify_control_data(ControlFileData *control_file, + const char *controlpath, bool crc_ok, + uint64 manifest_system_identifier) { - ControlFileData *control_file; - bool crc_ok; - - pg_log_debug("reading \"%s\"", controlpath); - control_file = get_controlfile_by_exact_path(controlpath, &crc_ok); - /* Control file contents not meaningful if CRC is bad. */ if (!crc_ok) report_fatal_error("%s: CRC is incorrect", controlpath); @@ -703,9 +702,6 @@ verify_control_file(const char *controlpath, uint64 manifest_system_identifier) controlpath, (unsigned long long) manifest_system_identifier, (unsigned long long) control_file->system_identifier); - - /* Release memory. */ - pfree(control_file); } /* diff --git a/src/bin/pg_verifybackup/pg_verifybackup.h b/src/bin/pg_verifybackup/pg_verifybackup.h index 1bc5f7a6b4a..c88f71ff14b 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.h +++ b/src/bin/pg_verifybackup/pg_verifybackup.h @@ -16,6 +16,7 @@ #include "common/controldata_utils.h" #include "common/hashfn_unstable.h" +#include "common/logging.h" #include "common/parse_manifest.h" #include "fe_utils/simple_list.h" @@ -44,7 +45,19 @@ typedef struct manifest_file } manifest_file; #define should_verify_checksum(m) \ - (((m)->matched) && !((m)->bad) && (((m)->checksum_type) != CHECKSUM_TYPE_NONE)) + (((m) != NULL) && ((m)->matched) && !((m)->bad) && \ + (((m)->checksum_type) != CHECKSUM_TYPE_NONE)) + +/* + * Validate the manifest system identifier against the control file; this + * feature is not available in manifest version 1. This validation should be + * carried out only if the manifest entry validation is completed without any + * errors. + */ +#define should_verify_control_data(manifest, m) \ + (((manifest)->version != 1) && \ + ((m) != NULL) && ((m)->matched) && !((m)->bad) && \ + (strcmp((m)->pathname, "global/pg_control") == 0)) /* * Define a hash table which we can use to store information about the files @@ -110,6 +123,9 @@ extern manifest_file *verify_manifest_entry(verifier_context *context, extern void verify_checksum(verifier_context *context, manifest_file *m, pg_checksum_context *checksum_ctx, uint8 *checksumbuf); +extern void verify_control_data(ControlFileData *control_file, + const char *controlpath, bool crc_ok, + uint64 manifest_system_identifier); extern void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...) -- 2.18.0
From ee279a6a540a640857599951807438cda02f30d6 Mon Sep 17 00:00:00 2001 From: Amul Sul <amul.sul@enterprisedb.com> Date: Fri, 2 Aug 2024 16:37:38 +0530 Subject: [PATCH v7 09/12] Refactor: move first and last progress_report call to Main. --- src/bin/pg_verifybackup/pg_verifybackup.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index 5f055a23a63..801e13886c2 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -253,7 +253,10 @@ main(int argc, char **argv) * read, which occurs only when checksum verification is enabled. */ if (!context.skip_checksums) + { compute_total_size(&context); + progress_report(&context, false); + } /* * Now scan the files in the backup directory. At this stage, we verify @@ -275,7 +278,10 @@ main(int argc, char **argv) * told to skip it. */ if (!context.skip_checksums) + { verify_backup_checksums(&context); + progress_report(&context, true); + } /* * Try to parse the required ranges of WAL records, unless we were told @@ -736,8 +742,6 @@ verify_backup_checksums(verifier_context *context) manifest_file *m; uint8 *buffer; - progress_report(context, false); - buffer = pg_malloc(READ_CHUNK_SIZE * sizeof(uint8)); manifest_files_start_iterate(manifest->files, &it); @@ -761,8 +765,6 @@ verify_backup_checksums(verifier_context *context) } pfree(buffer); - - progress_report(context, true); } /* -- 2.18.0
From 37148e750aa68092eaa3e13c0c46cb4978c3f67a Mon Sep 17 00:00:00 2001 From: Amul Sul <amul.sul@enterprisedb.com> Date: Thu, 1 Aug 2024 12:15:26 +0530 Subject: [PATCH v7 06/12] Refactor: split verify_backup_file() function. Move the manifest entry verification code into a new function as verify_manifest_entry(). And the total size computation code into another new function, compute_total_size(), which is called from the main. --- src/bin/pg_verifybackup/pg_verifybackup.c | 76 ++++++++++++++++++----- src/bin/pg_verifybackup/pg_verifybackup.h | 3 + 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index 4e42757c346..ab6bda8c9dc 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -72,6 +72,7 @@ static void parse_required_wal(verifier_context *context, char *pg_waldump_path, char *wal_directory); +static void compute_total_size(verifier_context *context); static void usage(void); static const char *progname; @@ -250,6 +251,13 @@ main(int argc, char **argv) */ context.manifest = parse_manifest_file(manifest_path); + /* + * For the progress report, compute the total size of the files to be + * read, which occurs only when checksum verification is enabled. + */ + if (!context.skip_checksums) + compute_total_size(&context); + /* * Now scan the files in the backup directory. At this stage, we verify * that every file on disk is present in the manifest and that the sizes @@ -614,6 +622,27 @@ verify_backup_file(verifier_context *context, char *relpath, char *fullpath) return; } + /* Check the backup manifest entry for this file. */ + m = verify_manifest_entry(context, relpath, sb.st_size); + + /* + * Validate the manifest system identifier, not available in manifest + * version 1. + */ + if (context->manifest->version != 1 && + strcmp(relpath, "global/pg_control") == 0 && + m != NULL && m->matched && !m->bad) + verify_control_file(fullpath, context->manifest->system_identifier); +} + +/* + * Verify file and its size entry in the manifest. + */ +manifest_file * +verify_manifest_entry(verifier_context *context, char *relpath, int64 filesize) +{ + manifest_file *m; + /* Check whether there's an entry in the manifest hash. */ m = manifest_files_lookup(context->manifest->files, relpath); if (m == NULL) @@ -621,40 +650,29 @@ verify_backup_file(verifier_context *context, char *relpath, char *fullpath) report_backup_error(context, "\"%s\" is present on disk but not in the manifest", relpath); - return; + return NULL; } /* Flag this entry as having been encountered in the filesystem. */ m->matched = true; /* Check that the size matches. */ - if (m->size != sb.st_size) + if (m->size != filesize) { report_backup_error(context, "\"%s\" has size %lld on disk but size %zu in the manifest", - relpath, (long long int) sb.st_size, m->size); + relpath, (long long int) filesize, m->size); m->bad = true; } - /* - * Validate the manifest system identifier, not available in manifest - * version 1. - */ - if (context->manifest->version != 1 && - strcmp(relpath, "global/pg_control") == 0) - verify_control_file(fullpath, context->manifest->system_identifier); - - /* Update statistics for progress report, if necessary */ - if (context->show_progress && !context->skip_checksums && - should_verify_checksum(m)) - context->total_size += m->size; - /* * We don't verify checksums at this stage. We first finish verifying that * we have the expected set of files with the expected sizes, and only * afterwards verify the checksums. That's because computing checksums may * take a while, and we'd like to report more obvious problems quickly. */ + + return m; } /* @@ -817,7 +835,7 @@ verify_file_checksum(verifier_context *context, manifest_file *m, /* * Double-check that we read the expected number of bytes from the file. - * Normally, a file size mismatch would be caught in verify_backup_file + * Normally, a file size mismatch would be caught in verify_manifest_entry * and this check would never be reached, but this provides additional * safety and clarity in the event of concurrent modifications or * filesystem misbehavior. @@ -988,6 +1006,30 @@ progress_report(verifier_context *context, bool finished) fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr); } +/* + * Compute the total size of backup files for progress reporting. + */ +static void +compute_total_size(verifier_context *context) +{ + manifest_data *manifest = context->manifest; + manifest_files_iterator it; + manifest_file *m; + uint64 total_size = 0; + + if (!context->show_progress) + return; + + manifest_files_start_iterate(manifest->files, &it); + while ((m = manifest_files_iterate(manifest->files, &it)) != NULL) + { + if (!should_ignore_relpath(context, m->pathname)) + total_size += m->size; + } + + context->total_size = total_size; +} + /* * Print out usage information and exit. */ diff --git a/src/bin/pg_verifybackup/pg_verifybackup.h b/src/bin/pg_verifybackup/pg_verifybackup.h index 90900048547..98c75916255 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.h +++ b/src/bin/pg_verifybackup/pg_verifybackup.h @@ -105,6 +105,9 @@ typedef struct verifier_context uint64 done_size; } verifier_context; +extern manifest_file *verify_manifest_entry(verifier_context *context, + char *relpath, int64 filesize); + extern void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...) pg_attribute_printf(2, 3); -- 2.18.0
From 6ec5031183fe3c42f0dbc69ba5374ea302116ef7 Mon Sep 17 00:00:00 2001 From: Amul Sul <amul.sul@enterprisedb.com> Date: Thu, 1 Aug 2024 16:45:55 +0530 Subject: [PATCH v7 07/12] Refactor: split verify_file_checksum() function. Move the core functionality of verify_file_checksum to a new function to reuse it instead of duplicating the code. --- src/bin/pg_verifybackup/pg_verifybackup.c | 18 ++++++++++++++++-- src/bin/pg_verifybackup/pg_verifybackup.h | 3 +++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index ab6bda8c9dc..3eddaa2468e 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -782,7 +782,6 @@ verify_file_checksum(verifier_context *context, manifest_file *m, int rc; size_t bytes_read = 0; uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH]; - int checksumlen; /* Open the target file. */ if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0) @@ -848,8 +847,23 @@ verify_file_checksum(verifier_context *context, manifest_file *m, return; } + /* Do the final computation and verification. */ + verify_checksum(context, m, &checksum_ctx, checksumbuf); +} + +/* + * A helper function to finalize checksum computation and verify it against the + * backup manifest information. + */ +void +verify_checksum(verifier_context *context, manifest_file *m, + pg_checksum_context *checksum_ctx, uint8 *checksumbuf) +{ + int checksumlen; + const char *relpath = m->pathname; + /* Get the final checksum. */ - checksumlen = pg_checksum_final(&checksum_ctx, checksumbuf); + checksumlen = pg_checksum_final(checksum_ctx, checksumbuf); if (checksumlen < 0) { report_backup_error(context, diff --git a/src/bin/pg_verifybackup/pg_verifybackup.h b/src/bin/pg_verifybackup/pg_verifybackup.h index 98c75916255..1bc5f7a6b4a 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.h +++ b/src/bin/pg_verifybackup/pg_verifybackup.h @@ -107,6 +107,9 @@ typedef struct verifier_context extern manifest_file *verify_manifest_entry(verifier_context *context, char *relpath, int64 filesize); +extern void verify_checksum(verifier_context *context, manifest_file *m, + pg_checksum_context *checksum_ctx, + uint8 *checksumbuf); extern void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...) -- 2.18.0
From bb127a0c8f25d15ee9c10d111be7611ce58dcb39 Mon Sep 17 00:00:00 2001 From: Amul Sul <amul.sul@enterprisedb.com> Date: Thu, 1 Aug 2024 12:10:34 +0530 Subject: [PATCH v7 05/12] Refactor: move some part of pg_verifybackup.c to pg_verifybackup.h --- src/bin/pg_verifybackup/pg_verifybackup.c | 102 +------------------ src/bin/pg_verifybackup/pg_verifybackup.h | 118 ++++++++++++++++++++++ 2 files changed, 123 insertions(+), 97 deletions(-) create mode 100644 src/bin/pg_verifybackup/pg_verifybackup.h diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index 71585ffc50e..4e42757c346 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -18,12 +18,11 @@ #include <sys/stat.h> #include <time.h> -#include "common/controldata_utils.h" -#include "common/hashfn_unstable.h" #include "common/logging.h" #include "common/parse_manifest.h" #include "fe_utils/simple_list.h" #include "getopt_long.h" +#include "pg_verifybackup.h" #include "pgtime.h" /* @@ -40,89 +39,6 @@ */ #define ESTIMATED_BYTES_PER_MANIFEST_LINE 100 -/* - * How many bytes should we try to read from a file at once? - */ -#define READ_CHUNK_SIZE (128 * 1024) - -/* - * Each file described by the manifest file is parsed to produce an object - * like this. - */ -typedef struct manifest_file -{ - uint32 status; /* hash status */ - const char *pathname; - size_t size; - pg_checksum_type checksum_type; - int checksum_length; - uint8 *checksum_payload; - bool matched; - bool bad; -} manifest_file; - -#define should_verify_checksum(m) \ - (((m)->matched) && !((m)->bad) && (((m)->checksum_type) != CHECKSUM_TYPE_NONE)) - -/* - * Define a hash table which we can use to store information about the files - * mentioned in the backup manifest. - */ -#define SH_PREFIX manifest_files -#define SH_ELEMENT_TYPE manifest_file -#define SH_KEY_TYPE const char * -#define SH_KEY pathname -#define SH_HASH_KEY(tb, key) hash_string(key) -#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) -#define SH_SCOPE static inline -#define SH_RAW_ALLOCATOR pg_malloc0 -#define SH_DECLARE -#define SH_DEFINE -#include "lib/simplehash.h" - -/* - * Each WAL range described by the manifest file is parsed to produce an - * object like this. - */ -typedef struct manifest_wal_range -{ - TimeLineID tli; - XLogRecPtr start_lsn; - XLogRecPtr end_lsn; - struct manifest_wal_range *next; - struct manifest_wal_range *prev; -} manifest_wal_range; - -/* - * All the data parsed from a backup_manifest file. - */ -typedef struct manifest_data -{ - int version; - uint64 system_identifier; - manifest_files_hash *files; - manifest_wal_range *first_wal_range; - manifest_wal_range *last_wal_range; -} manifest_data; - -/* - * All of the context information we need while checking a backup manifest. - */ -typedef struct verifier_context -{ - manifest_data *manifest; - char *backup_directory; - SimpleStringList ignore_list; - bool skip_checksums; - bool exit_on_error; - bool saw_any_error; - - /* Progress indicators */ - bool show_progress; - uint64 total_size; - uint64 done_size; -} verifier_context; - static manifest_data *parse_manifest_file(char *manifest_path); static void verifybackup_version_cb(JsonManifestParseContext *context, int manifest_version); @@ -156,14 +72,6 @@ static void parse_required_wal(verifier_context *context, char *pg_waldump_path, char *wal_directory); -static void report_backup_error(verifier_context *context, - const char *pg_restrict fmt,...) - pg_attribute_printf(2, 3); -static void report_fatal_error(const char *pg_restrict fmt,...) - pg_attribute_printf(1, 2) pg_attribute_noreturn(); -static bool should_ignore_relpath(verifier_context *context, const char *relpath); - -static void progress_report(verifier_context *context, bool finished); static void usage(void); static const char *progname; @@ -978,7 +886,7 @@ parse_required_wal(verifier_context *context, char *pg_waldump_path, * Update the context to indicate that we saw an error, and exit if the * context says we should. */ -static void +void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...) { va_list ap; @@ -995,7 +903,7 @@ report_backup_error(verifier_context *context, const char *pg_restrict fmt,...) /* * Report a fatal error and exit */ -static void +void report_fatal_error(const char *pg_restrict fmt,...) { va_list ap; @@ -1014,7 +922,7 @@ report_fatal_error(const char *pg_restrict fmt,...) * Note that by "prefix" we mean a parent directory; for this purpose, * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc". */ -static bool +bool should_ignore_relpath(verifier_context *context, const char *relpath) { SimpleStringListCell *cell; @@ -1043,7 +951,7 @@ should_ignore_relpath(verifier_context *context, const char *relpath) * If finished is set to true, this is the last progress report. The cursor * is moved to the next line. */ -static void +void progress_report(verifier_context *context, bool finished) { static pg_time_t last_progress_report = 0; diff --git a/src/bin/pg_verifybackup/pg_verifybackup.h b/src/bin/pg_verifybackup/pg_verifybackup.h new file mode 100644 index 00000000000..90900048547 --- /dev/null +++ b/src/bin/pg_verifybackup/pg_verifybackup.h @@ -0,0 +1,118 @@ +/*------------------------------------------------------------------------- + * + * pg_verifybackup.h + * Verify a backup against a backup manifest. + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/bin/pg_verifybackup/pg_verifybackup.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PG_VERIFYBACKUP_H +#define PG_VERIFYBACKUP_H + +#include "common/controldata_utils.h" +#include "common/hashfn_unstable.h" +#include "common/parse_manifest.h" +#include "fe_utils/simple_list.h" + +extern bool show_progress; +extern bool skip_checksums; + +/* + * How many bytes should we try to read from a file at once? + */ +#define READ_CHUNK_SIZE (128 * 1024) + +/* + * Each file described by the manifest file is parsed to produce an object + * like this. + */ +typedef struct manifest_file +{ + uint32 status; /* hash status */ + const char *pathname; + size_t size; + pg_checksum_type checksum_type; + int checksum_length; + uint8 *checksum_payload; + bool matched; + bool bad; +} manifest_file; + +#define should_verify_checksum(m) \ + (((m)->matched) && !((m)->bad) && (((m)->checksum_type) != CHECKSUM_TYPE_NONE)) + +/* + * Define a hash table which we can use to store information about the files + * mentioned in the backup manifest. + */ +#define SH_PREFIX manifest_files +#define SH_ELEMENT_TYPE manifest_file +#define SH_KEY_TYPE const char * +#define SH_KEY pathname +#define SH_HASH_KEY(tb, key) hash_string(key) +#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) +#define SH_SCOPE static inline +#define SH_RAW_ALLOCATOR pg_malloc0 +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +/* + * Each WAL range described by the manifest file is parsed to produce an + * object like this. + */ +typedef struct manifest_wal_range +{ + TimeLineID tli; + XLogRecPtr start_lsn; + XLogRecPtr end_lsn; + struct manifest_wal_range *next; + struct manifest_wal_range *prev; +} manifest_wal_range; + +/* + * All the data parsed from a backup_manifest file. + */ +typedef struct manifest_data +{ + int version; + uint64 system_identifier; + manifest_files_hash *files; + manifest_wal_range *first_wal_range; + manifest_wal_range *last_wal_range; +} manifest_data; + +/* + * All of the context information we need while checking a backup manifest. + */ +typedef struct verifier_context +{ + manifest_data *manifest; + char *backup_directory; + SimpleStringList ignore_list; + bool skip_checksums; + bool exit_on_error; + bool saw_any_error; + + /* Progress indicators */ + bool show_progress; + uint64 total_size; + uint64 done_size; +} verifier_context; + +extern void report_backup_error(verifier_context *context, + const char *pg_restrict fmt,...) + pg_attribute_printf(2, 3); +extern void report_fatal_error(const char *pg_restrict fmt,...) + pg_attribute_printf(1, 2) pg_attribute_noreturn(); +extern bool should_ignore_relpath(verifier_context *context, + const char *relpath); + +extern void progress_report(verifier_context *context, bool finished); + +#endif /* PG_VERIFYBACKUP_H */ -- 2.18.0
From 0ef14ab4be362f6ab48c6ebd501d3036ba4d21d9 Mon Sep 17 00:00:00 2001 From: Robert Haas <rhaas@postgresql.org> Date: Tue, 6 Aug 2024 10:23:45 +0530 Subject: [PATCH v7] Improve file header comments for astramer code. Make it clear that "astreamer" stands for "archive streamer". Generalize comments that still believe this code can only be used by pg_basebackup. Add some comments explaining the asymmetry between the gzip, lz4, and zstd astreamers, in the hopes of making life easier for anyone who hacks on this code in the future. --- src/fe_utils/astreamer_file.c | 4 ++++ src/fe_utils/astreamer_gzip.c | 15 +++++++++++++++ src/fe_utils/astreamer_lz4.c | 4 ++++ src/fe_utils/astreamer_zstd.c | 4 ++++ src/include/fe_utils/astreamer.h | 21 +++++++++++++++------ 5 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/fe_utils/astreamer_file.c b/src/fe_utils/astreamer_file.c index 13d1192c6e6..c9a030853bc 100644 --- a/src/fe_utils/astreamer_file.c +++ b/src/fe_utils/astreamer_file.c @@ -2,6 +2,10 @@ * * astreamer_file.c * + * Archive streamers that write to files. astreamer_plain_writer writes + * the whole archive to a single file, and astreamer_extractor writes + * each archive member to a separate file in a given directory. + * * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group * * IDENTIFICATION diff --git a/src/fe_utils/astreamer_gzip.c b/src/fe_utils/astreamer_gzip.c index dd28defac7b..1c773a23848 100644 --- a/src/fe_utils/astreamer_gzip.c +++ b/src/fe_utils/astreamer_gzip.c @@ -2,6 +2,21 @@ * * astreamer_gzip.c * + * Archive streamers that deal with data compressed using gzip. + * astreamer_gzip_writer applies gzip compression to the input data + * and writes the result to a file. astreamer_gzip_decompressor assumes + * that the input stream is compressed using gzip and decompresses it. + * + * Note that the code in this file is asymmetric with what we do for + * other compression types: for lz4 and zstd, there is a compressor and + * a decompressor, rather than a writer and a decompressor. The approach + * taken here is less flexible, because a writer can only write to a file, + * while a compressor can write to a subsequent astreamer which is free + * to do whatever it likes. The reason it's like this is because this + * code was adapated from old, less-modular pg_basebackup that used the + * same APIs that astreamer_gzip_writer uses, and it didn't seem + * necessary to change anything at the time. + * * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group * * IDENTIFICATION diff --git a/src/fe_utils/astreamer_lz4.c b/src/fe_utils/astreamer_lz4.c index d8b2a367e47..2bf14084e7f 100644 --- a/src/fe_utils/astreamer_lz4.c +++ b/src/fe_utils/astreamer_lz4.c @@ -2,6 +2,10 @@ * * astreamer_lz4.c * + * Archive streamers that deal with data compressed using lz4. + * astreamer_lz4_compressor applies lz4 compression to the input stream, + * and astreamer_lz4_decompressor does the reverse. + * * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group * * IDENTIFICATION diff --git a/src/fe_utils/astreamer_zstd.c b/src/fe_utils/astreamer_zstd.c index 45f6cb67363..4b2d42b2311 100644 --- a/src/fe_utils/astreamer_zstd.c +++ b/src/fe_utils/astreamer_zstd.c @@ -2,6 +2,10 @@ * * astreamer_zstd.c * + * Archive streamers that deal with data compressed using zstd. + * astreamer_zstd_compressor applies lz4 compression to the input stream, + * and astreamer_zstd_decompressor does the reverse. + * * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group * * IDENTIFICATION diff --git a/src/include/fe_utils/astreamer.h b/src/include/fe_utils/astreamer.h index 2c014dbddbe..570cfba3040 100644 --- a/src/include/fe_utils/astreamer.h +++ b/src/include/fe_utils/astreamer.h @@ -2,9 +2,18 @@ * * astreamer.h * - * Each tar archive returned by the server is passed to one or more - * astreamer objects for further processing. The astreamer may do - * something simple, like write the archive to a file, perhaps after + * The "archive streamer" interface is intended to allow frontend code + * to stream from possibly-compressed archive files from any source and + * perform arbitrary actions based on the contents of those archives. + * Archive streamers are intended to be composable, and most tasks will + * require two or more archive streamers to complete. For instance, + * if the input is an uncompressed tar stream, a tar parser astreamer + * could be used to interpret it, and then an extractor astreamer could + * be used to write each archive member out to a file. + * + * In general, each archive streamer is relatively free to take whatever + * action it desires in the stream of chunks provided by the caller. It + * may do something simple, like write the archive to a file, perhaps after * compressing it, but it can also do more complicated things, like * annotating the byte stream to indicate which parts of the data * correspond to tar headers or trailing padding, vs. which parts are @@ -33,9 +42,9 @@ typedef struct astreamer_ops astreamer_ops; /* * Each chunk of archive data passed to a astreamer is classified into one - * of these categories. When data is first received from the remote server, - * each chunk will be categorized as ASTREAMER_UNKNOWN, and the chunks will - * be of whatever size the remote server chose to send. + * of these categories. When data is initially passed to an archive streamer, + * each chunk will be categorized as ASTREAMER_UNKNOWN, and the chunks can + * be of whatever size the caller finds convenient. * * If the archive is parsed (e.g. see astreamer_tar_parser_new()), then all * chunks should be labelled as one of the other types listed here. In -- 2.18.0
From 43fb489aecb872cc6f9a59ebbdca9c5a1110ea72 Mon Sep 17 00:00:00 2001 From: Amul Sul <amul.sul@enterprisedb.com> Date: Wed, 31 Jul 2024 11:43:52 +0530 Subject: [PATCH v7 04/12] Refactor: move few global variable to verifier_context struct Global variables are: 1. show_progress 2. skip_checksums 3. total_size 4. done_size --- src/bin/pg_verifybackup/pg_verifybackup.c | 50 +++++++++++------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index d77e70fbe38..71585ffc50e 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -113,8 +113,14 @@ typedef struct verifier_context manifest_data *manifest; char *backup_directory; SimpleStringList ignore_list; + bool skip_checksums; bool exit_on_error; bool saw_any_error; + + /* Progress indicators */ + bool show_progress; + uint64 total_size; + uint64 done_size; } verifier_context; static manifest_data *parse_manifest_file(char *manifest_path); @@ -157,19 +163,11 @@ static void report_fatal_error(const char *pg_restrict fmt,...) pg_attribute_printf(1, 2) pg_attribute_noreturn(); static bool should_ignore_relpath(verifier_context *context, const char *relpath); -static void progress_report(bool finished); +static void progress_report(verifier_context *context, bool finished); static void usage(void); static const char *progname; -/* options */ -static bool show_progress = false; -static bool skip_checksums = false; - -/* Progress indicators */ -static uint64 total_size = 0; -static uint64 done_size = 0; - /* * Main entry point. */ @@ -260,13 +258,13 @@ main(int argc, char **argv) no_parse_wal = true; break; case 'P': - show_progress = true; + context.show_progress = true; break; case 'q': quiet = true; break; case 's': - skip_checksums = true; + context.skip_checksums = true; break; case 'w': wal_directory = pstrdup(optarg); @@ -299,7 +297,7 @@ main(int argc, char **argv) } /* Complain if the specified arguments conflict */ - if (show_progress && quiet) + if (context.show_progress && quiet) pg_fatal("cannot specify both %s and %s", "-P/--progress", "-q/--quiet"); @@ -363,7 +361,7 @@ main(int argc, char **argv) * Now do the expensive work of verifying file checksums, unless we were * told to skip it. */ - if (!skip_checksums) + if (!context.skip_checksums) verify_backup_checksums(&context); /* @@ -739,8 +737,9 @@ verify_backup_file(verifier_context *context, char *relpath, char *fullpath) verify_control_file(fullpath, context->manifest->system_identifier); /* Update statistics for progress report, if necessary */ - if (show_progress && !skip_checksums && should_verify_checksum(m)) - total_size += m->size; + if (context->show_progress && !context->skip_checksums && + should_verify_checksum(m)) + context->total_size += m->size; /* * We don't verify checksums at this stage. We first finish verifying that @@ -815,7 +814,7 @@ verify_backup_checksums(verifier_context *context) manifest_file *m; uint8 *buffer; - progress_report(false); + progress_report(context, false); buffer = pg_malloc(READ_CHUNK_SIZE * sizeof(uint8)); @@ -841,7 +840,7 @@ verify_backup_checksums(verifier_context *context) pfree(buffer); - progress_report(true); + progress_report(context, true); } /* @@ -889,8 +888,8 @@ verify_file_checksum(verifier_context *context, manifest_file *m, } /* Report progress */ - done_size += rc; - progress_report(false); + context->done_size += rc; + progress_report(context, false); } if (rc < 0) report_backup_error(context, "could not read file \"%s\": %m", @@ -1036,7 +1035,7 @@ should_ignore_relpath(verifier_context *context, const char *relpath) } /* - * Print a progress report based on the global variables. + * Print a progress report based on the variables in verifier_context. * * Progress report is written at maximum once per second, unless the finished * parameter is set to true. @@ -1045,7 +1044,7 @@ should_ignore_relpath(verifier_context *context, const char *relpath) * is moved to the next line. */ static void -progress_report(bool finished) +progress_report(verifier_context *context, bool finished) { static pg_time_t last_progress_report = 0; pg_time_t now; @@ -1053,7 +1052,7 @@ progress_report(bool finished) char totalsize_str[32]; char donesize_str[32]; - if (!show_progress) + if (!context->show_progress) return; now = time(NULL); @@ -1061,12 +1060,13 @@ progress_report(bool finished) return; /* Max once per second */ last_progress_report = now; - percent_size = total_size ? (int) ((done_size * 100 / total_size)) : 0; + percent_size = context->total_size ? + (int) ((context->done_size * 100 / context->total_size)) : 0; snprintf(totalsize_str, sizeof(totalsize_str), UINT64_FORMAT, - total_size / 1024); + context->total_size / 1024); snprintf(donesize_str, sizeof(donesize_str), UINT64_FORMAT, - done_size / 1024); + context->done_size / 1024); fprintf(stderr, _("%*s/%s kB (%d%%) verified"), -- 2.18.0