Hi, Attached new set of patches adding support for the tablespace handling.
This patchset also fixes the issues reported by Vignesh, Robert, Jeevan Ladhe, and Dilip Kumar. Please have a look and let me know if I missed any comments to account. Thanks -- Jeevan Chalke Technical Architect, Product Development EnterpriseDB Corporation The Enterprise PostgreSQL Company
From 7cc2fb70188676406ca883055467aff602438fcd Mon Sep 17 00:00:00 2001 From: Jeevan Chalke <jeevan.cha...@enterprisedb.com> Date: Mon, 9 Sep 2019 10:38:01 +0530 Subject: [PATCH v2 1/4] Add support for command line option to pass LSN. This adds [ LSN 'lsn' ] to BASE_BACKUP command and --lsn=LSN to the pg_basebackup binary. Also, add small tests. --- src/backend/replication/basebackup.c | 20 ++++++++++++++++++++ src/backend/replication/repl_gram.y | 6 ++++++ src/backend/replication/repl_scanner.l | 1 + src/bin/pg_basebackup/pg_basebackup.c | 15 +++++++++++++-- src/bin/pg_basebackup/t/010_pg_basebackup.pl | 12 +++++++++++- 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 6aab8d7..e72bf8e 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -38,6 +38,7 @@ #include "storage/ipc.h" #include "storage/reinit.h" #include "utils/builtins.h" +#include "utils/pg_lsn.h" #include "utils/ps_status.h" #include "utils/relcache.h" #include "utils/timestamp.h" @@ -52,6 +53,7 @@ typedef struct bool includewal; uint32 maxrate; bool sendtblspcmapfile; + XLogRecPtr lsn; } basebackup_options; @@ -652,6 +654,7 @@ parse_basebackup_options(List *options, basebackup_options *opt) bool o_maxrate = false; bool o_tablespace_map = false; bool o_noverify_checksums = false; + bool o_lsn = false; MemSet(opt, 0, sizeof(*opt)); foreach(lopt, options) @@ -740,6 +743,23 @@ parse_basebackup_options(List *options, basebackup_options *opt) noverify_checksums = true; o_noverify_checksums = true; } + else if (strcmp(defel->defname, "lsn") == 0) + { + bool have_error = false; + + if (o_lsn) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + o_lsn = true; + + /* Validate given LSN and convert it into XLogRecPtr. */ + opt->lsn = pg_lsn_in_internal(strVal(defel->arg), &have_error); + if (XLogRecPtrIsInvalid(opt->lsn)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid value for LSN"))); + } else elog(ERROR, "option \"%s\" not recognized", defel->defname); diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index c4e11cc..c24d319 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -87,6 +87,7 @@ static SQLCmd *make_sqlcmd(void); %token K_EXPORT_SNAPSHOT %token K_NOEXPORT_SNAPSHOT %token K_USE_SNAPSHOT +%token K_LSN %type <node> command %type <node> base_backup start_replication start_logical_replication @@ -214,6 +215,11 @@ base_backup_opt: $$ = makeDefElem("noverify_checksums", (Node *)makeInteger(true), -1); } + | K_LSN SCONST + { + $$ = makeDefElem("lsn", + (Node *)makeString($2), -1); + } ; create_replication_slot: diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l index 380faeb..77b5af4 100644 --- a/src/backend/replication/repl_scanner.l +++ b/src/backend/replication/repl_scanner.l @@ -107,6 +107,7 @@ EXPORT_SNAPSHOT { return K_EXPORT_SNAPSHOT; } NOEXPORT_SNAPSHOT { return K_NOEXPORT_SNAPSHOT; } USE_SNAPSHOT { return K_USE_SNAPSHOT; } WAIT { return K_WAIT; } +LSN { return K_LSN; } "," { return ','; } ";" { return ';'; } diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 7986872..1791853 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -105,6 +105,7 @@ static bool temp_replication_slot = true; static bool create_slot = false; static bool no_slot = false; static bool verify_checksums = true; +static char *lsn = NULL; static bool success = false; static bool made_new_pgdata = false; @@ -341,6 +342,7 @@ usage(void) " include required WAL files with specified method\n")); printf(_(" -z, --gzip compress tar output\n")); printf(_(" -Z, --compress=0-9 compress tar output with given compression level\n")); + printf(_(" --lsn=LSN incremental backup, using LSN as threshold\n")); printf(_("\nGeneral options:\n")); printf(_(" -c, --checkpoint=fast|spread\n" " set fast or spread checkpointing\n")); @@ -1805,6 +1807,7 @@ BaseBackup(void) maxServerMajor; int serverVersion, serverMajor; + char *lsn_clause = NULL; Assert(conn != NULL); @@ -1871,8 +1874,11 @@ BaseBackup(void) fprintf(stderr, "\n"); } + if (lsn) + lsn_clause = psprintf("LSN '%s'", lsn); + basebkp = - psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s", + psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s %s", escaped_label, showprogress ? "PROGRESS" : "", includewal == FETCH_WAL ? "WAL" : "", @@ -1880,7 +1886,8 @@ BaseBackup(void) includewal == NO_WAL ? "" : "NOWAIT", maxrate_clause ? maxrate_clause : "", format == 't' ? "TABLESPACE_MAP" : "", - verify_checksums ? "" : "NOVERIFY_CHECKSUMS"); + verify_checksums ? "" : "NOVERIFY_CHECKSUMS", + lsn_clause ? lsn_clause : ""); if (PQsendQuery(conn, basebkp) == 0) { @@ -2199,6 +2206,7 @@ main(int argc, char **argv) {"waldir", required_argument, NULL, 1}, {"no-slot", no_argument, NULL, 2}, {"no-verify-checksums", no_argument, NULL, 3}, + {"lsn", required_argument, NULL, 4}, {NULL, 0, NULL, 0} }; int c; @@ -2367,6 +2375,9 @@ main(int argc, char **argv) case 3: verify_checksums = false; break; + case 4: + lsn = pg_strdup(optarg); + break; default: /* diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index b7d36b6..fd8e187 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -6,7 +6,7 @@ use File::Basename qw(basename dirname); use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 106; +use Test::More tests => 108; program_help_ok('pg_basebackup'); program_version_ok('pg_basebackup'); @@ -556,5 +556,15 @@ $node->command_ok( 'pg_basebackup with -k does not report checksum mismatch'); rmtree("$tempdir/backup_corrupt4"); +# check LSN +$node->command_fails( + [ 'pg_basebackup', '-D', "$tempdir/lsn_test", '--lsn', "0/INVALID" ], + 'pg_basebackup with invalid LSN fails'); +$node->command_ok( + [ 'pg_basebackup', '-D', "$tempdir/lsn_test", '--lsn', "0/ABCDEF01", '--no-verify-checksums' ], + 'pg_basebackup with valid LSN'); +rmtree("$tempdir/lsn_test"); + + $node->safe_psql('postgres', "DROP TABLE corrupt1;"); $node->safe_psql('postgres', "DROP TABLE corrupt2;"); -- 1.8.3.1
From 2dce92d30b7de406bffc449642a6f1fc0cb2141e Mon Sep 17 00:00:00 2001 From: Jeevan Chalke <jeevan.cha...@enterprisedb.com> Date: Mon, 9 Sep 2019 12:29:27 +0530 Subject: [PATCH v2 4/4] Add support to combine files using pg_combinebackup. --- doc/src/sgml/ref/allfiles.sgml | 1 + doc/src/sgml/ref/pg_basebackup.sgml | 2 +- doc/src/sgml/ref/pg_combinebackup.sgml | 232 +++++ doc/src/sgml/reference.sgml | 1 + src/bin/Makefile | 1 + src/bin/pg_combinebackup/Makefile | 44 + src/bin/pg_combinebackup/pg_combinebackup.c | 1225 +++++++++++++++++++++++++++ 7 files changed, 1505 insertions(+), 1 deletion(-) create mode 100644 doc/src/sgml/ref/pg_combinebackup.sgml create mode 100644 src/bin/pg_combinebackup/Makefile create mode 100644 src/bin/pg_combinebackup/pg_combinebackup.c diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml index 8d91f35..f3e90b6 100644 --- a/doc/src/sgml/ref/allfiles.sgml +++ b/doc/src/sgml/ref/allfiles.sgml @@ -200,6 +200,7 @@ Complete list of usable sgml source files in this directory. <!ENTITY pgBasebackup SYSTEM "pg_basebackup.sgml"> <!ENTITY pgbench SYSTEM "pgbench.sgml"> <!ENTITY pgChecksums SYSTEM "pg_checksums.sgml"> +<!ENTITY pgCombinebackup SYSTEM "pg_combinebackup.sgml"> <!ENTITY pgConfig SYSTEM "pg_config-ref.sgml"> <!ENTITY pgControldata SYSTEM "pg_controldata.sgml"> <!ENTITY pgCtl SYSTEM "pg_ctl-ref.sgml"> diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 9e2b9b8..004a4e4 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -415,7 +415,7 @@ PostgreSQL documentation which are modified after this given LSN will be backed up. The file which has these partial blocks has .partial as an extension. Backup taken in this manner has to be combined with the full backup with the - <command>pg_combinebackup</command> utility. The value of LSN should + <xref linkend="app-pgcombinebackup"/> utility. The value of LSN should match the <literal>START WAL LOCATION</literal> of previously taken full or incremental backup from <literal>backup_label</literal> file. </para> diff --git a/doc/src/sgml/ref/pg_combinebackup.sgml b/doc/src/sgml/ref/pg_combinebackup.sgml new file mode 100644 index 0000000..9cccf9e --- /dev/null +++ b/doc/src/sgml/ref/pg_combinebackup.sgml @@ -0,0 +1,232 @@ +<!-- +doc/src/sgml/ref/pg_combinebackup.sgml +PostgreSQL documentation +--> + +<refentry id="app-pgcombinebackup"> + <indexterm zone="app-pgcombinebackup"> + <primary>pg_combinebackup</primary> + </indexterm> + + <refmeta> + <refentrytitle><application>pg_combinebackup</application></refentrytitle> + <manvolnum>1</manvolnum> + <refmiscinfo>Application</refmiscinfo> + </refmeta> + + <refnamediv> + <refname>pg_combinebackup</refname> + <refpurpose>create a synthetic backup from a full backup and one or more incremental backups</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>pg_combinebackup</command> + <arg rep="repeat" choice="opt"><replaceable class="parameter">option</replaceable></arg> + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1 id="r1-app-pg_combinebackup-1"> + <title>Description</title> + <para> + <application>pg_combinebackup</application> combines one or more incremental + backups with the full base-backup to generate a synthetic backup. + </para> + </refsect1> + + <refsect1> + <title>Options</title> + + <para> + The following command-line options are available: + + <variablelist> + <varlistentry> + <term><option>-f <replaceable>directory</replaceable></option></term> + <term><option>--full-backup=<replaceable>directory</replaceable></option></term> + <listitem> + <para> + Specifies the directory where the full backup is stored. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>-i <replaceable>directory</replaceable></option></term> + <term><option>--incr-backup=<replaceable>directory</replaceable></option></term> + <listitem> + <para> + Specifies the directory where the incremental backup is stored. If + there are more than one incremental backups to be combined with the + full backup, then they must be provided in the order the backups were + taken. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>-T <replaceable class="parameter">olddir</replaceable>=<replaceable class="parameter">newdir</replaceable></option></term> + <term><option>--tablespace-mapping=<replaceable class="parameter">olddir</replaceable>=<replaceable class="parameter">newdir</replaceable></option></term> + <listitem> + <para> + Relocate the tablespace in directory <replaceable>olddir</replaceable> + to <replaceable>newdir</replaceable> during combining the backup. To be + effective, <replaceable>olddir</replaceable> must exactly match the + path specification of the tablespace as it is in the last incremental + backup directory. (But it is not an error if there is no tablespace + in <replaceable>olddir</replaceable> contained in that backup.) + Both <replaceable>olddir</replaceable> + and <replaceable>newdir</replaceable> must be absolute paths. If a + path happens to contain a <literal>=</literal> sign, escape it with a + backslash. This option can be specified multiple times for multiple + tablespaces. + </para> + + <para> + If a tablespace is relocated in this way, the symbolic links inside + the combined data directory are updated to point to the new location. + so the new data directory is ready to be used for a new server instance + with all tablespaces in the updated locations. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>-o <replaceable>directory</replaceable></option></term> + <term><option>--output-backup=<replaceable>directory</replaceable></option></term> + <listitem> + <para> + Specifies the output directory where the combined full synthetic backup + to be stored. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>-l <replaceable class="parameter">label</replaceable></option></term> + <term><option>--label=<replaceable class="parameter">label</replaceable></option></term> + <listitem> + <para> + Sets the label for the combined backup. If none is specified, a default + value of <quote><literal>pg_combinebackup combined full backup</literal></quote> + will be used. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>-n</option></term> + <term><option>--no-clean</option></term> + <listitem> + <para> + By default, when <command>pg_combinebackup</command> aborts with an + error, it removes the output data directories it might have created + before discovering that it cannot finish the job. This option inhibits + tidying-up and is thus useful for debugging. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>-v</option></term> + <term><option>--verbose</option></term> + <listitem> + <para> + Enable verbose output. Lists all partial files processed and its + checksum status. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>-V</option></term> + <term><option>--version</option></term> + <listitem> + <para> + Print the <application>pg_combinebackup</application> version and exit. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>-?</option></term> + <term><option>--help</option></term> + <listitem> + <para> + Show help about <application>pg_combinebackup</application> command line + arguments, and exit. + </para> + </listitem> + </varlistentry> + </variablelist> + </para> + </refsect1> + + <refsect1> + <title>Environment</title> + <variablelist> + <varlistentry> + <term><envar>PG_COLOR</envar></term> + <listitem> + <para> + Specifies whether to use color in diagnostics messages. Possible values + are <literal>always</literal>, <literal>auto</literal>, + <literal>never</literal>. + </para> + </listitem> + </varlistentry> + </variablelist> + </refsect1> + + <refsect1> + <title>Notes</title> + <para> + Output directory, full backup directory, and at-least one incremental backup + directory must be specified. + </para> + + <para> + <literal>PREVIOUS WAL LOCATION</literal> of the incremental backup must + match with the <literal>START WAL LOCATION</literal> of the previous full + or incremental backup in a given sequence. + </para> + </refsect1> + + <refsect1> + <title>Examples</title> + + <para> + To combine a full backup with two incremental backups and store it in the + output directory: +<screen> +<prompt>$</prompt> <userinput>pg_combinebackup -f /data/full/data -i /data/incr/data1 -i /data/incr/data2 -o /data/full/fulldata</userinput> +</screen> + </para> + + <para> + To combine a full backup with an incremental backups and store it in the + output directory along with tablespace relocation from + <filename>/data/incr/ts1</filename> to <filename>/data/full/ts</filename> +<screen> +<prompt>$</prompt> <userinput>pg_combinebackup -f /data/full/data -i /data/incr/data1 -o /data/full/fulldata -T /data/incr/ts1=/data/full/ts</userinput> +</screen> + </para> + + <para> + To combine a full backup with an incremental backups and store it in the + output directory along with various options like, verbose, no-clean etc.: +<screen> +<prompt>$</prompt> <userinput>pg_combinebackup -v --no-clean -f /data/full/data -i /data/incr/data1 -o /data/full/fulldata</userinput> +</screen> + </para> + </refsect1> + + <refsect1> + <title>See Also</title> + + <simplelist type="inline"> + <member><xref linkend="app-pgbasebackup"/></member> + </simplelist> + </refsect1> + +</refentry> diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml index cef09dd..3513ab4 100644 --- a/doc/src/sgml/reference.sgml +++ b/doc/src/sgml/reference.sgml @@ -248,6 +248,7 @@ &ecpgRef; &pgBasebackup; &pgbench; + &pgCombinebackup; &pgConfig; &pgDump; &pgDumpall; diff --git a/src/bin/Makefile b/src/bin/Makefile index 903e581..fc3cea4 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -18,6 +18,7 @@ SUBDIRS = \ pg_archivecleanup \ pg_basebackup \ pg_checksums \ + pg_combinebackup \ pg_config \ pg_controldata \ pg_ctl \ diff --git a/src/bin/pg_combinebackup/Makefile b/src/bin/pg_combinebackup/Makefile new file mode 100644 index 0000000..44be044 --- /dev/null +++ b/src/bin/pg_combinebackup/Makefile @@ -0,0 +1,44 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/bin/pg_combinebackup +# +# Copyright (c) 1998-2019, PostgreSQL Global Development Group +# +# src/bin/pg_combinebackup/Makefile +# +#------------------------------------------------------------------------- + +PGFILEDESC = "pg_combinebackup - combine full backup with incremental backups" +PGAPPICON=win32 + +subdir = src/bin/pg_combinebackup +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils + +OBJS= pg_combinebackup.o $(WIN32RES) + +all: pg_combinebackup + +pg_combinebackup: $(OBJS) | submake-libpgport + $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) + +install: all installdirs + $(INSTALL_PROGRAM) pg_combinebackup$(X) '$(DESTDIR)$(bindir)/pg_combinebackup$(X)' + +installdirs: + $(MKDIR_P) '$(DESTDIR)$(bindir)' + +uninstall: + rm -f '$(DESTDIR)$(bindir)/pg_combinebackup$(X)' + +clean distclean maintainer-clean: + rm -f pg_combinebackup$(X) $(OBJS) + rm -rf tmp_check + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c new file mode 100644 index 0000000..55c0283 --- /dev/null +++ b/src/bin/pg_combinebackup/pg_combinebackup.c @@ -0,0 +1,1225 @@ +/*------------------------------------------------------------------------- + * + * pg_combinebackup.c + * Combines one or more incremental backups with the full base-backup to + * generate new full base-backup. + * + * Copyright (c) 2010-2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/bin/pg_combinebackup/pg_combinebackup.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include <dirent.h> +#include <sys/stat.h> +#include <time.h> +#include <unistd.h> + +#include "access/xlog_internal.h" +#include "common/controldata_utils.h" +#include "common/file_perm.h" +#include "common/logging.h" +#include "fe_utils/simple_list.h" +#include "getopt_long.h" +#include "pg_getopt.h" +#include "replication/basebackup.h" + + +/* Max number of incremental backups to be combined. */ +#define MAX_INCR_BK_COUNT 10 + +/* + * BACKUP_LABEL_FILE is defined in xlog.h which needs postgres.h to be included + * too. Thus to avoid that define it here again. + */ +#define BACKUP_LABEL_FILE "backup_label" + + +typedef struct +{ + FILE *fp; + char filename[MAXPGPATH]; + bool isPartial; +} FileMap; + +typedef struct +{ + FILE *fp; + int offset; +} FileOffset; + +static const char *progname; +static ControlFileData *ControlFile; +static bool verbose = false; +static bool success = false; +static bool noclean = false; +static bool made_new_outputdata = false; +static bool found_existing_outputdata = false; +static bool made_tablespace_dirs = false; +static bool found_tablespace_dirs = false; +static bool checksum_failure = false; +static char *OutputDir = NULL; +static TablespaceList tablespace_dirs = {NULL, NULL}; + +/* Function headers */ +static void usage(void); +static void scan_file(const char *fn, char **IncrDirs, int nIncrDir, + const char *subdirpath); +static void scan_directory(char **IncrDirs, int nIncrDir, + const char *subdirpath); +static void check_compatibility(char *datadir); +static void verify_dir_is_empty_or_create(char *dirname, bool *created, + bool *found); +static void cleanup_directories_atexit(void); +static void combine_partial_files(const char *fn, char **IncrDirs, + int nIncrDir, const char *subdirpath, + const char*outfn); +static void copy_whole_file(const char *fromfn, const char *tofn); +static void cleanup_filemaps(FileMap *filemaps, int nfilemaps); +static void verify_backup_chain(char **IncrDirs, int nIncrDir); +static int create_filemap(const char *fn, char **IncrDirs, int nIncrDir, + const char *subdirpath, FileMap *filemaps); +static void write_backup_label_file(char *InputDir, char *label); + + +int +main(int argc, char *argv[]) +{ + static struct option long_options[] = { + {"full-backup", required_argument, NULL, 'f'}, + {"incr-backup", required_argument, NULL, 'i'}, + {"output-dir", required_argument, NULL, 'o'}, + {"tablespace-mapping", required_argument, NULL, 'T'}, + {"label", required_argument, NULL, 'l'}, + {"no-clean", no_argument, NULL, 'n'}, + {"verbose", no_argument, NULL, 'v'}, + {NULL, 0, NULL, 0} + }; + + char *IncrDirs[MAX_INCR_BK_COUNT + 1]; /* Full backup directory is + * stored at index 0 */ + int nIncrDir; + int c; + int option_index; + char *label = "pg_combinebackup combined full backup"; + + pg_logging_init(argv[0]); + set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_combinebackup")); + progname = get_progname(argv[0]); + + if (argc > 1) + { + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) + { + usage(); + exit(0); + } + if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) + { + puts("pg_combinebackup (PostgreSQL) " PG_VERSION); + exit(0); + } + } + + atexit(cleanup_directories_atexit); + + /* Zero index is reserved for full backup directory. */ + IncrDirs[0] = NULL; + nIncrDir = 1; + while ((c = getopt_long(argc, argv, "f:i:l:no:T:v", long_options, &option_index)) != -1) + { + switch (c) + { + case 'f': + IncrDirs[0] = optarg; + break; + case 'i': + if (nIncrDir > MAX_INCR_BK_COUNT) + { + pg_log_error("too many incremental backups to combine"); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + exit(1); + } + + IncrDirs[nIncrDir] = optarg; + nIncrDir++; + break; + case 'o': + OutputDir = optarg; + break; + case 'l': + label = pg_strdup(optarg); + if (strlen(label) > MAXPGPATH) + { + pg_log_error("backup label too long (max %d bytes)", + MAXPGPATH); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + exit(1); + } + break; + case 'n': + noclean = true; + break; + case 'T': + tablespace_list_append(&tablespace_dirs, optarg); + break; + case 'v': + verbose = true; + break; + default: + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + exit(1); + } + } + + /* + * Need to have directory paths for full backup, incremental backups, and + * the output directory. Error out if we don't get that. + */ + if (IncrDirs[0] == NULL) + { + pg_log_error("no full backup directory specified"); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + if (nIncrDir == 1) + { + pg_log_error("no incremental backup directory specified"); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + if (OutputDir == NULL) + { + pg_log_error("no target directory specified"); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + else + verify_dir_is_empty_or_create(OutputDir, &made_new_outputdata, + &found_existing_outputdata); + + /* Complain if any arguments remain */ + if (optind < argc) + { + pg_log_error("too many command-line arguments (first is \"%s\")", + argv[optind]); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + + /* Check that we have a valid backup chain */ + verify_backup_chain(IncrDirs, nIncrDir); + + /* Scan whole directory and process all .partial files */ + scan_directory(IncrDirs, nIncrDir, NULL); + + /* Now write a backup label file into the output directory */ + write_backup_label_file(IncrDirs[nIncrDir - 1], label); + + success = true; + return 0; +} + +static void +usage(void) +{ + printf(_("%s combines full backup with one or more incremental backups.\n\n"), progname); + printf(_("Usage:\n")); + printf(_(" %s [OPTION]...\n"), progname); + printf(_("\nOptions:\n")); + printf(_(" -f, --full-backup=DIRECTORY full backup directory\n")); + printf(_(" -i, --incr-backup=DIRECTORY incremental backup directory (maximum %d), " + " must be in the order the backups were taken\n"), MAX_INCR_BK_COUNT); + printf(_(" -o, --output-dir=DIRECTORY combine backup directory\n")); + printf(_(" -T, --tablespace-mapping=OLDDIR=NEWDIR\n" + " relocate tablespace in OLDDIR to NEWDIR\n")); + printf(_("\nGeneral options:\n")); + printf(_(" -l, --label=LABEL set combine backup label\n")); + printf(_(" -n, --no-clean do not clean up after errors\n")); + printf(_(" -v, --verbose output verbose messages\n")); + printf(_(" -V, --version output version information, then exit\n")); + printf(_(" -?, --help show this help, then exit\n")); + printf(_("\nReport bugs to <pgsql-b...@lists.postgresql.org>.\n")); +} + +/* + * scan_file + * + * Checks whether the given file is a partial file or not. If partial, then + * combines it into a full backup file, else copies as is to the output + * directory. + */ +static void +scan_file(const char *fn, char **IncrDirs, int nIncrDir, + const char *subdirpath) +{ + char *extptr = strstr(fn, ".partial"); + + /* If .partial file, combine them, else copy it as is */ + if (extptr != NULL) + { + char outfn[MAXPGPATH]; + + if (verbose) + pg_log_info("combining partial file \"%s\"", fn); + + if (subdirpath) + snprintf(outfn, MAXPGPATH, "%s/%s/%s", OutputDir, subdirpath, fn); + else + snprintf(outfn, MAXPGPATH, "%s/%s", OutputDir, fn); + + extptr = strstr(outfn, ".partial"); + Assert (extptr != NULL); + extptr[0] = '\0'; + + combine_partial_files(fn, IncrDirs, nIncrDir, subdirpath, outfn); + } + else + { + char infn[MAXPGPATH]; + char outfn[MAXPGPATH]; + + if (verbose) + pg_log_info("copying file \"%s\"", fn); + + if (subdirpath) + { + snprintf(infn, MAXPGPATH, "%s/%s/%s", IncrDirs[nIncrDir - 1], + subdirpath, fn); + snprintf(outfn, MAXPGPATH, "%s/%s/%s", OutputDir, subdirpath, fn); + } + else + { + snprintf(infn, MAXPGPATH, "%s/%s", IncrDirs[nIncrDir - 1], fn); + snprintf(outfn, MAXPGPATH, "%s/%s", OutputDir, fn); + } + + copy_whole_file(infn, outfn); + } +} + +/* + * copy_whole_file + * + * Copy file from source to its destination. + */ +static void +copy_whole_file(const char *fromfn, const char *tofn) +{ + FILE *ifp; + FILE *ofp; + char *buf; + struct stat statbuf; + off_t cnt; + pgoff_t len = 0; + + ifp = fopen(fromfn, "rb"); + if (ifp == NULL) + { + pg_log_error("could not open file \"%s\": %m", fromfn); + exit(1); + } + + if (fstat(fileno(ifp), &statbuf) != 0) + { + pg_log_error("could not stat file \"%s\": %m", fromfn); + fclose(ifp); + exit(1); + } + + if (verbose && statbuf.st_size > (RELSEG_SIZE * BLCKSZ)) + pg_log_info("found big file \"%s\" (size: %.2lfGB): %m", fromfn, + (double) statbuf.st_size / (RELSEG_SIZE * BLCKSZ)); + + ofp = fopen(tofn, "wb"); + if (ofp == NULL) + { + pg_log_error("could not create file \"%s\": %m", tofn); + fclose(ifp); + exit(1); + } + + /* 1GB slice */ + buf = (char *) pg_malloc(RELSEG_SIZE * BLCKSZ); + + /* + * We do read entire 1GB file in memory while taking incremental backup; so + * I don't see any reason why can't we do that here. Also, copying data in + * chunks is expensive. However, for bigger files, we still slice at 1GB + * border. + */ + while ((cnt = fread(buf, 1, Min(RELSEG_SIZE * BLCKSZ, statbuf.st_size - len), ifp)) > 0) + { + /* Write the buf to the output file. */ + if (fwrite(buf, 1, cnt, ofp) != cnt) + { + pg_log_error("could not write to file \"%s\": %m", tofn); + fclose(ifp); + fclose(ofp); + pg_free(buf); + exit(1); + } + + len += cnt; + } + + if (len < statbuf.st_size) + pg_log_error("could not read file \"%s\": %m", fromfn); + + fclose(ifp); + fclose(ofp); + pg_free(buf); +} + +/* + * scan_directory + * + * Scan the input incremental directory and operates on each file. Creates + * corresponding directories in the output directory too. + */ +static void +scan_directory(char **IncrDirs, int nIncrDir, const char *subdirpath) +{ + char path[MAXPGPATH]; + DIR *dir; + struct dirent *de; + + if (subdirpath) + { + char outputpath[MAXPGPATH]; + + snprintf(path, sizeof(path), "%s/%s", IncrDirs[nIncrDir - 1], + subdirpath); + snprintf(outputpath, sizeof(outputpath), "%s/%s", OutputDir, + subdirpath); + + /* Create this sub-directory in output directory */ + if (pg_mkdir_p(outputpath, pg_dir_create_mode) == -1) + { + pg_log_error("could not create directory \"%s\": %m", outputpath); + exit(1); + } + } + else + snprintf(path, sizeof(path), "%s", IncrDirs[nIncrDir - 1]); + + dir = opendir(path); + if (!dir) + { + pg_log_error("could not open directory \"%s\": %m", path); + exit(1); + } + + while ((de = readdir(dir)) != NULL) + { + char fn[MAXPGPATH]; + struct stat st; + + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; + + snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name); + if (lstat(fn, &st) < 0) + { + pg_log_error("could not stat file \"%s\": %m", fn); + exit(1); + } + if (S_ISREG(st.st_mode)) + { + /* Skip backup label file. */ + if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0) + continue; + + scan_file(de->d_name, IncrDirs, nIncrDir, subdirpath); + } + else if (subdirpath && strcmp(subdirpath, "pg_tblspc") == 0 && +#ifndef WIN32 + S_ISLNK(st.st_mode) +#else + pgwin32_is_junction(fn) +#endif + ) + { + char newsubdirpath[MAXPGPATH]; + char linkpath[MAXPGPATH]; + char outfn[MAXPGPATH]; + int rllen; + char *mapped_tblspc_path; + + rllen = readlink(fn, linkpath, sizeof(linkpath)); + if (rllen < 0) + { + pg_log_error("could not read symbolic link \"%s\": %m", fn); + exit(1); + } + if (rllen >= sizeof(linkpath)) + { + pg_log_error("symbolic link \"%s\" target is too long", fn); + exit(1); + } + linkpath[rllen] = '\0'; + + snprintf(newsubdirpath, MAXPGPATH, "%s/%s", subdirpath, + de->d_name); + snprintf(outfn, MAXPGPATH, "%s/%s", OutputDir, newsubdirpath); + + mapped_tblspc_path = (char *) get_tablespace_mapping(&tablespace_dirs, + (const char *) linkpath); + + verify_dir_is_empty_or_create(mapped_tblspc_path, + &made_tablespace_dirs, + &found_tablespace_dirs); + + /* Create a symlink in the output directory. */ + if (symlink(mapped_tblspc_path, outfn) != 0) + { + pg_log_error("could not create symbolic link from \"%s\" to \"%s\": %m", + outfn, mapped_tblspc_path); + exit(1); + } + + if (verbose) + pg_log_info("mapped tablespace from \"%s\" to \"%s\"", + linkpath, mapped_tblspc_path); + + scan_directory(IncrDirs, nIncrDir, newsubdirpath); + } + else if (S_ISDIR(st.st_mode)) + { + char newsubdirpath[MAXPGPATH]; + + if (subdirpath) + snprintf(newsubdirpath, MAXPGPATH, "%s/%s", subdirpath, + de->d_name); + else + snprintf(newsubdirpath, MAXPGPATH, "%s", de->d_name); + + scan_directory(IncrDirs, nIncrDir, newsubdirpath); + } + } + closedir(dir); + return; +} + +/* + * check_compatibility + * + * Read the control file and check compatibility + */ +static void +check_compatibility(char *datadir) +{ + bool crc_ok; + + ControlFile = get_controlfile(datadir, &crc_ok); + if (!crc_ok) + { + pg_log_error("pg_control CRC value is incorrect"); + exit(1); + } + + if (ControlFile->pg_control_version != PG_CONTROL_VERSION) + { + pg_log_error("cluster is not compatible with this version of pg_combinebackup"); + exit(1); + } + + if (ControlFile->blcksz != BLCKSZ) + { + pg_log_error("database cluster is not compatible"); + fprintf(stderr, _("The database cluster was initialized with block size %u, but pg_combinebackup was compiled with block size %u.\n"), + ControlFile->blcksz, BLCKSZ); + exit(1); + } + + /* When backup was taken, the database should have been in clean state. */ + if (ControlFile->state != DB_IN_PRODUCTION) + { + pg_log_error("cluster must be in production"); + exit(1); + } +} + +/* + * verify_dir_is_empty_or_create + * + * Verify that the given directory exists and is empty. If it does not exists, + * it is created. If it exists but is not empty, an error will be given and + * the process ended. + */ +static void +verify_dir_is_empty_or_create(char *dirname, bool *created, bool *found) +{ + switch (pg_check_dir(dirname)) + { + case 0: + /* + * Does not exist, so create + */ + if (pg_mkdir_p(dirname, pg_dir_create_mode) == -1) + { + pg_log_error("could not create directory \"%s\": %m", dirname); + exit(1); + } + if (created) + *created = true; + return; + + case 1: + /* + * Exists, empty + */ + if (found) + *found = true; + return; + + case 2: + case 3: + case 4: + /* + * Exists, not empty + */ + pg_log_error("directory \"%s\" exists but is not empty", dirname); + exit(1); + + case -1: + /* + * Access problem + */ + pg_log_error("could not access directory \"%s\": %m", dirname); + exit(1); + } +} + +static void +cleanup_directories_atexit(void) +{ + if (success) + return; + + if (!noclean && !checksum_failure) + { + if (made_new_outputdata) + { + pg_log_info("removing target data directory \"%s\"", OutputDir); + if (!rmtree(OutputDir, true)) + pg_log_error("failed to remove data directory"); + } + else if (found_existing_outputdata) + { + pg_log_info("removing contents of target data directory \"%s\"", + OutputDir); + if (!rmtree(OutputDir, false)) + pg_log_error("failed to remove contents of data directory"); + } + } + else + { + if ((made_new_outputdata || found_existing_outputdata) && + !checksum_failure) + pg_log_info("target data directory \"%s\" not removed at user's request", + OutputDir); + } + + if ((made_tablespace_dirs || found_tablespace_dirs) && !checksum_failure) + pg_log_info("changes to tablespace directories will not be undone"); +} + +/* + * combine_partial_files + * + * Combines one or more incremental backups with full backup. The algorithm in + * this function works this way: + * 1. Work backward through the backup chain until we find a complete version + * of the file. We create a filemap in this process. + * 2. Loop over all the files within filemap, read the header and check the + * blocks modified, verify the CRC and create a blockmap. + * 3. Create a new file in output directory by writing all the blocks. + */ +static void +combine_partial_files(const char *fn, char **IncrDirs, int nIncrDir, + const char *subdirpath, const char *outfn) +{ + FILE *outfp; + FileOffset outblocks[RELSEG_SIZE] = {{0}}; + int i; + FileMap *filemaps; + int nfilemaps; + bool modifiedblockfound; + uint32 lastblkno; + FileMap *fm; + struct stat statbuf; + uint32 nblocks; + + filemaps = (FileMap *) pg_malloc(sizeof(FileMap) * nIncrDir); + + /* Create file map from the input directories. */ + nfilemaps = create_filemap(fn, IncrDirs, nIncrDir, subdirpath, filemaps); + + /* Process all opened files. */ + lastblkno = 0; + modifiedblockfound = false; + for (i = 0; i < nfilemaps - 1; i++) + { + char *buf; + int hsize; + int k; + int blkstartoffset; + int blknumberssize; + uint32 *blknumbers; + partial_file_header *pfh; + pg_crc32c savedchecksum; + + fm = &filemaps[i]; + Assert(fm->isPartial); + + hsize = offsetof(partial_file_header, blocknumbers); + buf = (char *) pg_malloc(hsize); + + /* Read partial file header. */ + if (fread(buf, 1, hsize, fm->fp) != hsize) + { + pg_log_error("corrupted partial file \"%s\": %m", fm->filename); + checksum_failure = true; + pg_free(filemaps); + pg_free(buf); + exit(1); + } + + pfh = (partial_file_header *) buf; + + /* Check magic */ + if (pfh->magic != INCREMENTAL_BACKUP_MAGIC) + { + pg_log_error("corrupted partial file \"%s\", magic mismatch: %m", fm->filename); + pg_free(filemaps); + pg_free(buf); + exit(1); + } + + blknumberssize = sizeof(uint32) * pfh->nblocks; + blknumbers = (uint32 *) pg_malloc(blknumberssize); + + /* Read all block numbers. */ + if (fread((char *) blknumbers, 1, blknumberssize, fm->fp) != blknumberssize) + { + pg_log_error("corrupted partial file \"%s\": %m", fm->filename); + pg_free(blknumbers); + pg_free(buf); + pg_free(filemaps); + exit(1); + } + + /* Check CRC */ + savedchecksum = pfh->checksum; + INIT_CRC32C(pfh->checksum); + COMP_CRC32C(pfh->checksum, pfh, hsize); + COMP_CRC32C(pfh->checksum, blknumbers, blknumberssize); + if (pfh->checksum != savedchecksum) + { + pg_log_error("corrupted partial file \"%s\", checksum mismatch: %m", fm->filename); + pg_free(blknumbers); + pg_free(filemaps); + pg_free(buf); + exit(1); + } + else if (verbose) + pg_log_info("checksums verified in file \"%s\"", fm->filename); + + blkstartoffset = hsize + blknumberssize; + for (k = 0; k < pfh->nblocks; k++) + { + uint32 blknum = blknumbers[k]; + + /* + * Set this block pointer in outblock array. We skip setting + * it if already set as we are processing from latest file to + * oldest file. If same block is modified across multiple + * incremental backup, then we use the latest one; skipping all + * other. + */ + if (outblocks[blknum].fp == NULL) + { + outblocks[blknum].fp = fm->fp; + outblocks[blknum].offset = blkstartoffset + BLCKSZ * k; + } + + modifiedblockfound = true; + } + + /* Update last block number */ + if (k != 0 && blknumbers[k - 1] > lastblkno) + lastblkno = (int) blknumbers[k - 1]; + } + + /* Read base file */ + Assert(i == (nfilemaps - 1)); + + fm = &filemaps[nfilemaps - 1]; + Assert(fm->isPartial == false); + + /* + * If after processing all .partial files, we end up with no blocks + * modified, then simply copy the base file to the output directory and + * we are done. + */ + if (!modifiedblockfound) + { + copy_whole_file(fm->filename, outfn); + cleanup_filemaps(filemaps, nfilemaps); + return; + } + + /* Write all blocks to the output file */ + + if (fstat(fileno(fm->fp), &statbuf) != 0) + { + pg_log_error("could not stat file \"%s\": %m", fm->filename); + cleanup_filemaps(filemaps, nfilemaps); + exit(1); + } + + Assert((statbuf.st_size % BLCKSZ) == 0); + + nblocks = statbuf.st_size / BLCKSZ; + if ((nblocks - 1) > lastblkno) + lastblkno = nblocks - 1; + + outfp = fopen(outfn, "wb"); + if (!outfp) + { + pg_log_error("could not create file \"%s\": %m", outfn); + cleanup_filemaps(filemaps, nfilemaps); + exit(1); + } + + for (i = 0; i <= lastblkno; i++) + { + char blkdata[BLCKSZ]; + FILE *infp; + int offset; + + /* + * Read block by block from respective file. If outblock has NULL + * file pointer, then fetch that block from the base file. + */ + if (outblocks[i].fp != NULL) + { + infp = outblocks[i].fp; + offset = outblocks[i].offset; + } + else + { + infp = fm->fp; + offset = i * BLCKSZ; + } + + if (fseek(infp, offset, SEEK_SET) == -1) + { + pg_log_error("could not fseek in file: %m"); + fclose(outfp); + cleanup_filemaps(filemaps, nfilemaps); + exit(1); + } + + if (fread(blkdata, 1, BLCKSZ, infp) != BLCKSZ) + { + pg_log_error("could not read from file \"%s\": %m", outfn); + fclose(outfp); + cleanup_filemaps(filemaps, nfilemaps); + exit(1); + } + + /* Finally write one block to the output file */ + if (fwrite(blkdata, 1, BLCKSZ, outfp) != BLCKSZ) + { + pg_log_error("could not write to file \"%s\": %m", outfn); + fclose(outfp); + cleanup_filemaps(filemaps, nfilemaps); + exit(1); + } + } + + fclose(outfp); + cleanup_filemaps(filemaps, nfilemaps); + + return; +} + +static void +cleanup_filemaps(FileMap *filemaps, int nfilemaps) +{ + int i; + + for (i = 0; i < nfilemaps; i++) + fclose(filemaps[i].fp); + + pg_free(filemaps); +} + +/* + * verify_backup_chain + * + * Verifies that the INCREMENTAL BACKUP REFERENCE WAL LOCATION of the + * incremental backup matches with the START WAL LOCATION of the previous + * backup, until we reach a full backup in which there is no INCREMENTAL + * BACKUP REFERENCE WAL LOCATION present. + */ +static void +verify_backup_chain(char **IncrDirs, int nIncrDir) +{ + int i; + XLogRecPtr startlsn = InvalidXLogRecPtr; + XLogRecPtr prevlsn = InvalidXLogRecPtr; + TimeLineID tli = 0; + + for (i = (nIncrDir - 1); i >= 0; i--) + { + struct stat statbuf; + char filename[MAXPGPATH]; + FILE *fp; + char *labelfile; + char startxlogfilename[MAXFNAMELEN]; + uint32 hi; + uint32 lo; + char ch; + char *ptr; + TimeLineID tli_from_file; + + check_compatibility(IncrDirs[i]); + + snprintf(filename, MAXPGPATH, "%s/%s", IncrDirs[i], BACKUP_LABEL_FILE); + fp = fopen(filename, "r"); + if (fp == NULL) + { + pg_log_error("could not read file \"%s\": %m", filename); + exit(1); + } + if (fstat(fileno(fp), &statbuf)) + { + pg_log_error("could not stat file \"%s\": %m", filename); + fclose(fp); + exit(1); + } + + labelfile = pg_malloc(statbuf.st_size + 1); + if (fread(labelfile, 1, statbuf.st_size, fp) != statbuf.st_size) + { + pg_log_error("corrupted file \"%s\": %m", filename); + pg_free(labelfile); + fclose(fp); + exit(1); + } + + fclose(fp); + labelfile[statbuf.st_size] = '\0'; + + /* + * Read the START WAL LOCATION from the directory, we skip this for top + * most directory corresponding to the last incremental backup as it is + * not needed to check. + */ + if (i != (nIncrDir - 1)) + { + if (sscanf(labelfile, "START WAL LOCATION: %X/%X (file %24s)%c", + &hi, &lo, startxlogfilename, + &ch) != 4 || ch != '\n') + { + pg_log_error("invalid data in file \"%s\": %m", filename); + pg_free(labelfile); + exit(1); + } + startlsn = ((uint64) hi) << 32 | lo; + + /* + * We end up here from second loop counter, thus prevlsn must have + * been already set. Check that with startlsn fetched above, they + * must match. Otherwise we have a broken chain, bail out. + */ + Assert(!XLogRecPtrIsInvalid(prevlsn)); + if (prevlsn != startlsn) + { + pg_log_error("invalid backup chain"); + pg_free(labelfile); + exit(1); + } + } + + /* + * Read forward until we get START TIMELINE and read it. We must + * ensure that all backups should have same timeline id. + */ + ptr = strstr(labelfile, "START TIMELINE:"); + + if (!ptr || sscanf(ptr, "START TIMELINE: %u\n", &tli_from_file) != 1) + { + pg_log_error("invalid data in file \"%s\": %m", filename); + pg_free(labelfile); + exit(1); + } + if (i != (nIncrDir - 1) && tli_from_file != tli) + { + pg_log_error("invalid timeline"); + pg_free(labelfile); + exit(1); + } + tli = tli_from_file; + + /* + * Fetch the INCREMENTAL BACKUP REFERENCE WAL LOCATION from the + * incremental backup directory. Index 0 is of full backup directory + * where we won't have that, so we skip it. + */ + if (i != 0) + { + ptr = strstr(ptr, "INCREMENTAL BACKUP REFERENCE WAL LOCATION:"); + + if (!ptr || sscanf(ptr, "INCREMENTAL BACKUP REFERENCE WAL LOCATION: %X/%X\n", &hi, &lo) != 2) + { + pg_log_error("invalid data in file \"%s\": %m", filename); + pg_free(labelfile); + exit(1); + } + prevlsn = ((uint64) hi) << 32 | lo; + } + + pg_free(labelfile); + } +} + +/* + * create_filemap + * + * Open all files from all incremental backup directories and create a file + * map. Returns number of files added into the filemaps. + */ +static int +create_filemap(const char *fn, char **IncrDirs, int nIncrDir, + const char *subdirpath, FileMap *filemaps) +{ + int i; + bool basefilefound = false; + FileMap *fm; + int fmindex; + + for (i = (nIncrDir - 1), fmindex = 0; i >= 0; i--, fmindex++) + { + fm = &filemaps[fmindex]; + + if (subdirpath) + snprintf(fm->filename, MAXPGPATH, "%s/%s/%s", IncrDirs[i], + subdirpath, fn); + else + snprintf(fm->filename, MAXPGPATH, "%s/%s", IncrDirs[i], fn); + + fm->fp = fopen(fm->filename, "rb"); + if (fm->fp != NULL) + { + fm->isPartial = true; + continue; + } + + if (errno == ENOENT) + { + char *extptr = strstr(fm->filename, ".partial"); + + Assert (extptr != NULL); + extptr[0] = '\0'; + + /* Check without .partial */ + fm->fp = fopen(fm->filename, "rb"); + if (fm->fp != NULL) + { + fm->isPartial = false; + basefilefound = true; + /* We got a non-partial file, so no need to scan further */ + break; + } + } + + pg_log_error("could not open file \"%s\": %m", fm->filename); + cleanup_filemaps(filemaps, fmindex); + exit(1); + } + + /* We must have found the base file. */ + if (!basefilefound) + { + pg_log_error("could not find base file \"%s\": %m", fn); + cleanup_filemaps(filemaps, fmindex); + exit(1); + } + + /* Number of files = last index + 1 */ + return fmindex + 1; +} + +/* + * write_backup_label_file + * + * From backup label given in the incremental backup directory, write a backup + * label file into the output directory. Note here that, LABEL field is + * modified per user given string and incremental backup reference LSN is not + * added in the output file. + */ +static void +write_backup_label_file(char *InputDir, char *label) +{ + char fromfn[MAXPGPATH]; + char tofn[MAXPGPATH]; + char outputlabel[MAXPGPATH + 7]; /* Room for "LABEL: " */ + FILE *fp; + char *labelfile; + char *ptr; + char *fromptr; + struct stat statbuf; + int len = 0; + + /* + * Read entire backup label file from input directory into in-memory + * buffer. + */ + + snprintf(fromfn, MAXPGPATH, "%s/%s", InputDir, BACKUP_LABEL_FILE); + fp = fopen(fromfn, "rb"); + if (fp == NULL) + { + pg_log_error("could not open file \"%s\": %m", fromfn); + exit(1); + } + if (fstat(fileno(fp), &statbuf) != 0) + { + pg_log_error("could not stat file \"%s\": %m", fromfn); + fclose(fp); + exit(1); + } + + labelfile = pg_malloc(statbuf.st_size + 1); + if (fread(labelfile, 1, statbuf.st_size, fp) != statbuf.st_size) + { + pg_log_error("corrupted file \"%s\": %m", fromfn); + pg_free(labelfile); + fclose(fp); + exit(1); + } + + fclose(fp); + labelfile[statbuf.st_size] = '\0'; + fromptr = labelfile; + + /* + * We need to copy all details up-to LABEL as is into the output backup + * label file. Then write a user given label followed by rest of the + * details except incremental backup reference LSN. + */ + + snprintf(tofn, MAXPGPATH, "%s/%s", OutputDir, BACKUP_LABEL_FILE); + fp = fopen(tofn, "wb"); + if (fp == NULL) + { + pg_log_error("could not create file \"%s\": %m", tofn); + fclose(fp); + exit(1); + } + + /* Find start of the label and write up-to that. */ + ptr = strstr(fromptr, "LABEL:"); + if (!ptr) + { + pg_log_error("corrupted file \"%s\": %m", fromfn); + fclose(fp); + pg_free(labelfile); + exit(1); + } + + len = ptr - fromptr; + if (fwrite(fromptr, 1, len, fp) != len) + { + pg_log_error("could not write to file \"%s\": %m", tofn); + fclose(fp); + pg_free(labelfile); + exit(1); + } + + /* Write label */ + snprintf(outputlabel, MAXPGPATH + 7, "LABEL: %s", label); + + len = strlen(outputlabel); + if (fwrite(outputlabel, 1, len, fp) != len) + { + pg_log_error("could not write to file \"%s\": %m", tofn); + fclose(fp); + pg_free(labelfile); + exit(1); + } + + /* Skip label from the input */ + if (sscanf(ptr, "LABEL: %1023[^\n]\n", outputlabel) != 1) + { + pg_log_error("corrupted file \"%s\": %m", fromfn); + fclose(fp); + pg_free(labelfile); + exit(1); + } + + /* Move exactly after label. */ + fromptr = ptr + strlen(outputlabel) + strlen("LABEL: "); + + /* Find incremental backup reference LSN, and write up-to that as-is. */ + ptr = strstr(fromptr, "INCREMENTAL BACKUP REFERENCE WAL LOCATION:"); + /* We must find that, else its an error. */ + if (!ptr) + { + pg_log_error("corrupted file \"%s\": %m", fromfn); + fclose(fp); + pg_free(labelfile); + exit(1); + } + + len = ptr - fromptr; + if (fwrite(fromptr, 1, len, fp) != len) + { + pg_log_error("could not write to file \"%s\": %m", tofn); + fclose(fp); + pg_free(labelfile); + exit(1); + } + + ptr = strstr(ptr, "\n"); + /* We must find that, else its an error. */ + if (!ptr) + { + pg_log_error("corrupted file \"%s\": %m", fromfn); + fclose(fp); + pg_free(labelfile); + exit(1); + } + + /* Move past '\n' */ + ptr++; + + /* Move until ptr, skipping incremental backup reference LSN line. */ + fromptr = ptr; + + /* Write rest of the text. */ + len = statbuf.st_size - (ptr - labelfile); + if (len && fwrite(fromptr, 1, len, fp) != len) + { + pg_log_error("invalid backup file \"%s\": %m", tofn); + fclose(fp); + pg_free(labelfile); + exit(1); + } + + fclose(fp); + pg_free(labelfile); +} -- 1.8.3.1
From 031d3fecd3ceb70de173bd63214fd083a547ae32 Mon Sep 17 00:00:00 2001 From: Jeevan Chalke <jeevan.cha...@enterprisedb.com> Date: Mon, 9 Sep 2019 11:05:50 +0530 Subject: [PATCH v2 3/4] Add support for the incremental backup. If file is modified 90% or more, we send a whole file else we send only those blocks which are modified. The file is named .partial and has following header details: - magic number, set to 0x494E4352 (4 bytes) - checksum, of whole file except actual blocks (4 bytes) - number of blocks in this .partial file (4 bytes) - all modified block numbers (4 bytes each) - modified blocks --- doc/src/sgml/protocol.sgml | 50 ++++- doc/src/sgml/ref/pg_basebackup.sgml | 23 ++ src/backend/access/transam/xlog.c | 19 +- src/backend/access/transam/xlogfuncs.c | 6 +- src/backend/replication/basebackup.c | 311 +++++++++++++++++++++++++-- src/backend/storage/file/fd.c | 29 +++ src/bin/pg_basebackup/t/010_pg_basebackup.pl | 2 +- src/include/access/xlog.h | 3 +- src/include/replication/basebackup.h | 13 ++ src/include/storage/fd.h | 1 + 10 files changed, 438 insertions(+), 19 deletions(-) mode change 100644 => 100755 src/bin/pg_basebackup/t/010_pg_basebackup.pl diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 8027521..21be571 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2466,7 +2466,7 @@ The commands accepted in replication mode are: </varlistentry> <varlistentry> - <term><literal>BASE_BACKUP</literal> [ <literal>LABEL</literal> <replaceable>'label'</replaceable> ] [ <literal>PROGRESS</literal> ] [ <literal>FAST</literal> ] [ <literal>WAL</literal> ] [ <literal>NOWAIT</literal> ] [ <literal>MAX_RATE</literal> <replaceable>rate</replaceable> ] [ <literal>TABLESPACE_MAP</literal> ] [ <literal>NOVERIFY_CHECKSUMS</literal> ] + <term><literal>BASE_BACKUP</literal> [ <literal>LABEL</literal> <replaceable>'label'</replaceable> ] [ <literal>PROGRESS</literal> ] [ <literal>FAST</literal> ] [ <literal>WAL</literal> ] [ <literal>NOWAIT</literal> ] [ <literal>MAX_RATE</literal> <replaceable>rate</replaceable> ] [ <literal>TABLESPACE_MAP</literal> ] [ <literal>NOVERIFY_CHECKSUMS</literal> ] [ <literal>LSN</literal> <replaceable>'lsn'</replaceable> ] <indexterm><primary>BASE_BACKUP</primary></indexterm> </term> <listitem> @@ -2576,6 +2576,22 @@ The commands accepted in replication mode are: </para> </listitem> </varlistentry> + + <varlistentry> + <term><literal>LSN</literal> <replaceable>'lsn'</replaceable></term> + <listitem> + <para> + Includes only those data blocks in backup which has LSN greater than + or equal to the given lsn. However, if 90% or more data blocks are + modified in the file, then sends the entire file. Otherwise, creates + a <filename>.partial</filename> file containing only the blocks which + are modified and sends that instead. The <filename>.partial</filename> + file has its own header followed by the actual data blocks. Note that + only relation files are considered here, all other files are sent as + is. + </para> + </listitem> + </varlistentry> </variablelist> </para> <para> @@ -2698,6 +2714,38 @@ The commands accepted in replication mode are: Owner, group, and file mode are set if the underlying file system on the server supports it. </para> + <para> + An incremental backup's <filename>.partial</filename> file has the + following format: + <itemizedlist> + <listitem> + <para> + Starts with a 4-byte magic number + </para> + </listitem> + <listitem> + <para> + Followed by a 4-byte CRC of the header (containing a magic number, + count of the number of blocks, and all block numbers) + </para> + </listitem> + <listitem> + <para> + Then a 4-byte count of the number of blocks included in the file + </para> + </listitem> + <listitem> + <para> + Then the block numbers, each as a 4-byte quantity + </para> + </listitem> + <listitem> + <para> + Followed by the actual data blocks in order with the block numbers + </para> + </listitem> + </itemizedlist> + </para> </listitem> </varlistentry> </variablelist> diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index fc9e222..9e2b9b8 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -408,6 +408,21 @@ PostgreSQL documentation </varlistentry> <varlistentry> + <term><option>--lsn=<replaceable class="parameter">LSN</replaceable></option></term> + <listitem> + <para> + Takes an incremental backup, using LSN as a threshold. Only the blocks + which are modified after this given LSN will be backed up. The file + which has these partial blocks has .partial as an extension. Backup + taken in this manner has to be combined with the full backup with the + <command>pg_combinebackup</command> utility. The value of LSN should + match the <literal>START WAL LOCATION</literal> of previously taken + full or incremental backup from <literal>backup_label</literal> file. + </para> + </listitem> + </varlistentry> + + <varlistentry> <term><option>-n</option></term> <term><option>--no-clean</option></term> <listitem> @@ -792,6 +807,14 @@ PostgreSQL documentation <prompt>$</prompt> <userinput>pg_basebackup -D backup/data -T /opt/ts=$(pwd)/backup/ts</userinput> </screen> </para> + + <para> + To create an incremental backup having LSN greater than or equal to + <literal>5/19000060</literal>: +<screen> +<prompt>$</prompt> <userinput>pg_basebackup -D incbackup --lsn='5/19000060'</userinput> +</screen> + </para> </refsect1> <refsect1> diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 6876537..406c13b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -10178,7 +10178,8 @@ XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, - bool needtblspcmapfile) + bool needtblspcmapfile, + XLogRecPtr incremental_reference_lsn) { bool exclusive = (labelfile == NULL); bool backup_started_in_recovery = false; @@ -10391,6 +10392,18 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, XLogFileName(xlogfilename, starttli, _logSegNo, wal_segment_size); /* + * If we are doing an incremental backup, then passed in reference LSN + * must be from past i.e. it should be less than the startpoint. + */ + if (!XLogRecPtrIsInvalid(incremental_reference_lsn) && + incremental_reference_lsn >= startpoint) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("incremental backup reference lsn %X/%X is in the future", + (uint32) (incremental_reference_lsn >> 32), + (uint32) incremental_reference_lsn))); + + /* * Construct tablespace_map file */ if (exclusive) @@ -10506,6 +10519,10 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, appendStringInfo(labelfile, "START TIME: %s\n", strfbuf); appendStringInfo(labelfile, "LABEL: %s\n", backupidstr); appendStringInfo(labelfile, "START TIMELINE: %u\n", starttli); + if (!XLogRecPtrIsInvalid(incremental_reference_lsn)) + appendStringInfo(labelfile, "INCREMENTAL BACKUP REFERENCE WAL LOCATION: %X/%X\n", + (uint32) (incremental_reference_lsn >> 32), + (uint32) incremental_reference_lsn); /* * Okay, write the file, or return its contents to caller. diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 8a70503..61598fb 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -89,7 +89,8 @@ pg_start_backup(PG_FUNCTION_ARGS) if (exclusive) { startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL, - NULL, NULL, false, true); + NULL, NULL, false, true, + InvalidXLogRecPtr); } else { @@ -105,7 +106,8 @@ pg_start_backup(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); startpoint = do_pg_start_backup(backupidstr, fast, NULL, label_file, - NULL, tblspc_map_file, false, true); + NULL, tblspc_map_file, false, true, + InvalidXLogRecPtr); before_shmem_exit(nonexclusive_base_backup_cleanup, (Datum) 0); } diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index bf15262..90ab3ea 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -83,6 +83,11 @@ static pgoff_t sendCompleteFile(const char *readfilename, const char *tarfilename, FILE *fp, struct stat *statbuf, int segmentno, bool verify_checksum, int *checksum_failures); +static pgoff_t sendPartialFile(const char *readfilename, + const char *tarfilename, FILE *fp, + XLogRecPtr incremental_reference_lsn, + struct stat *statbuf, int segmentno, + bool verify_checksum, int *checksum_failures); /* Was the backup currently in-progress initiated in recovery mode? */ static bool backup_started_in_recovery = false; @@ -112,6 +117,11 @@ do { \ (errmsg("could not read from file \"%s\"", filename))); \ } while (0) +/* + * When to send the whole file, % blocks modified (90%) + */ +#define WHOLE_FILE_THRESHOLD 0.9 + /* The actual number of bytes, transfer of which may cause sleep. */ static uint64 throttling_sample; @@ -127,6 +137,9 @@ static TimestampTz throttled_last; /* The starting XLOG position of the base backup. */ static XLogRecPtr startptr; +/* The reference XLOG position for the incremental backup. */ +static XLogRecPtr incremental_reference_lsn; + /* Total number of checksum failures during base backup. */ static long long int total_checksum_failures; @@ -267,7 +280,9 @@ perform_base_backup(basebackup_options *opt) startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli, labelfile, &tablespaces, tblspc_map_file, - opt->progress, opt->sendtblspcmapfile); + opt->progress, opt->sendtblspcmapfile, + opt->lsn); + incremental_reference_lsn = opt->lsn; /* * Once do_pg_start_backup has been called, ensure that any failure causes @@ -665,6 +680,7 @@ parse_basebackup_options(List *options, basebackup_options *opt) bool o_lsn = false; MemSet(opt, 0, sizeof(*opt)); + opt->lsn = InvalidXLogRecPtr; foreach(lopt, options) { DefElem *defel = (DefElem *) lfirst(lopt); @@ -1407,6 +1423,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf int segmentno = 0; char *segmentpath; bool verify_checksum = false; + char *filename; fp = AllocateFile(readfilename, "rb"); if (fp == NULL) @@ -1418,17 +1435,15 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf errmsg("could not open file \"%s\": %m", readfilename))); } + /* + * Get the filename (excluding path). As last_dir_separator() includes + * the last directory separator, we chop that off by incrementing the + * pointer. + */ + filename = last_dir_separator(readfilename) + 1; + if (!noverify_checksums && DataChecksumsEnabled()) { - char *filename; - - /* - * Get the filename (excluding path). As last_dir_separator() - * includes the last directory separator, we chop that off by - * incrementing the pointer. - */ - filename = last_dir_separator(readfilename) + 1; - if (is_checksummed_file(readfilename, filename)) { verify_checksum = true; @@ -1449,9 +1464,25 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf } } - /* Send complete file to the client. */ - len = sendCompleteFile(readfilename, tarfilename, fp, statbuf, segmentno, - verify_checksum, &checksum_failures); + /* + * If incremental backup, see whether the filename is a non-temporary + * relation filename or not and can be sent partially. All other files are + * sent completely. + */ + if (!XLogRecPtrIsInvalid(incremental_reference_lsn) && + OidIsValid(dboid) && looks_like_non_temp_rel_name(filename)) + { + /* Send partial file to the client. */ + len = sendPartialFile(readfilename, tarfilename, fp, + incremental_reference_lsn, statbuf, segmentno, + verify_checksum, &checksum_failures); + } + else + { + /* Send complete file to the client. */ + len = sendCompleteFile(readfilename, tarfilename, fp, statbuf, + segmentno, verify_checksum, &checksum_failures); + } /* If the file was truncated while we were sending it, pad it with zeros */ if (len < statbuf->st_size) @@ -1813,3 +1844,257 @@ sendCompleteFile(const char *readfilename, const char *tarfilename, FILE *fp, return len; } + +/* + * sendPartialFile + * + * Sends a partial file containing only the blocks which are modified after + * given LSN. However, if the file is heavily modified, then we send complete + * file instead. + */ +static pgoff_t +sendPartialFile(const char *readfilename, const char *tarfilename, FILE *fp, + XLogRecPtr incremental_reference_lsn, struct stat *statbuf, + int segmentno, bool verify_checksum, int *checksum_failures) +{ + char *buf; + off_t cnt; + pgoff_t len = 0; + bool sendwholefile = false; + + /* + * Relation file is segmented at size RELSEG_SIZE * BLCKSZ, so we will + * never have size more than that. + */ + Assert(statbuf->st_size <= (RELSEG_SIZE * BLCKSZ)); + + /* + * We want to read the whole file in memory to see how many blocks were + * actually changed. We don't want to do that incrementally as it will + * need to reread the file while sending the blocks. palloc() reads + * maximum 1GB - 1, and current max relation segment will be of 1GB, thus + * we use malloc() here. + */ + buf = (char *) malloc(statbuf->st_size); + if (buf == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + if ((cnt = fread(buf, 1, statbuf->st_size, fp)) > 0) + { + Bitmapset *mod_blocks = NULL; + int nmodblocks = 0; + int part_size = 0; + int part_header_size; + int blknum; + int blknocnt; + partial_file_header *pfh; + char *partialtarfilename = NULL; + + /* + * A valid relation size is multiple of BLCKSZ. However, if we read + * some arbitrary size data, then instead of throwing an error, we + * chose to send that file as-is. Inform the same to the client by + * emitting a warning. Also, we cannot verify the checksum, if + * enabled, emit a warning for that too. + */ + if (cnt % BLCKSZ != 0) + { + if (verify_checksum) + { + ereport(WARNING, + (errmsg("cannot verify checksum in file \"%s\"", + readfilename))); + verify_checksum = false; + } + + ereport(WARNING, + (errmsg("file size (%d) not in multiple of page size (%d), sending whole file", + (int) cnt, BLCKSZ))); + + /* File size is not in multiple of BLCKSZ, send as is. */ + sendwholefile = true; + } + + /* + * Check each page LSN and see if it is modified after the given LSN or + * not. Create a bitmap of all such modified blocks and then decide + * whether we want to send a whole file or a partial file. Skip this + * check if we decided to send whole file already. + */ + if (!sendwholefile) + { + XLogRecPtr pglsn; + int i; + int nblocks = (cnt / BLCKSZ); + + for (i = 0; i < nblocks; i++) + { + int page_index_in_buf = (BLCKSZ * i); + char *page = buf + page_index_in_buf; + + pglsn = PageGetLSN(page); + + if (pglsn >= incremental_reference_lsn) + { + /* + * Verify checksum, if requested, for the modified blocks. + */ + if (verify_checksum) + { + verify_page_checksum(readfilename, fp, page, + (cnt - page_index_in_buf), i, + segmentno, checksum_failures); + + /* + * If we hit end-of-file, a concurrent truncation must + * have occurred, so break out of this loop just as if + * the initial fread() returned 0. We'll drop through + * to the same code that handles that case. (We must + * fix up cnt first, though.) + */ + if (feof(fp)) + { + cnt = page_index_in_buf; + break; + } + } + + mod_blocks = bms_add_member(mod_blocks, i); + } + } + + nmodblocks = bms_num_members(mod_blocks); + + /* + * We need to send whole file if the modified block count is equal + * to or greater than the WHOLE_FILE_THRESHOLD. Check that. + */ + if (i > 0 && (nmodblocks / (double) i) >= WHOLE_FILE_THRESHOLD) + sendwholefile = true; + } + + /* + * If sendwholefile is true then we need to send the whole file as is. + * Otherwise send a partial file. Instead of sending entire file at a + * time, we send data in a series of chunks of size CHUNK_SIZE. + * CHUNK_SIZE is arbitrary chosen to 1MB assuming BLCKSZ is of 8K. + */ + if (sendwholefile) + { +#define CHUNK_SIZE (BLCKSZ * 128) + int i; + int nchunks = cnt / CHUNK_SIZE; + off_t sent = 0; + + _tarWriteHeader(tarfilename, NULL, statbuf, false); + + /* Send data in chunks of size CHUNK_SIZE each. */ + for (i = 0; i < nchunks; i++) + { + /* Send the chunk as a CopyData message */ + if (pq_putmessage('d', buf + sent, CHUNK_SIZE)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + + throttle(CHUNK_SIZE); + sent += CHUNK_SIZE; + } + + /* Send remaining data, if present. */ + if (sent < cnt) + { + off_t remaining = cnt - sent; + + /* Send the chunk as a CopyData message */ + if (pq_putmessage('d', buf + sent, remaining)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + + throttle(remaining); + sent += remaining; + } + + free(buf); + Assert(sent = cnt); + + return cnt; + } + + /* Create a partial file */ + + /* Calculate partial file size. */ + part_header_size = offsetof(partial_file_header, blocknumbers) + + (sizeof(uint32) * nmodblocks); + part_size = part_header_size + (BLCKSZ * nmodblocks); + + /* Add .partial to filename */ + partialtarfilename = (char *) palloc(strlen(tarfilename) + 9); + snprintf(partialtarfilename, strlen(tarfilename) + 9, "%s.partial", tarfilename); + + statbuf->st_size = part_size; + _tarWriteHeader(partialtarfilename, NULL, statbuf, false); + pfree(partialtarfilename); + + pfh = (partial_file_header *) palloc(part_header_size); + pfh->magic = INCREMENTAL_BACKUP_MAGIC; + pfh->nblocks = nmodblocks; + + blknum = -1; + blknocnt = 0; + while ((blknum = bms_next_member(mod_blocks, blknum)) >= 0) + { + pfh->blocknumbers[blknocnt] = blknum; + /* Calculate CRC for each block to be transferred. */ + blknocnt++; + } + + Assert(blknocnt == nmodblocks); + + /* Now calculate CRC for the header */ + INIT_CRC32C(pfh->checksum); + COMP_CRC32C(pfh->checksum, pfh, part_header_size); + + /* Send header */ + if (pq_putmessage('d', (char *) pfh, part_header_size)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + throttle(part_header_size); + + /* Send data blocks */ + for (blknocnt = 0; blknocnt < nmodblocks; blknocnt++) + { + int offset = BLCKSZ * pfh->blocknumbers[blknocnt]; + + if (pq_putmessage('d', buf + offset, BLCKSZ)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + throttle(BLCKSZ); + } + + Assert(blknocnt == nmodblocks && statbuf->st_size == part_size); + + len = part_size; + pfree(pfh); + } + else + { + /* Check for fread() error. */ + if (ferror(fp)) + { + free(buf); + ereport(ERROR, + (errmsg("could not read from file \"%s\"", readfilename))); + } + + /* Send empty file as is */ + _tarWriteHeader(tarfilename, NULL, statbuf, false); + len = cnt; + } + + /* free buffer allocated */ + free(buf); + + return len; +} diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 2de2105..3e07d52 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -3111,6 +3111,35 @@ looks_like_temp_rel_name(const char *name) return true; } +/* <digits>, or <digits>.<digits> */ +bool +looks_like_non_temp_rel_name(const char *name) +{ + int pos; + + /* Look for a non-empty string of digits (that isn't too long). */ + for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) + ; + if (pos == 0 || pos > OIDCHARS) + return false; + + if (name[pos] == '.') + { + int segchar; + + for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar) + ; + if (segchar <= 1) + return false; + pos += segchar; + } + + /* Now we should be at the end. */ + if (name[pos] != '\0') + return false; + return true; +} + /* * Issue fsync recursively on PGDATA and all its contents. diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl old mode 100644 new mode 100755 index fd8e187..a425f14 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -561,7 +561,7 @@ $node->command_fails( [ 'pg_basebackup', '-D', "$tempdir/lsn_test", '--lsn', "0/INVALID" ], 'pg_basebackup with invalid LSN fails'); $node->command_ok( - [ 'pg_basebackup', '-D', "$tempdir/lsn_test", '--lsn', "0/ABCDEF01", '--no-verify-checksums' ], + [ 'pg_basebackup', '-D', "$tempdir/lsn_test", '--lsn', "0/00000001", '--no-verify-checksums' ], 'pg_basebackup with valid LSN'); rmtree("$tempdir/lsn_test"); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index d519252..155385d 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -347,7 +347,8 @@ typedef enum SessionBackupState extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, - bool needtblspcmapfile); + bool needtblspcmapfile, + XLogRecPtr ref_lsn); extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p); extern void do_pg_abort_backup(void); diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h index 503a5b9..1b35b08 100644 --- a/src/include/replication/basebackup.h +++ b/src/include/replication/basebackup.h @@ -20,6 +20,9 @@ #define MAX_RATE_LOWER 32 #define MAX_RATE_UPPER 1048576 +/* magic number in incremental backup's .partial file */ +#define INCREMENTAL_BACKUP_MAGIC 0x494E4352 + typedef struct { @@ -29,6 +32,16 @@ typedef struct int64 size; } tablespaceinfo; +/* Definition of the partial file header */ +typedef struct +{ + uint32 magic; + pg_crc32c checksum; + uint32 nblocks; + uint32 blocknumbers[FLEXIBLE_ARRAY_MEMBER]; +} partial_file_header; + + extern void SendBaseBackup(BaseBackupCmd *cmd); extern int64 sendTablespace(char *path, bool sizeonly); diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index d2a8c52..070faf1 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -136,6 +136,7 @@ extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid, SubTransactionId parentSubid); extern void RemovePgTempFiles(void); extern bool looks_like_temp_rel_name(const char *name); +extern bool looks_like_non_temp_rel_name(const char *name); extern int pg_fsync(int fd); extern int pg_fsync_no_writethrough(int fd); -- 1.8.3.1
From 0da6d78690f8f45a8cf4e1e0caed8e53a9a3d010 Mon Sep 17 00:00:00 2001 From: Jeevan Chalke <jeevan.cha...@enterprisedb.com> Date: Mon, 9 Sep 2019 10:56:43 +0530 Subject: [PATCH v2 2/4] Refactor code in basebackup.c - Refactor full backup code to the separate function. - Refactor checksum verifying logic to the separate function. - Refactor tablespace mapping code. --- src/backend/replication/basebackup.c | 348 +++++++++++++++++++--------------- src/bin/pg_basebackup/pg_basebackup.c | 129 +------------ src/fe_utils/simple_list.c | 104 ++++++++++ src/include/fe_utils/simple_list.h | 21 ++ 4 files changed, 334 insertions(+), 268 deletions(-) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index e72bf8e..bf15262 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -75,6 +75,14 @@ static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli); static int compareWalFileNames(const ListCell *a, const ListCell *b); static void throttle(size_t increment); static bool is_checksummed_file(const char *fullpath, const char *filename); +static void verify_page_checksum(const char *readfilename, FILE *fp, + char *page, int backup_distance, + BlockNumber blkno, int segmentno, + int *checksum_failures); +static pgoff_t sendCompleteFile(const char *readfilename, + const char *tarfilename, FILE *fp, + struct stat *statbuf, int segmentno, + bool verify_checksum, int *checksum_failures); /* Was the backup currently in-progress initiated in recovery mode? */ static bool backup_started_in_recovery = false; @@ -1391,17 +1399,11 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf bool missing_ok, Oid dboid) { FILE *fp; - BlockNumber blkno = 0; - bool block_retry = false; char buf[TAR_SEND_SIZE]; - uint16 checksum; int checksum_failures = 0; off_t cnt; - int i; pgoff_t len = 0; - char *page; size_t pad; - PageHeader phdr; int segmentno = 0; char *segmentpath; bool verify_checksum = false; @@ -1416,8 +1418,6 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf errmsg("could not open file \"%s\": %m", readfilename))); } - _tarWriteHeader(tarfilename, NULL, statbuf, false); - if (!noverify_checksums && DataChecksumsEnabled()) { char *filename; @@ -1449,146 +1449,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf } } - while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) - { - /* - * The checksums are verified at block level, so we iterate over the - * buffer in chunks of BLCKSZ, after making sure that - * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of - * BLCKSZ bytes. - */ - Assert(TAR_SEND_SIZE % BLCKSZ == 0); - - if (verify_checksum && (cnt % BLCKSZ != 0)) - { - ereport(WARNING, - (errmsg("cannot verify checksum in file \"%s\", block " - "%d: read buffer size %d and page size %d " - "differ", - readfilename, blkno, (int) cnt, BLCKSZ))); - verify_checksum = false; - } - - if (verify_checksum) - { - for (i = 0; i < cnt / BLCKSZ; i++) - { - page = buf + BLCKSZ * i; - - /* - * Only check pages which have not been modified since the - * start of the base backup. Otherwise, they might have been - * written only halfway and the checksum would not be valid. - * However, replaying WAL would reinstate the correct page in - * this case. We also skip completely new pages, since they - * don't have a checksum yet. - */ - if (!PageIsNew(page) && PageGetLSN(page) < startptr) - { - checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); - phdr = (PageHeader) page; - if (phdr->pd_checksum != checksum) - { - /* - * Retry the block on the first failure. It's - * possible that we read the first 4K page of the - * block just before postgres updated the entire block - * so it ends up looking torn to us. We only need to - * retry once because the LSN should be updated to - * something we can ignore on the next pass. If the - * error happens again then it is a true validation - * failure. - */ - if (block_retry == false) - { - /* Reread the failed block */ - if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fseek in file \"%s\": %m", - readfilename))); - } - - if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ) - { - /* - * If we hit end-of-file, a concurrent - * truncation must have occurred, so break out - * of this loop just as if the initial fread() - * returned 0. We'll drop through to the same - * code that handles that case. (We must fix - * up cnt first, though.) - */ - if (feof(fp)) - { - cnt = BLCKSZ * i; - break; - } - - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not reread block %d of file \"%s\": %m", - blkno, readfilename))); - } - - if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fseek in file \"%s\": %m", - readfilename))); - } - - /* Set flag so we know a retry was attempted */ - block_retry = true; - - /* Reset loop to validate the block again */ - i--; - continue; - } - - checksum_failures++; - - if (checksum_failures <= 5) - ereport(WARNING, - (errmsg("checksum verification failed in " - "file \"%s\", block %d: calculated " - "%X but expected %X", - readfilename, blkno, checksum, - phdr->pd_checksum))); - if (checksum_failures == 5) - ereport(WARNING, - (errmsg("further checksum verification " - "failures in file \"%s\" will not " - "be reported", readfilename))); - } - } - block_retry = false; - blkno++; - } - } - - /* Send the chunk as a CopyData message */ - if (pq_putmessage('d', buf, cnt)) - ereport(ERROR, - (errmsg("base backup could not send data, aborting backup"))); - - len += cnt; - throttle(cnt); - - if (feof(fp) || len >= statbuf->st_size) - { - /* - * Reached end of file. The file could be longer, if it was - * extended while we were sending it, but for a base backup we can - * ignore such extended data. It will be restored from WAL. - */ - break; - } - } - - CHECK_FREAD_ERROR(fp, readfilename); + /* Send complete file to the client. */ + len = sendCompleteFile(readfilename, tarfilename, fp, statbuf, segmentno, + verify_checksum, &checksum_failures); /* If the file was truncated while we were sending it, pad it with zeros */ if (len < statbuf->st_size) @@ -1761,3 +1624,192 @@ throttle(size_t increment) */ throttled_last = GetCurrentTimestamp(); } + +/* + * verify_page_checksum + * + * Verifies checksum for one page. + */ +static void +verify_page_checksum(const char *readfilename, FILE *fp, char *page, + int backup_distance, BlockNumber blkno, int segmentno, + int *checksum_failures) +{ + uint16 checksum; + bool block_retried = false; + PageHeader phdr; + + while (1) + { + /* + * Only check pages which have not been modified since the start of the + * base backup. Otherwise, they might have been written only halfway + * and the checksum would not be valid. However, replaying WAL would + * reinstate the correct page in this case. We also skip completely + * new pages, since they don't have a checksum yet. + */ + if (PageIsNew(page) || PageGetLSN(page) >= startptr) + return; + + checksum = pg_checksum_page(page, blkno + segmentno * RELSEG_SIZE); + phdr = (PageHeader) page; + if (phdr->pd_checksum == checksum) + return; + + /* + * Retry the block on the first failure. It's possible that we + * read the first 4K page of the block just before postgres updated + * the entire block so it ends up looking torn to us. We only need + * to retry once because the LSN should be updated to something we + * can ignore on the next pass. If the error happens again then it + * is a true validation failure. + */ + if (block_retried == false) + { + /* Reread the failed block */ + if (fseek(fp, -backup_distance, SEEK_CUR) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fseek in file \"%s\": %m", + readfilename))); + } + + if (fread(page, 1, BLCKSZ, fp) != BLCKSZ) + { + /* + * If we hit end-of-file, return from here. Caller will take + * care of the rest. + */ + if (feof(fp)) + return; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not reread block %d of file \"%s\": %m", + blkno, readfilename))); + } + + if (fseek(fp, backup_distance - BLCKSZ, SEEK_CUR) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fseek in file \"%s\": %m", + readfilename))); + } + + /* Set flag so we know a retry was attempted */ + block_retried = true; + + /* Re-validate the block again */ + continue; + } + + (*checksum_failures)++; + + if (*checksum_failures <= 5) + ereport(WARNING, + (errmsg("checksum verification failed in file \"%s\", block %d: calculated %X but expected %X", + readfilename, blkno, checksum, + phdr->pd_checksum))); + if (*checksum_failures == 5) + ereport(WARNING, + (errmsg("further checksum verification failures in file \"%s\" will not be reported", + readfilename))); + + /* + * Note that we want this loop to run only once, so we return from + * here. If we were needed to reread the block, then it was already + * done above. + */ + return; + } +} + +/* + * sendCompleteFile + * + * Sends complete file to the client. + */ +static pgoff_t +sendCompleteFile(const char *readfilename, const char *tarfilename, FILE *fp, + struct stat *statbuf, int segmentno, bool verify_checksum, + int *checksum_failures) +{ + char buf[TAR_SEND_SIZE]; + off_t cnt; + pgoff_t len = 0; + BlockNumber blkno = 0; + int i; + + _tarWriteHeader(tarfilename, NULL, statbuf, false); + + while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) + { + /* + * The checksums are verified at block level, so we iterate over the + * buffer in chunks of BLCKSZ, after making sure that + * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of + * BLCKSZ bytes. + */ + Assert(TAR_SEND_SIZE % BLCKSZ == 0); + + if (verify_checksum && (cnt % BLCKSZ != 0)) + { + ereport(WARNING, + (errmsg("cannot verify checksum in file \"%s\", block " + "%d: read buffer size %d and page size %d " + "differ", + readfilename, blkno, (int) cnt, BLCKSZ))); + verify_checksum = false; + } + + if (verify_checksum) + { + for (i = 0; i < cnt / BLCKSZ; i++) + { + int page_index_in_buf = (BLCKSZ * i); + + verify_page_checksum(readfilename, fp, buf + page_index_in_buf, + (cnt - page_index_in_buf), blkno, + segmentno, checksum_failures); + + /* + * If we hit end-of-file, a concurrent truncation must have + * occurred, so break out of this loop just as if the initial + * fread() returned 0. We'll drop through to the same code that + * handles that case. (We must fix up cnt first, though.) + */ + if (feof(fp)) + { + cnt = page_index_in_buf; + break; + } + + blkno++; + } + } + + /* Send the chunk as a CopyData message */ + if (pq_putmessage('d', buf, cnt)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + + len += cnt; + throttle(cnt); + + if (feof(fp) || len >= statbuf->st_size) + { + /* + * Reached end of file. The file could be longer, if it was + * extended while we were sending it, but for a base backup we can + * ignore such extended data. It will be restored from WAL. + */ + break; + } + } + + CHECK_FREAD_ERROR(fp, readfilename); + + return len; +} diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 1791853..8df453b 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -31,6 +31,7 @@ #include "common/file_utils.h" #include "common/logging.h" #include "common/string.h" +#include "fe_utils/simple_list.h" #include "fe_utils/string_utils.h" #include "getopt_long.h" #include "libpq-fe.h" @@ -43,19 +44,6 @@ #define ERRCODE_DATA_CORRUPTED "XX001" -typedef struct TablespaceListCell -{ - struct TablespaceListCell *next; - char old_dir[MAXPGPATH]; - char new_dir[MAXPGPATH]; -} TablespaceListCell; - -typedef struct TablespaceList -{ - TablespaceListCell *head; - TablespaceListCell *tail; -} TablespaceList; - /* * pg_xlog has been renamed to pg_wal in version 10. This version number * should be compared with PQserverVersion(). @@ -155,9 +143,6 @@ static void BaseBackup(void); static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline, bool segment_finished); -static const char *get_tablespace_mapping(const char *dir); -static void tablespace_list_append(const char *arg); - static void cleanup_directories_atexit(void) @@ -227,84 +212,6 @@ kill_bgchild_atexit(void) } #endif -/* - * Split argument into old_dir and new_dir and append to tablespace mapping - * list. - */ -static void -tablespace_list_append(const char *arg) -{ - TablespaceListCell *cell = (TablespaceListCell *) pg_malloc0(sizeof(TablespaceListCell)); - char *dst; - char *dst_ptr; - const char *arg_ptr; - - dst_ptr = dst = cell->old_dir; - for (arg_ptr = arg; *arg_ptr; arg_ptr++) - { - if (dst_ptr - dst >= MAXPGPATH) - { - pg_log_error("directory name too long"); - exit(1); - } - - if (*arg_ptr == '\\' && *(arg_ptr + 1) == '=') - ; /* skip backslash escaping = */ - else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\')) - { - if (*cell->new_dir) - { - pg_log_error("multiple \"=\" signs in tablespace mapping"); - exit(1); - } - else - dst = dst_ptr = cell->new_dir; - } - else - *dst_ptr++ = *arg_ptr; - } - - if (!*cell->old_dir || !*cell->new_dir) - { - pg_log_error("invalid tablespace mapping format \"%s\", must be \"OLDDIR=NEWDIR\"", arg); - exit(1); - } - - /* - * This check isn't absolutely necessary. But all tablespaces are created - * with absolute directories, so specifying a non-absolute path here would - * just never match, possibly confusing users. It's also good to be - * consistent with the new_dir check. - */ - if (!is_absolute_path(cell->old_dir)) - { - pg_log_error("old directory is not an absolute path in tablespace mapping: %s", - cell->old_dir); - exit(1); - } - - if (!is_absolute_path(cell->new_dir)) - { - pg_log_error("new directory is not an absolute path in tablespace mapping: %s", - cell->new_dir); - exit(1); - } - - /* - * Comparisons done with these values should involve similarly - * canonicalized path values. This is particularly sensitive on Windows - * where path values may not necessarily use Unix slashes. - */ - canonicalize_path(cell->old_dir); - canonicalize_path(cell->new_dir); - - if (tablespace_dirs.tail) - tablespace_dirs.tail->next = cell; - else - tablespace_dirs.head = cell; - tablespace_dirs.tail = cell; -} - #ifdef HAVE_LIBZ static const char * @@ -1359,28 +1266,6 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) /* - * Retrieve tablespace path, either relocated or original depending on whether - * -T was passed or not. - */ -static const char * -get_tablespace_mapping(const char *dir) -{ - TablespaceListCell *cell; - char canon_dir[MAXPGPATH]; - - /* Canonicalize path for comparison consistency */ - strlcpy(canon_dir, dir, sizeof(canon_dir)); - canonicalize_path(canon_dir); - - for (cell = tablespace_dirs.head; cell; cell = cell->next) - if (strcmp(canon_dir, cell->old_dir) == 0) - return cell->new_dir; - - return dir; -} - - -/* * Receive a tar format stream from the connection to the server, and unpack * the contents of it into a directory. Only files, directories and * symlinks are supported, no other kinds of special files. @@ -1406,7 +1291,8 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) strlcpy(current_path, basedir, sizeof(current_path)); else strlcpy(current_path, - get_tablespace_mapping(PQgetvalue(res, rownum, 1)), + get_tablespace_mapping(&tablespace_dirs, + PQgetvalue(res, rownum, 1)), sizeof(current_path)); /* @@ -1537,7 +1423,8 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) */ filename[strlen(filename) - 1] = '\0'; /* Remove trailing slash */ - mapped_tblspc_path = get_tablespace_mapping(©buf[157]); + mapped_tblspc_path = get_tablespace_mapping(&tablespace_dirs, + ©buf[157]); if (symlink(mapped_tblspc_path, filename) != 0) { pg_log_error("could not create symbolic link from \"%s\" to \"%s\": %m", @@ -1966,7 +1853,9 @@ BaseBackup(void) */ if (format == 'p' && !PQgetisnull(res, i, 1)) { - char *path = unconstify(char *, get_tablespace_mapping(PQgetvalue(res, i, 1))); + char *path = unconstify(char *, + get_tablespace_mapping(&tablespace_dirs, + PQgetvalue(res, i, 1))); verify_dir_is_empty_or_create(path, &made_tablespace_dirs, &found_tablespace_dirs); } @@ -2276,7 +2165,7 @@ main(int argc, char **argv) no_slot = true; break; case 'T': - tablespace_list_append(optarg); + tablespace_list_append(&tablespace_dirs, optarg); break; case 'X': if (strcmp(optarg, "n") == 0 || diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c index cfdb7dc..90ef114 100644 --- a/src/fe_utils/simple_list.c +++ b/src/fe_utils/simple_list.c @@ -16,6 +16,7 @@ */ #include "postgres_fe.h" +#include "common/logging.h" #include "fe_utils/simple_list.h" @@ -152,3 +153,106 @@ simple_string_list_not_touched(SimpleStringList *list) } return NULL; } + +/* + * Split argument into old_dir and new_dir and append to tablespace mapping + * list. + */ +void +tablespace_list_append(TablespaceList *tablespace_dirs, const char *arg) +{ + TablespaceListCell *cell = (TablespaceListCell *) pg_malloc0(sizeof(TablespaceListCell)); + char *dst; + char *dst_ptr; + const char *arg_ptr; + + Assert(tablespace_dirs); + + dst_ptr = dst = cell->old_dir; + for (arg_ptr = arg; *arg_ptr; arg_ptr++) + { + if (dst_ptr - dst >= MAXPGPATH) + { + pg_log_error("directory name too long"); + exit(1); + } + + if (*arg_ptr == '\\' && *(arg_ptr + 1) == '=') + ; /* skip backslash escaping = */ + else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\')) + { + if (*cell->new_dir) + { + pg_log_error("multiple \"=\" signs in tablespace mapping"); + exit(1); + } + else + dst = dst_ptr = cell->new_dir; + } + else + *dst_ptr++ = *arg_ptr; + } + + if (!*cell->old_dir || !*cell->new_dir) + { + pg_log_error("invalid tablespace mapping format \"%s\", must be \"OLDDIR=NEWDIR\"", arg); + exit(1); + } + + /* + * This check isn't absolutely necessary. But all tablespaces are created + * with absolute directories, so specifying a non-absolute path here would + * just never match, possibly confusing users. It's also good to be + * consistent with the new_dir check. + */ + if (!is_absolute_path(cell->old_dir)) + { + pg_log_error("old directory is not an absolute path in tablespace mapping: %s", + cell->old_dir); + exit(1); + } + + if (!is_absolute_path(cell->new_dir)) + { + pg_log_error("new directory is not an absolute path in tablespace mapping: %s", + cell->new_dir); + exit(1); + } + + /* + * Comparisons done with these values should involve similarly + * canonicalized path values. This is particularly sensitive on Windows + * where path values may not necessarily use Unix slashes. + */ + canonicalize_path(cell->old_dir); + canonicalize_path(cell->new_dir); + + if (tablespace_dirs->tail) + tablespace_dirs->tail->next = cell; + else + tablespace_dirs->head = cell; + tablespace_dirs->tail = cell; +} + +/* + * Retrieve tablespace path, either relocated or original depending on whether + * -T was passed or not. + */ +const char * +get_tablespace_mapping(TablespaceList *tablespace_dirs, const char *dir) +{ + TablespaceListCell *cell; + char canon_dir[MAXPGPATH]; + + Assert(tablespace_dirs); + + /* Canonicalize path for comparison consistency */ + strlcpy(canon_dir, dir, sizeof(canon_dir)); + canonicalize_path(canon_dir); + + for (cell = tablespace_dirs->head; cell; cell = cell->next) + if (strcmp(canon_dir, cell->old_dir) == 0) + return cell->new_dir; + + return dir; +} diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h index 75738be..436358b 100644 --- a/src/include/fe_utils/simple_list.h +++ b/src/include/fe_utils/simple_list.h @@ -6,6 +6,9 @@ * these is very primitive compared to the backend's List facilities, but * it's all we need in, eg, pg_dump. * + * Also, has data structures for simple lists of tablespace mappings used in + * backups. + * * * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -43,6 +46,19 @@ typedef struct SimpleStringList SimpleStringListCell *tail; } SimpleStringList; +typedef struct TablespaceListCell +{ + struct TablespaceListCell *next; + char old_dir[MAXPGPATH]; + char new_dir[MAXPGPATH]; +} TablespaceListCell; + +typedef struct TablespaceList +{ + TablespaceListCell *head; + TablespaceListCell *tail; +} TablespaceList; + extern void simple_oid_list_append(SimpleOidList *list, Oid val); extern bool simple_oid_list_member(SimpleOidList *list, Oid val); @@ -54,4 +70,9 @@ extern void simple_string_list_destroy(SimpleStringList *list); extern const char *simple_string_list_not_touched(SimpleStringList *list); +extern void tablespace_list_append(TablespaceList *tablespace_dirs, + const char *arg); +extern const char *get_tablespace_mapping(TablespaceList *tablespace_dirs, + const char *dir); + #endif /* SIMPLE_LIST_H */ -- 1.8.3.1