This is an automated email from the ASF dual-hosted git repository. reshke pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 84e8a03e14dac1b7a2c2b8d3bc0f1d51c86cd25f Author: Marbin Tan <[email protected]> AuthorDate: Tue Mar 19 14:57:35 2024 -0700 pg_upgrade: modify `--output-dir` to not create certain sub-directories Previously when specifying `--output-dir`, pg_upgrade creates `<output-dir-option>/pg_upgrade_output.d/<timestamp>/[files and logs]`. This made it very hard to manage the directories when wrapped with gpupgrade. The timestamps are created on the fly, which made each pg_upgrade have unique folder. Unfortunately, this made it hard to track down the pg_upgrades that ran with gpupgrade. Ideally, all pg_upgrade that are run on the segments have the same timestamps. Modify `--output-dir` such that we only need to give the directory we want to put the files and logs. The user of pg_ugprade will be the one responsible for creating unique log directories. With this change, pg_upgrade will create `<output-dir-option>/[files and logs]`. The data directory already has a log directory which conflicts with `pg_upgrade`. So by default (without the `--output-dir` option flag), `pg_upgrade` will still create `<data-directory>/pg_upgrade_output.d/<timestamp>/[files and logs]`. --- src/bin/pg_upgrade/Makefile | 2 +- .../pg_upgrade/greenplum/pg_upgrade_greenplum.h | 5 +- src/bin/pg_upgrade/greenplum/util.c | 89 ++++++++++++++++++++++ src/bin/pg_upgrade/pg_upgrade.c | 9 ++- 4 files changed, 101 insertions(+), 4 deletions(-) diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile index 41ea1bdf8e..2b972aa772 100644 --- a/src/bin/pg_upgrade/Makefile +++ b/src/bin/pg_upgrade/Makefile @@ -28,7 +28,7 @@ OBJS = \ OBJS += greenplum/aotable.o greenplum/version_gp.o \ greenplum/check_gp.o greenplum/reporting.o \ greenplum/aomd_filehandler.o greenplum/option_gp.o \ - greenplum/controldata_gp.o + greenplum/controldata_gp.o greenplum/util.o override CPPFLAGS := -DDLSUFFIX=\"$(DLSUFFIX)\" -I$(srcdir) -I$(libpq_srcdir) $(CPPFLAGS) LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) diff --git a/src/bin/pg_upgrade/greenplum/pg_upgrade_greenplum.h b/src/bin/pg_upgrade/greenplum/pg_upgrade_greenplum.h index f36b2bd705..94125757a2 100644 --- a/src/bin/pg_upgrade/greenplum/pg_upgrade_greenplum.h +++ b/src/bin/pg_upgrade/greenplum/pg_upgrade_greenplum.h @@ -73,13 +73,11 @@ char *get_output_dir(void); void freeze_master_data(void); void reset_system_identifier(void); - /* aotable.c */ void restore_aosegment_tables(void); bool is_appendonly(char relstorage); - /* version_gp.c */ void check_hash_partition_usage(void); @@ -99,4 +97,7 @@ void report_progress(ClusterInfo *cluster, progress_type op, char *fmt,...) pg_attribute_printf(3, 4); void close_progress(void); +/* util.c */ +void make_outputdirs_gp(char *pgdata); + #endif /* PG_UPGRADE_GREENPLUM_H */ diff --git a/src/bin/pg_upgrade/greenplum/util.c b/src/bin/pg_upgrade/greenplum/util.c new file mode 100644 index 0000000000..a13da5055d --- /dev/null +++ b/src/bin/pg_upgrade/greenplum/util.c @@ -0,0 +1,89 @@ +#include "postgres_fe.h" + +#include "pg_upgrade_greenplum.h" +#include "common/file_perm.h" + +/* + * This is a modified version of make_outputdirs. + * + * Use make_outputdirs_gp() when the user knows the exact directory to put the + * files and logs that pg_upgrade generates. This function no longer creates + * `pg_upgrade_output.d/<timestamp>` within the `pgdata`. + * + * Create and assign proper permissions to the set of output directories + * used to store any data generated internally, filling in log_opts in + * the process. + */ +void +make_outputdirs_gp(char *pgdata) +{ + FILE *fp; + char **filename; + time_t run_time = time(NULL); + char filename_path[MAXPGPATH]; + int len; + + log_opts.rootdir = (char *) pg_malloc0(MAXPGPATH); + len = snprintf(log_opts.rootdir, MAXPGPATH, "%s", pgdata); + if (len >= MAXPGPATH) + pg_fatal("directory path for new cluster is too long\n"); + + /* keep basedir for upstream code compatibility even though rootdir and + * basedir is the same. + */ + log_opts.basedir = (char *) pg_malloc0(MAXPGPATH); + len = snprintf(log_opts.basedir, MAXPGPATH, "%s", log_opts.rootdir); + if (len >= MAXPGPATH) + pg_fatal("directory path for new cluster is too long\n"); + + /* BASE_OUTPUTDIR/dump/ */ + log_opts.dumpdir = (char *) pg_malloc0(MAXPGPATH); + len = snprintf(log_opts.dumpdir, MAXPGPATH, "%s/%s", log_opts.rootdir, + DUMP_OUTPUTDIR); + if (len >= MAXPGPATH) + pg_fatal("directory path for new cluster is too long\n"); + + /* BASE_OUTPUTDIR/log/ */ + log_opts.logdir = (char *) pg_malloc0(MAXPGPATH); + len = snprintf(log_opts.logdir, MAXPGPATH, "%s/%s", log_opts.rootdir, + LOG_OUTPUTDIR); + if (len >= MAXPGPATH) + pg_fatal("directory path for new cluster is too long\n"); + + /* + * Ignore the error case where the root path exists, as it is kept the + * same across runs. + */ + if (mkdir(log_opts.rootdir, pg_dir_create_mode) < 0 && errno != EEXIST) + pg_fatal("could not create directory \"%s\": %m\n", log_opts.rootdir); + if (mkdir(log_opts.dumpdir, pg_dir_create_mode) < 0) + pg_fatal("could not create directory \"%s\": %m\n", log_opts.dumpdir); + if (mkdir(log_opts.logdir, pg_dir_create_mode) < 0) + pg_fatal("could not create directory \"%s\": %m\n", log_opts.logdir); + + len = snprintf(filename_path, sizeof(filename_path), "%s/%s", + log_opts.logdir, INTERNAL_LOG_FILE); + if (len >= sizeof(filename_path)) + pg_fatal("directory path for new cluster is too long\n"); + + if ((log_opts.internal = fopen_priv(filename_path, "a")) == NULL) + pg_fatal("could not open log file \"%s\": %m\n", filename_path); + + /* label start of upgrade in logfiles */ + for (filename = output_files; *filename != NULL; filename++) + { + len = snprintf(filename_path, sizeof(filename_path), "%s/%s", + log_opts.logdir, *filename); + if (len >= sizeof(filename_path)) + pg_fatal("directory path for new cluster is too long\n"); + if ((fp = fopen_priv(filename_path, "a")) == NULL) + pg_fatal("could not write to log file \"%s\": %m\n", filename_path); + + fprintf(fp, + "-----------------------------------------------------------------\n" + " pg_upgrade run on %s" + "-----------------------------------------------------------------\n\n", + ctime(&run_time)); + fclose(fp); + } +} diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 036084400b..eee38da194 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -131,9 +131,16 @@ main(int argc, char **argv) * cluster in adjust_data_dir(). * * GPDB allows for relocateable output with the --output-dir flag + * + * Use make_outputdirs() for the default option; this ensures that there is a + * unique directory for pg_upgrade on the data directory. If not, + * pg_upgrade will fail immediately. The default option will create the directory + * `<data-directory>/pg_upgrade_output.d/<timestamp>` for pg_upgrade. Otherwise, use + * make_outputdirs_gp() when the user knows the exact directory to put the + * files and logs that pg_upgrade generates. */ if ((output_dir = get_output_dir()) != NULL) - make_outputdirs(output_dir); + make_outputdirs_gp(output_dir); else make_outputdirs(new_cluster.pgdata); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
