This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 84e8a03e14dac1b7a2c2b8d3bc0f1d51c86cd25f
Author: Marbin Tan <[email protected]>
AuthorDate: Tue Mar 19 14:57:35 2024 -0700

    pg_upgrade: modify `--output-dir` to not create certain sub-directories
    
    Previously when specifying `--output-dir`, pg_upgrade creates
    `<output-dir-option>/pg_upgrade_output.d/<timestamp>/[files and logs]`.
    
    This made it very hard to manage the directories when wrapped with
    gpupgrade. The timestamps are created on the fly, which made each
    pg_upgrade have unique folder. Unfortunately, this made it hard to track
    down the pg_upgrades that ran with gpupgrade. Ideally, all pg_upgrade
    that are run on the segments have the same timestamps.
    
    Modify `--output-dir` such that we only need to give the directory we
    want to put the files and logs. The user of pg_ugprade will be the one
    responsible for creating unique log directories.
    
    With this change, pg_upgrade will create
    `<output-dir-option>/[files and logs]`.
    
    The data directory already has a log directory which conflicts with
    `pg_upgrade`. So by default (without the `--output-dir` option flag),
    `pg_upgrade` will still create
    `<data-directory>/pg_upgrade_output.d/<timestamp>/[files and logs]`.
---
 src/bin/pg_upgrade/Makefile                        |  2 +-
 .../pg_upgrade/greenplum/pg_upgrade_greenplum.h    |  5 +-
 src/bin/pg_upgrade/greenplum/util.c                | 89 ++++++++++++++++++++++
 src/bin/pg_upgrade/pg_upgrade.c                    |  9 ++-
 4 files changed, 101 insertions(+), 4 deletions(-)

diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index 41ea1bdf8e..2b972aa772 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -28,7 +28,7 @@ OBJS = \
 OBJS += greenplum/aotable.o greenplum/version_gp.o \
                greenplum/check_gp.o greenplum/reporting.o \
                greenplum/aomd_filehandler.o greenplum/option_gp.o \
-               greenplum/controldata_gp.o
+               greenplum/controldata_gp.o greenplum/util.o
 
 override CPPFLAGS := -DDLSUFFIX=\"$(DLSUFFIX)\" -I$(srcdir) -I$(libpq_srcdir) 
$(CPPFLAGS)
 LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
diff --git a/src/bin/pg_upgrade/greenplum/pg_upgrade_greenplum.h 
b/src/bin/pg_upgrade/greenplum/pg_upgrade_greenplum.h
index f36b2bd705..94125757a2 100644
--- a/src/bin/pg_upgrade/greenplum/pg_upgrade_greenplum.h
+++ b/src/bin/pg_upgrade/greenplum/pg_upgrade_greenplum.h
@@ -73,13 +73,11 @@ char *get_output_dir(void);
 void freeze_master_data(void);
 void reset_system_identifier(void);
 
-
 /* aotable.c */
 
 void           restore_aosegment_tables(void);
 bool        is_appendonly(char relstorage);
 
-
 /* version_gp.c */
 
 void check_hash_partition_usage(void);
@@ -99,4 +97,7 @@ void report_progress(ClusterInfo *cluster, progress_type op, 
char *fmt,...)
 pg_attribute_printf(3, 4);
 void close_progress(void);
 
+/* util.c */
+void make_outputdirs_gp(char *pgdata);
+
 #endif /* PG_UPGRADE_GREENPLUM_H */
diff --git a/src/bin/pg_upgrade/greenplum/util.c 
b/src/bin/pg_upgrade/greenplum/util.c
new file mode 100644
index 0000000000..a13da5055d
--- /dev/null
+++ b/src/bin/pg_upgrade/greenplum/util.c
@@ -0,0 +1,89 @@
+#include "postgres_fe.h"
+
+#include "pg_upgrade_greenplum.h"
+#include "common/file_perm.h"
+
+/*
+ * This is a modified version of make_outputdirs.
+ *
+ * Use make_outputdirs_gp() when the user knows the exact directory to put the
+ * files and logs that pg_upgrade generates. This function no longer creates
+ * `pg_upgrade_output.d/<timestamp>` within the `pgdata`.
+ *
+ * Create and assign proper permissions to the set of output directories
+ * used to store any data generated internally, filling in log_opts in
+ * the process.
+ */
+void
+make_outputdirs_gp(char *pgdata)
+{
+       FILE       *fp;
+       char      **filename;
+       time_t          run_time = time(NULL);
+       char            filename_path[MAXPGPATH];
+       int                     len;
+
+       log_opts.rootdir = (char *) pg_malloc0(MAXPGPATH);
+       len = snprintf(log_opts.rootdir, MAXPGPATH, "%s", pgdata);
+       if (len >= MAXPGPATH)
+               pg_fatal("directory path for new cluster is too long\n");
+
+       /* keep basedir for upstream code compatibility even though rootdir and
+        * basedir is the same.
+        */
+       log_opts.basedir = (char *) pg_malloc0(MAXPGPATH);
+       len = snprintf(log_opts.basedir, MAXPGPATH, "%s", log_opts.rootdir);
+       if (len >= MAXPGPATH)
+               pg_fatal("directory path for new cluster is too long\n");
+
+       /* BASE_OUTPUTDIR/dump/ */
+       log_opts.dumpdir = (char *) pg_malloc0(MAXPGPATH);
+       len = snprintf(log_opts.dumpdir, MAXPGPATH, "%s/%s", log_opts.rootdir,
+                                  DUMP_OUTPUTDIR);
+       if (len >= MAXPGPATH)
+               pg_fatal("directory path for new cluster is too long\n");
+
+       /* BASE_OUTPUTDIR/log/ */
+       log_opts.logdir = (char *) pg_malloc0(MAXPGPATH);
+       len = snprintf(log_opts.logdir, MAXPGPATH, "%s/%s", log_opts.rootdir,
+                                  LOG_OUTPUTDIR);
+       if (len >= MAXPGPATH)
+               pg_fatal("directory path for new cluster is too long\n");
+
+       /*
+        * Ignore the error case where the root path exists, as it is kept the
+        * same across runs.
+        */
+       if (mkdir(log_opts.rootdir, pg_dir_create_mode) < 0 && errno != EEXIST)
+               pg_fatal("could not create directory \"%s\": %m\n", 
log_opts.rootdir);
+       if (mkdir(log_opts.dumpdir, pg_dir_create_mode) < 0)
+               pg_fatal("could not create directory \"%s\": %m\n", 
log_opts.dumpdir);
+       if (mkdir(log_opts.logdir, pg_dir_create_mode) < 0)
+               pg_fatal("could not create directory \"%s\": %m\n", 
log_opts.logdir);
+
+       len = snprintf(filename_path, sizeof(filename_path), "%s/%s",
+                                  log_opts.logdir, INTERNAL_LOG_FILE);
+       if (len >= sizeof(filename_path))
+               pg_fatal("directory path for new cluster is too long\n");
+
+       if ((log_opts.internal = fopen_priv(filename_path, "a")) == NULL)
+               pg_fatal("could not open log file \"%s\": %m\n", filename_path);
+
+       /* label start of upgrade in logfiles */
+       for (filename = output_files; *filename != NULL; filename++)
+       {
+               len = snprintf(filename_path, sizeof(filename_path), "%s/%s",
+                                          log_opts.logdir, *filename);
+               if (len >= sizeof(filename_path))
+                       pg_fatal("directory path for new cluster is too 
long\n");
+               if ((fp = fopen_priv(filename_path, "a")) == NULL)
+                       pg_fatal("could not write to log file \"%s\": %m\n", 
filename_path);
+
+               fprintf(fp,
+                               
"-----------------------------------------------------------------\n"
+                               "  pg_upgrade run on %s"
+                               
"-----------------------------------------------------------------\n\n",
+                               ctime(&run_time));
+               fclose(fp);
+       }
+}
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 036084400b..eee38da194 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -131,9 +131,16 @@ main(int argc, char **argv)
         * cluster in adjust_data_dir().
         *
         * GPDB allows for relocateable output with the --output-dir flag
+        *
+        * Use make_outputdirs() for the default option; this ensures that 
there is a
+        * unique directory for pg_upgrade on the data directory. If not,
+        * pg_upgrade will fail immediately. The default option will create the 
directory
+        * `<data-directory>/pg_upgrade_output.d/<timestamp>` for pg_upgrade. 
Otherwise, use
+        * make_outputdirs_gp() when the user knows the exact directory to put 
the
+        * files and logs that pg_upgrade generates.
         */
        if ((output_dir = get_output_dir()) != NULL)
-               make_outputdirs(output_dir);
+               make_outputdirs_gp(output_dir);
        else
                make_outputdirs(new_cluster.pgdata);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to