This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 90c12321531b958bf9bdc92d95d4e13b3290bd51
Author: Michael Paquier <[email protected]>
AuthorDate: Wed Jun 8 10:53:01 2022 +0900

    Restructure pg_upgrade output directories for better idempotence
    
    38bfae3 has moved the contents written to files by pg_upgrade under a
    new directory called pg_upgrade_output.d/ located in the new cluster's
    data folder, and it used a simple structure made of two subdirectories
    leading to a fixed structure: log/ and dump/.  This design has made
    weaker pg_upgrade on repeated calls, as we could get failures when
    creating one or more of those directories, while potentially losing the
    logs of a previous run (logs are retained automatically on failure, and
    cleaned up on success unless --retain is specified).  So a user would
    need to clean up pg_upgrade_output.d/ as an extra step for any repeated
    calls of pg_upgrade.  The most common scenario here is --check followed
    by the actual upgrade, but one could see a failure when specifying an
    incorrect input argument value.  Removing entirely the logs would have
    the disadvantage of removing all the past information, even if --retain
    was specified at some past step.
    
    This result is annoying for a lot of users and automated upgrade flows.
    So, rather than requiring a manual removal of pg_upgrade_output.d/, this
    redesigns the set of output directories in a more dynamic way, based on
    a suggestion from Tom Lane and Daniel Gustafsson.  pg_upgrade_output.d/
    is still the base path, but a second directory level is added, mostly
    named after an ISO-8601-formatted timestamp (in short human-readable,
    with milliseconds appended to the name to avoid any conflicts).  The
    logs and dumps are saved within the same subdirectories as previously,
    as of log/ and dump/, but these are located inside the subdirectory
    named after the timestamp.
    
    The logs of a given run are removed only after a successful run if
    --retain is not used, and pg_upgrade_output.d/ is kept if there are any
    logs from a previous run.  Note that previously, pg_upgrade would have
    kept the logs even after a successful --check but that was inconsistent
    compared to the case without --check when using --retain.  The code in
    charge of the removal of the output directories is now refactored into a
    single routine.
    
    Two TAP tests are added with some --check commands (one failure case and
    one success case), to look after the issue fixed here.  Note that the
    tests had to be tweaked a bit to fit with the new directory structure so
    as it can find any logs generated on failure.  This is still going to
    require a change in the buildfarm client for the case where pg_upgrade
    is tested without the TAP test, though, but I'll tackle that with a
    separate patch where needed.
    
    Reported-by: Tushar Ahuja
    Author: Michael Paquier
    Reviewed-by: Daniel Gustafsson, Justin Pryzby
    Discussion: 
https://postgr.es/m/[email protected]
    (cherry picked from commit 4fff78f00910af0137f9de7532f8eb21d08ab1c3)
---
 doc/src/sgml/ref/pgupgrade.sgml |  5 ++-
 src/bin/pg_upgrade/check.c      |  2 ++
 src/bin/pg_upgrade/pg_upgrade.c | 68 ++++++++++++++++++++++++++++-------------
 src/bin/pg_upgrade/pg_upgrade.h | 14 ++++++---
 src/bin/pg_upgrade/util.c       | 42 +++++++++++++++++++++++++
 5 files changed, 103 insertions(+), 28 deletions(-)

diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index f43121cb19..f21563fb5b 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -763,7 +763,10 @@ psql --username=postgres --file=script.sql postgres
   <para>
    <application>pg_upgrade</application> creates various working files, such
    as schema dumps, stored within <literal>pg_upgrade_output.d</literal> in
-   the directory of the new cluster.
+   the directory of the new cluster. Each run creates a new subdirectory named
+   with a timestamp formatted as per ISO 8601
+   (<literal>%Y%m%dT%H%M%S</literal>), where all the generated files are
+   stored.
   </para>
 
   <para>
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 34331187be..36424f57ff 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -270,6 +270,8 @@ report_clusters_compatible(void)
 
                /* stops new cluster */
                stop_postmaster(false);
+               cleanup_output_dirs();
+
                if (get_check_fatal_occurred())
                        exit(1);
                exit(0);
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 236dac3fca..2b7e0e4f85 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -59,7 +59,6 @@ static void copy_xact_xlog_xid(void);
 static void set_frozenxids(bool minmxid_only);
 static void make_outputdirs(char *pgdata);
 static void setup(char *argv0, bool *live_check);
-static void cleanup(void);
 
 static void copy_subdir_files(const char *old_subdir, const char *new_subdir);
 
@@ -287,7 +286,7 @@ main(int argc, char **argv)
        pg_free(analyze_script_file_name);
        pg_free(deletion_script_file_name);
 
-       cleanup();
+       cleanup_output_dirs();
 
        return 0;
 }
@@ -413,19 +412,54 @@ make_outputdirs(char *pgdata)
        char      **filename;
        time_t          run_time = time(NULL);
        char            filename_path[MAXPGPATH];
+       char            timebuf[128];
+       struct timeval time;
+       time_t          tt;
+       int                     len;
+
+       log_opts.rootdir = (char *) pg_malloc0(MAXPGPATH);
+       len = snprintf(log_opts.rootdir, MAXPGPATH, "%s/%s", pgdata, 
BASE_OUTPUTDIR);
+       if (len >= MAXPGPATH)
+               pg_fatal("buffer for root directory too small");
+
+       /* BASE_OUTPUTDIR/$timestamp/ */
+       gettimeofday(&time, NULL);
+       tt = (time_t) time.tv_sec;
+       strftime(timebuf, sizeof(timebuf), "%Y%m%dT%H%M%S", localtime(&tt));
+       /* append milliseconds */
+       snprintf(timebuf + strlen(timebuf), sizeof(timebuf) - strlen(timebuf),
+                        ".%03d", (int) (time.tv_usec / 1000));
+       log_opts.basedir = (char *) pg_malloc0(MAXPGPATH);
+       len = snprintf(log_opts.basedir, MAXPGPATH, "%s/%s", log_opts.rootdir,
+                                  timebuf);
+       if (len >= MAXPGPATH)
+               pg_fatal("buffer for base directory too small");
+
+       /* BASE_OUTPUTDIR/$timestamp/dump/ */
+       log_opts.dumpdir = (char *) pg_malloc0(MAXPGPATH);
+       len = snprintf(log_opts.dumpdir, MAXPGPATH, "%s/%s/%s", 
log_opts.rootdir,
+                                  timebuf, DUMP_OUTPUTDIR);
+       if (len >= MAXPGPATH)
+               pg_fatal("buffer for dump directory too small");
+
+       /* BASE_OUTPUTDIR/$timestamp/log/ */
+       log_opts.logdir = (char *) pg_malloc0(MAXPGPATH);
+       len = snprintf(log_opts.logdir, MAXPGPATH, "%s/%s/%s", log_opts.rootdir,
+                                  timebuf, LOG_OUTPUTDIR);
+       if (len >= MAXPGPATH)
+               pg_fatal("buffer for log directory too small");
 
-       log_opts.basedir = (char *) pg_malloc(MAXPGPATH);
-       snprintf(log_opts.basedir, MAXPGPATH, "%s/%s", pgdata, BASE_OUTPUTDIR);
-       log_opts.dumpdir = (char *) pg_malloc(MAXPGPATH);
-       snprintf(log_opts.dumpdir, MAXPGPATH, "%s/%s", pgdata, DUMP_OUTPUTDIR);
-       log_opts.logdir = (char *) pg_malloc(MAXPGPATH);
-       snprintf(log_opts.logdir, MAXPGPATH, "%s/%s", pgdata, LOG_OUTPUTDIR);
-
-       if (mkdir(log_opts.basedir, pg_dir_create_mode))
+       /*
+        * Ignore the error case where the root path exists, as it is kept the
+        * same across runs.
+        */
+       if (mkdir(log_opts.rootdir, pg_dir_create_mode) < 0 && errno != EEXIST)
+               pg_fatal("could not create directory \"%s\": %m\n", 
log_opts.rootdir);
+       if (mkdir(log_opts.basedir, pg_dir_create_mode) < 0)
                pg_fatal("could not create directory \"%s\": %m\n", 
log_opts.basedir);
-       if (mkdir(log_opts.dumpdir, pg_dir_create_mode))
+       if (mkdir(log_opts.dumpdir, pg_dir_create_mode) < 0)
                pg_fatal("could not create directory \"%s\": %m\n", 
log_opts.dumpdir);
-       if (mkdir(log_opts.logdir, pg_dir_create_mode))
+       if (mkdir(log_opts.logdir, pg_dir_create_mode) < 0)
                pg_fatal("could not create directory \"%s\": %m\n", 
log_opts.logdir);
 
        snprintf(filename_path, sizeof(filename_path), "%s/%s", log_opts.logdir,
@@ -1004,13 +1038,3 @@ set_frozenxids(bool minmxid_only)
 
        check_ok();
 }
-
-static void
-cleanup(void)
-{
-       fclose(log_opts.internal);
-
-       /* Remove dump and log files? */
-       if (!log_opts.retain)
-               rmtree(log_opts.basedir, true);
-}
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index c18af34793..95543466b1 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -36,12 +36,14 @@
 #define DB_DUMP_FILE_MASK      "pg_upgrade_dump_%u.custom"
 
 /*
- * Base directories that include all the files generated internally,
- * from the root path of the new cluster.
+ * Base directories that include all the files generated internally, from the
+ * root path of the new cluster.  The paths are dynamically built as of
+ * BASE_OUTPUTDIR/$timestamp/{LOG_OUTPUTDIR,DUMP_OUTPUTDIR} to ensure their
+ * uniqueness in each run.
  */
 #define BASE_OUTPUTDIR         "pg_upgrade_output.d"
-#define LOG_OUTPUTDIR          BASE_OUTPUTDIR "/log"
-#define DUMP_OUTPUTDIR         BASE_OUTPUTDIR "/dump"
+#define LOG_OUTPUTDIR           "log"
+#define DUMP_OUTPUTDIR          "dump"
 
 #define DB_DUMP_LOG_FILE_MASK  "pg_upgrade_dump_%u.log"
 #define SERVER_LOG_FILE                "pg_upgrade_server.log"
@@ -384,7 +386,8 @@ typedef struct
        bool            verbose;                /* true -> be verbose in 
messages */
        bool            retain;                 /* retain log files on success 
*/
        /* Set of internal directories for output files */
-       char       *basedir;            /* Base output directory */
+       char       *rootdir;            /* Root directory, aka 
pg_upgrade_output.d */
+       char       *basedir;            /* Base output directory, with 
timestamp */
        char       *dumpdir;            /* Dumps */
        char       *logdir;                     /* Log files */
        bool            isatty;                 /* is stdout a tty */
@@ -547,6 +550,7 @@ void                report_status(eLogType type, const char 
*fmt,...) pg_attribute_printf(2, 3
 void           pg_log(eLogType type, const char *fmt,...) 
pg_attribute_printf(2, 3);
 void           pg_fatal(const char *fmt,...) pg_attribute_printf(1, 2) 
pg_attribute_noreturn();
 void           end_progress_output(void);
+void           cleanup_output_dirs(void);
 void           prep_status(const char *fmt,...) pg_attribute_printf(1, 2);
 void           prep_status_progress(const char *fmt,...) 
pg_attribute_printf(1, 2);
 unsigned int str2uint(const char *str);
diff --git a/src/bin/pg_upgrade/util.c b/src/bin/pg_upgrade/util.c
index a22f6fb735..af48453c6d 100644
--- a/src/bin/pg_upgrade/util.c
+++ b/src/bin/pg_upgrade/util.c
@@ -53,6 +53,48 @@ end_progress_output(void)
                pg_log(PG_REPORT, "%-*s", MESSAGE_WIDTH, "");
 }
 
+/*
+ * Remove any logs generated internally.  To be used once when exiting.
+ */
+void
+cleanup_output_dirs(void)
+{
+       fclose(log_opts.internal);
+
+       /* Remove dump and log files? */
+       if (log_opts.retain)
+               return;
+
+       (void) rmtree(log_opts.basedir, true);
+
+       /* Remove pg_upgrade_output.d only if empty */
+       switch (pg_check_dir(log_opts.rootdir))
+       {
+               case 0:                                 /* non-existent */
+               case 3:                                 /* exists and contains 
a mount point */
+                       Assert(false);
+                       break;
+
+               case 1:                                 /* exists and empty */
+               case 2:                                 /* exists and contains 
only dot files */
+                       (void) rmtree(log_opts.rootdir, true);
+                       break;
+
+               case 4:                                 /* exists */
+
+                       /*
+                        * Keep the root directory as this includes some past 
log
+                        * activity.
+                        */
+                       break;
+
+               default:
+                       /* different failure, just report it */
+                       pg_log(PG_WARNING, "could not access directory \"%s\": 
%m",
+                                  log_opts.rootdir);
+                       break;
+       }
+}
 
 /*
  * prep_status


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to