On Thu, Mar 28, 2019 at 03:53:59PM +0100, Fabien COELHO wrote: > I think that it is good to show the overall impact of the signal stuff, in > particular the fact that the size must always be computed if the progress > may be activated.
Getting to know the total size and the current size are the two important factors that matter when it comes to do progress reporting in my opinion. I have read the patch, and I am not really convinced by the need to show the progress report based on an interval of 250ms as we talk about an operation which could take dozens of minutes. So I have simplified the patch to only show a progress report every second. This also removes the include for the time-related APIs from portability/. A second thing is that I don't think that the speed is much useful. I would expect the speed to be steady, still there is a risk to show incorrect information if the speed of the operation is spiky or irregular leading to an incorrect estimation of the remaining time. In short, I would like to commit the first patch as attached, which is much more simple than what has been sent previously, still it provides the progress information which is useful. -- Michael
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml index 70339eaec9..29507ff98e 100644 --- a/doc/src/sgml/ref/pg_checksums.sgml +++ b/doc/src/sgml/ref/pg_checksums.sgml @@ -135,6 +135,17 @@ PostgreSQL documentation </listitem> </varlistentry> + <varlistentry> + <term><option>-P</option></term> + <term><option>--progress</option></term> + <listitem> + <para> + Enable progress reporting. Turning this on will deliver a progress + report while checking or enabling checksums. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>-V</option></term> <term><option>--version</option></term> diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 5c185ebed8..f9903a441c 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -15,6 +15,7 @@ #include "postgres_fe.h" #include <dirent.h> +#include <time.h> #include <sys/stat.h> #include <unistd.h> @@ -37,6 +38,7 @@ static ControlFileData *ControlFile; static char *only_relfilenode = NULL; static bool do_sync = true; static bool verbose = false; +static bool showprogress = false; typedef enum { @@ -59,6 +61,13 @@ static PgChecksumMode mode = PG_MODE_CHECK; static const char *progname; +/* + * Progress status information. + */ +int64 total_size = 0; +int64 current_size = 0; +static pg_time_t last_progress_report = 0; + static void usage(void) { @@ -71,6 +80,7 @@ usage(void) printf(_(" -d, --disable disable data checksums\n")); printf(_(" -e, --enable enable data checksums\n")); printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); + printf(_(" -P, --progress show progress information\n")); printf(_(" -v, --verbose output verbose messages\n")); printf(_(" -r RELFILENODE check only relation with specified relfilenode\n")); printf(_(" -V, --version output version information, then exit\n")); @@ -97,6 +107,52 @@ static const char *const skip[] = { NULL, }; +/* + * Report current progress status. Parts borrowed from + * PostgreSQLs' src/bin/pg_basebackup.c + */ +static void +progress_report(bool force) +{ + int percent; + char total_size_str[32]; + char current_size_str[32]; + pg_time_t now; + + if (!showprogress) + return; + + now = time(NULL); + if (now == last_progress_report && !force) + return; /* Max once per second */ + + /* Save current time */ + last_progress_report = now; + + /* Adjust total size if current_size is larger */ + if (current_size > total_size) + total_size = current_size; + + /* Calculate current percentage of size done */ + percent = total_size ? (int) ((current_size) * 100 / total_size) : 0; + + snprintf(total_size_str, sizeof(total_size_str), INT64_FORMAT, + total_size / (1024 * 1024)); + snprintf(current_size_str, sizeof(current_size_str), INT64_FORMAT, + current_size / (1024 * 1024)); + + /* + * Separate step to keep platform-dependent format code out of + * translatable strings. And we only test for INT64_FORMAT availability + * in snprintf, not fprintf. + */ + fprintf(stderr, "%*s/%s MB (%d%%) done", + (int) strlen(current_size_str), current_size_str, total_size_str, + percent); + + fprintf(stderr, "\r"); +} + static bool skipfile(const char *fn) { @@ -153,6 +209,7 @@ scan_file(const char *fn, BlockNumber segmentno) continue; csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE); + current_size += r; if (mode == PG_MODE_CHECK) { if (csum != header->pd_checksum) @@ -183,6 +240,8 @@ scan_file(const char *fn, BlockNumber segmentno) exit(1); } } + + progress_report(false); } if (verbose) @@ -195,12 +254,23 @@ scan_file(const char *fn, BlockNumber segmentno) _("%s: checksums enabled in file \"%s\"\n"), progname, fn); } + /* Make sure progress is reported at least once per file */ + progress_report(false); + close(f); } -static void -scan_directory(const char *basedir, const char *subdir) +/* + * Scan the given directory for items which can be checksummed and + * operate on each one of them. If "sizeonly" is true, the size of + * all the items which have checksums is computed and returned back + * to the caller without operating on the files. This is used to compile + * the total size of the data directory for progress reports. + */ +static int64 +scan_directory(const char *basedir, const char *subdir, bool sizeonly) { + int64 dirsize = 0; char path[MAXPGPATH]; DIR *dir; struct dirent *de; @@ -279,16 +349,24 @@ scan_directory(const char *basedir, const char *subdir) /* Relfilenode not to be included */ continue; - scan_file(fn, segmentno); + dirsize += st.st_size; + + /* + * No need to work on the file when calculating only the size + * of the items in the data folder. + */ + if (!sizeonly) + scan_file(fn, segmentno); } #ifndef WIN32 else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) #else else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn)) #endif - scan_directory(path, de->d_name); + dirsize += scan_directory(path, de->d_name, sizeonly); } closedir(dir); + return dirsize; } int @@ -300,6 +378,7 @@ main(int argc, char *argv[]) {"disable", no_argument, NULL, 'd'}, {"enable", no_argument, NULL, 'e'}, {"no-sync", no_argument, NULL, 'N'}, + {"progress", no_argument, NULL, 'P'}, {"verbose", no_argument, NULL, 'v'}, {NULL, 0, NULL, 0} }; @@ -327,7 +406,7 @@ main(int argc, char *argv[]) } } - while ((c = getopt_long(argc, argv, "cD:deNr:v", long_options, &option_index)) != -1) + while ((c = getopt_long(argc, argv, "cD:deNPr:v", long_options, &option_index)) != -1) { switch (c) { @@ -357,6 +436,9 @@ main(int argc, char *argv[]) } only_relfilenode = pstrdup(optarg); break; + case 'P': + showprogress = true; + break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); @@ -436,6 +518,18 @@ main(int argc, char *argv[]) exit(1); } + /* + * If progress status information is requested, we need to scan the + * directory tree twice: once to know how much total data needs to + * be processed and once to do the real work. + */ + if (showprogress) + { + total_size = scan_directory(DataDir, "global", true); + total_size += scan_directory(DataDir, "base", true); + total_size += scan_directory(DataDir, "pg_tblspc", true); + } + if (ControlFile->data_checksum_version == 0 && mode == PG_MODE_DISABLE) { @@ -453,9 +547,15 @@ main(int argc, char *argv[]) /* Operate on all files if checking or enabling checksums */ if (mode == PG_MODE_CHECK || mode == PG_MODE_ENABLE) { - scan_directory(DataDir, "global"); - scan_directory(DataDir, "base"); - scan_directory(DataDir, "pg_tblspc"); + (void) scan_directory(DataDir, "global", false); + (void) scan_directory(DataDir, "base", false); + (void) scan_directory(DataDir, "pg_tblspc", false); + + if (showprogress) + { + progress_report(true); + fprintf(stderr, "\n"); /* Need to move to next line */ + } printf(_("Checksum operation completed\n")); printf(_("Files scanned: %s\n"), psprintf(INT64_FORMAT, files));
signature.asc
Description: PGP signature