Hi all,

While looking at pg_rewind code, I have been surprised to find that the
final fsync done on the target's data folder is done using initdb -S via
a system() call.  This is in my opinion overcomplicated because we have
a dedicated API in fe_utils able to do a fsync on a data folder, called
fsync_pgdata() that I have implemented when working on data durability
for the other backup tools.  So I would like to simplify the code as
attached.

One difference that this patch introduces is that a failed sync is not
considered as a failure, still failures are reported to stderr.  This
new behavior is actually more consistent with what happens in pg_dump
and pg_basebackup.  And we have decided previously to do so, see here
for more details on the discussion:
https://www.postgresql.org/message-id/CAB7nPqQ_B0j3n1t%3D8c1ZLHXF1b8Tf4XsXoUC9bP9t5Hab--SMg%40mail.gmail.com

An extra thing I have noticed, which is I think an oversight, is that
there is no --no-sync option in pg_rewind.  Like the other binaries,
this is useful to reduce the I/O effort when running tests.

Both things are implemented as attached.  I am of course not pushing for
integrating that patch in v11 even if it is straight-forward, so I'll
park it in the next future commit fest.

Thanks,
--
Michael
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 8e49249826..6f32507466 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -151,6 +151,22 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>-N</option></term>
+      <term><option>--no-sync</option></term>
+      <listitem>
+       <para>
+        By default, <command>pg_rewind</command> will wait for all files
+        to be written safely to disk.  This option causes
+        <command>pg_rewind</command> to return without waiting, which is
+        faster, but means that a subsequent operating system crash can leave
+        the synchronized data folder corrupt.  Generally, this option is
+        useful for testing but should not be used when creating a production
+        installation.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-P</option></term>
       <term><option>--progress</option></term>
diff --git a/src/bin/pg_rewind/RewindTest.pm b/src/bin/pg_rewind/RewindTest.pm
index 00b5b42dd7..7811da2d98 100644
--- a/src/bin/pg_rewind/RewindTest.pm
+++ b/src/bin/pg_rewind/RewindTest.pm
@@ -210,6 +210,7 @@ sub run_pg_rewind
 		$node_standby->stop;
 		command_ok(
 			[   'pg_rewind',
+				"--no-sync",
 				"--debug",
 				"--source-pgdata=$standby_pgdata",
 				"--target-pgdata=$master_pgdata" ],
@@ -222,7 +223,7 @@ sub run_pg_rewind
 		command_ok(
 			[   'pg_rewind',       "--debug",
 				"--source-server", $standby_connstr,
-				"--target-pgdata=$master_pgdata" ],
+				"--no-sync",       "--target-pgdata=$master_pgdata" ],
 			'pg_rewind remote');
 	}
 	else
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 72ab2f8d5e..5e3c7ce11f 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -24,6 +24,7 @@
 #include "access/xlog_internal.h"
 #include "catalog/catversion.h"
 #include "catalog/pg_control.h"
+#include "common/file_utils.h"
 #include "common/restricted_token.h"
 #include "getopt_long.h"
 #include "storage/bufpage.h"
@@ -54,6 +55,7 @@ char	   *connstr_source = NULL;
 bool		debug = false;
 bool		showprogress = false;
 bool		dry_run = false;
+bool		do_sync = true;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
@@ -69,6 +71,7 @@ usage(const char *progname)
 	printf(_("      --source-pgdata=DIRECTORY  source data directory to synchronize with\n"));
 	printf(_("      --source-server=CONNSTR    source server to synchronize with\n"));
 	printf(_("  -n, --dry-run                  stop before modifying anything\n"));
+	printf(_("  -N, --no-sync                  do not wait for changes to be written safely to disk\n"));
 	printf(_("  -P, --progress                 write progress messages\n"));
 	printf(_("      --debug                    write a lot of debug messages\n"));
 	printf(_("  -V, --version                  output version information, then exit\n"));
@@ -87,6 +90,7 @@ main(int argc, char **argv)
 		{"source-server", required_argument, NULL, 2},
 		{"version", no_argument, NULL, 'V'},
 		{"dry-run", no_argument, NULL, 'n'},
+		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
 		{NULL, 0, NULL, 0}
@@ -123,7 +127,7 @@ main(int argc, char **argv)
 		}
 	}
 
-	while ((c = getopt_long(argc, argv, "D:nP", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "D:nNP", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -139,6 +143,10 @@ main(int argc, char **argv)
 				dry_run = true;
 				break;
 
+			case 'N':
+				do_sync = false;
+				break;
+
 			case 3:
 				debug = true;
 				break;
@@ -687,50 +695,13 @@ updateControlFile(ControlFileData *ControlFile)
  *
  * We do this once, for the whole data directory, for performance reasons.  At
  * the end of pg_rewind's run, the kernel is likely to already have flushed
- * most dirty buffers to disk. Additionally initdb -S uses a two-pass approach
- * (only initiating writeback in the first pass), which often reduces the
- * overall amount of IO noticeably.
+ * most dirty buffers to disk.
  */
 static void
 syncTargetDirectory(const char *argv0)
 {
-	int			ret;
-#define MAXCMDLEN (2 * MAXPGPATH)
-	char		exec_path[MAXPGPATH];
-	char		cmd[MAXCMDLEN];
-
-	/* locate initdb binary */
-	if ((ret = find_other_exec(argv0, "initdb",
-							   "initdb (PostgreSQL) " PG_VERSION "\n",
-							   exec_path)) < 0)
-	{
-		char		full_path[MAXPGPATH];
-
-		if (find_my_exec(argv0, full_path) < 0)
-			strlcpy(full_path, progname, sizeof(full_path));
-
-		if (ret == -1)
-			pg_fatal("The program \"initdb\" is needed by %s but was\n"
-					 "not found in the same directory as \"%s\".\n"
-					 "Check your installation.\n", progname, full_path);
-		else
-			pg_fatal("The program \"initdb\" was found by \"%s\"\n"
-					 "but was not the same version as %s.\n"
-					 "Check your installation.\n", full_path, progname);
-	}
-
-	/* only skip processing after ensuring presence of initdb */
-	if (dry_run)
+	if (!do_sync || dry_run)
 		return;
 
-	/* finally run initdb -S */
-	if (debug)
-		snprintf(cmd, MAXCMDLEN, "\"%s\" -D \"%s\" -S",
-				 exec_path, datadir_target);
-	else
-		snprintf(cmd, MAXCMDLEN, "\"%s\" -D \"%s\" -S > \"%s\"",
-				 exec_path, datadir_target, DEVNULL);
-
-	if (system(cmd) != 0)
-		pg_fatal("sync of target directory failed\n");
+	fsync_pgdata(datadir_target, progname, PG_VERSION_NUM);
 }

Attachment: signature.asc
Description: PGP signature

Reply via email to