Hello,

Yep. That is the issue I think is preventable by fsyncing updated data
*then* writing & syncing the control file, and that should be done by
pg_checksums.

Well, pg_rewind works similarly: control file gets updated and then
the whole data directory gets flushed.

So it is basically prone to the same potential issue?

In my opinion, the take here is that we log something after the sync of the whole data folder is done, so as in the event of a crash an operator can make sure that everything has happened.

I do not understand. I'm basically only suggesting to reorder 3 lines and add an fsync so that this potential problem goes away, see attached poc (which does not compile because pg_fsync is in the backend only, however it works with fsync but on linux, I'm unsure of the portability, probably pg_fsync should be moved to port or something).

--
Fabien.
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index a7c39ac99a..1d7dd52ad0 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -454,17 +454,16 @@ main(int argc, char *argv[])
 		}
 	}
 
-	/*
-	 * Finally update the control file, flushing the data directory at the
-	 * end.
-	 */
+	/* Flush the data directory and update the control file */
 	if (mode == PG_MODE_ENABLE || mode == PG_MODE_DISABLE)
 	{
+		fsync_pgdata(DataDir, progname, PG_VERSION_NUM);
+
 		/* Update control file */
 		ControlFile->data_checksum_version =
 			(mode == PG_MODE_ENABLE) ? PG_DATA_CHECKSUM_VERSION : 0;
 		update_controlfile(DataDir, progname, ControlFile);
-		fsync_pgdata(DataDir, progname, PG_VERSION_NUM);
+
 		if (verbose)
 			printf(_("Data checksum version: %d\n"), ControlFile->data_checksum_version);
 		if (mode == PG_MODE_ENABLE)
diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c
index 71e67a2eda..0f599826e0 100644
--- a/src/common/controldata_utils.c
+++ b/src/common/controldata_utils.c
@@ -145,8 +145,8 @@ get_controlfile(const char *DataDir, const char *progname, bool *crc_ok_p)
  *
  * Update controlfile values with the contents given by caller.  The
  * contents to write are included in "ControlFile".  Note that it is up
- * to the caller to fsync the updated file, and to properly lock
- * ControlFileLock when calling this routine in the backend.
+ * to the caller to properly lock ControlFileLock when calling this
+ * routine in the backend.
  */
 void
 update_controlfile(const char *DataDir, const char *progname,
@@ -216,6 +216,20 @@ update_controlfile(const char *DataDir, const char *progname,
 #endif
 	}
 
+	if (pg_fsync(fd) != 0)
+	{
+#ifndef FRONTEND
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync file \"%s\": %m",
+						ControlFilePath)));
+#else
+		fprintf(stderr, _("%s: could not fsync \"%s\": %s\n"),
+				progname, ControlFilePath, strerror(errno));
+		exit(EXIT_FAILURE);
+#endif
+	}
+
 #ifndef FRONTEND
 	if (CloseTransientFile(fd))
 		ereport(PANIC,

Reply via email to