On Fri, Mar 24, 2017 at 5:08 AM, Teodor Sigaev <teo...@sigaev.ru> wrote:
> Hmm, it doesn't work (but appplies) on current HEAD:
> [...]
> Data page checksums are disabled.
>
> fixing permissions on existing directory /spool/pg_data ... ok
> creating subdirectories ... ok
> selecting default max_connections ... 100
> selecting default shared_buffers ... 128MB
> selecting dynamic shared memory implementation ... posix
> creating configuration files ... ok
> running bootstrap script ... 2017-03-23 23:07:14.705 MSK [40286] FATAL:
> could not open file "pg_clog": No such file or directory
> child process exited with exit code 1
> initdb: data directory "/spool/pg_data" not removed at user's request

And the renaming of pg_clog to pg_xact is also my fault. Attached is
an updated patch.
-- 
Michael
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 2d33510930..7a007a6ba5 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -577,6 +577,13 @@ ShutdownCLOG(void)
 	/* Flush dirty CLOG pages to disk */
 	TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(false);
 	SimpleLruFlush(ClogCtl, false);
+
+	/*
+	 * fsync pg_xact to ensure that any files flushed previously are durably
+	 * on disk.
+	 */
+	fsync_fname("pg_xact", true);
+
 	TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(false);
 }
 
@@ -589,6 +596,13 @@ CheckPointCLOG(void)
 	/* Flush dirty CLOG pages to disk */
 	TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
 	SimpleLruFlush(ClogCtl, true);
+
+	/*
+	 * fsync pg_xact to ensure that any files flushed previously are durably
+	 * on disk.
+	 */
+	fsync_fname("pg_xact", true);
+
 	TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
 }
 
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 8e1df6e0ea..03ffa20908 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -746,6 +746,12 @@ ShutdownCommitTs(void)
 {
 	/* Flush dirty CommitTs pages to disk */
 	SimpleLruFlush(CommitTsCtl, false);
+
+	/*
+	 * fsync pg_commit_ts to ensure that any files flushed previously are durably
+	 * on disk.
+	 */
+	fsync_fname("pg_commit_ts", true);
 }
 
 /*
@@ -756,6 +762,12 @@ CheckPointCommitTs(void)
 {
 	/* Flush dirty CommitTs pages to disk */
 	SimpleLruFlush(CommitTsCtl, true);
+
+	/*
+	 * fsync pg_commit_ts to ensure that any files flushed previously are durably
+	 * on disk.
+	 */
+	fsync_fname("pg_commit_ts", true);
 }
 
 /*
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 4b4999fd7b..83169cccc3 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1650,6 +1650,14 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
 	}
 	LWLockRelease(TwoPhaseStateLock);
 
+	/*
+	 * Flush unconditionally the parent directory to make any information
+	 * durable on disk.  Two-phase files could have been removed and those
+	 * removals need to be made persistent as well as any files newly created
+	 * previously since the last checkpoint.
+	 */
+	fsync_fname(TWOPHASE_DIR, true);
+
 	TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_DONE();
 
 	if (log_checkpoints && serialized_xacts > 0)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index b99ded5df6..11b1929c94 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3469,7 +3469,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
 	if (!find_free)
 	{
 		/* Force installation: get rid of any pre-existing segment file */
-		unlink(path);
+		durable_unlink(path, DEBUG1);
 	}
 	else
 	{
@@ -4020,16 +4020,13 @@ RemoveXlogFile(const char *segname, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
 					  path)));
 			return;
 		}
-		rc = unlink(newpath);
+		rc = durable_unlink(newpath, LOG);
 #else
-		rc = unlink(path);
+		rc = durable_unlink(path, LOG);
 #endif
 		if (rc != 0)
 		{
-			ereport(LOG,
-					(errcode_for_file_access(),
-			   errmsg("could not remove old transaction log file \"%s\": %m",
-					  path)));
+			/* Message already logged by durable_unlink() */
 			return;
 		}
 		CheckpointStats.ckpt_segs_removed++;
@@ -10752,17 +10749,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
 						(errcode_for_file_access(),
 						 errmsg("could not read file \"%s\": %m",
 								BACKUP_LABEL_FILE)));
-			if (unlink(BACKUP_LABEL_FILE) != 0)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not remove file \"%s\": %m",
-								BACKUP_LABEL_FILE)));
+			durable_unlink(BACKUP_LABEL_FILE, ERROR);
 
 			/*
 			 * Remove tablespace_map file if present, it is created only if there
 			 * are tablespaces.
 			 */
-			unlink(TABLESPACE_MAP);
+			durable_unlink(TABLESPACE_MAP, DEBUG1);
 		}
 		PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
 	}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f0ed2e9b5f..b14979496c 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -658,6 +658,43 @@ durable_rename(const char *oldfile, const char *newfile, int elevel)
 }
 
 /*
+ * durable_unlink -- remove a file in a durable manner
+ *
+ * This routine ensures that, after returning, the effect of removing file
+ * persists in case of a crash. A crash while this routine is running will
+ * leave the system in no mixed state.
+ *
+ * It does so by using fsync on the parent directory of the file after the
+ * actual removal is done.
+ *
+ * Log errors with the severity specified by caller.
+ *
+ * Returns 0 if the operation succeeded, -1 otherwise. Note that errno is not
+ * valid upon return.
+ */
+int
+durable_unlink(const char *fname, int elevel)
+{
+	if (unlink(fname) < 0)
+	{
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not remove file \"%s\": %m",
+						fname)));
+		return -1;
+	}
+
+	/*
+	 * To guarantee that the removal of the file is persistent, fsync
+	 * its parent directory.
+	 */
+	if (fsync_parent_path(fname, elevel) != 0)
+		return -1;
+
+	return 0;
+}
+
+/*
  * durable_link_or_rename -- rename a file in a durable manner.
  *
  * Similar to durable_rename(), except that this routine tries (but does not
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index ac37502928..05680499e4 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -119,6 +119,7 @@ extern int	pg_fdatasync(int fd);
 extern void pg_flush_data(int fd, off_t offset, off_t amount);
 extern void fsync_fname(const char *fname, bool isdir);
 extern int	durable_rename(const char *oldfile, const char *newfile, int loglevel);
+extern int	durable_unlink(const char *fname, int loglevel);
 extern int	durable_link_or_rename(const char *oldfile, const char *newfile, int loglevel);
 extern void SyncDataDirectory(void);
 
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to