Hi,

this is the second improvement proposed in the thread [1] about ext4 data loss issue. It adds another field to control file, tracking the last known WAL segment. This does not eliminate the data loss, just the silent part of it when the last segment gets lost (due to forgetting the rename, deleting it by mistake or whatever). The patch makes sure the cluster refuses to start if that happens.


[1] http://www.postgresql.org/message-id/56583bdd.9060...@2ndquadrant.com

It's a fairly simple patch, but obviously it touches very complex part of the code. I'll add it to 2016-01 CF.


regards

--
Tomas Vondra                  http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 71fc8ff..50f10a5 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2222,6 +2222,16 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
 			use_existent = true;
 			openLogFile = XLogFileInit(openLogSegNo, &use_existent, true);
 			openLogOff = 0;
+
+			/* update the last known segment in the control file */
+			LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+			if (ControlFile->lastKnownSegment != openLogSegNo)
+			{
+				elog(WARNING, "updating segment number = %lu", openLogSegNo);
+				ControlFile->lastKnownSegment = openLogSegNo;
+				UpdateControlFile();
+			}
+			LWLockRelease(ControlFileLock);
 		}
 
 		/* Make sure we have the current logfile open */
@@ -5904,6 +5914,7 @@ StartupXLOG(void)
 	XLogPageReadPrivate private;
 	bool		fast_promoted = false;
 	struct stat st;
+	XLogSegNo	lastLogSegNo = 0;
 
 	/*
 	 * Read control file and check XLOG status looks valid.
@@ -6865,6 +6876,9 @@ StartupXLOG(void)
 				/* Remember this record as the last-applied one */
 				LastRec = ReadRecPtr;
 
+				/* Also remember the segment number */
+				XLByteToSeg(ReadRecPtr, lastLogSegNo);
+
 				/* Allow read-only connections if we're consistent now */
 				CheckRecoveryConsistency();
 
@@ -6942,6 +6956,18 @@ StartupXLOG(void)
 					RmgrTable[rmid].rm_cleanup();
 			}
 
+			/*
+			 * Check that we've actually seen all the XLOG segments, i.e. that
+			 * we've reached ControlFile->lastKnownSegment (this may fail for
+			 * example when someone deletes the last XLOG segment, or in case
+			 * of a filesystem issue).
+			 */
+			if (ControlFile->lastKnownSegment != lastLogSegNo)
+				ereport(FATAL,
+						(errmsg("not reached the last known segment (expected %lX/%lX seen %lX/%lX)",
+								(ControlFile->lastKnownSegment >> 8), (ControlFile->lastKnownSegment & 0xFF),
+								(lastLogSegNo >> 8), (lastLogSegNo & 0xFF))));
+
 			ereport(LOG,
 					(errmsg("redo done at %X/%X",
 						 (uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr)));
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 32e1d81..44dde42 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -293,6 +293,9 @@ main(int argc, char *argv[])
 		   (uint32) ControlFile.backupEndPoint);
 	printf(_("End-of-backup record required:        %s\n"),
 		   ControlFile.backupEndRequired ? _("yes") : _("no"));
+	printf(_("Last known segment:                   %lX/%X\n"),
+		   (uint64) (ControlFile.lastKnownSegment >> 8),
+		   (uint32) (ControlFile.lastKnownSegment & 0xFF));
 	printf(_("wal_level setting:                    %s\n"),
 		   wal_level_str(ControlFile.wal_level));
 	printf(_("wal_log_hints setting:                %s\n"),
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index ad1eb4b..f0ba450 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -164,12 +164,18 @@ typedef struct ControlFileData
 	 * start up. If it's false, but backupStartPoint is set, a backup_label
 	 * file was found at startup but it may have been a leftover from a stray
 	 * pg_start_backup() call, not accompanied by pg_stop_backup().
+	 *
+	 * lastKnownSegment is the segment sequence number of the last known XLOG
+	 * segment. This is useful to check that the recovery actually processed
+	 * all segments allocated before the crash (serves as a protection against
+	 * accidentally deleted segments etc.)
 	 */
 	XLogRecPtr	minRecoveryPoint;
 	TimeLineID	minRecoveryPointTLI;
 	XLogRecPtr	backupStartPoint;
 	XLogRecPtr	backupEndPoint;
 	bool		backupEndRequired;
+	XLogSegNo	lastKnownSegment;
 
 	/*
 	 * Parameter settings that determine if the WAL can be used for archival
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to