On Thu, Nov 5, 2015 at 3:00 PM, Michael Paquier
<michael.paqu...@gmail.com> wrote:
> On Wed, Nov 4, 2015 at 7:33 PM, Andres Freund wrote:
>> As soon as a single checkpoint ever happened the early-return logic in
>> CreateCheckPoint() will fail to take the LogStandbySnapshot() in
>> CreateCheckPoint() into account. The test is:
>>     if (curInsert == ControlFile->checkPoint +
>>         MAXALIGN(SizeOfXLogRecord + sizeof(CheckPoint)) &&
>>         ControlFile->checkPoint == ControlFile->checkPointCopy.redo)
>> which obviously doesn't work if there's been a WAL record logged after
>> the redo pointer has been determined etc.
>
> Yes. If segment switches are enforced at a pace faster than
> checkpoint_timeout, this check considers that a checkpoint needs to
> happen because a SWITCH_XLOG record is in-between. I am a bit
> surprised that this should happen actually. The segment switch
> triggers a checkpoint record, and vice-versa, even for idle systems.
> Shouldn't we make this check a bit smarter then?

Ah, the documentation clearly explains that setting archive_timeout
will enforce a segment switch if any activity occurred, including a
checkpoint:
http://www.postgresql.org/docs/devel/static/runtime-config-wal.html#RUNTIME-CONFIG-WAL-ARCHIVING
I missed that, sorry.

I have as well thought a bit about adding a space-related constraint
on the standby snapshot generated by the bgwriter, so as to not rely
entirely on the interval of 15s. I finished with the attached that
uses a check based on CheckPointSegments / 8 to be sure that at least
this number of segments has been generated since the last checkpoint
before logging a new snapshot. I guess that's less brittle than the
last patch. Thoughts?
-- 
Michael
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 08d1682..3f410b7 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -526,6 +526,8 @@ typedef struct XLogCtlData
 	XLogwrtRqst LogwrtRqst;
 	XLogRecPtr	RedoRecPtr;		/* a recent copy of Insert->RedoRecPtr */
 	uint32		ckptXidEpoch;	/* nextXID & epoch of latest checkpoint */
+	XLogRecPtr	ckptPtrRec;		/* LSN position of latest checkpoint
+								 * record */
 	TransactionId ckptXid;
 	XLogRecPtr	asyncXactLSN;	/* LSN of newest async commit/abort */
 	XLogRecPtr	replicationSlotMinLSN;	/* oldest LSN needed by any slot */
@@ -6316,6 +6318,7 @@ StartupXLOG(void)
 					 checkPoint.newestCommitTs);
 	XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
 	XLogCtl->ckptXid = checkPoint.nextXid;
+	XLogCtl->ckptPtrRec = checkPointLoc;
 
 	/*
 	 * Initialize replication slots, before there's a chance to remove
@@ -8476,10 +8479,11 @@ CreateCheckPoint(int flags)
 	UpdateControlFile();
 	LWLockRelease(ControlFileLock);
 
-	/* Update shared-memory copy of checkpoint XID/epoch */
+	/* Update shared-memory copy of checkpoint XID/epoch/LSN */
 	SpinLockAcquire(&XLogCtl->info_lck);
 	XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
 	XLogCtl->ckptXid = checkPoint.nextXid;
+	XLogCtl->ckptPtrRec = recptr;
 	SpinLockRelease(&XLogCtl->info_lck);
 
 	/*
@@ -9278,6 +9282,7 @@ xlog_redo(XLogReaderState *record)
 		SpinLockAcquire(&XLogCtl->info_lck);
 		XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
 		XLogCtl->ckptXid = checkPoint.nextXid;
+		XLogCtl->ckptPtrRec = lsn;
 		SpinLockRelease(&XLogCtl->info_lck);
 
 		/*
@@ -9328,6 +9333,7 @@ xlog_redo(XLogReaderState *record)
 		SpinLockAcquire(&XLogCtl->info_lck);
 		XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
 		XLogCtl->ckptXid = checkPoint.nextXid;
+		XLogCtl->ckptPtrRec = lsn;
 		SpinLockRelease(&XLogCtl->info_lck);
 
 		/* TLI should not change in an on-line checkpoint */
@@ -10623,6 +10629,21 @@ GetXLogWriteRecPtr(void)
 }
 
 /*
+ * Get latest WAL checkpoint pointer
+ */
+XLogRecPtr
+GetXLogCheckpointRecPtr(void)
+{
+	XLogRecPtr	chkptr;
+
+	SpinLockAcquire(&XLogCtl->info_lck);
+	chkptr = XLogCtl->ckptPtrRec;
+	SpinLockRelease(&XLogCtl->info_lck);
+
+	return chkptr;
+}
+
+/*
  * Returns the redo pointer of the last checkpoint or restartpoint. This is
  * the oldest point in WAL that we still need, if we have to restart recovery.
  */
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 65465d6..0706871 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -78,6 +78,12 @@ int			BgWriterDelay = 200;
 #define LOG_SNAPSHOT_INTERVAL_MS 15000
 
 /*
+ * Minimal number of WAL segments that need to be generated before logging
+ * a standby snapshot.
+ */
+#define LOG_SNAPSHOT_MIN_SEGS		(CheckPointSegments / 8)
+
+/*
  * LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid
  * doing so too often or repeatedly if there has been no other write activity
  * in the system.
@@ -310,16 +316,19 @@ BackgroundWriterMain(void)
 		{
 			TimestampTz timeout = 0;
 			TimestampTz now = GetCurrentTimestamp();
+			XLogRecPtr	checkpoint_lsn = GetXLogCheckpointRecPtr();
+			XLogRecPtr	insert_lsn = GetXLogInsertRecPtr();
 
 			timeout = TimestampTzPlusMilliseconds(last_snapshot_ts,
 												  LOG_SNAPSHOT_INTERVAL_MS);
 
 			/*
-			 * only log if enough time has passed and some xlog record has
-			 * been inserted.
+			 * Only log if enough time has passed and that enough progress
+			 * has been done since last checkpoint.
 			 */
 			if (now >= timeout &&
-				last_snapshot_lsn != GetXLogInsertRecPtr())
+				last_snapshot_lsn != insert_lsn &&
+				(insert_lsn - checkpoint_lsn) / XLogSegSize > LOG_SNAPSHOT_MIN_SEGS)
 			{
 				last_snapshot_lsn = LogStandbySnapshot();
 				last_snapshot_ts = now;
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 790ca66..2d063e5 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -235,6 +235,7 @@ extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
 extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
 extern XLogRecPtr GetXLogInsertRecPtr(void);
 extern XLogRecPtr GetXLogWriteRecPtr(void);
+extern XLogRecPtr GetXLogCheckpointRecPtr(void);
 extern bool RecoveryIsPaused(void);
 extern void SetRecoveryPause(bool recoveryPause);
 extern TimestampTz GetLatestXTime(void);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to