On Fri, Jan 29, 2016 at 9:13 PM, Michael Paquier wrote:
> Well, to put it short, I am just trying to find a way to make the
> backend skip unnecessary checkpoints on an idle system, which results
> in the following WAL pattern if system is completely idle:
> CHECKPOINT_ONLINE
> RUNNING_XACTS
> RUNNING_XACTS
> [etc..]
>
> The thing is that I am lost with the meaning of this condition to
> decide if a checkpoint should be skipped or not:
>         if (prevPtr == ControlFile->checkPointCopy.redo &&
>             prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE)
>         {
>             WALInsertLockRelease();
>             LWLockRelease(CheckpointLock);
>             return;
>         }
> As at least one standby snapshot is logged before the checkpoint
> record, the redo position is never going to match the previous insert
> LSN, so checkpoints will never be skipped if wal_level >= hot_standby.
> Skipping such unnecessary checkpoints is what you would like to
> address, no? Because that's what I would like to do here first. And
> once we got that right, we could think about addressing the case where
> WAL segments are forcibly archived for idle systems.

I have put a bit more of brain power into that, and finished with the
patch attached. A new field called chkpProgressLSN is attached to
XLogCtl, which is to the current insert position of the checkpoint
when wal_level <= archive, or the LSN position of the standby snapshot
taken after a checkpoint. The bgwriter code is modified as well so as
it uses this progress LSN and compares it with the current insert LSN
to determine if a standby snapshot should be logged or not. On an
instance of Postgres completely idle, no useless checkpoints, and no
useless standby snapshots are generated anymore.
-- 
Michael
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a2846c4..ccef3f0 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -527,6 +527,8 @@ typedef struct XLogCtlData
 	TransactionId ckptXid;
 	XLogRecPtr	asyncXactLSN;	/* LSN of newest async commit/abort */
 	XLogRecPtr	replicationSlotMinLSN;	/* oldest LSN needed by any slot */
+	XLogRecPtr	ckptProgressLSN;	/* LSN tracking progress of checkpoint
+									 * and standby snapshots */
 
 	XLogSegNo	lastRemovedSegNo;		/* latest removed/recycled XLOG
 										 * segment */
@@ -6314,6 +6316,7 @@ StartupXLOG(void)
 					 checkPoint.newestCommitTsXid);
 	XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
 	XLogCtl->ckptXid = checkPoint.nextXid;
+	XLogCtl->ckptProgressLSN = checkPoint.redo;
 
 	/*
 	 * Initialize replication slots, before there's a chance to remove
@@ -7869,6 +7872,22 @@ GetFlushRecPtr(void)
 }
 
 /*
+ * GetProgressRecPtr -- Returns the reference position of last checkpoint,
+ * taking into account standby snapshots generated by checkpoints.
+ */
+XLogRecPtr
+GetProgressRecPtr(void)
+{
+	XLogRecPtr	progress_lsn;
+
+	SpinLockAcquire(&XLogCtl->info_lck);
+	progress_lsn = XLogCtl->ckptProgressLSN;
+	SpinLockRelease(&XLogCtl->info_lck);
+
+	return progress_lsn;
+}
+
+/*
  * Get the time of the last xlog segment switch
  */
 pg_time_t
@@ -8133,6 +8152,7 @@ CreateCheckPoint(int flags)
 	XLogRecPtr	PriorRedoPtr;
 	XLogRecPtr	curInsert;
 	XLogRecPtr	prevPtr;
+	XLogRecPtr	progress_lsn;
 	VirtualTransactionId *vxids;
 	int			nvxids;
 
@@ -8214,10 +8234,13 @@ CreateCheckPoint(int flags)
 
 	/*
 	 * We must block concurrent insertions while examining insert state to
-	 * determine the checkpoint REDO pointer.
+	 * determine the checkpoint REDO pointer. The progress LSN of this
+	 * to-be-created checkpoint is for now initialized as the current insert
+	 * position in WAL. This will get updated later on depending on if
+	 * a standby snapshot is logged.
 	 */
 	WALInsertLockAcquireExclusive();
-	curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
+	curInsert = progress_lsn = XLogBytePosToRecPtr(Insert->CurrBytePos);
 	prevPtr = XLogBytePosToRecPtr(Insert->PrevBytePos);
 
 	/*
@@ -8239,7 +8262,7 @@ CreateCheckPoint(int flags)
 	if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
 				  CHECKPOINT_FORCE)) == 0)
 	{
-		if (prevPtr == ControlFile->checkPointCopy.redo &&
+		if (GetProgressRecPtr() == prevPtr &&
 			prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE)
 		{
 			WALInsertLockRelease();
@@ -8406,9 +8429,15 @@ CreateCheckPoint(int flags)
 	 *
 	 * If we are shutting down, or Startup process is completing crash
 	 * recovery we don't need to write running xact data.
+	 *
+	 * progress_lsn is aimed at tracking the WAL progress that happens
+	 * since this checkpoint. If a standby snapshot is logged, its record
+	 * postion need to be taken into account in the progress LSN position
+	 * that is used to evaluate if checkpoints are necessary or standby
+	 * snapshots need to be logged, and skip them on idle systems.
 	 */
 	if (!shutdown && XLogStandbyInfoActive())
-		LogStandbySnapshot();
+		progress_lsn = LogStandbySnapshot();
 
 	START_CRIT_SECTION();
 
@@ -8478,10 +8507,15 @@ CreateCheckPoint(int flags)
 	UpdateControlFile();
 	LWLockRelease(ControlFileLock);
 
-	/* Update shared-memory copy of checkpoint XID/epoch */
+	/*
+	 * Update shared-memory copy of checkpoint XID/epoch and the LSN
+	 * record tracking progress of checkpoint and standby snapshot
+	 * activity.
+	 */
 	SpinLockAcquire(&XLogCtl->info_lck);
 	XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
 	XLogCtl->ckptXid = checkPoint.nextXid;
+	XLogCtl->ckptProgressLSN = progress_lsn;
 	SpinLockRelease(&XLogCtl->info_lck);
 
 	/*
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 010b5fc..22374b0 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -78,12 +78,10 @@ int			BgWriterDelay = 200;
 #define LOG_SNAPSHOT_INTERVAL_MS 15000
 
 /*
- * LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid
- * doing so too often or repeatedly if there has been no other write activity
- * in the system.
+ * Timestamp at which we last issued a LogStandbySnapshot(), to avoid doing
+ * so too often.
  */
 static TimestampTz last_snapshot_ts;
-static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr;
 
 /*
  * Flags set by interrupt handlers for later service in the main loop.
@@ -301,8 +299,8 @@ BackgroundWriterMain(void)
 		 * check whether there has been any WAL inserted since the last time
 		 * we've logged a running xacts.
 		 *
-		 * We do this logging in the bgwriter as its the only process thats
-		 * run regularly and returns to its mainloop all the time. E.g.
+		 * We do this logging in the bgwriter as it is the only process that
+		 * is run regularly and returns to its mainloop all the time. E.g.
 		 * Checkpointer, when active, is barely ever in its mainloop and thus
 		 * makes it hard to log regularly.
 		 */
@@ -315,13 +313,12 @@ BackgroundWriterMain(void)
 												  LOG_SNAPSHOT_INTERVAL_MS);
 
 			/*
-			 * only log if enough time has passed and some xlog record has
-			 * been inserted.
+			 * only log if enough time has passed.
 			 */
 			if (now >= timeout &&
-				last_snapshot_lsn != GetXLogInsertRecPtr())
+				GetProgressRecPtr() != GetInsertRecPtr())
 			{
-				last_snapshot_lsn = LogStandbySnapshot();
+				(void) LogStandbySnapshot();
 				last_snapshot_ts = now;
 			}
 		}
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index ecd30ce..41cb512 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -261,6 +261,7 @@ extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 extern XLogRecPtr GetRedoRecPtr(void);
 extern XLogRecPtr GetInsertRecPtr(void);
 extern XLogRecPtr GetFlushRecPtr(void);
+extern XLogRecPtr GetProgressRecPtr(void);
 extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
 extern void RemovePromoteSignalFiles(void);
 
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to