On Thu, Nov 5, 2015 at 3:00 PM, Michael Paquier
<[email protected]> wrote:
> On Wed, Nov 4, 2015 at 7:33 PM, Andres Freund wrote:
>> As soon as a single checkpoint ever happened the early-return logic in
>> CreateCheckPoint() will fail to take the LogStandbySnapshot() in
>> CreateCheckPoint() into account. The test is:
>> if (curInsert == ControlFile->checkPoint +
>> MAXALIGN(SizeOfXLogRecord + sizeof(CheckPoint)) &&
>> ControlFile->checkPoint == ControlFile->checkPointCopy.redo)
>> which obviously doesn't work if there's been a WAL record logged after
>> the redo pointer has been determined etc.
>
> Yes. If segment switches are enforced at a pace faster than
> checkpoint_timeout, this check considers that a checkpoint needs to
> happen because a SWITCH_XLOG record is in-between. I am a bit
> surprised that this should happen actually. The segment switch
> triggers a checkpoint record, and vice-versa, even for idle systems.
> Shouldn't we make this check a bit smarter then?
Ah, the documentation clearly explains that setting archive_timeout
will enforce a segment switch if any activity occurred, including a
checkpoint:
http://www.postgresql.org/docs/devel/static/runtime-config-wal.html#RUNTIME-CONFIG-WAL-ARCHIVING
I missed that, sorry.
I have as well thought a bit about adding a space-related constraint
on the standby snapshot generated by the bgwriter, so as to not rely
entirely on the interval of 15s. I finished with the attached that
uses a check based on CheckPointSegments / 8 to be sure that at least
this number of segments has been generated since the last checkpoint
before logging a new snapshot. I guess that's less brittle than the
last patch. Thoughts?
--
Michael
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 08d1682..3f410b7 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -526,6 +526,8 @@ typedef struct XLogCtlData
XLogwrtRqst LogwrtRqst;
XLogRecPtr RedoRecPtr; /* a recent copy of Insert->RedoRecPtr */
uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */
+ XLogRecPtr ckptPtrRec; /* LSN position of latest checkpoint
+ * record */
TransactionId ckptXid;
XLogRecPtr asyncXactLSN; /* LSN of newest async commit/abort */
XLogRecPtr replicationSlotMinLSN; /* oldest LSN needed by any slot */
@@ -6316,6 +6318,7 @@ StartupXLOG(void)
checkPoint.newestCommitTs);
XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
XLogCtl->ckptXid = checkPoint.nextXid;
+ XLogCtl->ckptPtrRec = checkPointLoc;
/*
* Initialize replication slots, before there's a chance to remove
@@ -8476,10 +8479,11 @@ CreateCheckPoint(int flags)
UpdateControlFile();
LWLockRelease(ControlFileLock);
- /* Update shared-memory copy of checkpoint XID/epoch */
+ /* Update shared-memory copy of checkpoint XID/epoch/LSN */
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
XLogCtl->ckptXid = checkPoint.nextXid;
+ XLogCtl->ckptPtrRec = recptr;
SpinLockRelease(&XLogCtl->info_lck);
/*
@@ -9278,6 +9282,7 @@ xlog_redo(XLogReaderState *record)
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
XLogCtl->ckptXid = checkPoint.nextXid;
+ XLogCtl->ckptPtrRec = lsn;
SpinLockRelease(&XLogCtl->info_lck);
/*
@@ -9328,6 +9333,7 @@ xlog_redo(XLogReaderState *record)
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
XLogCtl->ckptXid = checkPoint.nextXid;
+ XLogCtl->ckptPtrRec = lsn;
SpinLockRelease(&XLogCtl->info_lck);
/* TLI should not change in an on-line checkpoint */
@@ -10623,6 +10629,21 @@ GetXLogWriteRecPtr(void)
}
/*
+ * Get latest WAL checkpoint pointer
+ */
+XLogRecPtr
+GetXLogCheckpointRecPtr(void)
+{
+ XLogRecPtr chkptr;
+
+ SpinLockAcquire(&XLogCtl->info_lck);
+ chkptr = XLogCtl->ckptPtrRec;
+ SpinLockRelease(&XLogCtl->info_lck);
+
+ return chkptr;
+}
+
+/*
* Returns the redo pointer of the last checkpoint or restartpoint. This is
* the oldest point in WAL that we still need, if we have to restart recovery.
*/
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 65465d6..0706871 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -78,6 +78,12 @@ int BgWriterDelay = 200;
#define LOG_SNAPSHOT_INTERVAL_MS 15000
/*
+ * Minimal number of WAL segments that need to be generated before logging
+ * a standby snapshot.
+ */
+#define LOG_SNAPSHOT_MIN_SEGS (CheckPointSegments / 8)
+
+/*
* LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid
* doing so too often or repeatedly if there has been no other write activity
* in the system.
@@ -310,16 +316,19 @@ BackgroundWriterMain(void)
{
TimestampTz timeout = 0;
TimestampTz now = GetCurrentTimestamp();
+ XLogRecPtr checkpoint_lsn = GetXLogCheckpointRecPtr();
+ XLogRecPtr insert_lsn = GetXLogInsertRecPtr();
timeout = TimestampTzPlusMilliseconds(last_snapshot_ts,
LOG_SNAPSHOT_INTERVAL_MS);
/*
- * only log if enough time has passed and some xlog record has
- * been inserted.
+ * Only log if enough time has passed and that enough progress
+ * has been done since last checkpoint.
*/
if (now >= timeout &&
- last_snapshot_lsn != GetXLogInsertRecPtr())
+ last_snapshot_lsn != insert_lsn &&
+ (insert_lsn - checkpoint_lsn) / XLogSegSize > LOG_SNAPSHOT_MIN_SEGS)
{
last_snapshot_lsn = LogStandbySnapshot();
last_snapshot_ts = now;
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 790ca66..2d063e5 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -235,6 +235,7 @@ extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
extern XLogRecPtr GetXLogInsertRecPtr(void);
extern XLogRecPtr GetXLogWriteRecPtr(void);
+extern XLogRecPtr GetXLogCheckpointRecPtr(void);
extern bool RecoveryIsPaused(void);
extern void SetRecoveryPause(bool recoveryPause);
extern TimestampTz GetLatestXTime(void);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers