On 29 December 2016 at 16:51, Craig Ringer <cr...@2ndquadrant.com> wrote:
> On 28 December 2016 at 22:16, Petr Jelinek <petr.jeli...@2ndquadrant.com> 
> wrote:
>
>> About the patch, it looks good to me for master with the minor exception
>> that:
>>> +             appendStringInfo(buf, "pageno %d, xid %u",
>>> +                     trunc.pageno, trunc.oldestXid);
>>
>> This should probably say oldestXid instead of xid in the text description.
>
> Agreed.

Slightly amended attached.

-- 
 Craig Ringer                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
From e1dd1a711d65425df71db63d06ce6b6b934f12a8 Mon Sep 17 00:00:00 2001
From: Craig Ringer <cr...@2ndquadrant.com>
Date: Wed, 28 Dec 2016 21:23:59 +0800
Subject: [PATCH] Fix a minor race between commit_ts slru truncation and
 lookups

vac_truncate_clog was truncating the commit timestamp SLRU before
it advanced the oldest xid limit for commit timestamp lookups. This
created a small race window where a concurrent pg_xact_commit_timestamp
calling TransactionIdGetCommitTsData(...) could check the oldest
xid after the SLRU page containing it is truncated away but before
the threshold is updated.

Fix this by advancing the lower bound before removing the relevant SLRU pages.

A larger race window also existed on a standby. The lower bound for commit
timetamp validity was only advanced on a standby when we replayed a checkpoint.
This could happen a long time after the commit timestamp SLRU truncation.

This race only affects XIDs that vac_truncate_clog has determined are no longer
referenced in the system, so the minimum datfrozenxid has advanced past them.
It's never going to be triggered by code that looks up commit timestamps of
rows it's just read during an in-progress transaction. Also, the worst outcome
should the race be triggered is an I/O error from the SLRU code (where the
commit ts lookup should more correctly return null). So this race is pretty
harmless.

The nearly identical clog code has the same race, but it doesn't matter there
since there's no way for users to pass arbitrary XIDs to look them up in clog.
---
 src/backend/access/rmgrdesc/committsdesc.c |  8 +++++---
 src/backend/access/transam/commit_ts.c     | 23 +++++++++++++++--------
 src/backend/commands/vacuum.c              |  3 ++-
 src/include/access/commit_ts.h             |  8 ++++++++
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/backend/access/rmgrdesc/committsdesc.c b/src/backend/access/rmgrdesc/committsdesc.c
index 527e5dc..34553d5 100644
--- a/src/backend/access/rmgrdesc/committsdesc.c
+++ b/src/backend/access/rmgrdesc/committsdesc.c
@@ -33,10 +33,12 @@ commit_ts_desc(StringInfo buf, XLogReaderState *record)
 	}
 	else if (info == COMMIT_TS_TRUNCATE)
 	{
-		int			pageno;
+		xl_commit_ts_truncate trunc;
 
-		memcpy(&pageno, rec, sizeof(int));
-		appendStringInfo(buf, "%d", pageno);
+		memcpy(&trunc, rec, SizeOfCommitTsTruncate);
+
+		appendStringInfo(buf, "pageno %d, oldestXid %u",
+			trunc.pageno, trunc.oldestXid);
 	}
 	else if (info == COMMIT_TS_SETTS)
 	{
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index a5b270c..86ac1d6 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -113,7 +113,7 @@ static bool CommitTsPagePrecedes(int page1, int page2);
 static void ActivateCommitTs(void);
 static void DeactivateCommitTs(void);
 static void WriteZeroPageXlogRec(int pageno);
-static void WriteTruncateXlogRec(int pageno);
+static void WriteTruncateXlogRec(int pageno, TransactionId oldestXid);
 static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids,
 						 TransactionId *subxids, TimestampTz timestamp,
 						 RepOriginId nodeid);
@@ -824,7 +824,7 @@ TruncateCommitTs(TransactionId oldestXact)
 		return;					/* nothing to remove */
 
 	/* Write XLOG record */
-	WriteTruncateXlogRec(cutoffPage);
+	WriteTruncateXlogRec(cutoffPage, oldestXact);
 
 	/* Now we can remove the old CommitTs segment(s) */
 	SimpleLruTruncate(CommitTsCtl, cutoffPage);
@@ -910,10 +910,15 @@ WriteZeroPageXlogRec(int pageno)
  * Write a TRUNCATE xlog record
  */
 static void
-WriteTruncateXlogRec(int pageno)
+WriteTruncateXlogRec(int pageno, TransactionId oldestXid)
 {
+	xl_commit_ts_truncate xlrec;
+
+	xlrec.pageno = pageno;
+	xlrec.oldestXid = oldestXid;
+
 	XLogBeginInsert();
-	XLogRegisterData((char *) (&pageno), sizeof(int));
+	XLogRegisterData((char *) (&xlrec), SizeOfCommitTsTruncate);
 	(void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_TRUNCATE);
 }
 
@@ -967,17 +972,19 @@ commit_ts_redo(XLogReaderState *record)
 	}
 	else if (info == COMMIT_TS_TRUNCATE)
 	{
-		int			pageno;
+		xl_commit_ts_truncate trunc;
 
-		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
+		memcpy(&trunc, XLogRecGetData(record), SizeOfCommitTsTruncate);
+
+		AdvanceOldestCommitTsXid(trunc.oldestXid);
 
 		/*
 		 * During XLOG replay, latest_page_number isn't set up yet; insert a
 		 * suitable value to bypass the sanity test in SimpleLruTruncate.
 		 */
-		CommitTsCtl->shared->latest_page_number = pageno;
+		CommitTsCtl->shared->latest_page_number = trunc.pageno;
 
-		SimpleLruTruncate(CommitTsCtl, pageno);
+		SimpleLruTruncate(CommitTsCtl, trunc.pageno);
 	}
 	else if (info == COMMIT_TS_SETTS)
 	{
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index b1be2f7..4336eda 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1152,6 +1152,8 @@ vac_truncate_clog(TransactionId frozenXID,
 	if (bogus)
 		return;
 
+	AdvanceOldestCommitTsXid(frozenXID);
+
 	/*
 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
 	 */
@@ -1167,7 +1169,6 @@ vac_truncate_clog(TransactionId frozenXID,
 	 */
 	SetTransactionIdLimit(frozenXID, oldestxid_datoid);
 	SetMultiXactIdLimit(minMulti, minmulti_datoid);
-	AdvanceOldestCommitTsXid(frozenXID);
 }
 
 
diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h
index cf343ca..06991c6 100644
--- a/src/include/access/commit_ts.h
+++ b/src/include/access/commit_ts.h
@@ -61,6 +61,14 @@ typedef struct xl_commit_ts_set
 #define SizeOfCommitTsSet	(offsetof(xl_commit_ts_set, mainxid) + \
 							 sizeof(TransactionId))
 
+typedef struct xl_commit_ts_truncate
+{
+	int pageno;
+	TransactionId oldestXid;
+} xl_commit_ts_truncate;
+
+#define SizeOfCommitTsTruncate	(offsetof(xl_commit_ts_truncate, oldestXid) +\
+								sizeof(TransactionId))
 
 extern void commit_ts_redo(XLogReaderState *record);
 extern void commit_ts_desc(StringInfo buf, XLogReaderState *record);
-- 
2.5.5

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to