On Sat, Feb 4, 2012 at 6:37 PM, Simon Riggs <si...@2ndquadrant.com> wrote:

> Patch to do that attached


-- 
 Simon Riggs                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 99a431a..4758931 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -4590,6 +4590,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
 	int			ndead;
 	int			nunused;
 	Size		freespace;
+	bool		hit;
 
 	/*
 	 * We're about to remove tuples. In Hot Standby mode, ensure that there's
@@ -4608,7 +4609,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
 	if (record->xl_info & XLR_BKP_BLOCK_1)
 		return;
 
-	buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL);
+	buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL, &hit);
 	if (!BufferIsValid(buffer))
 		return;
 	LockBufferForCleanup(buffer);
@@ -4664,6 +4665,7 @@ heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
 	TransactionId cutoff_xid = xlrec->cutoff_xid;
 	Buffer		buffer;
 	Page		page;
+	bool		hit;
 
 	/*
 	 * In Hot Standby mode, ensure that there's no queries running which still
@@ -4677,7 +4679,7 @@ heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
 	if (record->xl_info & XLR_BKP_BLOCK_1)
 		return;
 
-	buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL);
+	buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL, &hit);
 	if (!BufferIsValid(buffer))
 		return;
 	LockBufferForCleanup(buffer);
@@ -4728,6 +4730,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
 	xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
 	Buffer		buffer;
 	Page		page;
+	bool		hit;
 
 	/*
 	 * Read the heap page, if it still exists.  If the heap file has been
@@ -4736,7 +4739,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
 	 * will have to be cleared out at the same time.
 	 */
 	buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block,
-									RBM_NORMAL);
+									RBM_NORMAL, &hit);
 	if (!BufferIsValid(buffer))
 		return;
 	page = (Page) BufferGetPage(buffer);
@@ -4806,13 +4809,14 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
 	xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record);
 	Buffer		buffer;
 	Page		page;
+	bool		hit;
 
 	/*
 	 * Note: the NEWPAGE log record is used for both heaps and indexes, so do
 	 * not do anything that assumes we are touching a heap.
 	 */
 	buffer = XLogReadBufferExtended(xlrec->node, xlrec->forknum, xlrec->blkno,
-									RBM_ZERO);
+									RBM_ZERO, &hit);
 	Assert(BufferIsValid(buffer));
 	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 	page = (Page) BufferGetPage(buffer);
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 0f5c113..d10b0b8 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -466,6 +466,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
 	Buffer		buffer;
 	Page		page;
 	BTPageOpaque opaque;
+	bool		hit;
 
 	xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
 
@@ -491,7 +492,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
 			 * Another simple optimization would be to check if there's any
 			 * backends running; if not, we could just skip this.
 			 */
-			buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL);
+			buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL, &hit);
 			if (BufferIsValid(buffer))
 			{
 				LockBufferForCleanup(buffer);
@@ -513,7 +514,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
 	 * Like in btvacuumpage(), we need to take a cleanup lock on every leaf
 	 * page. See nbtree/README for details.
 	 */
-	buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL);
+	buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL, &hit);
 	if (!BufferIsValid(buffer))
 		return;
 	LockBufferForCleanup(buffer);
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index cce87a3..3f4842d 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3687,6 +3687,7 @@ RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup)
 	BkpBlock	bkpb;
 	char	   *blk;
 	int			i;
+	bool		hit;
 
 	if (!(record->xl_info & XLR_BKP_BLOCK_MASK))
 		return;
@@ -3700,8 +3701,21 @@ RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup)
 		memcpy(&bkpb, blk, sizeof(BkpBlock));
 		blk += sizeof(BkpBlock);
 
+		hit = false;
 		buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
-										RBM_ZERO);
+										RBM_ZERO, &hit);
+
+		/*
+		 * If we found the block in shared buffers and we are already
+		 * consistent then skip applying the backup block. The block
+		 * was already removable anyway, so we can skip without problems.
+		 * This avoids us needing to take a cleanup lock in all cases when
+		 * we apply backup blocks because of potential effects on user queries,
+		 * which expect data on blocks to remain constant while being read.
+		 */
+		if (reachedConsistency && hit)
+			continue;
+
 		Assert(BufferIsValid(buffer));
 		if (cleanup)
 			LockBufferForCleanup(buffer);
@@ -3716,9 +3730,9 @@ RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup)
 		}
 		else
 		{
-			/* must zero-fill the hole */
-			MemSet((char *) page, 0, BLCKSZ);
 			memcpy((char *) page, blk, bkpb.hole_offset);
+			/* must zero-fill the hole */
+			MemSet((char *) page + bkpb.hole_offset, 0, bkpb.hole_length);
 			memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
 				   blk + bkpb.hole_offset,
 				   BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index f286cdf..b7b9ec8 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -263,9 +263,10 @@ Buffer
 XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
 {
 	Buffer		buf;
+	bool		hit;
 
 	buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
-								 init ? RBM_ZERO : RBM_NORMAL);
+								 init ? RBM_ZERO : RBM_NORMAL, &hit);
 	if (BufferIsValid(buf))
 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
 
@@ -290,7 +291,7 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
  */
 Buffer
 XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
-					   BlockNumber blkno, ReadBufferMode mode)
+					   BlockNumber blkno, ReadBufferMode mode, bool *hit)
 {
 	BlockNumber lastblock;
 	Buffer		buffer;
@@ -317,7 +318,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
 	{
 		/* page exists in file */
 		buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
-										   mode, NULL);
+										   mode, NULL, hit);
 	}
 	else
 	{
@@ -336,7 +337,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
 			if (buffer != InvalidBuffer)
 				ReleaseBuffer(buffer);
 			buffer = ReadBufferWithoutRelcache(rnode, forknum,
-											   P_NEW, mode, NULL);
+											   P_NEW, mode, NULL, hit);
 			lastblock++;
 		}
 		Assert(BufferGetBlockNumber(buffer) == blkno);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 1adb6d3..64293ae 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -264,14 +264,12 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
 Buffer
 ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
 						  BlockNumber blockNum, ReadBufferMode mode,
-						  BufferAccessStrategy strategy)
+						  BufferAccessStrategy strategy, bool *hit)
 {
-	bool		hit;
-
 	SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
 
 	return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum,
-							 mode, strategy, &hit);
+							 mode, strategy, hit);
 }
 
 
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 7840adb..0aa099a 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -202,13 +202,14 @@ XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
 	BlockNumber blkno;
 	Buffer		buf;
 	Page		page;
+	bool		hit;
 
 	/* Get the location of the FSM byte representing the heap block */
 	addr = fsm_get_location(heapBlk, &slot);
 	blkno = fsm_logical_to_physical(addr);
 
 	/* If the page doesn't exist already, extend */
-	buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR);
+	buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, &hit);
 	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
 
 	page = BufferGetPage(buf);
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
index 6ade476..038f94f 100644
--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
@@ -24,7 +24,7 @@ extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
 
 extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
 extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
-					   BlockNumber blkno, ReadBufferMode mode);
+					   BlockNumber blkno, ReadBufferMode mode, bool *hit);
 
 extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
 extern void FreeFakeRelcacheEntry(Relation fakerel);
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index de1bbd0..aa8f77c 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -165,8 +165,8 @@ extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
 				   BlockNumber blockNum, ReadBufferMode mode,
 				   BufferAccessStrategy strategy);
 extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode,
-						  ForkNumber forkNum, BlockNumber blockNum,
-						  ReadBufferMode mode, BufferAccessStrategy strategy);
+					ForkNumber forkNum, BlockNumber blockNum,
+					ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit);
 extern void ReleaseBuffer(Buffer buffer);
 extern void UnlockReleaseBuffer(Buffer buffer);
 extern void MarkBufferDirty(Buffer buffer);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to