On Sat, Feb 4, 2012 at 6:37 PM, Simon Riggs <si...@2ndquadrant.com> wrote:
> Patch to do that attached -- Simon Riggs http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 99a431a..4758931 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -4590,6 +4590,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record) int ndead; int nunused; Size freespace; + bool hit; /* * We're about to remove tuples. In Hot Standby mode, ensure that there's @@ -4608,7 +4609,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record) if (record->xl_info & XLR_BKP_BLOCK_1) return; - buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL); + buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL, &hit); if (!BufferIsValid(buffer)) return; LockBufferForCleanup(buffer); @@ -4664,6 +4665,7 @@ heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record) TransactionId cutoff_xid = xlrec->cutoff_xid; Buffer buffer; Page page; + bool hit; /* * In Hot Standby mode, ensure that there's no queries running which still @@ -4677,7 +4679,7 @@ heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record) if (record->xl_info & XLR_BKP_BLOCK_1) return; - buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL); + buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL, &hit); if (!BufferIsValid(buffer)) return; LockBufferForCleanup(buffer); @@ -4728,6 +4730,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record) xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record); Buffer buffer; Page page; + bool hit; /* * Read the heap page, if it still exists. If the heap file has been @@ -4736,7 +4739,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record) * will have to be cleared out at the same time. */ buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, - RBM_NORMAL); + RBM_NORMAL, &hit); if (!BufferIsValid(buffer)) return; page = (Page) BufferGetPage(buffer); @@ -4806,13 +4809,14 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record) xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record); Buffer buffer; Page page; + bool hit; /* * Note: the NEWPAGE log record is used for both heaps and indexes, so do * not do anything that assumes we are touching a heap. */ buffer = XLogReadBufferExtended(xlrec->node, xlrec->forknum, xlrec->blkno, - RBM_ZERO); + RBM_ZERO, &hit); Assert(BufferIsValid(buffer)); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); page = (Page) BufferGetPage(buffer); diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 0f5c113..d10b0b8 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -466,6 +466,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record) Buffer buffer; Page page; BTPageOpaque opaque; + bool hit; xlrec = (xl_btree_vacuum *) XLogRecGetData(record); @@ -491,7 +492,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record) * Another simple optimization would be to check if there's any * backends running; if not, we could just skip this. */ - buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL); + buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL, &hit); if (BufferIsValid(buffer)) { LockBufferForCleanup(buffer); @@ -513,7 +514,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record) * Like in btvacuumpage(), we need to take a cleanup lock on every leaf * page. See nbtree/README for details. */ - buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL); + buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL, &hit); if (!BufferIsValid(buffer)) return; LockBufferForCleanup(buffer); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index cce87a3..3f4842d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -3687,6 +3687,7 @@ RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup) BkpBlock bkpb; char *blk; int i; + bool hit; if (!(record->xl_info & XLR_BKP_BLOCK_MASK)) return; @@ -3700,8 +3701,21 @@ RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup) memcpy(&bkpb, blk, sizeof(BkpBlock)); blk += sizeof(BkpBlock); + hit = false; buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block, - RBM_ZERO); + RBM_ZERO, &hit); + + /* + * If we found the block in shared buffers and we are already + * consistent then skip applying the backup block. The block + * was already removable anyway, so we can skip without problems. + * This avoids us needing to take a cleanup lock in all cases when + * we apply backup blocks because of potential effects on user queries, + * which expect data on blocks to remain constant while being read. + */ + if (reachedConsistency && hit) + continue; + Assert(BufferIsValid(buffer)); if (cleanup) LockBufferForCleanup(buffer); @@ -3716,9 +3730,9 @@ RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup) } else { - /* must zero-fill the hole */ - MemSet((char *) page, 0, BLCKSZ); memcpy((char *) page, blk, bkpb.hole_offset); + /* must zero-fill the hole */ + MemSet((char *) page + bkpb.hole_offset, 0, bkpb.hole_length); memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length), blk + bkpb.hole_offset, BLCKSZ - (bkpb.hole_offset + bkpb.hole_length)); diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index f286cdf..b7b9ec8 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -263,9 +263,10 @@ Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) { Buffer buf; + bool hit; buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, - init ? RBM_ZERO : RBM_NORMAL); + init ? RBM_ZERO : RBM_NORMAL, &hit); if (BufferIsValid(buf)) LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); @@ -290,7 +291,7 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) */ Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, - BlockNumber blkno, ReadBufferMode mode) + BlockNumber blkno, ReadBufferMode mode, bool *hit) { BlockNumber lastblock; Buffer buffer; @@ -317,7 +318,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, { /* page exists in file */ buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, - mode, NULL); + mode, NULL, hit); } else { @@ -336,7 +337,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, if (buffer != InvalidBuffer) ReleaseBuffer(buffer); buffer = ReadBufferWithoutRelcache(rnode, forknum, - P_NEW, mode, NULL); + P_NEW, mode, NULL, hit); lastblock++; } Assert(BufferGetBlockNumber(buffer) == blkno); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 1adb6d3..64293ae 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -264,14 +264,12 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, Buffer ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, - BufferAccessStrategy strategy) + BufferAccessStrategy strategy, bool *hit) { - bool hit; - SMgrRelation smgr = smgropen(rnode, InvalidBackendId); return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum, - mode, strategy, &hit); + mode, strategy, hit); } diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 7840adb..0aa099a 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -202,13 +202,14 @@ XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, BlockNumber blkno; Buffer buf; Page page; + bool hit; /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(heapBlk, &slot); blkno = fsm_logical_to_physical(addr); /* If the page doesn't exist already, extend */ - buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR); + buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, &hit); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index 6ade476..038f94f 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -24,7 +24,7 @@ extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum, extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init); extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, - BlockNumber blkno, ReadBufferMode mode); + BlockNumber blkno, ReadBufferMode mode, bool *hit); extern Relation CreateFakeRelcacheEntry(RelFileNode rnode); extern void FreeFakeRelcacheEntry(Relation fakerel); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index de1bbd0..aa8f77c 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -165,8 +165,8 @@ extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy); extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, - ForkNumber forkNum, BlockNumber blockNum, - ReadBufferMode mode, BufferAccessStrategy strategy); + ForkNumber forkNum, BlockNumber blockNum, + ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit); extern void ReleaseBuffer(Buffer buffer); extern void UnlockReleaseBuffer(Buffer buffer); extern void MarkBufferDirty(Buffer buffer);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers