Simple tuning of btree_xlog_vacuum() using an idea I had a while back,
just never implemented. XXX comments removed.
Allows us to avoid reading in blocks during VACUUM replay that are only
required for correctness of index scans.
Objections to commit?
--
Simon Riggs www.2ndQuadrant.com
*** a/src/backend/access/nbtree/nbtxlog.c
--- b/src/backend/access/nbtree/nbtxlog.c
***************
*** 486,505 **** btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
for (; blkno < xlrec->block; blkno++)
{
/*
! * XXX we don't actually need to read the block, we just need to
! * confirm it is unpinned. If we had a special call into the
! * buffer manager we could optimise this so that if the block is
! * not in shared_buffers we confirm it as unpinned.
! *
! * Another simple optimization would be to check if there's any
! * backends running; if not, we could just skip this.
*/
! buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL);
! if (BufferIsValid(buffer))
! {
! LockBufferForCleanup(buffer);
! UnlockReleaseBuffer(buffer);
! }
}
}
--- 486,496 ----
for (; blkno < xlrec->block; blkno++)
{
/*
! * We don't actually need to read the block, we just need to
! * confirm it is unpinned, since if it's not in shared_buffers then
! * we're OK.
*/
! XLogConfirmBufferIsUnpinned(xlrec->node, MAIN_FORKNUM, blkno);
}
}
*** a/src/backend/access/transam/xlogutils.c
--- b/src/backend/access/transam/xlogutils.c
***************
*** 342,347 **** XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
--- 342,377 ----
return buffer;
}
+ void
+ XLogConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forknum,
+ BlockNumber blkno)
+ {
+ BlockNumber lastblock;
+ SMgrRelation smgr;
+
+ Assert(blkno != P_NEW);
+
+ /* Open the relation at smgr level */
+ smgr = smgropen(rnode);
+
+ /*
+ * Create the target file if it doesn't already exist. This lets us cope
+ * if the replay sequence contains writes to a relation that is later
+ * deleted. (The original coding of this routine would instead suppress
+ * the writes, but that seems like it risks losing valuable data if the
+ * filesystem loses an inode during a crash. Better to write the data
+ * until we are actually told to delete the file.)
+ */
+ smgrcreate(smgr, forknum, true);
+
+ lastblock = smgrnblocks(smgr, forknum);
+
+ if (blkno >= lastblock)
+ return;
+
+ /* page exists in file */
+ ConfirmBufferIsUnpinned(rnode, forknum, blkno);
+ }
/*
* Struct actually returned by XLogFakeRelcacheEntry, though the declared
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
***************
*** 475,480 **** ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum,
--- 475,520 ----
return BufferDescriptorGetBuffer(bufHdr);
}
+ void
+ ConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum)
+ {
+ BufferTag bufTag; /* identity of requested block */
+ uint32 bufHash; /* hash value for newTag */
+ LWLockId bufPartitionLock; /* buffer partition lock for it */
+ int buf_id;
+ SMgrRelation smgr = smgropen(rnode);
+
+ /* create a tag so we can lookup the buffer */
+ INIT_BUFFERTAG(bufTag, smgr->smgr_rnode, forkNum, blockNum);
+
+ /* determine its hash code and partition lock ID */
+ bufHash = BufTableHashCode(&bufTag);
+ bufPartitionLock = BufMappingPartitionLock(bufHash);
+
+ /* see if the block is in the buffer pool already */
+ LWLockAcquire(bufPartitionLock, LW_SHARED);
+
+ buf_id = BufTableLookup(&bufTag, bufHash);
+
+ /*
+ * If buffer isn't present it must be unpinned.
+ */
+ if (buf_id >= 0)
+ {
+ volatile BufferDesc *buf;
+
+ buf = &BufferDescriptors[buf_id];
+
+ /*
+ * Found it. Now, pin/unpin the buffer to prove it's unpinned.
+ */
+ if (PinBuffer(buf, NULL))
+ UnpinBuffer(buf, false);
+ }
+
+ LWLockRelease(bufPartitionLock);
+ }
+
/*
* BufferAlloc -- subroutine for ReadBuffer. Handles lookup of a shared
* buffer. If no buffer exists already, selects a replacement
*** a/src/include/access/xlogutils.h
--- b/src/include/access/xlogutils.h
***************
*** 28,33 **** extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
--- 28,35 ----
extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, ReadBufferMode mode);
+ extern void XLogConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forknum,
+ BlockNumber blkno);
extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
extern void FreeFakeRelcacheEntry(Relation fakerel);
*** a/src/include/storage/bufmgr.h
--- b/src/include/storage/bufmgr.h
***************
*** 163,168 **** extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
--- 163,170 ----
extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp,
ForkNumber forkNum, BlockNumber blockNum,
ReadBufferMode mode, BufferAccessStrategy strategy);
+ extern void ConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forkNum,
+ BlockNumber blockNum);
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers