On Fri, 2013-06-14 at 18:27 +0200, Andres Freund wrote:
> I'd like to see a comment around the memcpys in XLogSaveBufferForHint()
> that mentions that they are safe in a non std buffer due to
> XLogCheckBuffer setting an appropriate hole/offset. Or make an explicit
> change of the copy algorithm there.
Done.
> Btw, if you touch that code, I'd vote for renaming XLOG_HINT to XLOG_FPI
> or something like that. I find the former name confusing...
Also done.
Patch attached. Also, since we branched, I think this should be
back-patched to 9.3 as well.
Regards,
Jeff Davis
*** a/src/backend/access/hash/hash.c
--- b/src/backend/access/hash/hash.c
***************
*** 287,293 **** hashgettuple(PG_FUNCTION_ARGS)
/*
* Since this can be redone later if needed, mark as a hint.
*/
! MarkBufferDirtyHint(buf);
}
/*
--- 287,293 ----
/*
* Since this can be redone later if needed, mark as a hint.
*/
! MarkBufferDirtyHint(buf, true);
}
/*
*** a/src/backend/access/heap/pruneheap.c
--- b/src/backend/access/heap/pruneheap.c
***************
*** 262,268 **** heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
{
((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
PageClearFull(page);
! MarkBufferDirtyHint(buffer);
}
}
--- 262,268 ----
{
((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
PageClearFull(page);
! MarkBufferDirtyHint(buffer, true);
}
}
*** a/src/backend/access/nbtree/nbtinsert.c
--- b/src/backend/access/nbtree/nbtinsert.c
***************
*** 413,421 **** _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
* crucial. Be sure to mark the proper buffer dirty.
*/
if (nbuf != InvalidBuffer)
! MarkBufferDirtyHint(nbuf);
else
! MarkBufferDirtyHint(buf);
}
}
}
--- 413,421 ----
* crucial. Be sure to mark the proper buffer dirty.
*/
if (nbuf != InvalidBuffer)
! MarkBufferDirtyHint(nbuf, true);
else
! MarkBufferDirtyHint(buf, true);
}
}
}
*** a/src/backend/access/nbtree/nbtree.c
--- b/src/backend/access/nbtree/nbtree.c
***************
*** 1052,1058 **** restart:
opaque->btpo_cycleid == vstate->cycleid)
{
opaque->btpo_cycleid = 0;
! MarkBufferDirtyHint(buf);
}
}
--- 1052,1058 ----
opaque->btpo_cycleid == vstate->cycleid)
{
opaque->btpo_cycleid = 0;
! MarkBufferDirtyHint(buf, true);
}
}
*** a/src/backend/access/nbtree/nbtutils.c
--- b/src/backend/access/nbtree/nbtutils.c
***************
*** 1789,1795 **** _bt_killitems(IndexScanDesc scan, bool haveLock)
if (killedsomething)
{
opaque->btpo_flags |= BTP_HAS_GARBAGE;
! MarkBufferDirtyHint(so->currPos.buf);
}
if (!haveLock)
--- 1789,1795 ----
if (killedsomething)
{
opaque->btpo_flags |= BTP_HAS_GARBAGE;
! MarkBufferDirtyHint(so->currPos.buf, true);
}
if (!haveLock)
*** a/src/backend/access/rmgrdesc/xlogdesc.c
--- b/src/backend/access/rmgrdesc/xlogdesc.c
***************
*** 82,92 **** xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "restore point: %s", xlrec->rp_name);
}
! else if (info == XLOG_HINT)
{
BkpBlock *bkp = (BkpBlock *) rec;
! appendStringInfo(buf, "page hint: %s block %u",
relpathperm(bkp->node, bkp->fork),
bkp->block);
}
--- 82,92 ----
appendStringInfo(buf, "restore point: %s", xlrec->rp_name);
}
! else if (info == XLOG_FPI)
{
BkpBlock *bkp = (BkpBlock *) rec;
! appendStringInfo(buf, "full-page image: %s block %u",
relpathperm(bkp->node, bkp->fork),
bkp->block);
}
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 7681,7692 **** XLogRestorePoint(const char *rpName)
* records. In that case, multiple copies of the same block would be recorded
* in separate WAL records by different backends, though that is still OK from
* a correctness perspective.
- *
- * Note that this only works for buffers that fit the standard page model,
- * i.e. those for which buffer_std == true
*/
XLogRecPtr
! XLogSaveBufferForHint(Buffer buffer)
{
XLogRecPtr recptr = InvalidXLogRecPtr;
XLogRecPtr lsn;
--- 7681,7689 ----
* records. In that case, multiple copies of the same block would be recorded
* in separate WAL records by different backends, though that is still OK from
* a correctness perspective.
*/
XLogRecPtr
! XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
{
XLogRecPtr recptr = InvalidXLogRecPtr;
XLogRecPtr lsn;
***************
*** 7708,7714 **** XLogSaveBufferForHint(Buffer buffer)
* and reset rdata for any actual WAL record insert.
*/
rdata[0].buffer = buffer;
! rdata[0].buffer_std = true;
/*
* Check buffer while not holding an exclusive lock.
--- 7705,7711 ----
* and reset rdata for any actual WAL record insert.
*/
rdata[0].buffer = buffer;
! rdata[0].buffer_std = buffer_std;
/*
* Check buffer while not holding an exclusive lock.
***************
*** 7722,7727 **** XLogSaveBufferForHint(Buffer buffer)
--- 7719,7727 ----
* Copy buffer so we don't have to worry about concurrent hint bit or
* lsn updates. We assume pd_lower/upper cannot be changed without an
* exclusive lock, so the contents bkp are not racy.
+ *
+ * With buffer_std set to false, XLogCheckBuffer() sets hole_length and
+ * hole_offset to 0; so the following code is safe for either case.
*/
memcpy(copied_buffer, origdata, bkpb.hole_offset);
memcpy(copied_buffer + bkpb.hole_offset,
***************
*** 7744,7750 **** XLogSaveBufferForHint(Buffer buffer)
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
! recptr = XLogInsert(RM_XLOG_ID, XLOG_HINT, rdata);
}
return recptr;
--- 7744,7750 ----
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
! recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
}
return recptr;
***************
*** 8109,8122 **** xlog_redo(XLogRecPtr lsn, XLogRecord *record)
{
/* nothing to do here */
}
! else if (info == XLOG_HINT)
{
char *data;
BkpBlock bkpb;
/*
! * Hint bit records contain a backup block stored "inline" in the
! * normal data since the locking when writing hint records isn't
* sufficient to use the normal backup block mechanism, which assumes
* exclusive lock on the buffer supplied.
*
--- 8109,8122 ----
{
/* nothing to do here */
}
! else if (info == XLOG_FPI)
{
char *data;
BkpBlock bkpb;
/*
! * Full-page image (FPI) records contain a backup block stored "inline"
! * in the normal data since the locking when writing hint records isn't
* sufficient to use the normal backup block mechanism, which assumes
* exclusive lock on the buffer supplied.
*
*** a/src/backend/commands/sequence.c
--- b/src/backend/commands/sequence.c
***************
*** 1118,1124 **** read_seq_tuple(SeqTable elm, Relation rel, Buffer *buf, HeapTuple seqtuple)
HeapTupleHeaderSetXmax(seqtuple->t_data, InvalidTransactionId);
seqtuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
seqtuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
! MarkBufferDirtyHint(*buf);
}
seq = (Form_pg_sequence) GETSTRUCT(seqtuple);
--- 1118,1124 ----
HeapTupleHeaderSetXmax(seqtuple->t_data, InvalidTransactionId);
seqtuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
seqtuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
! MarkBufferDirtyHint(*buf, true);
}
seq = (Form_pg_sequence) GETSTRUCT(seqtuple);
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
***************
*** 2587,2593 **** IncrBufferRefCount(Buffer buffer)
* (due to a race condition), so it cannot be used for important changes.
*/
void
! MarkBufferDirtyHint(Buffer buffer)
{
volatile BufferDesc *bufHdr;
Page page = BufferGetPage(buffer);
--- 2587,2593 ----
* (due to a race condition), so it cannot be used for important changes.
*/
void
! MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
{
volatile BufferDesc *bufHdr;
Page page = BufferGetPage(buffer);
***************
*** 2671,2677 **** MarkBufferDirtyHint(Buffer buffer)
* rather than full transactionids.
*/
MyPgXact->delayChkpt = delayChkpt = true;
! lsn = XLogSaveBufferForHint(buffer);
}
LockBufHdr(bufHdr);
--- 2671,2677 ----
* rather than full transactionids.
*/
MyPgXact->delayChkpt = delayChkpt = true;
! lsn = XLogSaveBufferForHint(buffer, buffer_std);
}
LockBufHdr(bufHdr);
*** a/src/backend/storage/freespace/freespace.c
--- b/src/backend/storage/freespace/freespace.c
***************
*** 216,222 **** XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
PageInit(page, BLCKSZ, 0);
if (fsm_set_avail(page, slot, new_cat))
! MarkBufferDirtyHint(buf);
UnlockReleaseBuffer(buf);
}
--- 216,222 ----
PageInit(page, BLCKSZ, 0);
if (fsm_set_avail(page, slot, new_cat))
! MarkBufferDirtyHint(buf, false);
UnlockReleaseBuffer(buf);
}
***************
*** 286,292 **** FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
return; /* nothing to do; the FSM was already smaller */
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
! MarkBufferDirtyHint(buf);
UnlockReleaseBuffer(buf);
new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
--- 286,292 ----
return; /* nothing to do; the FSM was already smaller */
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
! MarkBufferDirtyHint(buf, false);
UnlockReleaseBuffer(buf);
new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
***************
*** 619,625 **** fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
page = BufferGetPage(buf);
if (fsm_set_avail(page, slot, newValue))
! MarkBufferDirtyHint(buf);
if (minValue != 0)
{
--- 619,625 ----
page = BufferGetPage(buf);
if (fsm_set_avail(page, slot, newValue))
! MarkBufferDirtyHint(buf, false);
if (minValue != 0)
{
***************
*** 770,776 **** fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
{
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
fsm_set_avail(BufferGetPage(buf), slot, child_avail);
! MarkBufferDirtyHint(buf);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
}
--- 770,776 ----
{
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
fsm_set_avail(BufferGetPage(buf), slot, child_avail);
! MarkBufferDirtyHint(buf, false);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
}
*** a/src/backend/storage/freespace/fsmpage.c
--- b/src/backend/storage/freespace/fsmpage.c
***************
*** 284,290 **** restart:
exclusive_lock_held = true;
}
fsm_rebuild_page(page);
! MarkBufferDirtyHint(buf);
goto restart;
}
}
--- 284,290 ----
exclusive_lock_held = true;
}
fsm_rebuild_page(page);
! MarkBufferDirtyHint(buf, false);
goto restart;
}
}
*** a/src/backend/utils/time/tqual.c
--- b/src/backend/utils/time/tqual.c
***************
*** 121,127 **** SetHintBits(HeapTupleHeader tuple, Buffer buffer,
}
tuple->t_infomask |= infomask;
! MarkBufferDirtyHint(buffer);
}
/*
--- 121,127 ----
}
tuple->t_infomask |= infomask;
! MarkBufferDirtyHint(buffer, true);
}
/*
*** a/src/include/access/xlog.h
--- b/src/include/access/xlog.h
***************
*** 267,273 **** extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
extern int XLogFileOpen(XLogSegNo segno);
! extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer);
extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
extern void XLogSetAsyncXactLSN(XLogRecPtr record);
--- 267,273 ----
extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
extern int XLogFileOpen(XLogSegNo segno);
! extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
extern void XLogSetAsyncXactLSN(XLogRecPtr record);
*** a/src/include/catalog/pg_control.h
--- b/src/include/catalog/pg_control.h
***************
*** 67,73 **** typedef struct CheckPoint
#define XLOG_RESTORE_POINT 0x70
#define XLOG_FPW_CHANGE 0x80
#define XLOG_END_OF_RECOVERY 0x90
! #define XLOG_HINT 0xA0
/*
--- 67,73 ----
#define XLOG_RESTORE_POINT 0x70
#define XLOG_FPW_CHANGE 0x80
#define XLOG_END_OF_RECOVERY 0x90
! #define XLOG_FPI 0xA0
/*
*** a/src/include/storage/bufmgr.h
--- b/src/include/storage/bufmgr.h
***************
*** 204,210 **** extern Size BufferShmemSize(void);
extern void BufferGetTag(Buffer buffer, RelFileNode *rnode,
ForkNumber *forknum, BlockNumber *blknum);
! extern void MarkBufferDirtyHint(Buffer buffer);
extern void UnlockBuffers(void);
extern void LockBuffer(Buffer buffer, int mode);
--- 204,210 ----
extern void BufferGetTag(Buffer buffer, RelFileNode *rnode,
ForkNumber *forknum, BlockNumber *blknum);
! extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std);
extern void UnlockBuffers(void);
extern void LockBuffer(Buffer buffer, int mode);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers