On Fri, 2013-06-14 at 18:27 +0200, Andres Freund wrote: > I'd like to see a comment around the memcpys in XLogSaveBufferForHint() > that mentions that they are safe in a non std buffer due to > XLogCheckBuffer setting an appropriate hole/offset. Or make an explicit > change of the copy algorithm there.
Done. > Btw, if you touch that code, I'd vote for renaming XLOG_HINT to XLOG_FPI > or something like that. I find the former name confusing... Also done. Patch attached. Also, since we branched, I think this should be back-patched to 9.3 as well. Regards, Jeff Davis
*** a/src/backend/access/hash/hash.c --- b/src/backend/access/hash/hash.c *************** *** 287,293 **** hashgettuple(PG_FUNCTION_ARGS) /* * Since this can be redone later if needed, mark as a hint. */ ! MarkBufferDirtyHint(buf); } /* --- 287,293 ---- /* * Since this can be redone later if needed, mark as a hint. */ ! MarkBufferDirtyHint(buf, true); } /* *** a/src/backend/access/heap/pruneheap.c --- b/src/backend/access/heap/pruneheap.c *************** *** 262,268 **** heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, { ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; PageClearFull(page); ! MarkBufferDirtyHint(buffer); } } --- 262,268 ---- { ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; PageClearFull(page); ! MarkBufferDirtyHint(buffer, true); } } *** a/src/backend/access/nbtree/nbtinsert.c --- b/src/backend/access/nbtree/nbtinsert.c *************** *** 413,421 **** _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, * crucial. Be sure to mark the proper buffer dirty. */ if (nbuf != InvalidBuffer) ! MarkBufferDirtyHint(nbuf); else ! MarkBufferDirtyHint(buf); } } } --- 413,421 ---- * crucial. Be sure to mark the proper buffer dirty. */ if (nbuf != InvalidBuffer) ! MarkBufferDirtyHint(nbuf, true); else ! MarkBufferDirtyHint(buf, true); } } } *** a/src/backend/access/nbtree/nbtree.c --- b/src/backend/access/nbtree/nbtree.c *************** *** 1052,1058 **** restart: opaque->btpo_cycleid == vstate->cycleid) { opaque->btpo_cycleid = 0; ! MarkBufferDirtyHint(buf); } } --- 1052,1058 ---- opaque->btpo_cycleid == vstate->cycleid) { opaque->btpo_cycleid = 0; ! MarkBufferDirtyHint(buf, true); } } *** a/src/backend/access/nbtree/nbtutils.c --- b/src/backend/access/nbtree/nbtutils.c *************** *** 1789,1795 **** _bt_killitems(IndexScanDesc scan, bool haveLock) if (killedsomething) { opaque->btpo_flags |= BTP_HAS_GARBAGE; ! MarkBufferDirtyHint(so->currPos.buf); } if (!haveLock) --- 1789,1795 ---- if (killedsomething) { opaque->btpo_flags |= BTP_HAS_GARBAGE; ! MarkBufferDirtyHint(so->currPos.buf, true); } if (!haveLock) *** a/src/backend/access/rmgrdesc/xlogdesc.c --- b/src/backend/access/rmgrdesc/xlogdesc.c *************** *** 82,92 **** xlog_desc(StringInfo buf, uint8 xl_info, char *rec) appendStringInfo(buf, "restore point: %s", xlrec->rp_name); } ! else if (info == XLOG_HINT) { BkpBlock *bkp = (BkpBlock *) rec; ! appendStringInfo(buf, "page hint: %s block %u", relpathperm(bkp->node, bkp->fork), bkp->block); } --- 82,92 ---- appendStringInfo(buf, "restore point: %s", xlrec->rp_name); } ! else if (info == XLOG_FPI) { BkpBlock *bkp = (BkpBlock *) rec; ! appendStringInfo(buf, "full-page image: %s block %u", relpathperm(bkp->node, bkp->fork), bkp->block); } *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *************** *** 7681,7692 **** XLogRestorePoint(const char *rpName) * records. In that case, multiple copies of the same block would be recorded * in separate WAL records by different backends, though that is still OK from * a correctness perspective. - * - * Note that this only works for buffers that fit the standard page model, - * i.e. those for which buffer_std == true */ XLogRecPtr ! XLogSaveBufferForHint(Buffer buffer) { XLogRecPtr recptr = InvalidXLogRecPtr; XLogRecPtr lsn; --- 7681,7689 ---- * records. In that case, multiple copies of the same block would be recorded * in separate WAL records by different backends, though that is still OK from * a correctness perspective. */ XLogRecPtr ! XLogSaveBufferForHint(Buffer buffer, bool buffer_std) { XLogRecPtr recptr = InvalidXLogRecPtr; XLogRecPtr lsn; *************** *** 7708,7714 **** XLogSaveBufferForHint(Buffer buffer) * and reset rdata for any actual WAL record insert. */ rdata[0].buffer = buffer; ! rdata[0].buffer_std = true; /* * Check buffer while not holding an exclusive lock. --- 7705,7711 ---- * and reset rdata for any actual WAL record insert. */ rdata[0].buffer = buffer; ! rdata[0].buffer_std = buffer_std; /* * Check buffer while not holding an exclusive lock. *************** *** 7722,7727 **** XLogSaveBufferForHint(Buffer buffer) --- 7719,7727 ---- * Copy buffer so we don't have to worry about concurrent hint bit or * lsn updates. We assume pd_lower/upper cannot be changed without an * exclusive lock, so the contents bkp are not racy. + * + * With buffer_std set to false, XLogCheckBuffer() sets hole_length and + * hole_offset to 0; so the following code is safe for either case. */ memcpy(copied_buffer, origdata, bkpb.hole_offset); memcpy(copied_buffer + bkpb.hole_offset, *************** *** 7744,7750 **** XLogSaveBufferForHint(Buffer buffer) rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; ! recptr = XLogInsert(RM_XLOG_ID, XLOG_HINT, rdata); } return recptr; --- 7744,7750 ---- rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; ! recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata); } return recptr; *************** *** 8109,8122 **** xlog_redo(XLogRecPtr lsn, XLogRecord *record) { /* nothing to do here */ } ! else if (info == XLOG_HINT) { char *data; BkpBlock bkpb; /* ! * Hint bit records contain a backup block stored "inline" in the ! * normal data since the locking when writing hint records isn't * sufficient to use the normal backup block mechanism, which assumes * exclusive lock on the buffer supplied. * --- 8109,8122 ---- { /* nothing to do here */ } ! else if (info == XLOG_FPI) { char *data; BkpBlock bkpb; /* ! * Full-page image (FPI) records contain a backup block stored "inline" ! * in the normal data since the locking when writing hint records isn't * sufficient to use the normal backup block mechanism, which assumes * exclusive lock on the buffer supplied. * *** a/src/backend/commands/sequence.c --- b/src/backend/commands/sequence.c *************** *** 1118,1124 **** read_seq_tuple(SeqTable elm, Relation rel, Buffer *buf, HeapTuple seqtuple) HeapTupleHeaderSetXmax(seqtuple->t_data, InvalidTransactionId); seqtuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; seqtuple->t_data->t_infomask |= HEAP_XMAX_INVALID; ! MarkBufferDirtyHint(*buf); } seq = (Form_pg_sequence) GETSTRUCT(seqtuple); --- 1118,1124 ---- HeapTupleHeaderSetXmax(seqtuple->t_data, InvalidTransactionId); seqtuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; seqtuple->t_data->t_infomask |= HEAP_XMAX_INVALID; ! MarkBufferDirtyHint(*buf, true); } seq = (Form_pg_sequence) GETSTRUCT(seqtuple); *** a/src/backend/storage/buffer/bufmgr.c --- b/src/backend/storage/buffer/bufmgr.c *************** *** 2587,2593 **** IncrBufferRefCount(Buffer buffer) * (due to a race condition), so it cannot be used for important changes. */ void ! MarkBufferDirtyHint(Buffer buffer) { volatile BufferDesc *bufHdr; Page page = BufferGetPage(buffer); --- 2587,2593 ---- * (due to a race condition), so it cannot be used for important changes. */ void ! MarkBufferDirtyHint(Buffer buffer, bool buffer_std) { volatile BufferDesc *bufHdr; Page page = BufferGetPage(buffer); *************** *** 2671,2677 **** MarkBufferDirtyHint(Buffer buffer) * rather than full transactionids. */ MyPgXact->delayChkpt = delayChkpt = true; ! lsn = XLogSaveBufferForHint(buffer); } LockBufHdr(bufHdr); --- 2671,2677 ---- * rather than full transactionids. */ MyPgXact->delayChkpt = delayChkpt = true; ! lsn = XLogSaveBufferForHint(buffer, buffer_std); } LockBufHdr(bufHdr); *** a/src/backend/storage/freespace/freespace.c --- b/src/backend/storage/freespace/freespace.c *************** *** 216,222 **** XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, PageInit(page, BLCKSZ, 0); if (fsm_set_avail(page, slot, new_cat)) ! MarkBufferDirtyHint(buf); UnlockReleaseBuffer(buf); } --- 216,222 ---- PageInit(page, BLCKSZ, 0); if (fsm_set_avail(page, slot, new_cat)) ! MarkBufferDirtyHint(buf, false); UnlockReleaseBuffer(buf); } *************** *** 286,292 **** FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) return; /* nothing to do; the FSM was already smaller */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_truncate_avail(BufferGetPage(buf), first_removed_slot); ! MarkBufferDirtyHint(buf); UnlockReleaseBuffer(buf); new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1; --- 286,292 ---- return; /* nothing to do; the FSM was already smaller */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_truncate_avail(BufferGetPage(buf), first_removed_slot); ! MarkBufferDirtyHint(buf, false); UnlockReleaseBuffer(buf); new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1; *************** *** 619,625 **** fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, page = BufferGetPage(buf); if (fsm_set_avail(page, slot, newValue)) ! MarkBufferDirtyHint(buf); if (minValue != 0) { --- 619,625 ---- page = BufferGetPage(buf); if (fsm_set_avail(page, slot, newValue)) ! MarkBufferDirtyHint(buf, false); if (minValue != 0) { *************** *** 770,776 **** fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) { LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_set_avail(BufferGetPage(buf), slot, child_avail); ! MarkBufferDirtyHint(buf); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } } --- 770,776 ---- { LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_set_avail(BufferGetPage(buf), slot, child_avail); ! MarkBufferDirtyHint(buf, false); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } } *** a/src/backend/storage/freespace/fsmpage.c --- b/src/backend/storage/freespace/fsmpage.c *************** *** 284,290 **** restart: exclusive_lock_held = true; } fsm_rebuild_page(page); ! MarkBufferDirtyHint(buf); goto restart; } } --- 284,290 ---- exclusive_lock_held = true; } fsm_rebuild_page(page); ! MarkBufferDirtyHint(buf, false); goto restart; } } *** a/src/backend/utils/time/tqual.c --- b/src/backend/utils/time/tqual.c *************** *** 121,127 **** SetHintBits(HeapTupleHeader tuple, Buffer buffer, } tuple->t_infomask |= infomask; ! MarkBufferDirtyHint(buffer); } /* --- 121,127 ---- } tuple->t_infomask |= infomask; ! MarkBufferDirtyHint(buffer, true); } /* *** a/src/include/access/xlog.h --- b/src/include/access/xlog.h *************** *** 267,273 **** extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock); extern int XLogFileOpen(XLogSegNo segno); ! extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer); extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli); extern void XLogSetAsyncXactLSN(XLogRecPtr record); --- 267,273 ---- extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock); extern int XLogFileOpen(XLogSegNo segno); ! extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std); extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli); extern void XLogSetAsyncXactLSN(XLogRecPtr record); *** a/src/include/catalog/pg_control.h --- b/src/include/catalog/pg_control.h *************** *** 67,73 **** typedef struct CheckPoint #define XLOG_RESTORE_POINT 0x70 #define XLOG_FPW_CHANGE 0x80 #define XLOG_END_OF_RECOVERY 0x90 ! #define XLOG_HINT 0xA0 /* --- 67,73 ---- #define XLOG_RESTORE_POINT 0x70 #define XLOG_FPW_CHANGE 0x80 #define XLOG_END_OF_RECOVERY 0x90 ! #define XLOG_FPI 0xA0 /* *** a/src/include/storage/bufmgr.h --- b/src/include/storage/bufmgr.h *************** *** 204,210 **** extern Size BufferShmemSize(void); extern void BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum); ! extern void MarkBufferDirtyHint(Buffer buffer); extern void UnlockBuffers(void); extern void LockBuffer(Buffer buffer, int mode); --- 204,210 ---- extern void BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum); ! extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std); extern void UnlockBuffers(void); extern void LockBuffer(Buffer buffer, int mode);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers