On Fri, 2013-06-14 at 18:27 +0200, Andres Freund wrote:
> I'd like to see a comment around the memcpys in XLogSaveBufferForHint()
> that mentions that they are safe in a non std buffer due to
> XLogCheckBuffer setting an appropriate hole/offset. Or make an explicit
> change of the copy algorithm there.

Done.

> Btw, if you touch that code, I'd vote for renaming XLOG_HINT to XLOG_FPI
> or something like that. I find the former name confusing...

Also done.

Patch attached. Also, since we branched, I think this should be
back-patched to 9.3 as well.

Regards,
        Jeff Davis

*** a/src/backend/access/hash/hash.c
--- b/src/backend/access/hash/hash.c
***************
*** 287,293 **** hashgettuple(PG_FUNCTION_ARGS)
  			/*
  			 * Since this can be redone later if needed, mark as a hint.
  			 */
! 			MarkBufferDirtyHint(buf);
  		}
  
  		/*
--- 287,293 ----
  			/*
  			 * Since this can be redone later if needed, mark as a hint.
  			 */
! 			MarkBufferDirtyHint(buf, true);
  		}
  
  		/*
*** a/src/backend/access/heap/pruneheap.c
--- b/src/backend/access/heap/pruneheap.c
***************
*** 262,268 **** heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
  		{
  			((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
  			PageClearFull(page);
! 			MarkBufferDirtyHint(buffer);
  		}
  	}
  
--- 262,268 ----
  		{
  			((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
  			PageClearFull(page);
! 			MarkBufferDirtyHint(buffer, true);
  		}
  	}
  
*** a/src/backend/access/nbtree/nbtinsert.c
--- b/src/backend/access/nbtree/nbtinsert.c
***************
*** 413,421 **** _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
  					 * crucial. Be sure to mark the proper buffer dirty.
  					 */
  					if (nbuf != InvalidBuffer)
! 						MarkBufferDirtyHint(nbuf);
  					else
! 						MarkBufferDirtyHint(buf);
  				}
  			}
  		}
--- 413,421 ----
  					 * crucial. Be sure to mark the proper buffer dirty.
  					 */
  					if (nbuf != InvalidBuffer)
! 						MarkBufferDirtyHint(nbuf, true);
  					else
! 						MarkBufferDirtyHint(buf, true);
  				}
  			}
  		}
*** a/src/backend/access/nbtree/nbtree.c
--- b/src/backend/access/nbtree/nbtree.c
***************
*** 1052,1058 **** restart:
  				opaque->btpo_cycleid == vstate->cycleid)
  			{
  				opaque->btpo_cycleid = 0;
! 				MarkBufferDirtyHint(buf);
  			}
  		}
  
--- 1052,1058 ----
  				opaque->btpo_cycleid == vstate->cycleid)
  			{
  				opaque->btpo_cycleid = 0;
! 				MarkBufferDirtyHint(buf, true);
  			}
  		}
  
*** a/src/backend/access/nbtree/nbtutils.c
--- b/src/backend/access/nbtree/nbtutils.c
***************
*** 1789,1795 **** _bt_killitems(IndexScanDesc scan, bool haveLock)
  	if (killedsomething)
  	{
  		opaque->btpo_flags |= BTP_HAS_GARBAGE;
! 		MarkBufferDirtyHint(so->currPos.buf);
  	}
  
  	if (!haveLock)
--- 1789,1795 ----
  	if (killedsomething)
  	{
  		opaque->btpo_flags |= BTP_HAS_GARBAGE;
! 		MarkBufferDirtyHint(so->currPos.buf, true);
  	}
  
  	if (!haveLock)
*** a/src/backend/access/rmgrdesc/xlogdesc.c
--- b/src/backend/access/rmgrdesc/xlogdesc.c
***************
*** 82,92 **** xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
  		appendStringInfo(buf, "restore point: %s", xlrec->rp_name);
  
  	}
! 	else if (info == XLOG_HINT)
  	{
  		BkpBlock   *bkp = (BkpBlock *) rec;
  
! 		appendStringInfo(buf, "page hint: %s block %u",
  						 relpathperm(bkp->node, bkp->fork),
  						 bkp->block);
  	}
--- 82,92 ----
  		appendStringInfo(buf, "restore point: %s", xlrec->rp_name);
  
  	}
! 	else if (info == XLOG_FPI)
  	{
  		BkpBlock   *bkp = (BkpBlock *) rec;
  
! 		appendStringInfo(buf, "full-page image: %s block %u",
  						 relpathperm(bkp->node, bkp->fork),
  						 bkp->block);
  	}
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 7681,7692 **** XLogRestorePoint(const char *rpName)
   * records. In that case, multiple copies of the same block would be recorded
   * in separate WAL records by different backends, though that is still OK from
   * a correctness perspective.
-  *
-  * Note that this only works for buffers that fit the standard page model,
-  * i.e. those for which buffer_std == true
   */
  XLogRecPtr
! XLogSaveBufferForHint(Buffer buffer)
  {
  	XLogRecPtr	recptr = InvalidXLogRecPtr;
  	XLogRecPtr	lsn;
--- 7681,7689 ----
   * records. In that case, multiple copies of the same block would be recorded
   * in separate WAL records by different backends, though that is still OK from
   * a correctness perspective.
   */
  XLogRecPtr
! XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
  {
  	XLogRecPtr	recptr = InvalidXLogRecPtr;
  	XLogRecPtr	lsn;
***************
*** 7708,7714 **** XLogSaveBufferForHint(Buffer buffer)
  	 * and reset rdata for any actual WAL record insert.
  	 */
  	rdata[0].buffer = buffer;
! 	rdata[0].buffer_std = true;
  
  	/*
  	 * Check buffer while not holding an exclusive lock.
--- 7705,7711 ----
  	 * and reset rdata for any actual WAL record insert.
  	 */
  	rdata[0].buffer = buffer;
! 	rdata[0].buffer_std = buffer_std;
  
  	/*
  	 * Check buffer while not holding an exclusive lock.
***************
*** 7722,7727 **** XLogSaveBufferForHint(Buffer buffer)
--- 7719,7727 ----
  		 * Copy buffer so we don't have to worry about concurrent hint bit or
  		 * lsn updates. We assume pd_lower/upper cannot be changed without an
  		 * exclusive lock, so the contents bkp are not racy.
+ 		 *
+ 		 * With buffer_std set to false, XLogCheckBuffer() sets hole_length and
+ 		 * hole_offset to 0; so the following code is safe for either case.
  		 */
  		memcpy(copied_buffer, origdata, bkpb.hole_offset);
  		memcpy(copied_buffer + bkpb.hole_offset,
***************
*** 7744,7750 **** XLogSaveBufferForHint(Buffer buffer)
  		rdata[1].buffer = InvalidBuffer;
  		rdata[1].next = NULL;
  
! 		recptr = XLogInsert(RM_XLOG_ID, XLOG_HINT, rdata);
  	}
  
  	return recptr;
--- 7744,7750 ----
  		rdata[1].buffer = InvalidBuffer;
  		rdata[1].next = NULL;
  
! 		recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
  	}
  
  	return recptr;
***************
*** 8109,8122 **** xlog_redo(XLogRecPtr lsn, XLogRecord *record)
  	{
  		/* nothing to do here */
  	}
! 	else if (info == XLOG_HINT)
  	{
  		char	   *data;
  		BkpBlock	bkpb;
  
  		/*
! 		 * Hint bit records contain a backup block stored "inline" in the
! 		 * normal data since the locking when writing hint records isn't
  		 * sufficient to use the normal backup block mechanism, which assumes
  		 * exclusive lock on the buffer supplied.
  		 *
--- 8109,8122 ----
  	{
  		/* nothing to do here */
  	}
! 	else if (info == XLOG_FPI)
  	{
  		char	   *data;
  		BkpBlock	bkpb;
  
  		/*
! 		 * Full-page image (FPI) records contain a backup block stored "inline"
! 		 * in the normal data since the locking when writing hint records isn't
  		 * sufficient to use the normal backup block mechanism, which assumes
  		 * exclusive lock on the buffer supplied.
  		 *
*** a/src/backend/commands/sequence.c
--- b/src/backend/commands/sequence.c
***************
*** 1118,1124 **** read_seq_tuple(SeqTable elm, Relation rel, Buffer *buf, HeapTuple seqtuple)
  		HeapTupleHeaderSetXmax(seqtuple->t_data, InvalidTransactionId);
  		seqtuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
  		seqtuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
! 		MarkBufferDirtyHint(*buf);
  	}
  
  	seq = (Form_pg_sequence) GETSTRUCT(seqtuple);
--- 1118,1124 ----
  		HeapTupleHeaderSetXmax(seqtuple->t_data, InvalidTransactionId);
  		seqtuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
  		seqtuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
! 		MarkBufferDirtyHint(*buf, true);
  	}
  
  	seq = (Form_pg_sequence) GETSTRUCT(seqtuple);
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
***************
*** 2587,2593 **** IncrBufferRefCount(Buffer buffer)
   *	  (due to a race condition), so it cannot be used for important changes.
   */
  void
! MarkBufferDirtyHint(Buffer buffer)
  {
  	volatile BufferDesc *bufHdr;
  	Page		page = BufferGetPage(buffer);
--- 2587,2593 ----
   *	  (due to a race condition), so it cannot be used for important changes.
   */
  void
! MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
  {
  	volatile BufferDesc *bufHdr;
  	Page		page = BufferGetPage(buffer);
***************
*** 2671,2677 **** MarkBufferDirtyHint(Buffer buffer)
  			 * rather than full transactionids.
  			 */
  			MyPgXact->delayChkpt = delayChkpt = true;
! 			lsn = XLogSaveBufferForHint(buffer);
  		}
  
  		LockBufHdr(bufHdr);
--- 2671,2677 ----
  			 * rather than full transactionids.
  			 */
  			MyPgXact->delayChkpt = delayChkpt = true;
! 			lsn = XLogSaveBufferForHint(buffer, buffer_std);
  		}
  
  		LockBufHdr(bufHdr);
*** a/src/backend/storage/freespace/freespace.c
--- b/src/backend/storage/freespace/freespace.c
***************
*** 216,222 **** XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
  		PageInit(page, BLCKSZ, 0);
  
  	if (fsm_set_avail(page, slot, new_cat))
! 		MarkBufferDirtyHint(buf);
  	UnlockReleaseBuffer(buf);
  }
  
--- 216,222 ----
  		PageInit(page, BLCKSZ, 0);
  
  	if (fsm_set_avail(page, slot, new_cat))
! 		MarkBufferDirtyHint(buf, false);
  	UnlockReleaseBuffer(buf);
  }
  
***************
*** 286,292 **** FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
  			return;				/* nothing to do; the FSM was already smaller */
  		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
  		fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
! 		MarkBufferDirtyHint(buf);
  		UnlockReleaseBuffer(buf);
  
  		new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
--- 286,292 ----
  			return;				/* nothing to do; the FSM was already smaller */
  		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
  		fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
! 		MarkBufferDirtyHint(buf, false);
  		UnlockReleaseBuffer(buf);
  
  		new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
***************
*** 619,625 **** fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
  	page = BufferGetPage(buf);
  
  	if (fsm_set_avail(page, slot, newValue))
! 		MarkBufferDirtyHint(buf);
  
  	if (minValue != 0)
  	{
--- 619,625 ----
  	page = BufferGetPage(buf);
  
  	if (fsm_set_avail(page, slot, newValue))
! 		MarkBufferDirtyHint(buf, false);
  
  	if (minValue != 0)
  	{
***************
*** 770,776 **** fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
  			{
  				LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
  				fsm_set_avail(BufferGetPage(buf), slot, child_avail);
! 				MarkBufferDirtyHint(buf);
  				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
  			}
  		}
--- 770,776 ----
  			{
  				LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
  				fsm_set_avail(BufferGetPage(buf), slot, child_avail);
! 				MarkBufferDirtyHint(buf, false);
  				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
  			}
  		}
*** a/src/backend/storage/freespace/fsmpage.c
--- b/src/backend/storage/freespace/fsmpage.c
***************
*** 284,290 **** restart:
  				exclusive_lock_held = true;
  			}
  			fsm_rebuild_page(page);
! 			MarkBufferDirtyHint(buf);
  			goto restart;
  		}
  	}
--- 284,290 ----
  				exclusive_lock_held = true;
  			}
  			fsm_rebuild_page(page);
! 			MarkBufferDirtyHint(buf, false);
  			goto restart;
  		}
  	}
*** a/src/backend/utils/time/tqual.c
--- b/src/backend/utils/time/tqual.c
***************
*** 121,127 **** SetHintBits(HeapTupleHeader tuple, Buffer buffer,
  	}
  
  	tuple->t_infomask |= infomask;
! 	MarkBufferDirtyHint(buffer);
  }
  
  /*
--- 121,127 ----
  	}
  
  	tuple->t_infomask |= infomask;
! 	MarkBufferDirtyHint(buffer, true);
  }
  
  /*
*** a/src/include/access/xlog.h
--- b/src/include/access/xlog.h
***************
*** 267,273 **** extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
  extern int	XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
  extern int	XLogFileOpen(XLogSegNo segno);
  
! extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer);
  
  extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
  extern void XLogSetAsyncXactLSN(XLogRecPtr record);
--- 267,273 ----
  extern int	XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
  extern int	XLogFileOpen(XLogSegNo segno);
  
! extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
  
  extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
  extern void XLogSetAsyncXactLSN(XLogRecPtr record);
*** a/src/include/catalog/pg_control.h
--- b/src/include/catalog/pg_control.h
***************
*** 67,73 **** typedef struct CheckPoint
  #define XLOG_RESTORE_POINT				0x70
  #define XLOG_FPW_CHANGE					0x80
  #define XLOG_END_OF_RECOVERY			0x90
! #define XLOG_HINT						0xA0
  
  
  /*
--- 67,73 ----
  #define XLOG_RESTORE_POINT				0x70
  #define XLOG_FPW_CHANGE					0x80
  #define XLOG_END_OF_RECOVERY			0x90
! #define XLOG_FPI						0xA0
  
  
  /*
*** a/src/include/storage/bufmgr.h
--- b/src/include/storage/bufmgr.h
***************
*** 204,210 **** extern Size BufferShmemSize(void);
  extern void BufferGetTag(Buffer buffer, RelFileNode *rnode,
  			 ForkNumber *forknum, BlockNumber *blknum);
  
! extern void MarkBufferDirtyHint(Buffer buffer);
  
  extern void UnlockBuffers(void);
  extern void LockBuffer(Buffer buffer, int mode);
--- 204,210 ----
  extern void BufferGetTag(Buffer buffer, RelFileNode *rnode,
  			 ForkNumber *forknum, BlockNumber *blknum);
  
! extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std);
  
  extern void UnlockBuffers(void);
  extern void LockBuffer(Buffer buffer, int mode);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to