Re: [HACKERS] Avoiding unnecessary reads in recovery

Heikki Linnakangas Wed, 25 Apr 2007 08:08:11 -0700

Gregory Stark wrote:

So in short I think with your patch this piece of code no longer has a role.
Either your patch kicks in and we never even look at the damaged page at all,
or we should be treating it as corrupt data and just check zero_damaged_pages
alone and not do anything special in recovery.


Good point. Adjusted patch attached. I added some comments as well.

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com

Index: src/backend/access/transam/xlogutils.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/transam/xlogutils.c,v
retrieving revision 1.49
diff -c -r1.49 xlogutils.c
*** src/backend/access/transam/xlogutils.c	5 Jan 2007 22:19:24 -0000	1.49
--- src/backend/access/transam/xlogutils.c	25 Apr 2007 14:27:05 -0000
***************
*** 226,232 ****
  	if (blkno < lastblock)
  	{
  		/* page exists in file */
! 		buffer = ReadBuffer(reln, blkno);
  	}
  	else
  	{
--- 226,235 ----
  	if (blkno < lastblock)
  	{
  		/* page exists in file */
! 		if (init)
! 			buffer = ZapBuffer(reln, blkno);
! 		else
! 			buffer = ReadBuffer(reln, blkno);
  	}
  	else
  	{
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.216
diff -c -r1.216 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c	30 Mar 2007 18:34:55 -0000	1.216
--- src/backend/storage/buffer/bufmgr.c	25 Apr 2007 14:36:15 -0000
***************
*** 17,22 ****
--- 17,26 ----
   *		and pin it so that no one can destroy it while this process
   *		is using it.
   *
+  * ZapBuffer() -- like ReadBuffer, but destroys the contents of the 
+  *		page. Used in recovery when the page is completely overwritten 
+  *		from WAL.
+  *
   * ReleaseBuffer() -- unpin a buffer
   *
   * MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
***************
*** 97,102 ****
--- 101,107 ----
  static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
  				  int set_flag_bits);
  static void buffer_write_error_callback(void *arg);
+ static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum, bool alloc_only);
  static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
  			bool *foundPtr);
  static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
***************
*** 121,126 ****
--- 126,155 ----
  Buffer
  ReadBuffer(Relation reln, BlockNumber blockNum)
  {
+ 	return ReadBuffer_common(reln, blockNum, false);
+ }
+ 
+ /*
+  * ZapBuffer -- like ReadBuffer, but doesn't read the contents of the page
+  *		from disk. The caller is expected to completely rewrite the page,
+  *		regardless of the current contents. This should only be used in
+  *		recovery where there's no concurrent readers that might see the
+  *		contents of the page before the caller rewrites it.
+  */
+ Buffer
+ ZapBuffer(Relation reln, BlockNumber blockNum)
+ {
+ 	Assert(InRecovery);
+ 
+ 	return ReadBuffer_common(reln, blockNum, true);
+ }
+ 
+ /*
+  * ReadBuffer_common -- common logic of ReadBuffer and ZapBuffer.
+  */
+ static Buffer
+ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool alloc_only)
+ {
  	volatile BufferDesc *bufHdr;
  	Block		bufBlock;
  	bool		found;
***************
*** 253,269 ****
  	}
  	else
  	{
  		smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
  		/* check for garbage data */
  		if (!PageHeaderIsValid((PageHeader) bufBlock))
  		{
  			/*
! 			 * During WAL recovery, the first access to any data page should
! 			 * overwrite the whole page from the WAL; so a clobbered page
! 			 * header is not reason to fail.  Hence, when InRecovery we may
! 			 * always act as though zero_damaged_pages is ON.
  			 */
! 			if (zero_damaged_pages || InRecovery)
  			{
  				ereport(WARNING,
  						(errcode(ERRCODE_DATA_CORRUPTED),
--- 282,304 ----
  	}
  	else
  	{
+ 		/* 
+ 		 * Read in the page, unless the caller intends to overwrite it
+ 		 * and just wants us to allocate a buffer.
+ 		 */
+ 		if (!alloc_only)
+ 		{
  		smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
  		/* check for garbage data */
  		if (!PageHeaderIsValid((PageHeader) bufBlock))
  		{
  			/*
! 			 * We used to ignore corrupt pages in WAL recovery, but
! 			 * that was only ever safe if full_page_writes was enabled.
! 			 * Now the caller sets alloc_only to false if he intends to 
! 			 * overwrite the whole page, which is already checked above.
  			 */
! 			if (zero_damaged_pages)
  			{
  				ereport(WARNING,
  						(errcode(ERRCODE_DATA_CORRUPTED),
***************
*** 277,282 ****
--- 312,318 ----
  				 errmsg("invalid page header in block %u of relation \"%s\"",
  						blockNum, RelationGetRelationName(reln))));
  		}
+ 		}
  	}
  
  	if (isLocalBuf)
Index: src/include/storage/bufmgr.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/storage/bufmgr.h,v
retrieving revision 1.102
diff -c -r1.102 bufmgr.h
*** src/include/storage/bufmgr.h	5 Jan 2007 22:19:57 -0000	1.102
--- src/include/storage/bufmgr.h	25 Apr 2007 14:14:28 -0000
***************
*** 111,116 ****
--- 111,117 ----
   * prototypes for functions in bufmgr.c
   */
  extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
+ extern Buffer ZapBuffer(Relation reln, BlockNumber blockNum);
  extern void ReleaseBuffer(Buffer buffer);
  extern void UnlockReleaseBuffer(Buffer buffer);
  extern void MarkBufferDirty(Buffer buffer);

---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster

Re: [HACKERS] Avoiding unnecessary reads in recovery

Reply via email to