Gregory Stark wrote:
So in short I think with your patch this piece of code no longer has a role.
Either your patch kicks in and we never even look at the damaged page at all,
or we should be treating it as corrupt data and just check zero_damaged_pages
alone and not do anything special in recovery.
Good point. Adjusted patch attached. I added some comments as well.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
Index: src/backend/access/transam/xlogutils.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/transam/xlogutils.c,v
retrieving revision 1.49
diff -c -r1.49 xlogutils.c
*** src/backend/access/transam/xlogutils.c 5 Jan 2007 22:19:24 -0000 1.49
--- src/backend/access/transam/xlogutils.c 25 Apr 2007 14:27:05 -0000
***************
*** 226,232 ****
if (blkno < lastblock)
{
/* page exists in file */
! buffer = ReadBuffer(reln, blkno);
}
else
{
--- 226,235 ----
if (blkno < lastblock)
{
/* page exists in file */
! if (init)
! buffer = ZapBuffer(reln, blkno);
! else
! buffer = ReadBuffer(reln, blkno);
}
else
{
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.216
diff -c -r1.216 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c 30 Mar 2007 18:34:55 -0000 1.216
--- src/backend/storage/buffer/bufmgr.c 25 Apr 2007 14:36:15 -0000
***************
*** 17,22 ****
--- 17,26 ----
* and pin it so that no one can destroy it while this process
* is using it.
*
+ * ZapBuffer() -- like ReadBuffer, but destroys the contents of the
+ * page. Used in recovery when the page is completely overwritten
+ * from WAL.
+ *
* ReleaseBuffer() -- unpin a buffer
*
* MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
***************
*** 97,102 ****
--- 101,107 ----
static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
int set_flag_bits);
static void buffer_write_error_callback(void *arg);
+ static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum, bool alloc_only);
static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
bool *foundPtr);
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
***************
*** 121,126 ****
--- 126,155 ----
Buffer
ReadBuffer(Relation reln, BlockNumber blockNum)
{
+ return ReadBuffer_common(reln, blockNum, false);
+ }
+
+ /*
+ * ZapBuffer -- like ReadBuffer, but doesn't read the contents of the page
+ * from disk. The caller is expected to completely rewrite the page,
+ * regardless of the current contents. This should only be used in
+ * recovery where there's no concurrent readers that might see the
+ * contents of the page before the caller rewrites it.
+ */
+ Buffer
+ ZapBuffer(Relation reln, BlockNumber blockNum)
+ {
+ Assert(InRecovery);
+
+ return ReadBuffer_common(reln, blockNum, true);
+ }
+
+ /*
+ * ReadBuffer_common -- common logic of ReadBuffer and ZapBuffer.
+ */
+ static Buffer
+ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool alloc_only)
+ {
volatile BufferDesc *bufHdr;
Block bufBlock;
bool found;
***************
*** 253,269 ****
}
else
{
smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
/* check for garbage data */
if (!PageHeaderIsValid((PageHeader) bufBlock))
{
/*
! * During WAL recovery, the first access to any data page should
! * overwrite the whole page from the WAL; so a clobbered page
! * header is not reason to fail. Hence, when InRecovery we may
! * always act as though zero_damaged_pages is ON.
*/
! if (zero_damaged_pages || InRecovery)
{
ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),
--- 282,304 ----
}
else
{
+ /*
+ * Read in the page, unless the caller intends to overwrite it
+ * and just wants us to allocate a buffer.
+ */
+ if (!alloc_only)
+ {
smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
/* check for garbage data */
if (!PageHeaderIsValid((PageHeader) bufBlock))
{
/*
! * We used to ignore corrupt pages in WAL recovery, but
! * that was only ever safe if full_page_writes was enabled.
! * Now the caller sets alloc_only to false if he intends to
! * overwrite the whole page, which is already checked above.
*/
! if (zero_damaged_pages)
{
ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),
***************
*** 277,282 ****
--- 312,318 ----
errmsg("invalid page header in block %u of relation \"%s\"",
blockNum, RelationGetRelationName(reln))));
}
+ }
}
if (isLocalBuf)
Index: src/include/storage/bufmgr.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/storage/bufmgr.h,v
retrieving revision 1.102
diff -c -r1.102 bufmgr.h
*** src/include/storage/bufmgr.h 5 Jan 2007 22:19:57 -0000 1.102
--- src/include/storage/bufmgr.h 25 Apr 2007 14:14:28 -0000
***************
*** 111,116 ****
--- 111,117 ----
* prototypes for functions in bufmgr.c
*/
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
+ extern Buffer ZapBuffer(Relation reln, BlockNumber blockNum);
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer);
---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster