Gregory Stark wrote:

> I think we're talking past each other. Martin and I are talking about doing
> something like:
> 
> for (...)
>   ...
>   crc(word including hint bits)
>   ...
> for (each line pointer)
>   crc-negated(word & LP_DEAD<<15)
>  
> Because CRC is a cyclic checksum it's possible to add or remove bits
> incrementally.

I see.

Since our CRC implementation is a simple byte loop, and since ItemIdData
fits in a uint32, the attached patch should do mostly the same by
copying the line pointer into a uint32, turning off the lp_flags, and
summing the modified copy.

This patch is also skipping pd_special and the unused area of the page.

I'm still testing this; please beware that this likely has an even
higher bug density than my regular patches (and some debugging printouts
as well).

While reading the pg_filedump code I noticed that there's a way to tell
the different index pages apart, so perhaps we can use that to be able
to checksum the special space as well.

-- 
Alvaro Herrera                                http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support
Index: src/backend/access/heap/heapam.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/access/heap/heapam.c,v
retrieving revision 1.269
diff -c -p -r1.269 heapam.c
*** src/backend/access/heap/heapam.c	6 Nov 2008 20:51:14 -0000	1.269
--- src/backend/access/heap/heapam.c	13 Nov 2008 17:44:23 -0000
***************
*** 4036,4041 ****
--- 4036,4128 ----
  }
  
  /*
+  * Perform XLogInsert for hint bits changes in a page.  This handles hint
+  * bits set in HeapTupleHeaderData (t_infomask and t_infomask2).
+  *
+  * This is intended to be called right before writing a page from shared
+  * buffers to disk.
+  *
+  * The approach used here, instead of WAL-logging every change, is to produce
+  * a complete record of the current state of hint bits in a page just before
+  * flushing it.  There are two downsides to this approach: first, it stores
+  * all hint bits in the page, not only those that changed; and second, that
+  * the flusher of the page needs to flush a lot more of the WAL (namely up
+  * to this new record's LSN) than the original LSN marked on the page.
+  */
+ XLogRecPtr
+ log_hintbits(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
+ 			 Page page)
+ {
+ 	xl_heap_hintbits xlrec;
+ 	OffsetNumber	i;
+ 	XLogRecPtr		recptr;
+ 	XLogRecData		rdata[2];
+ 	char		   *bits;
+ 	int				pos = 0;
+ 	StringInfoData	buf;
+ 
+ 	/*
+ 	 * 1 byte for line pointer bits, 2 bytes for infomask,
+ 	 * 2 bytes for infomask2
+ 	 */
+ 	bits = palloc(MaxHeapTuplesPerPage * 5);
+ 
+ 	initStringInfo(&buf);
+ 	appendStringInfo(&buf, "page %u: ", blkno);
+ 
+ 	for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page);
+ 		 i = OffsetNumberNext(i))
+ 	{
+ 		HeapTupleHeader	htup;
+ 		ItemId		lp = PageGetItemId(page, i);
+ 
+ 		if (!ItemIdHasStorage(lp))
+ 			continue;
+ 
+ 		appendStringInfo(&buf, "offset %d: ", i);
+ 
+ 		htup = (HeapTupleHeader) PageGetItem(page, lp);
+ 
+ 		*((uint16 *) (bits + pos)) = htup->t_infomask & HEAP_XACT_MASK;
+ 		appendStringInfo(&buf, "infomask %04x/%04x ", htup->t_infomask,
+ 			 htup->t_infomask & HEAP_XACT_MASK);
+ 		pos += 2;
+ 		*((uint16 *) (bits + pos)) = htup->t_infomask2 & HEAP2_XACT_MASK;
+ 		appendStringInfo(&buf, "infomask2 %04x/%04x\n", htup->t_infomask2,
+ 			 htup->t_infomask2 & HEAP2_XACT_MASK);
+ 		pos += 2;
+ 	}
+ 
+ 	elog(LOG, "%s", buf.data);
+ 	pfree(buf.data);
+ 
+ 	/* NO ELOG(ERROR) from here till hint bits are logged */
+ 	START_CRIT_SECTION();
+ 
+ 	xlrec.node = *rnode;
+ 	xlrec.block = blkno;
+ 
+ 	rdata[0].data = (char *) &xlrec;
+ 	rdata[0].len = SizeOfHeapHintbits;
+ 	rdata[0].buffer = InvalidBuffer;
+ 	rdata[0].next = &(rdata[1]);
+ 
+ 	rdata[1].data = (char *) bits;
+ 	rdata[1].len = pos;
+ 	rdata[1].buffer = InvalidBuffer;
+ 	rdata[1].next = NULL;
+ 
+ 	recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_HINTBITS, rdata);
+ 
+ 	PageSetLSN(page, recptr);
+ 	PageSetTLI(page, ThisTimeLineID);
+ 
+ 	END_CRIT_SECTION();
+ 
+ 	return recptr;
+ }
+ 
+ /*
   * Handles CLEAN and CLEAN_MOVE record types
   */
  static void
***************
*** 4153,4158 ****
--- 4240,4324 ----
  }
  
  static void
+ heap_xlog_hintbits(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_heap_hintbits *xlrec = (xl_heap_hintbits *) XLogRecGetData(record);
+ 	Buffer		buffer;
+ 	Page		page;
+ 
+ 	buffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
+ 	if (!BufferIsValid(buffer))
+ 		return;
+ 	page = (Page) BufferGetPage(buffer);
+ 
+ 	if (XLByteLE(lsn, PageGetLSN(page)))
+ 	{
+ 		UnlockReleaseBuffer(buffer);
+ 		return;
+ 	}
+ 
+ 	if (record->xl_len > SizeOfHeapHintbits)
+ 	{
+ 		char *bits;
+ 		char *bits_end;
+ 		OffsetNumber offset = FirstOffsetNumber;
+ 		StringInfoData	buf;
+ 
+ 
+ 		bits = (char *) xlrec + SizeOfHeapHintbits;
+ 		bits_end = (char *) xlrec + record->xl_len;
+ 
+ 		initStringInfo(&buf);
+ 		appendStringInfo(&buf, "page %u: ", xlrec->block);
+ 
+ 		while (bits < bits_end)
+ 		{
+ 
+ 			for (;;)
+ 			{
+ 				HeapTupleHeader	htup;
+ 				ItemId		lp = PageGetItemId(page, offset);
+ 
+ 				if (!ItemIdHasStorage(lp))
+ 				{
+ 					offset++;
+ 					continue;
+ 				}
+ 
+ 				appendStringInfo(&buf, "offset %d: ", offset);
+ 
+ 				htup = (HeapTupleHeader) PageGetItem(page, lp);
+ 
+ 				/* set the right bits in infomask */
+ 				htup->t_infomask = *(uint16 *) bits |
+ 					(htup->t_infomask & ~HEAP_XACT_MASK);
+ 				appendStringInfo(&buf, "infomask %04x/%04x ", htup->t_infomask,
+ 								 *(uint16 *) bits);
+ 				bits += 2;
+ 
+ 				/* set the right bits in infomask2 */
+ 				htup->t_infomask2 = *(uint16 *) bits |
+ 					(htup->t_infomask2 & ~HEAP2_XACT_MASK);
+ 				appendStringInfo(&buf, "infomask2 %04x/%04x\n", htup->t_infomask2,
+ 								 *(uint16 *) bits);
+ 				bits += 2;
+ 
+ 				offset++;
+ 
+ 				break;
+ 			}
+ 		}
+ 		elog(LOG, "%s", buf.data);
+ 		pfree(buf.data);
+ 	}
+ 
+ 	PageSetLSN(page, lsn);
+ 	PageSetTLI(page, ThisTimeLineID);
+ 	MarkBufferDirty(buffer);
+ 	UnlockReleaseBuffer(buffer);
+ }
+ 
+ static void
  heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
  {
  	xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record);
***************
*** 4692,4697 ****
--- 4858,4866 ----
  		case XLOG_HEAP2_CLEAN_MOVE:
  			heap_xlog_clean(lsn, record, true);
  			break;
+ 		case XLOG_HEAP2_HINTBITS:
+ 			heap_xlog_hintbits(lsn, record);
+ 			break;
  		default:
  			elog(PANIC, "heap2_redo: unknown op code %u", info);
  	}
***************
*** 4833,4838 ****
--- 5002,5015 ----
  						 xlrec->node.spcNode, xlrec->node.dbNode,
  						 xlrec->node.relNode, xlrec->block);
  	}
+ 	else if (info == XLOG_HEAP2_HINTBITS)
+ 	{
+ 		xl_heap_hintbits *xlrec = (xl_heap_hintbits *) rec;
+ 
+ 		appendStringInfo(buf, "hintbits: rel %u/%u/%u; blk %u",
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode, xlrec->block);
+ 	}
  	else
  		appendStringInfo(buf, "UNKNOWN");
  }
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.241
diff -c -p -r1.241 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c	11 Nov 2008 13:19:16 -0000	1.241
--- src/backend/storage/buffer/bufmgr.c	13 Nov 2008 17:44:23 -0000
***************
*** 33,38 ****
--- 33,39 ----
  #include <sys/file.h>
  #include <unistd.h>
  
+ #include "access/heapam.h"
  #include "catalog/catalog.h"
  #include "miscadmin.h"
  #include "pg_trace.h"
***************
*** 43,48 ****
--- 44,50 ----
  #include "storage/ipc.h"
  #include "storage/proc.h"
  #include "storage/smgr.h"
+ #include "utils/memutils.h"
  #include "utils/rel.h"
  #include "utils/resowner.h"
  
***************
*** 1461,1467 ****
   *	BUF_REUSABLE: buffer is available for replacement, ie, it has
   *		pin count 0 and usage count 0.
   *
!  * (BUF_WRITTEN could be set in error if FlushBuffers finds the buffer clean
   * after locking it, but we don't care all that much.)
   *
   * Note: caller must have done ResourceOwnerEnlargeBuffers.
--- 1463,1469 ----
   *	BUF_REUSABLE: buffer is available for replacement, ie, it has
   *		pin count 0 and usage count 0.
   *
!  * (BUF_WRITTEN could be set in error if FlushBuffer finds the buffer clean
   * after locking it, but we don't care all that much.)
   *
   * Note: caller must have done ResourceOwnerEnlargeBuffers.
***************
*** 1772,1777 ****
--- 1774,1787 ----
  {
  	XLogRecPtr	recptr;
  	ErrorContextCallback errcontext;
+ 	static char *dblbuf = NULL;
+ 	bool	done = false;
+ 	
+ 	if (enable_block_checksums && dblbuf == NULL)
+ 	{
+ 		dblbuf = MemoryContextAlloc(TopMemoryContext, BLCKSZ + ALIGNOF_BUFFER);
+ 		dblbuf = (char *) BUFFERALIGN(dblbuf);
+ 	}
  
  	/*
  	 * Acquire the buffer's io_in_progress lock.  If StartBufferIO returns
***************
*** 1796,1801 ****
--- 1806,1835 ----
  		 reln->smgr_rnode.relNode);
  
  	/*
+ 	 * We make a copy of the buffer to write.
+ 	 */
+ 	if (enable_block_checksums)
+ 		memcpy(dblbuf, BufHdrGetBlock(buf), BLCKSZ);
+ 
+ 	/*
+ 	 * If the page has been modified by a hint bit setter, ensure we WAL-log
+ 	 * their changes before actually writing the page; otherwise the CRC we're
+ 	 * about to store could be invalid if the page is torn.  Note: we check
+ 	 * the flag on the shared-memory copy of the buffer, not the private copy
+ 	 * we just made, to forestall the possibility that hints bits could have
+ 	 * been set in the later parts of the page after we copied the flag in
+ 	 * unset state.
+ 	 */
+ 	if (enable_block_checksums && PageHasUnloggedChange(BufHdrGetBlock(buf)) &&
+ 		!InRecovery)
+ 	{
+ 		/* XXX cast away the "volatile" qualifier */
+ 		log_hintbits(&((BufferDesc *) buf)->tag.rnode, buf->tag.forkNum,
+ 					 buf->tag.blockNum, BufHdrGetBlock(buf));
+ 		done = true;
+ 	}
+ 
+ 	/*
  	 * Force XLOG flush up to buffer's LSN.  This implements the basic WAL
  	 * rule that log updates must hit disk before any of the data-file changes
  	 * they describe do.
***************
*** 1817,1823 ****
  	smgrwrite(reln,
  			  buf->tag.forkNum,
  			  buf->tag.blockNum,
! 			  (char *) BufHdrGetBlock(buf),
  			  false);
  
  	BufferFlushCount++;
--- 1851,1857 ----
  	smgrwrite(reln,
  			  buf->tag.forkNum,
  			  buf->tag.blockNum,
! 			  enable_block_checksums ? dblbuf : BufHdrGetBlock(buf),
  			  false);
  
  	BufferFlushCount++;
Index: src/backend/storage/page/bufpage.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/page/bufpage.c,v
retrieving revision 1.81
diff -c -p -r1.81 bufpage.c
*** src/backend/storage/page/bufpage.c	3 Nov 2008 20:47:48 -0000	1.81
--- src/backend/storage/page/bufpage.c	13 Nov 2008 17:44:23 -0000
***************
*** 41,46 ****
--- 41,47 ----
  	MemSet(p, 0, pageSize);
  
  	/* p->pd_flags = 0;								done by above MemSet */
+ 	p->pd_checksum = PAGE_INVALID_CHECKSUM;
  	p->pd_lower = SizeOfPageHeaderData;
  	p->pd_upper = pageSize - specialSize;
  	p->pd_special = pageSize - specialSize;
***************
*** 84,92 ****
  		page->pd_special == MAXALIGN(page->pd_special))
  		return true;
  
! 	/* Check all-zeroes case */
  	pagebytes = (char *) page;
! 	for (i = 0; i < BLCKSZ; i++)
  	{
  		if (pagebytes[i] != 0)
  			return false;
--- 85,93 ----
  		page->pd_special == MAXALIGN(page->pd_special))
  		return true;
  
! 	/* Check all-zeroes case (skipping the checksum) */
  	pagebytes = (char *) page;
! 	for (i = sizeof(PAGE_CHECKSUM_TYPE); i < BLCKSZ; i++)
  	{
  		if (pagebytes[i] != 0)
  			return false;
Index: src/backend/storage/smgr/smgr.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/smgr/smgr.c,v
retrieving revision 1.113
diff -c -p -r1.113 smgr.c
*** src/backend/storage/smgr/smgr.c	11 Nov 2008 13:19:16 -0000	1.113
--- src/backend/storage/smgr/smgr.c	13 Nov 2008 17:44:23 -0000
***************
*** 28,33 ****
--- 28,36 ----
  #include "utils/memutils.h"
  
  
+ /* Perform block checksumming for corruption detection */
+ bool enable_block_checksums = false;
+ 
  /*
   * This struct of function pointers defines the API between smgr.c and
   * any individual storage manager module.  Note that smgr subfunctions are
***************
*** 504,509 ****
--- 507,518 ----
  smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, 
  		   char *buffer, bool isTemp)
  {
+ 	/* Perform block checksumming for corruption detection */
+ 	if (enable_block_checksums)
+ 		WritePageChecksum(buffer);
+ 	else
+ 		WriteInvalidPageChecksum(buffer);
+ 
  	(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum,
  											   buffer, isTemp);
  }
***************
*** 521,526 ****
--- 530,557 ----
  		 char *buffer)
  {
  	(*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
+ 
+ 	/* Perform block checksumming for corruption detection */
+ 	if (enable_block_checksums && !PageIsNew(buffer) && !InRecovery &&
+ 		PageGetChecksum(buffer) != PAGE_INVALID_CHECKSUM)
+ 	{
+ 		PAGE_CHECKSUM_TYPE	chksum;
+ 		
+ 		CalcPageChecksum(buffer, chksum);
+ 
+ 		if (chksum != PageGetChecksum(buffer))
+ 		{
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_DATA_CORRUPTED),
+ 					 errmsg("invalid checksum on read of block %u of relation %u/%u/%u",
+ 							blocknum,
+ 							reln->smgr_rnode.spcNode,
+ 							reln->smgr_rnode.dbNode,
+ 							reln->smgr_rnode.relNode),
+ 					 errdetail("Got %08x, expected %08x.",
+ 							   chksum, PageGetChecksum(buffer))));
+ 		}
+ 	}
  }
  
  /*
***************
*** 542,547 ****
--- 573,584 ----
  smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, 
  		  char *buffer, bool isTemp)
  {
+ 	/* Perform block checksumming for corruption detection */
+ 	if (enable_block_checksums)
+ 		WritePageChecksum(buffer);
+ 	else
+ 		WriteInvalidPageChecksum(buffer);
+ 
  	(*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum,
  											  buffer, isTemp);
  }
Index: src/backend/utils/misc/guc.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/utils/misc/guc.c,v
retrieving revision 1.477
diff -c -p -r1.477 guc.c
*** src/backend/utils/misc/guc.c	11 Nov 2008 02:42:32 -0000	1.477
--- src/backend/utils/misc/guc.c	13 Nov 2008 17:44:23 -0000
***************
*** 57,62 ****
--- 57,63 ----
  #include "regex/regex.h"
  #include "storage/bufmgr.h"
  #include "storage/fd.h"
+ #include "storage/smgr.h"
  #include "tcop/tcopprot.h"
  #include "tsearch/ts_cache.h"
  #include "utils/builtins.h"
***************
*** 770,775 ****
--- 771,786 ----
  		false, NULL, NULL
  	},
  	{
+ 		{"perform_checksum", PGC_SIGHUP, UNGROUPED,
+ 			gettext_noop("Forces checksumming of blocks to/from disk."),
+ 			gettext_noop("The server will perform a checksum on the block "
+ 				"when read from or written to disk in order to detect storage-related "
+ 				"corruption.")
+ 		},
+ 		&enable_block_checksums,
+ 		false, NULL, NULL
+ 	},
+ 	{
  		{"log_duration", PGC_SUSET, LOGGING_WHAT,
  			gettext_noop("Logs the duration of each completed SQL statement."),
  			NULL
Index: src/backend/utils/misc/postgresql.conf.sample
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/utils/misc/postgresql.conf.sample,v
retrieving revision 1.247
diff -c -p -r1.247 postgresql.conf.sample
*** src/backend/utils/misc/postgresql.conf.sample	9 Nov 2008 00:28:35 -0000	1.247
--- src/backend/utils/misc/postgresql.conf.sample	12 Nov 2008 13:14:17 -0000
***************
*** 481,486 ****
--- 481,491 ----
  
  #transform_null_equals = off
  
+ #------------------------------------------------------------------------------
+ # CORRUPTION DETECTION
+ #------------------------------------------------------------------------------
+ 
+ #perform_checksum = off		# Perform block checksumming to/from disk
  
  #------------------------------------------------------------------------------
  # CUSTOMIZED OPTIONS
Index: src/backend/utils/time/tqual.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/utils/time/tqual.c,v
retrieving revision 1.110
diff -c -p -r1.110 tqual.c
*** src/backend/utils/time/tqual.c	26 Mar 2008 16:20:47 -0000	1.110
--- src/backend/utils/time/tqual.c	13 Nov 2008 17:44:23 -0000
***************
*** 44,49 ****
--- 44,50 ----
  #include "access/xact.h"
  #include "storage/bufmgr.h"
  #include "storage/procarray.h"
+ #include "storage/smgr.h"
  #include "utils/tqual.h"
  
  
***************
*** 96,101 ****
--- 97,104 ----
  	}
  
  	tuple->t_infomask |= infomask;
+ 	if (enable_block_checksums)
+ 		PageSetUnloggedChange(BufferGetPage(buffer));
  	SetBufferCommitInfoNeedsSave(buffer);
  }
  
Index: src/include/pg_config_manual.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/pg_config_manual.h,v
retrieving revision 1.35
diff -c -p -r1.35 pg_config_manual.h
Index: src/include/access/heapam.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/access/heapam.h,v
retrieving revision 1.140
diff -c -p -r1.140 heapam.h
*** src/include/access/heapam.h	6 Nov 2008 20:51:15 -0000	1.140
--- src/include/access/heapam.h	12 Nov 2008 13:14:17 -0000
***************
*** 140,145 ****
--- 140,147 ----
  				OffsetNumber *offsets, int offcnt);
  extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
  							  BlockNumber blk, Page page);
+ extern XLogRecPtr log_hintbits(RelFileNode *rnode, ForkNumber forkNum,
+ 							   BlockNumber blk, Page page);
  
  /* in heap/pruneheap.c */
  extern void heap_page_prune_opt(Relation relation, Buffer buffer,
Index: src/include/access/htup.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/access/htup.h,v
retrieving revision 1.103
diff -c -p -r1.103 htup.h
*** src/include/access/htup.h	2 Nov 2008 01:45:28 -0000	1.103
--- src/include/access/htup.h	12 Nov 2008 13:14:17 -0000
***************
*** 580,585 ****
--- 580,586 ----
  #define XLOG_HEAP2_FREEZE		0x00
  #define XLOG_HEAP2_CLEAN		0x10
  #define XLOG_HEAP2_CLEAN_MOVE	0x20
+ #define XLOG_HEAP2_HINTBITS		0x30
  
  /*
   * All what we need to find changed tuple
***************
*** 714,719 ****
--- 715,730 ----
  
  #define SizeOfHeapFreeze (offsetof(xl_heap_freeze, cutoff_xid) + sizeof(TransactionId))
  
+ /* This is what we need to know about hint bits */
+ typedef struct xl_heap_hintbits
+ {
+ 	RelFileNode node;
+ 	BlockNumber block;
+ 	/* HINT BIT ARRAY FOLLOWS AT THE END */
+ } xl_heap_hintbits;
+ 
+ #define SizeOfHeapHintbits (offsetof(xl_heap_hintbits, block) + sizeof(BlockNumber))
+ 
  /* HeapTupleHeader functions implemented in utils/time/combocid.c */
  extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup);
  extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup);
Index: src/include/storage/bufpage.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/storage/bufpage.h,v
retrieving revision 1.84
diff -c -p -r1.84 bufpage.h
*** src/include/storage/bufpage.h	3 Nov 2008 20:47:49 -0000	1.84
--- src/include/storage/bufpage.h	13 Nov 2008 14:42:20 -0000
***************
*** 17,22 ****
--- 17,23 ----
  #include "access/xlogdefs.h"
  #include "storage/item.h"
  #include "storage/off.h"
+ #include "utils/pg_crc.h"
  
  /*
   * A postgres disk page is an abstraction layered on top of a postgres
***************
*** 87,92 ****
--- 88,94 ----
   *
   * space management information generic to any page
   *
+  *		pd_checksum	- the checksum of the page
   *		pd_lsn		- identifies xlog record for last change to this page.
   *		pd_tli		- ditto.
   *		pd_flags	- flag bits.
***************
*** 118,136 ****
   * the constraint on pagesize mod 256 is not an important restriction.
   * On the high end, we can only support pages up to 32KB because lp_off/lp_len
   * are 15 bits.
   */
  typedef struct PageHeaderData
  {
! 	/* XXX LSN is member of *any* block, not only page-organized ones */
  	XLogRecPtr	pd_lsn;			/* LSN: next byte after last byte of xlog
  								 * record for last change to this page */
- 	uint16		pd_tli;			/* least significant bits of the TimeLineID
- 								 * containing the LSN */
- 	uint16		pd_flags;		/* flag bits, see below */
  	LocationIndex pd_lower;		/* offset to start of free space */
  	LocationIndex pd_upper;		/* offset to end of free space */
  	LocationIndex pd_special;	/* offset to start of special space */
  	uint16		pd_pagesize_version;
  	TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
  	ItemIdData	pd_linp[1];		/* beginning of line pointer array */
  } PageHeaderData;
--- 120,143 ----
   * the constraint on pagesize mod 256 is not an important restriction.
   * On the high end, we can only support pages up to 32KB because lp_off/lp_len
   * are 15 bits.
+  *
+  * Note that pd_tli appears in a rather awkward position in the struct;
+  * this is because we moved it to accomodate pd_checksum without changing
+  * pg_pagesize_version's offset.
   */
  typedef struct PageHeaderData
  {
! 	/* XXX CRC & LSN are members of *any* block, not only page-organized ones */
! 	pg_crc32	pd_checksum;    /* The block-level checksum */
  	XLogRecPtr	pd_lsn;			/* LSN: next byte after last byte of xlog
  								 * record for last change to this page */
  	LocationIndex pd_lower;		/* offset to start of free space */
  	LocationIndex pd_upper;		/* offset to end of free space */
  	LocationIndex pd_special;	/* offset to start of special space */
  	uint16		pd_pagesize_version;
+ 	uint16		pd_tli;			/* least significant bits of the TimeLineID
+ 								 * containing the LSN */
+ 	uint16		pd_flags;		/* flag bits, see below */
  	TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
  	ItemIdData	pd_linp[1];		/* beginning of line pointer array */
  } PageHeaderData;
***************
*** 148,159 ****
   * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
   * page for its new tuple version; this suggests that a prune is needed.
   * Again, this is just a hint.
   */
  #define PD_HAS_FREE_LINES	0x0001		/* are there any unused line pointers? */
  #define PD_PAGE_FULL		0x0002		/* not enough free space for new
  										 * tuple? */
  
! #define PD_VALID_FLAG_BITS	0x0003		/* OR of all valid pd_flags bits */
  
  /*
   * Page layout version number 0 is for pre-7.3 Postgres releases.
--- 155,172 ----
   * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
   * page for its new tuple version; this suggests that a prune is needed.
   * Again, this is just a hint.
+  *
+  * PG_UNLOGGED_CHANGE indicates whether a process has set hint bits on the
+  * page.  This is used to determine whether a WAL message needs to be emitted
+  * before writing the page to disk when page checksums are enabled.
   */
  #define PD_HAS_FREE_LINES	0x0001		/* are there any unused line pointers? */
  #define PD_PAGE_FULL		0x0002		/* not enough free space for new
  										 * tuple? */
+ #define PD_UNLOGGED_CHANGE	0x0004		/* does the page have unlogged hint
+ 										   bits? */
  
! #define PD_VALID_FLAG_BITS	0x0007		/* OR of all valid pd_flags bits */
  
  /*
   * Page layout version number 0 is for pre-7.3 Postgres releases.
***************
*** 163,170 ****
   * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
   *		added the pd_flags field (by stealing some bits from pd_tli),
   *		as well as adding the pd_prune_xid field (which enlarges the header).
   */
! #define PG_PAGE_LAYOUT_VERSION		4
  
  
  /* ----------------------------------------------------------------
--- 176,186 ----
   * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
   *		added the pd_flags field (by stealing some bits from pd_tli),
   *		as well as adding the pd_prune_xid field (which enlarges the header).
+  * Release 8.4 uses 5; it added a checksum to the page header, and moved
+  *		pd_tli and pd_flags so that the page version would keep the same
+  *		offset.
   */
! #define PG_PAGE_LAYOUT_VERSION		5
  
  
  /* ----------------------------------------------------------------
***************
*** 352,357 ****
--- 368,432 ----
  #define PageClearPrunable(page) \
  	(((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
  
+ /* ----------------------------------------------------------------
+  *      CRC support
+  * ----------------------------------------------------------------
+  */
+ #define PAGE_CHECKSUM_TYPE		pg_crc32
+ #define SIZEOF_PAGE_CHECKSUM	sizeof(PAGE_CHECKSUM_TYPE)
+ #define PAGE_INVALID_CHECKSUM	0xb79a6e9c
+ 
+ /*
+  * Given a page, calculate its checksum.
+  *
+  * We only include: the page header, the line pointers (except lp_flags), and
+  * the area between pd_upper and pd_special.  The unused area is not included,
+  * and neither is the "special space".
+  */
+ #define CalcPageChecksum(buffer, sum)									\
+ 	do {																\
+ 		int		i;														\
+ 		INIT_CRC32(sum);												\
+ 		/* The page header, excluding pd_crc, pd_flags and pd_prune_xid */ \
+ 		COMP_CRC32(sum, (char *) (buffer) + sizeof(pg_crc32),			\
+ 				   offsetof(PageHeaderData, pd_flags)); 				\
+ 		/* each line pointer, excluding lp_flags */						\
+ 		for (i = 1; i <= PageGetMaxOffsetNumber(buffer); i++) 			\
+ 		{ 																\
+ 			uint32	lpval;												\
+ 			lpval = *(uint32 *) PageGetItemId(buffer, i);				\
+ 			lpval &= ~ITEM_LP_FLAGS_MASK; 								\
+ 			COMP_CRC32(sum, &lpval, sizeof(ItemIdData)); 				\
+ 		} 																\
+ 		/* the space occupied by tuples */								\
+ 		COMP_CRC32(sum, (char *) (buffer) + ((PageHeader) (buffer))->pd_upper,		\
+ 			((PageHeader) (buffer))->pd_special - ((PageHeader) (buffer))->pd_upper); \
+ 		FIN_CRC32(sum);													\
+ 	} while (0)
+ 
+ 
+ /* beware multiple evaluation of argument */
+ #define WritePageChecksum(buffer)				\
+ 	do {										\
+ 		PAGE_CHECKSUM_TYPE    chksum;			\
+ 		CalcPageChecksum(buffer, chksum);		\
+ 		PageSetChecksum(buffer, chksum);		\
+ 	} while (0)
+ 
+ #define WriteInvalidPageChecksum(buffer)		\
+ 	PageSetChecksum((buffer), PAGE_INVALID_CHECKSUM)
+ 
+ #define PageGetChecksum(page) \
+ 	(((PageHeader) (page))->pd_checksum)
+ #define PageSetChecksum(page, checksum) \
+ 	(((PageHeader) (page))->pd_checksum = (checksum))
+ 
+ #define PageHasUnloggedChange(page) \
+ 	(((PageHeader) (page))->pd_flags & PD_UNLOGGED_CHANGE)
+ #define PageSetUnloggedChange(page) \
+ 	(((PageHeader) (page))->pd_flags |= PD_UNLOGGED_CHANGE)
+ #define PageClearUnloggedChange(page) \
+ 	(((PageHeader) (page))->pd_flags &= ~PD_UNLOGGED_CHANGE)
  
  /* ----------------------------------------------------------------
   *		extern declarations
Index: src/include/storage/itemid.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/storage/itemid.h,v
retrieving revision 1.30
diff -c -p -r1.30 itemid.h
*** src/include/storage/itemid.h	1 Jan 2008 19:45:59 -0000	1.30
--- src/include/storage/itemid.h	13 Nov 2008 12:41:07 -0000
***************
*** 39,44 ****
--- 39,47 ----
  #define LP_REDIRECT		2		/* HOT redirect (should have lp_len=0) */
  #define LP_DEAD			3		/* dead, may or may not have storage */
  
+ /* the bits used by lp_flags */
+ #define ITEM_LP_FLAGS_MASK	0x00018000L
+ 
  /*
   * Item offsets and lengths are represented by these types when
   * they're not actually stored in an ItemIdData.
Index: src/include/storage/smgr.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/storage/smgr.h,v
retrieving revision 1.63
diff -c -p -r1.63 smgr.h
*** src/include/storage/smgr.h	11 Aug 2008 11:05:11 -0000	1.63
--- src/include/storage/smgr.h	12 Nov 2008 13:14:17 -0000
***************
*** 20,25 ****
--- 20,28 ----
  #include "storage/relfilenode.h"
  
  
+ /* Perform block checksumming for corruption detection */
+ bool enable_block_checksums;
+ 
  /*
   * smgr.c maintains a table of SMgrRelation objects, which are essentially
   * cached file handles.  An SMgrRelation is created (if not already present)
Index: src/include/utils/pg_crc.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/utils/pg_crc.h,v
retrieving revision 1.18
diff -c -p -r1.18 pg_crc.h
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to