On Sun, Dec 25, 2011 at 1:01 PM, Kevin Grittner
<kevin.gritt...@wicourts.gov> wrote:

> This chicken-and-egg
> problem requires the checksum to be implemented first.

v2 of checksum patch, using a conditional copy if checksumming is
enabled, so locking is removed.

Thanks to Andres for thwacking me with the cluestick, though I have
used a simple copy rather than a copy & calc.

Tested using make installcheck with parameter on/off, then restart and
vacuumdb to validate all pages.

Reviews, objections, user interface tweaks all welcome.

-- 
 Simon Riggs                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
*** a/doc/src/sgml/config.sgml
--- b/doc/src/sgml/config.sgml
***************
*** 1701,1706 **** SET ENABLE_SEQSCAN TO OFF;
--- 1701,1748 ----
        </listitem>
       </varlistentry>
  
+      <varlistentry id="guc-page-checksums" xreflabel="page_checksums">
+       <indexterm>
+        <primary><varname>page_checksums</> configuration parameter</primary>
+       </indexterm>
+       <term><varname>page_checksums</varname> (<type>boolean</type>)</term>
+       <listitem>
+        <para>
+         When this parameter is on, the <productname>PostgreSQL</> server
+         calculates checksums when it writes main database pages to disk,
+         flagging the page as checksum protected.  When this parameter is off,
+         no checksum is written, only a standard watermark in the page header.
+         The database may thus contain a mix of pages with checksums and pages
+         without checksums.
+        </para>
+ 
+        <para>
+         When pages are read into shared buffers any page flagged with a
+         checksum has the checksum re-calculated and compared against the
+         stored value to provide greatly improved validation of page contents.
+        </para>
+ 
+        <para>
+         Writes via temp_buffers are not checksummed.
+        </para>
+ 
+        <para>
+         Turning this parameter off speeds normal operation, but
+         might allow data corruption to go unnoticed. The checksum uses
+         16-bit checksums, using the fast Fletcher 16 algorithm. With this
+         parameter enabled there is still a non-zero probability that an error
+         could go undetected, as well as a non-zero probability of false
+         positives.
+        </para>
+ 
+        <para>
+         This parameter can only be set in the <filename>postgresql.conf</>
+         file or on the server command line.
+         The default is <literal>off</>.
+        </para>
+       </listitem>
+      </varlistentry>
+ 
       <varlistentry id="guc-wal-buffers" xreflabel="wal_buffers">
        <term><varname>wal_buffers</varname> (<type>integer</type>)</term>
        <indexterm>
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
***************
*** 440,446 **** ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
  			smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
  
  			/* check for garbage data */
! 			if (!PageHeaderIsValid((PageHeader) bufBlock))
  			{
  				if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
  				{
--- 440,446 ----
  			smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
  
  			/* check for garbage data */
! 			if (!PageIsVerified((Page) bufBlock))
  			{
  				if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
  				{
***************
*** 1860,1865 **** FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
--- 1860,1867 ----
  {
  	XLogRecPtr	recptr;
  	ErrorContextCallback errcontext;
+ 	Block		bufBlock;
+ 	char		*bufCopy;
  
  	/*
  	 * Acquire the buffer's io_in_progress lock.  If StartBufferIO returns
***************
*** 1907,1916 **** FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
  	buf->flags &= ~BM_JUST_DIRTIED;
  	UnlockBufHdr(buf);
  
  	smgrwrite(reln,
  			  buf->tag.forkNum,
  			  buf->tag.blockNum,
! 			  (char *) BufHdrGetBlock(buf),
  			  false);
  
  	pgBufferUsage.shared_blks_written++;
--- 1909,1932 ----
  	buf->flags &= ~BM_JUST_DIRTIED;
  	UnlockBufHdr(buf);
  
+ 	/*
+ 	 * Set page verification info immediately before we write the buffer to disk.
+ 	 * Once we have flushed the buffer is marked clean again, meaning it can
+ 	 * be replaced quickly and silently with another data block, so we must
+ 	 * write verification info now. For efficiency, the process of cleaning
+ 	 * and page replacement is asynchronous, so we can't do this *only* when
+ 	 * we are about to replace the buffer, we need to do this for every flush.
+ 	 */
+ 	bufBlock = BufHdrGetBlock(buf);
+ 	bufCopy = PageSetVerificationInfo((Page) bufBlock);
+ 
+ 	/*
+ 	 * bufToWrite is either the shared buffer or a copy, as appropriate.
+ 	 */
  	smgrwrite(reln,
  			  buf->tag.forkNum,
  			  buf->tag.blockNum,
! 			  (char *) bufCopy,
  			  false);
  
  	pgBufferUsage.shared_blks_written++;
***************
*** 1921,1926 **** FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
--- 1937,1944 ----
  	 */
  	TerminateBufferIO(buf, true, 0);
  
+ 	/* XXX Assert(buf is not BM_JUST_DIRTIED) */
+ 
  	TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(buf->tag.forkNum,
  									   buf->tag.blockNum,
  									   reln->smgr_rnode.node.spcNode,
*** a/src/backend/storage/buffer/localbuf.c
--- b/src/backend/storage/buffer/localbuf.c
***************
*** 200,205 **** LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
--- 200,207 ----
  		/* Find smgr relation for buffer */
  		oreln = smgropen(bufHdr->tag.rnode, MyBackendId);
  
+ 		/* XXX do we want to write checksums for local buffers? An option? */
+ 
  		/* And write... */
  		smgrwrite(oreln,
  				  bufHdr->tag.forkNum,
*** a/src/backend/storage/page/bufpage.c
--- b/src/backend/storage/page/bufpage.c
***************
*** 16,21 ****
--- 16,27 ----
  
  #include "access/htup.h"
  
+ bool page_checksums = false;
+ 
+ static char pageCopy[BLCKSZ];	/* temporary buffer to allow checksum calculation */
+ 
+ static bool PageVerificationInfoOK(Page page);
+ static uint16 PageCalcChecksum16(Page page);
  
  /* ----------------------------------------------------------------
   *						Page support functions
***************
*** 25,30 ****
--- 31,40 ----
  /*
   * PageInit
   *		Initializes the contents of a page.
+  *		Note that we don't automatically add a checksum, or flag that the
+  * 		page has a checksum field. We start with a normal page layout and defer
+  *		the decision on what page verification will be written just before
+  *		we write the block to disk.
   */
  void
  PageInit(Page page, Size pageSize, Size specialSize)
***************
*** 67,86 **** PageInit(Page page, Size pageSize, Size specialSize)
   * will clean up such a page and make it usable.
   */
  bool
! PageHeaderIsValid(PageHeader page)
  {
  	char	   *pagebytes;
  	int			i;
  
  	/* Check normal case */
! 	if (PageGetPageSize(page) == BLCKSZ &&
! 		PageGetPageLayoutVersion(page) == PG_PAGE_LAYOUT_VERSION &&
! 		(page->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
! 		page->pd_lower >= SizeOfPageHeaderData &&
! 		page->pd_lower <= page->pd_upper &&
! 		page->pd_upper <= page->pd_special &&
! 		page->pd_special <= BLCKSZ &&
! 		page->pd_special == MAXALIGN(page->pd_special))
  		return true;
  
  	/* Check all-zeroes case */
--- 77,96 ----
   * will clean up such a page and make it usable.
   */
  bool
! PageIsVerified(Page page)
  {
+ 	PageHeader	p = (PageHeader) page;
  	char	   *pagebytes;
  	int			i;
  
  	/* Check normal case */
! 	if (PageVerificationInfoOK(page) &&
! 		(p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
! 		p->pd_lower >= SizeOfPageHeaderData &&
! 		p->pd_lower <= p->pd_upper &&
! 		p->pd_upper <= p->pd_special &&
! 		p->pd_special <= BLCKSZ &&
! 		p->pd_special == MAXALIGN(p->pd_special))
  		return true;
  
  	/* Check all-zeroes case */
***************
*** 827,829 **** PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
--- 837,1050 ----
  
  	pfree(itemidbase);
  }
+ 
+ /*
+  * Test whether the page verification information is correct or not.
+  *
+  * IMPORTANT NOTE -
+  * Verification info is not valid at all times on a data page. We set
+  * verification info before we flush page/buffer, and implicitly invalidate
+  * verification info when we write to the page. A heavily accessed buffer
+  * might then spend most of its life with invalid page verification info,
+  * so testing verification info on random pages in the buffer pool will tell
+  * you nothing. The reason for this is that page verification info protects
+  * Postgres data from errors on the filesystems on which we rely. We do not
+  * protect buffers against uncorrectable memory errors, since these have a
+  * very low measured incidence according to research on large server farms,
+  * http://www.google.com/research/pubs/archive/35162.pdf, discussed 2010/12/22.
+  *
+  * To confirm your understanding that means that WAL-logged changes to a page
+  * do NOT update the page verification info, so full page images may not have
+  * correct verification information on them. But those page images have the
+  * WAL CRC covering them and so are verified separately from this mechanism.
+  * WAL replay ignores page verification info unless it writes out or reads in
+  * blocks from disk; restoring full page writes does not check verification
+  * info via this function.
+  *
+  * The best way to understand this is that WAL CRCs protect records entering
+  * the WAL stream, and page verification protects blocks entering and leaving
+  * the buffer pool. They are similar in purpose, yet completely separate.
+  * Together they ensure we are able to detect errors in data leaving and
+  * re-entering PostgreSQL controlled memory.
+  *
+  * Note also that the verification mechanism can vary from page to page.
+  * All we do here is look at what the page itself says is the verification
+  * mechanism and then apply that test. This allows us to run without the CPU
+  * cost of verification if we choose, as well as to provide an upgrade path
+  * for anyone doing direct upgrades using pg_upgrade.
+  *
+  * There is some concern that trusting page data to say how to check page
+  * data is dangerously self-referential. To ensure no mistakes we set two
+  * non-adjacent bits to signify that the page has a checksum and
+  * should be verified when that block is read back into a buffer.
+  * We use two bits in case a multiple bit error removes one of the checksum
+  * flags *and* destroys data, which would lead to skipping the checksum check
+  * and silently accepting bad data.
+  *
+  * Note also that this returns a boolean, not a full damage assessment.
+  */
+ static bool
+ PageVerificationInfoOK(Page page)
+ {
+ 	PageHeader	p = (PageHeader) page;
+ 
+ 	/*
+ 	 * We set two non-adjacent bits to signify that the page has a checksum and
+ 	 * should be verified against that block is read back into a buffer.
+ 	 * We use two bits in case a multiple bit error removes one of the checksum
+ 	 * flags and destroys data, which would lead to skipping the checksum check
+ 	 * and silently accepting bad data.
+ 	 */
+ 	if (PageHasChecksumFlag1(p) && PageHasChecksumFlag2(p))
+ 	{
+ 		uint16 checksum = PageCalcChecksum16(page);
+ 
+ 		if (checksum == p->pd_verify.pd_checksum16)
+ 			return true;
+ 
+ 		elog(LOG, "page verification failed - checksum was %u page checksum field is %u",
+ 						checksum, p->pd_verify.pd_checksum16);
+ 	}
+ 	else if (!PageHasChecksumFlag1(p) && !PageHasChecksumFlag2(p))
+ 	{
+ 		if (PageGetPageLayoutVersion(p) == PG_PAGE_LAYOUT_VERSION &&
+ 			PageGetPageSize(p) == BLCKSZ)
+ 			return true;
+ 	}
+ 
+ 	elog(LOG, "page verification failed - page has one checksum flag set");
+ 	return false;
+ }
+ 
+ /*
+  * Set verification info for page.
+  *
+  * Either we set a new checksum, or we set the standard watermark. We must
+  * not leave an invalid checksum in place. Note that the verification info is
+  * not WAL logged, whereas the data changes to pages are, so data is safe
+  * whether or not we have page_checksums enabled. The purpose of checksums
+  * is to detect page corruption to allow replacement from backup.
+  *
+  * Returns a pointer to the block-sized data that needs to be written. That
+  * allows us to either copy, or not, depending upon whether we checksum.
+  */
+ char *
+ PageSetVerificationInfo(Page page)
+ {
+ 	PageHeader	p;
+ 
+ 	if (PageIsNew(page))
+ 		return (char *) page;
+ 
+ 	if (page_checksums)
+ 	{
+ 		/*
+ 		 * We make a copy iff we need to calculate a checksum because other
+ 		 * backends may set hint bits on this page while we write, which
+ 		 * would mean the checksum differs from the page contents. It doesn't
+ 		 * matter if we include or exclude hints during the copy, as long
+ 		 * as we write a valid page and associated checksum.
+ 		 */
+ 		memcpy(&pageCopy, page, BLCKSZ);
+ 
+ 		p = (PageHeader) &pageCopy;
+ 		p->pd_flags |= PD_CHECKSUM;
+ 		p->pd_verify.pd_checksum16 = PageCalcChecksum16((Page) &pageCopy);
+ 
+ 		return (char *) &pageCopy;
+ 	}
+ 
+ 	p = (PageHeader) page;
+ 
+ 	if (PageHasChecksumFlag1(p) || PageHasChecksumFlag2(p))
+ 	{
+ 		/* ensure any older checksum info is overwritten with watermark */
+ 		p->pd_flags &= ~PD_CHECKSUM;
+ 		PageSetPageSizeAndVersion(p, BLCKSZ, PG_PAGE_LAYOUT_VERSION);
+ 	}
+ 
+ 	return (char *) page;
+ }
+ 
+ /*
+  * Calculate checksum for a PostgreSQL Page. We do this in 3 steps, first
+  * we calculate the checksum for the header, avoiding the verification
+  * info, which will be added afterwards. Next, we add the line pointers up to
+  * the hole in the middle of the block at pd_lower. Last, we add the tail
+  * of the page from pd_upper to the end of page.
+  */
+ static uint16
+ PageCalcChecksum16(Page page)
+ {
+ #define PAGE_VERIFICATION_USES_FLETCHER16 (true)
+ #ifdef PAGE_VERIFICATION_USES_FLETCHER16
+ 	/*
+ 	 * Following calculation is a Flecther's 16 checksum. The calc is isolated
+ 	 * here and tuning and/or replacement algorithms are possible.
+ 	 *
+ 	 * XXX present implementation is raw, untuned calculation, please tweak
+ 	 */
+ 	PageHeader	p = (PageHeader) page;
+ 	uint	page_header_stop = (uint)(offsetof(PageHeaderData, pd_special) + sizeof(LocationIndex));
+ 	uint	page_lower_start = (uint)(offsetof(PageHeaderData, pd_prune_xid));
+ 	uint	page_lower_stop;
+ 	uint16 sum1 = 0;
+ 	uint16 sum2 = 0;
+ 	int		i;
+ 
+ 	/*
+ 	 * Avoid calculating checksum if page is new, just return a value that
+ 	 * will cause the check to fail. We may still pass the all-zeroes check.
+ 	 */
+ 	if (PageIsNew(page))
+ 		return 1;
+ 
+ 	/*
+ 	 * Just add in the pd_prune_xid if there are no line pointers yet.
+ 	 */
+ 	page_lower_stop = p->pd_lower;
+ 	if (page_lower_stop == 0)
+ 		page_lower_stop = page_lower_start + sizeof(TransactionId);
+ 
+ 	Assert(p->pd_upper != 0);
+ 
+ #ifdef DEBUG_CHECKSUM
+ 	elog(LOG, "calculating checksum for %u-%u %u-%u %u-%u",
+ 			0,	/* page_header_start */
+ 			page_header_stop,
+ 			page_lower_start,
+ 			page_lower_stop,
+ 			p->pd_upper,
+ 			BLCKSZ
+ 			);
+ #endif
+ 
+ #define	COMP_F16(from, to) \
+ do { \
+ 	for (i = from; i < to; i++) \
+ 	{ \
+ 			sum1 = (sum1 + page[i]) % 255; \
+ 			sum2 = (sum1 + sum2) % 255; \
+ 	} \
+ } while (0); \
+ 
+ 	COMP_F16(0,
+ 			 page_header_stop);
+ 
+ 	/* ignore the checksum field since not done yet... */
+ 
+ 	COMP_F16(page_lower_start,
+ 			 page_lower_stop);
+ 
+ 	/* ignore the hole in the middle of the block */
+ 
+ 	COMP_F16(p->pd_upper,
+ 			 BLCKSZ - 1);
+ 
+ #ifdef DEBUG_CHECKSUM
+ 	elog(LOG, "checksum %u", ((sum2 << 8) | sum1));
+ #endif
+ 
+ 	return ((sum2 << 8) | sum1);
+ #endif
+ }
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 830,835 **** static struct config_bool ConfigureNamesBool[] =
--- 830,849 ----
  		NULL, NULL, NULL
  	},
  	{
+ 		{"page_checksums", PGC_SIGHUP, WAL_SETTINGS,
+ 			gettext_noop("Marks database blocks with a checksum before writing them to disk. "),
+ 			gettext_noop("When enabled all database blocks will be marked with a checksums before writing to disk. "
+ 						 "When we read a database block from disk the checksum is checked, if it exists. "
+ 						 "If there is no checksum marked yet then no check is performed, though a "
+ 						 "checksum will be added later when we re-write the database block. "
+ 						 "When disabled checksums will be ignored, even if the block was marked "
+ 						 "with checksum. When disabled checksums will not be added to database blocks.")
+ 		},
+ 		&page_checksums,
+ 		false,
+ 		NULL, NULL, NULL
+ 	},
+ 	{
  		{"full_page_writes", PGC_SIGHUP, WAL_SETTINGS,
  			gettext_noop("Writes full pages to WAL when first modified after a checkpoint."),
  			gettext_noop("A page write in process during an operating system crash might be "
*** a/src/backend/utils/misc/postgresql.conf.sample
--- b/src/backend/utils/misc/postgresql.conf.sample
***************
*** 150,164 ****
  
  
  #------------------------------------------------------------------------------
! # WRITE AHEAD LOG
  #------------------------------------------------------------------------------
  
! # - Settings -
  
! #wal_level = minimal			# minimal, archive, or hot_standby
! 					# (change requires restart)
  #fsync = on				# turns forced synchronization on or off
  #synchronous_commit = on		# synchronization level; on, off, or local
  #wal_sync_method = fsync		# the default is the first option
  					# supported by the operating system:
  					#   open_datasync
--- 150,170 ----
  
  
  #------------------------------------------------------------------------------
! # WRITE AHEAD LOG & RELIABILITY
  #------------------------------------------------------------------------------
  
! # - Reliability -
  
! #page_checksums = off			# calculate checksum before database I/O
! #full_page_writes = on			# recover from partial page writes
  #fsync = on				# turns forced synchronization on or off
+ 
  #synchronous_commit = on		# synchronization level; on, off, or local
+ 
+ # - Write Ahead Log -
+ 
+ #wal_level = minimal			# minimal, archive, or hot_standby
+ 					# (change requires restart)
  #wal_sync_method = fsync		# the default is the first option
  					# supported by the operating system:
  					#   open_datasync
***************
*** 166,172 ****
  					#   fsync
  					#   fsync_writethrough
  					#   open_sync
- #full_page_writes = on			# recover from partial page writes
  #wal_buffers = -1			# min 32kB, -1 sets based on shared_buffers
  					# (change requires restart)
  #wal_writer_delay = 200ms		# 1-10000 milliseconds
--- 172,177 ----
*** a/src/include/storage/bufpage.h
--- b/src/include/storage/bufpage.h
***************
*** 18,23 ****
--- 18,25 ----
  #include "storage/item.h"
  #include "storage/off.h"
  
+ extern bool page_checksums;
+ 
  /*
   * A postgres disk page is an abstraction layered on top of a postgres
   * disk block (which is simply a unit of i/o, see block.h).
***************
*** 93,99 **** typedef uint16 LocationIndex;
   *		pd_lower	- offset to start of free space.
   *		pd_upper	- offset to end of free space.
   *		pd_special	- offset to start of special space.
!  *		pd_pagesize_version - size in bytes and page layout version number.
   *		pd_prune_xid - oldest XID among potentially prunable tuples on page.
   *
   * The LSN is used by the buffer manager to enforce the basic rule of WAL:
--- 95,101 ----
   *		pd_lower	- offset to start of free space.
   *		pd_upper	- offset to end of free space.
   *		pd_special	- offset to start of special space.
!  *		pd_verify	- page verification information of different kinds
   *		pd_prune_xid - oldest XID among potentially prunable tuples on page.
   *
   * The LSN is used by the buffer manager to enforce the basic rule of WAL:
***************
*** 106,112 **** typedef uint16 LocationIndex;
   * pd_prune_xid is a hint field that helps determine whether pruning will be
   * useful.	It is currently unused in index pages.
   *
!  * The page version number and page size are packed together into a single
   * uint16 field.  This is for historical reasons: before PostgreSQL 7.3,
   * there was no concept of a page version number, and doing it this way
   * lets us pretend that pre-7.3 databases have page version number zero.
--- 108,115 ----
   * pd_prune_xid is a hint field that helps determine whether pruning will be
   * useful.	It is currently unused in index pages.
   *
!  * For verification we store either a 16 bit checksum or a watermark of
!  * the page version number and page size packed together into a single
   * uint16 field.  This is for historical reasons: before PostgreSQL 7.3,
   * there was no concept of a page version number, and doing it this way
   * lets us pretend that pre-7.3 databases have page version number zero.
***************
*** 130,136 **** typedef struct PageHeaderData
  	LocationIndex pd_lower;		/* offset to start of free space */
  	LocationIndex pd_upper;		/* offset to end of free space */
  	LocationIndex pd_special;	/* offset to start of special space */
! 	uint16		pd_pagesize_version;
  	TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
  	ItemIdData	pd_linp[1];		/* beginning of line pointer array */
  } PageHeaderData;
--- 133,145 ----
  	LocationIndex pd_lower;		/* offset to start of free space */
  	LocationIndex pd_upper;		/* offset to end of free space */
  	LocationIndex pd_special;	/* offset to start of special space */
! 
! 	union
! 	{
! 		uint16		pd_pagesize_version;
! 		uint16		pd_checksum16;
! 	} pd_verify;				/* page verification data */
! 
  	TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
  	ItemIdData	pd_linp[1];		/* beginning of line pointer array */
  } PageHeaderData;
***************
*** 155,161 **** typedef PageHeaderData *PageHeader;
  #define PD_ALL_VISIBLE		0x0004		/* all tuples on page are visible to
  										 * everyone */
  
! #define PD_VALID_FLAG_BITS	0x0007		/* OR of all valid pd_flags bits */
  
  /*
   * Page layout version number 0 is for pre-7.3 Postgres releases.
--- 164,179 ----
  #define PD_ALL_VISIBLE		0x0004		/* all tuples on page are visible to
  										 * everyone */
  
! #define PD_VALID_FLAG_BITS	0x800F		/* OR of all non-checksum pd_flags bits */
! 
! #define PD_CHECKSUM1		0x0008		/* First checksum bit */
! #define PD_CHECKSUM2		0x8000		/* Second checksum bit */
! #define PD_CHECKSUM 		0x8008		/* OR of both checksum flags */
! 
! #define PageHasChecksumFlag1(page) \
! 	((((PageHeader) (page))->pd_flags & PD_CHECKSUM1) == PD_CHECKSUM1)
! #define PageHasChecksumFlag2(page) \
! 	((((PageHeader) (page))->pd_flags & PD_CHECKSUM2) == PD_CHECKSUM2)
  
  /*
   * Page layout version number 0 is for pre-7.3 Postgres releases.
***************
*** 165,170 **** typedef PageHeaderData *PageHeader;
--- 183,190 ----
   * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
   *		added the pd_flags field (by stealing some bits from pd_tli),
   *		as well as adding the pd_prune_xid field (which enlarges the header).
+  * Release 9.2 uses 4 as well, though with changed meaning of verification bits.
+  * We deliberately don't bump the page version for that, to allow upgrades.
   */
  #define PG_PAGE_LAYOUT_VERSION		4
  
***************
*** 231,249 **** typedef PageHeaderData *PageHeader;
   * PageGetPageSize
   *		Returns the page size of a page.
   *
!  * this can only be called on a formatted page (unlike
!  * BufferGetPageSize, which can be called on an unformatted page).
!  * however, it can be called on a page that is not stored in a buffer.
   */
! #define PageGetPageSize(page) \
! 	((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00))
  
  /*
   * PageGetPageLayoutVersion
   *		Returns the page layout version of a page.
   */
  #define PageGetPageLayoutVersion(page) \
! 	(((PageHeader) (page))->pd_pagesize_version & 0x00FF)
  
  /*
   * PageSetPageSizeAndVersion
--- 251,272 ----
   * PageGetPageSize
   *		Returns the page size of a page.
   *
!  * Since PageSizeIsValid() when pagesize == BLCKSZ, just written BLCKSZ.
!  * This can be called on any page, initialised or not, in or out of buffers.
!  * You might think this can vary at runtime but you'd be wrong, since pages
!  * frequently need to occupy buffers and pages are copied from one to another
!  * so there are many hidden assumptions that this simple definition is true.
   */
! #define PageGetPageSize(page) (BLCKSZ)
  
  /*
   * PageGetPageLayoutVersion
   *		Returns the page layout version of a page.
+  *
+  * Must not be used on a page that is flagged for checksums.
   */
  #define PageGetPageLayoutVersion(page) \
! 	(((PageHeader) (page))->pd_verify.pd_pagesize_version & 0x00FF)
  
  /*
   * PageSetPageSizeAndVersion
***************
*** 251,262 **** typedef PageHeaderData *PageHeader;
   *
   * We could support setting these two values separately, but there's
   * no real need for it at the moment.
   */
  #define PageSetPageSizeAndVersion(page, size, version) \
  ( \
  	AssertMacro(((size) & 0xFF00) == (size)), \
  	AssertMacro(((version) & 0x00FF) == (version)), \
! 	((PageHeader) (page))->pd_pagesize_version = (size) | (version) \
  )
  
  /* ----------------
--- 274,287 ----
   *
   * We could support setting these two values separately, but there's
   * no real need for it at the moment.
+  *
+  * Must not be used on a page that is flagged for checksums.
   */
  #define PageSetPageSizeAndVersion(page, size, version) \
  ( \
  	AssertMacro(((size) & 0xFF00) == (size)), \
  	AssertMacro(((version) & 0x00FF) == (version)), \
! 	((PageHeader) (page))->pd_verify.pd_pagesize_version = (size) | (version) \
  )
  
  /* ----------------
***************
*** 368,374 **** do { \
   */
  
  extern void PageInit(Page page, Size pageSize, Size specialSize);
! extern bool PageHeaderIsValid(PageHeader page);
  extern OffsetNumber PageAddItem(Page page, Item item, Size size,
  			OffsetNumber offsetNumber, bool overwrite, bool is_heap);
  extern Page PageGetTempPage(Page page);
--- 393,399 ----
   */
  
  extern void PageInit(Page page, Size pageSize, Size specialSize);
! extern bool PageIsVerified(Page page);
  extern OffsetNumber PageAddItem(Page page, Item item, Size size,
  			OffsetNumber offsetNumber, bool overwrite, bool is_heap);
  extern Page PageGetTempPage(Page page);
***************
*** 381,385 **** extern Size PageGetExactFreeSpace(Page page);
--- 406,411 ----
  extern Size PageGetHeapFreeSpace(Page page);
  extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
  extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
+ extern char *PageSetVerificationInfo(Page page);
  
  #endif   /* BUFPAGE_H */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to