Here's an updated patch that fixes a bug with full_page_writes, and an alignment issue in replay code. Also added a new test case to crashtest.sh that exercises the fixed bug.

Has anyone looked at this? I now consider it ready for committing, if no-one sees a problem with it.

Here's the original description of the patch:

Currently, an index split writes all the data on the split page to WAL. That's a lot of WAL traffic. The tuples that are copied to the right page need to be WAL logged, but the tuples that stay on the original page don't.

Here's a patch to do that. It needs further testing, I have used the attached crude crashtest.sh to test the basics, but we need to test the more obscure cases like splitting non-leaf or root page.

On a test case that inserts 10000 rows in increasing key order with a 100 characters wide text-field as key, the patch reduced the total generated WAL traffic from 45MB to 33MB, or ~ 25%. Your mileage may vary, depending on the tuple and key sizes, and the order of inserts.

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com

Attachment: crashtest.sh
Description: application/shellscript

Index: src/backend/access/nbtree/nbtinsert.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/nbtree/nbtinsert.c,v
retrieving revision 1.146
diff -c -r1.146 nbtinsert.c
*** src/backend/access/nbtree/nbtinsert.c	11 Nov 2006 01:14:18 -0000	1.146
--- src/backend/access/nbtree/nbtinsert.c	2 Jan 2007 15:58:59 -0000
***************
*** 733,738 ****
--- 733,739 ----
  				rightoff;
  	OffsetNumber maxoff;
  	OffsetNumber i;
+ 	bool		isroot;
  
  	rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
  	origpage = BufferGetPage(buf);
***************
*** 747,752 ****
--- 748,755 ----
  	lopaque = (BTPageOpaque) PageGetSpecialPointer(leftpage);
  	ropaque = (BTPageOpaque) PageGetSpecialPointer(rightpage);
  
+ 	isroot = P_ISROOT(oopaque);
+ 
  	/* if we're splitting this page, it won't be the root when we're done */
  	/* also, clear the SPLIT_END and HAS_GARBAGE flags in both pages */
  	lopaque->btpo_flags = oopaque->btpo_flags;
***************
*** 926,986 ****
  		MarkBufferDirty(sbuf);
  	}
  
  	/* XLOG stuff */
  	if (!rel->rd_istemp)
  	{
  		xl_btree_split xlrec;
  		uint8		xlinfo;
  		XLogRecPtr	recptr;
! 		XLogRecData rdata[4];
  
! 		xlrec.target.node = rel->rd_node;
! 		ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off);
  		if (newitemonleft)
! 			xlrec.otherblk = BufferGetBlockNumber(rbuf);
  		else
! 			xlrec.otherblk = BufferGetBlockNumber(buf);
! 		xlrec.leftblk = lopaque->btpo_prev;
! 		xlrec.rightblk = ropaque->btpo_next;
! 		xlrec.level = lopaque->btpo.level;
  
! 		/*
  		 * Direct access to page is not good but faster - we should implement
  		 * some new func in page API.  Note we only store the tuples
  		 * themselves, knowing that the item pointers are in the same order
  		 * and can be reconstructed by scanning the tuples.  See comments for
  		 * _bt_restore_page().
  		 */
! 		xlrec.leftlen = ((PageHeader) leftpage)->pd_special -
! 			((PageHeader) leftpage)->pd_upper;
  
! 		rdata[0].data = (char *) &xlrec;
! 		rdata[0].len = SizeOfBtreeSplit;
! 		rdata[0].buffer = InvalidBuffer;
! 		rdata[0].next = &(rdata[1]);
! 
! 		rdata[1].data = (char *) leftpage + ((PageHeader) leftpage)->pd_upper;
! 		rdata[1].len = xlrec.leftlen;
! 		rdata[1].buffer = InvalidBuffer;
! 		rdata[1].next = &(rdata[2]);
! 
! 		rdata[2].data = (char *) rightpage + ((PageHeader) rightpage)->pd_upper;
! 		rdata[2].len = ((PageHeader) rightpage)->pd_special -
  			((PageHeader) rightpage)->pd_upper;
! 		rdata[2].buffer = InvalidBuffer;
! 		rdata[2].next = NULL;
  
  		if (!P_RIGHTMOST(ropaque))
  		{
! 			rdata[2].next = &(rdata[3]);
! 			rdata[3].data = NULL;
! 			rdata[3].len = 0;
! 			rdata[3].buffer = sbuf;
! 			rdata[3].buffer_std = true;
! 			rdata[3].next = NULL;
  		}
  
! 		if (P_ISROOT(oopaque))
  			xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT;
  		else
  			xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R;
--- 929,1044 ----
  		MarkBufferDirty(sbuf);
  	}
  
+ 	/*
+ 	 * By here, the original data page has been split into two new halves, and
+ 	 * these are correct.  The algorithm requires that the left page never
+ 	 * move during a split, so we copy the new left page back on top of the
+ 	 * original.  Note that this is not a waste of time, since we also require
+ 	 * (in the page management code) that the center of a page always be
+ 	 * clean, and the most efficient way to guarantee this is just to compact
+ 	 * the data by reinserting it into a new left page.  (XXX the latter
+ 	 * comment is probably obsolete.)
+ 	 *
+ 	 * We need to do this before writing the WAL record, so that XLogInsert can
+ 	 * WAL log an image of the page if necessary.
+ 	 */
+ 	PageRestoreTempPage(leftpage, origpage);
+ 
  	/* XLOG stuff */
  	if (!rel->rd_istemp)
  	{
  		xl_btree_split xlrec;
  		uint8		xlinfo;
  		XLogRecPtr	recptr;
! 		XLogRecData rdata[6];
! 		XLogRecData *lastrdata;
  
! 		xlrec.node = rel->rd_node;
! 		xlrec.leftsib = BufferGetBlockNumber(buf);
! 		xlrec.rightsib = BufferGetBlockNumber(rbuf);
! 		xlrec.firstright = firstright;
! 		xlrec.rnext = ropaque->btpo_next;
! 		xlrec.level = lopaque->btpo.level;
! 
! 		rdata[0].data = (char *) &xlrec;
! 		rdata[0].len = SizeOfBtreeSplit;
! 		rdata[0].buffer = InvalidBuffer;
! 
! 		lastrdata = &rdata[0];
! 
! 		/* Log downlink on non-leaf pages. */
! 		if (lopaque->btpo.level > 0)
! 		{
! 			lastrdata->next = lastrdata + 1;
! 			lastrdata++;
! 
! 			lastrdata->data = (char *) &newitem->t_tid.ip_blkid;
! 			lastrdata->len = sizeof(BlockIdData);
! 			lastrdata->buffer = InvalidBuffer;
! 		}
! 
! 		/* Log the new item, if it was inserted on the left page. If it was 
! 		 * put on the right page, we don't need to explicitly WAL log it 
! 		 * because it's included with all the other items on the right page.
! 		 */
! 		lastrdata->next = lastrdata + 1;
! 		lastrdata++;
  		if (newitemonleft)
! 		{
! 			lastrdata->data = (char *) &newitemoff;
! 			lastrdata->len = sizeof(OffsetNumber);
! 			lastrdata->buffer = buf;		/* backup block 1 */
! 			lastrdata->buffer_std = true;
! 
! 			lastrdata->next = lastrdata + 1;
! 			lastrdata++;
! 			lastrdata->data = (char *)newitem;
! 			lastrdata->len = newitemsz;
! 			lastrdata->buffer = buf;		/* backup block 1 */
! 			lastrdata->buffer_std = true;
! 		}
  		else
! 		{
! 			lastrdata->data = NULL;
! 			lastrdata->len = 0;
! 			lastrdata->buffer = buf;		/* backup block 1 */
! 			lastrdata->buffer_std = true;
! 		}
  
! 		/* Log the contents of the right page in the format understood by
! 		 * _bt_restore_page(). We set lastrdata->buffer to InvalidBuffer,
! 		 * because we're going to recreate the whole page anyway.
! 		 *
  		 * Direct access to page is not good but faster - we should implement
  		 * some new func in page API.  Note we only store the tuples
  		 * themselves, knowing that the item pointers are in the same order
  		 * and can be reconstructed by scanning the tuples.  See comments for
  		 * _bt_restore_page().
  		 */
! 		lastrdata->next = lastrdata + 1;
! 		lastrdata++;
  
! 		lastrdata->data = (char *) rightpage + 
  			((PageHeader) rightpage)->pd_upper;
! 		lastrdata->len = ((PageHeader) rightpage)->pd_special -
! 			((PageHeader) rightpage)->pd_upper;
! 		lastrdata->buffer = InvalidBuffer;
  
+ 		/* Log the right sibling, because we've changed it's prev-pointer. */
  		if (!P_RIGHTMOST(ropaque))
  		{
! 			lastrdata->next = lastrdata + 1;
! 			lastrdata++;
! 
! 			lastrdata->data = NULL;
! 			lastrdata->len = 0;
! 			lastrdata->buffer = sbuf;		/* backup block 2 */
! 			lastrdata->buffer_std = true;
  		}
  
! 		lastrdata->next = NULL;
! 
! 		if (isroot)
  			xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT;
  		else
  			xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R;
***************
*** 998,1021 ****
  		}
  	}
  
- 	/*
- 	 * By here, the original data page has been split into two new halves, and
- 	 * these are correct.  The algorithm requires that the left page never
- 	 * move during a split, so we copy the new left page back on top of the
- 	 * original.  Note that this is not a waste of time, since we also require
- 	 * (in the page management code) that the center of a page always be
- 	 * clean, and the most efficient way to guarantee this is just to compact
- 	 * the data by reinserting it into a new left page.  (XXX the latter
- 	 * comment is probably obsolete.)
- 	 *
- 	 * It's a bit weird that we don't fill in the left page till after writing
- 	 * the XLOG entry, but not really worth changing.  Note that we use the
- 	 * origpage data (specifically its BTP_ROOT bit) while preparing the XLOG
- 	 * entry, so simply reshuffling the code won't do.
- 	 */
- 
- 	PageRestoreTempPage(leftpage, origpage);
- 
  	END_CRIT_SECTION();
  
  	/* release the old right sibling */
--- 1056,1061 ----
Index: src/backend/access/nbtree/nbtxlog.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/nbtree/nbtxlog.c,v
retrieving revision 1.39
diff -c -r1.39 nbtxlog.c
*** src/backend/access/nbtree/nbtxlog.c	1 Nov 2006 19:43:17 -0000	1.39
--- src/backend/access/nbtree/nbtxlog.c	2 Jan 2007 16:02:39 -0000
***************
*** 264,385 ****
  {
  	xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
  	Relation	reln;
! 	BlockNumber targetblk;
! 	OffsetNumber targetoff;
! 	BlockNumber leftsib;
! 	BlockNumber rightsib;
! 	BlockNumber downlink = 0;
! 	Buffer		buffer;
! 	Page		page;
! 	BTPageOpaque pageop;
  
! 	reln = XLogOpenRelation(xlrec->target.node);
! 	targetblk = ItemPointerGetBlockNumber(&(xlrec->target.tid));
! 	targetoff = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
! 	leftsib = (onleft) ? targetblk : xlrec->otherblk;
! 	rightsib = (onleft) ? xlrec->otherblk : targetblk;
  
! 	/* Left (original) sibling */
! 	buffer = XLogReadBuffer(reln, leftsib, true);
! 	Assert(BufferIsValid(buffer));
! 	page = (Page) BufferGetPage(buffer);
  
! 	_bt_pageinit(page, BufferGetPageSize(buffer));
! 	pageop = (BTPageOpaque) PageGetSpecialPointer(page);
  
! 	pageop->btpo_prev = xlrec->leftblk;
! 	pageop->btpo_next = rightsib;
! 	pageop->btpo.level = xlrec->level;
! 	pageop->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0;
! 	pageop->btpo_cycleid = 0;
  
- 	_bt_restore_page(page,
- 					 (char *) xlrec + SizeOfBtreeSplit,
- 					 xlrec->leftlen);
  
! 	if (onleft && xlrec->level > 0)
  	{
! 		IndexTuple	itup;
  
! 		/* extract downlink in the target tuple */
! 		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, targetoff));
! 		downlink = ItemPointerGetBlockNumber(&(itup->t_tid));
! 		Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
  	}
  
! 	PageSetLSN(page, lsn);
! 	PageSetTLI(page, ThisTimeLineID);
! 	MarkBufferDirty(buffer);
! 	UnlockReleaseBuffer(buffer);
  
! 	/* Right (new) sibling */
! 	buffer = XLogReadBuffer(reln, rightsib, true);
! 	Assert(BufferIsValid(buffer));
! 	page = (Page) BufferGetPage(buffer);
  
! 	_bt_pageinit(page, BufferGetPageSize(buffer));
! 	pageop = (BTPageOpaque) PageGetSpecialPointer(page);
  
! 	pageop->btpo_prev = leftsib;
! 	pageop->btpo_next = xlrec->rightblk;
! 	pageop->btpo.level = xlrec->level;
! 	pageop->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0;
! 	pageop->btpo_cycleid = 0;
  
! 	_bt_restore_page(page,
! 					 (char *) xlrec + SizeOfBtreeSplit + xlrec->leftlen,
! 					 record->xl_len - SizeOfBtreeSplit - xlrec->leftlen);
  
! 	if (!onleft && xlrec->level > 0)
! 	{
! 		IndexTuple	itup;
  
- 		/* extract downlink in the target tuple */
- 		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, targetoff));
- 		downlink = ItemPointerGetBlockNumber(&(itup->t_tid));
- 		Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
- 	}
  
! 	PageSetLSN(page, lsn);
! 	PageSetTLI(page, ThisTimeLineID);
! 	MarkBufferDirty(buffer);
! 	UnlockReleaseBuffer(buffer);
  
! 	/* Fix left-link of right (next) page */
! 	if (!(record->xl_info & XLR_BKP_BLOCK_1))
  	{
! 		if (xlrec->rightblk != P_NONE)
  		{
! 			buffer = XLogReadBuffer(reln, xlrec->rightblk, false);
! 			if (BufferIsValid(buffer))
  			{
! 				page = (Page) BufferGetPage(buffer);
  
! 				if (XLByteLE(lsn, PageGetLSN(page)))
  				{
! 					UnlockReleaseBuffer(buffer);
  				}
! 				else
! 				{
! 					pageop = (BTPageOpaque) PageGetSpecialPointer(page);
! 					pageop->btpo_prev = rightsib;
  
! 					PageSetLSN(page, lsn);
! 					PageSetTLI(page, ThisTimeLineID);
! 					MarkBufferDirty(buffer);
! 					UnlockReleaseBuffer(buffer);
  				}
  			}
  		}
  	}
  
! 	/* Forget any split this insertion completes */
! 	if (xlrec->level > 0)
! 		forget_matching_split(xlrec->target.node, downlink, false);
  
  	/* The job ain't done till the parent link is inserted... */
! 	log_incomplete_split(xlrec->target.node,
! 						 leftsib, rightsib, isroot);
  }
  
  static void
--- 264,428 ----
  {
  	xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
  	Relation	reln;
! 	Buffer		lbuf, rbuf;
! 	Page		lpage, rpage;
! 	BTPageOpaque ropaque, lopaque;
! 	char	   *datapos;
! 	int			datalen;
! 	bool		bkp_left = record->xl_info & XLR_BKP_BLOCK_1;
! 	bool		bkp_nextsib = record->xl_info & XLR_BKP_BLOCK_2;
! 	OffsetNumber newitemoff;
! 	Item newitem = NULL;
! 	Size newitemsz = 0;
  
! 	reln = XLogOpenRelation(xlrec->node);
  
! 	datapos = (char *) xlrec + SizeOfBtreeSplit;
! 	datalen = record->xl_len - SizeOfBtreeSplit;
  
! 	/* Forget any split this insertion completes */
! 	if (xlrec->level > 0)
! 	{
! 		BlockNumber downlink = BlockIdGetBlockNumber((BlockId) datapos);
  
! 		datapos += sizeof(BlockIdData);
! 		datalen -= sizeof(BlockIdData);
! 
! 		forget_matching_split(xlrec->node, downlink, false);
! 	}
  
  
! 	/* Extract newitem and newitemoff */
! 	if (!bkp_left && onleft)
  	{
! 		IndexTupleData itupdata;
  
! 		/* Extract the offset of the new tuple and it's contents */
! 		memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
! 		datapos += sizeof(OffsetNumber);
! 		datalen -= sizeof(OffsetNumber);
! 
! 		newitem = datapos;
! 		/* Need to copy tuple header due to alignment considerations */
! 		memcpy(&itupdata, datapos, sizeof(IndexTupleData));
! 		newitemsz = IndexTupleDSize(itupdata);
! 		newitemsz = MAXALIGN(newitemsz);
! 		datapos += newitemsz;
! 		datalen -= newitemsz;
  	}
  
! 	/* Reconstruct right (new) sibling */
! 	rbuf = XLogReadBuffer(reln, xlrec->rightsib, true);
! 	Assert(BufferIsValid(rbuf));
! 	rpage = (Page) BufferGetPage(rbuf);
  
! 	_bt_pageinit(rpage, BufferGetPageSize(rbuf));
! 	ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
  
! 	ropaque->btpo_prev = xlrec->leftsib;
! 	ropaque->btpo_next = xlrec->rnext;
! 	ropaque->btpo.level = xlrec->level;
! 	ropaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0;
! 	ropaque->btpo_cycleid = 0;
  
! 	_bt_restore_page(rpage, datapos, datalen);
  
! 	PageSetLSN(rpage, lsn);
! 	PageSetTLI(rpage, ThisTimeLineID);
! 	MarkBufferDirty(rbuf);
  
! 	/* don't release the buffer yet, because reconstructing the left sibling
! 	 * needs to access the data on the right page 
! 	 */
  
  
! 	/* Reconstruct left (original) sibling */
  
! 	if(!bkp_left)
  	{
! 		lbuf = XLogReadBuffer(reln, xlrec->leftsib, false);
! 
! 		if (BufferIsValid(lbuf))
  		{
! 			lpage = (Page) BufferGetPage(lbuf);
! 			lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
! 
! 			if (!XLByteLE(lsn, PageGetLSN(lpage)))
  			{
! 				/* Remove the items from the left page that were copied to
! 				 * right page, and add the new item if it was inserted to
! 				 * left page.
! 				 */
! 				OffsetNumber off;
! 				OffsetNumber maxoff = PageGetMaxOffsetNumber(lpage);
! 				ItemId hiItemId;
! 				Item hiItem;
  
! 				for(off = maxoff ; off >= xlrec->firstright; off--)
! 					PageIndexTupleDelete(lpage, off);
! 
! 				if (onleft)
  				{
! 					if (PageAddItem(lpage, newitem, newitemsz, newitemoff, 
! 									LP_USED) == InvalidOffsetNumber)
! 						elog(PANIC, "can't add new item to left sibling after split");
  				}
! 				/* Set high key equal to the first key on the right page */
! 				hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque));
! 				hiItem = PageGetItem(rpage, hiItemId);
  
! 				if(!P_RIGHTMOST(lopaque))
! 				{
! 					/* but remove the old high key first */
! 					PageIndexTupleDelete(lpage, P_HIKEY);
  				}
+ 
+ 				if(PageAddItem(lpage, hiItem, ItemIdGetLength(hiItemId),
+ 							   P_HIKEY, LP_USED) == InvalidOffsetNumber)
+ 					elog(PANIC, "can't add high key after split to left page");
+ 
+ 				/* Fix opaque fields */ 
+ 				lopaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0;
+ 				lopaque->btpo_next = xlrec->rightsib;
+ 				lopaque->btpo_cycleid = 0;
+ 
+ 				PageSetLSN(lpage, lsn);
+ 				PageSetTLI(lpage, ThisTimeLineID);
+ 				MarkBufferDirty(lbuf);
  			}
+ 
+ 			UnlockReleaseBuffer(lbuf);
  		}
+ 
  	}
  
! 	/* we no longer need the right buffer. */
! 	UnlockReleaseBuffer(rbuf);
! 
! 	/* Fix left-link of the page to the right of the new right sibling */
! 	if (!bkp_nextsib && xlrec->rnext != P_NONE)
! 	{
! 		Buffer buffer = XLogReadBuffer(reln, xlrec->rnext, false);
! 		if (BufferIsValid(buffer))
! 		{
! 			Page page = (Page) BufferGetPage(buffer);
! 
! 			if (!XLByteLE(lsn, PageGetLSN(page)))
! 			{
! 				BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
! 				pageop->btpo_prev = xlrec->rightsib;
! 
! 				PageSetLSN(page, lsn);
! 				PageSetTLI(page, ThisTimeLineID);
! 				MarkBufferDirty(buffer);
! 			}
! 			UnlockReleaseBuffer(buffer);
! 		}
! 	}
  
  	/* The job ain't done till the parent link is inserted... */
! 	log_incomplete_split(xlrec->node,
! 						 xlrec->leftsib, xlrec->rightsib, isroot);
  }
  
  static void
***************
*** 727,766 ****
  			{
  				xl_btree_split *xlrec = (xl_btree_split *) rec;
  
! 				appendStringInfo(buf, "split_l: ");
! 				out_target(buf, &(xlrec->target));
! 				appendStringInfo(buf, "; oth %u; rgh %u",
! 								 xlrec->otherblk, xlrec->rightblk);
  				break;
  			}
  		case XLOG_BTREE_SPLIT_R:
  			{
  				xl_btree_split *xlrec = (xl_btree_split *) rec;
  
! 				appendStringInfo(buf, "split_r: ");
! 				out_target(buf, &(xlrec->target));
! 				appendStringInfo(buf, "; oth %u; rgh %u",
! 								 xlrec->otherblk, xlrec->rightblk);
  				break;
  			}
  		case XLOG_BTREE_SPLIT_L_ROOT:
  			{
  				xl_btree_split *xlrec = (xl_btree_split *) rec;
  
! 				appendStringInfo(buf, "split_l_root: ");
! 				out_target(buf, &(xlrec->target));
! 				appendStringInfo(buf, "; oth %u; rgh %u",
! 								 xlrec->otherblk, xlrec->rightblk);
  				break;
  			}
  		case XLOG_BTREE_SPLIT_R_ROOT:
  			{
  				xl_btree_split *xlrec = (xl_btree_split *) rec;
  
! 				appendStringInfo(buf, "split_r_root: ");
! 				out_target(buf, &(xlrec->target));
! 				appendStringInfo(buf, "; oth %u; rgh %u",
! 								 xlrec->otherblk, xlrec->rightblk);
  				break;
  			}
  		case XLOG_BTREE_DELETE:
--- 770,817 ----
  			{
  				xl_btree_split *xlrec = (xl_btree_split *) rec;
  
! 				appendStringInfo(buf, "split_l: rel %u/%u/%u ",  
! 								 xlrec->node.spcNode, xlrec->node.dbNode,
! 								 xlrec->node.relNode);
! 				appendStringInfo(buf, "left %u, right %u off %u level %u",
! 								 xlrec->leftsib, xlrec->rightsib, 
! 								 xlrec->firstright, xlrec->level);
  				break;
  			}
  		case XLOG_BTREE_SPLIT_R:
  			{
  				xl_btree_split *xlrec = (xl_btree_split *) rec;
  
! 				appendStringInfo(buf, "split_r: rel %u/%u/%u ",  
! 								 xlrec->node.spcNode, xlrec->node.dbNode,
! 								 xlrec->node.relNode);
! 				appendStringInfo(buf, "left %u, right %u off %u level %u",
! 								 xlrec->leftsib, xlrec->rightsib, 
! 								 xlrec->firstright, xlrec->level);
  				break;
  			}
  		case XLOG_BTREE_SPLIT_L_ROOT:
  			{
  				xl_btree_split *xlrec = (xl_btree_split *) rec;
  
! 				appendStringInfo(buf, "split_l_root: rel %u/%u/%u ",  
! 								 xlrec->node.spcNode, xlrec->node.dbNode,
! 								 xlrec->node.relNode);
! 				appendStringInfo(buf, "left %u, right %u off %u level %u",
! 								 xlrec->leftsib, xlrec->rightsib, 
! 								 xlrec->firstright, xlrec->level);
  				break;
  			}
  		case XLOG_BTREE_SPLIT_R_ROOT:
  			{
  				xl_btree_split *xlrec = (xl_btree_split *) rec;
  
! 				appendStringInfo(buf, "split_r_root: rel %u/%u/%u ",  
! 								 xlrec->node.spcNode, xlrec->node.dbNode,
! 								 xlrec->node.relNode);
! 				appendStringInfo(buf, "left %u, right %u off %u level %u",
! 								 xlrec->leftsib, xlrec->rightsib, 
! 								 xlrec->firstright, xlrec->level);
  				break;
  			}
  		case XLOG_BTREE_DELETE:
Index: src/include/access/nbtree.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/nbtree.h,v
retrieving revision 1.106
diff -c -r1.106 nbtree.h
*** src/include/access/nbtree.h	1 Nov 2006 19:43:17 -0000	1.106
--- src/include/access/nbtree.h	2 Jan 2007 14:07:36 -0000
***************
*** 254,260 ****
   *
   * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record.
   * The _L and _R variants indicate whether the inserted tuple went into the
!  * left or right split page (and thus, whether otherblk is the right or left
   * page of the split pair).  The _ROOT variants indicate that we are splitting
   * the root page, and thus that a newroot record rather than an insert or
   * split record should follow.	Note that a split record never carries a
--- 254,261 ----
   *
   * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record.
   * The _L and _R variants indicate whether the inserted tuple went into the
!  * left or right split page (and thus, whether newitemoff and the new item
!  * are stored or not.
   * page of the split pair).  The _ROOT variants indicate that we are splitting
   * the root page, and thus that a newroot record rather than an insert or
   * split record should follow.	Note that a split record never carries a
***************
*** 262,278 ****
   */
  typedef struct xl_btree_split
  {
! 	xl_btreetid target;			/* inserted tuple id */
! 	BlockNumber otherblk;		/* second block participated in split: */
! 	/* first one is stored in target' tid */
! 	BlockNumber leftblk;		/* prev/left block */
! 	BlockNumber rightblk;		/* next/right block */
! 	uint32		level;			/* tree level of page being split */
! 	uint16		leftlen;		/* len of left page items below */
! 	/* LEFT AND RIGHT PAGES TUPLES FOLLOW AT THE END */
  } xl_btree_split;
  
! #define SizeOfBtreeSplit	(offsetof(xl_btree_split, leftlen) + sizeof(uint16))
  
  /*
   * This is what we need to know about delete of individual leaf index tuples.
--- 263,283 ----
   */
  typedef struct xl_btree_split
  {
! 	RelFileNode node;
! 	BlockNumber leftsib;	 /* orig page / new left page */
! 	BlockNumber rightsib;	 /* new right page */
! 	OffsetNumber firstright; /* first item stored on right page */
! 	BlockNumber rnext;		 /* next/right block pointer */
! 	uint32		level;		 /* tree level of page being split */
! 
! 	/* BlockIdData downlink follows if level > 0 */
! 	
! 	/* OffsetNumber newitemoff follows in the  _L variants. */
! 	/* New item follows in the _L variants */
! 	/* RIGHT PAGES TUPLES FOLLOW AT THE END */
  } xl_btree_split;
  
! #define SizeOfBtreeSplit	(offsetof(xl_btree_split, level) + sizeof(uint32))
  
  /*
   * This is what we need to know about delete of individual leaf index tuples.
---------------------------(end of broadcast)---------------------------
TIP 3: Have you checked our extensive FAQ?

               http://www.postgresql.org/docs/faq

Reply via email to