On Wed, 22 Mar 2006 14:19:48 -0500
Tom Lane <[EMAIL PROTECTED]> wrote:

> Mark Wong <[EMAIL PROTECTED]> writes:
> > I proposed to explore splitting BLCKSZ into separate values for logging
> > and data to see if there might be anything to gain:
> >     http://archives.postgresql.org/pgsql-hackers/2006-03/msg00745.php
> > My first pass was to do more or less a search and replace (attached) and
> > I am already running into trouble with a 'make check' (below).  I'm
> > guessing that when initdb is run, I'm not properly saving the values
> > that I've defined for DATA_BLCKSZ and possibly LOG_BLCKSZ.
> 
> I'd suggest leaving BLCKSZ as-is and inventing XLOG_BLCKSZ to be used
> only within the WAL code; should make for a *far* smaller patch.
> Offhand I don't think that anything except xlog.c knows the WAL block
> size --- it should be fairly closely associated with dependencies on
> XLOG_SEG_SIZE, if you are looking for something to grep for.

Ok, I have attached something much smaller.  Appears to pass a 'make
check' but I'll keep going to make sure it's really correct and works.

Thanks,
Mark
Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/xlog.c,v
retrieving revision 1.227
diff -c -r1.227 xlog.c
*** src/backend/access/transam/xlog.c	5 Mar 2006 15:58:22 -0000	1.227
--- src/backend/access/transam/xlog.c	23 Mar 2006 19:13:31 -0000
***************
*** 113,122 ****
  
  /*
   * Limitation of buffer-alignment for direct IO depends on OS and filesystem,
!  * but BLCKSZ is assumed to be enough for it.
   */
  #ifdef O_DIRECT
! #define ALIGNOF_XLOG_BUFFER		BLCKSZ
  #else
  #define ALIGNOF_XLOG_BUFFER		ALIGNOF_BUFFER
  #endif
--- 113,122 ----
  
  /*
   * Limitation of buffer-alignment for direct IO depends on OS and filesystem,
!  * but XLOG_BLCKSZ is assumed to be enough for it.
   */
  #ifdef O_DIRECT
! #define ALIGNOF_XLOG_BUFFER		XLOG_BLCKSZ
  #else
  #define ALIGNOF_XLOG_BUFFER		ALIGNOF_BUFFER
  #endif
***************
*** 374,380 ****
  	 * and xlblocks values depends on WALInsertLock and WALWriteLock.
  	 */
  	char	   *pages;			/* buffers for unwritten XLOG pages */
! 	XLogRecPtr *xlblocks;		/* 1st byte ptr-s + BLCKSZ */
  	Size		XLogCacheByte;	/* # bytes in xlog buffers */
  	int			XLogCacheBlck;	/* highest allocated xlog buffer index */
  	TimeLineID	ThisTimeLineID;
--- 374,380 ----
  	 * and xlblocks values depends on WALInsertLock and WALWriteLock.
  	 */
  	char	   *pages;			/* buffers for unwritten XLOG pages */
! 	XLogRecPtr *xlblocks;		/* 1st byte ptr-s + XLOG_BLCKSZ */
  	Size		XLogCacheByte;	/* # bytes in xlog buffers */
  	int			XLogCacheBlck;	/* highest allocated xlog buffer index */
  	TimeLineID	ThisTimeLineID;
***************
*** 397,403 ****
  
  /* Free space remaining in the current xlog page buffer */
  #define INSERT_FREESPACE(Insert)  \
! 	(BLCKSZ - ((Insert)->currpos - (char *) (Insert)->currpage))
  
  /* Construct XLogRecPtr value for current insertion point */
  #define INSERT_RECPTR(recptr,Insert,curridx)  \
--- 397,403 ----
  
  /* Free space remaining in the current xlog page buffer */
  #define INSERT_FREESPACE(Insert)  \
! 	(XLOG_BLCKSZ - ((Insert)->currpos - (char *) (Insert)->currpage))
  
  /* Construct XLogRecPtr value for current insertion point */
  #define INSERT_RECPTR(recptr,Insert,curridx)  \
***************
*** 441,447 ****
  static uint32 readSeg = 0;
  static uint32 readOff = 0;
  
! /* Buffer for currently read page (BLCKSZ bytes) */
  static char *readBuf = NULL;
  
  /* Buffer for current ReadRecord result (expandable) */
--- 441,447 ----
  static uint32 readSeg = 0;
  static uint32 readOff = 0;
  
! /* Buffer for currently read page (XLOG_BLCKSZ bytes) */
  static char *readBuf = NULL;
  
  /* Buffer for current ReadRecord result (expandable) */
***************
*** 662,668 ****
  			{
  				COMP_CRC32(rdata_crc,
  						   page,
! 						   BLCKSZ);
  			}
  			else
  			{
--- 662,668 ----
  			{
  				COMP_CRC32(rdata_crc,
  						   page,
! 						   XLOG_BLCKSZ);
  			}
  			else
  			{
***************
*** 672,678 ****
  						   bkpb->hole_offset);
  				COMP_CRC32(rdata_crc,
  						   page + (bkpb->hole_offset + bkpb->hole_length),
! 						   BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
  			}
  		}
  	}
--- 672,678 ----
  						   bkpb->hole_offset);
  				COMP_CRC32(rdata_crc,
  						   page + (bkpb->hole_offset + bkpb->hole_length),
! 						   XLOG_BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
  			}
  		}
  	}
***************
*** 705,711 ****
  	 * If cache is half filled then try to acquire write lock and do
  	 * XLogWrite. Ignore any fractional blocks in performing this check.
  	 */
! 	LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % BLCKSZ;
  	if (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid ||
  		(LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff +
  		 XLogCtl->XLogCacheByte / 2))
--- 705,711 ----
  	 * If cache is half filled then try to acquire write lock and do
  	 * XLogWrite. Ignore any fractional blocks in performing this check.
  	 */
! 	LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % XLOG_BLCKSZ;
  	if (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid ||
  		(LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff +
  		 XLogCtl->XLogCacheByte / 2))
***************
*** 795,802 ****
  		if (bkpb->hole_length == 0)
  		{
  			rdt->data = page;
! 			rdt->len = BLCKSZ;
! 			write_len += BLCKSZ;
  			rdt->next = NULL;
  		}
  		else
--- 795,802 ----
  		if (bkpb->hole_length == 0)
  		{
  			rdt->data = page;
! 			rdt->len = XLOG_BLCKSZ;
! 			write_len += XLOG_BLCKSZ;
  			rdt->next = NULL;
  		}
  		else
***************
*** 810,816 ****
  			rdt = rdt->next;
  
  			rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
! 			rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
  			write_len += rdt->len;
  			rdt->next = NULL;
  		}
--- 810,816 ----
  			rdt = rdt->next;
  
  			rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
! 			rdt->len = XLOG_BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
  			write_len += rdt->len;
  			rdt->next = NULL;
  		}
***************
*** 993,999 ****
  
  			if (lower >= SizeOfPageHeaderData &&
  				upper > lower &&
! 				upper <= BLCKSZ)
  			{
  				bkpb->hole_offset = lower;
  				bkpb->hole_length = upper - lower;
--- 993,999 ----
  
  			if (lower >= SizeOfPageHeaderData &&
  				upper > lower &&
! 				upper <= XLOG_BLCKSZ)
  			{
  				bkpb->hole_offset = lower;
  				bkpb->hole_length = upper - lower;
***************
*** 1225,1236 ****
  	{
  		/* crossing a logid boundary */
  		NewPageEndPtr.xlogid += 1;
! 		NewPageEndPtr.xrecoff = BLCKSZ;
  	}
  	else
! 		NewPageEndPtr.xrecoff += BLCKSZ;
  	XLogCtl->xlblocks[nextidx] = NewPageEndPtr;
! 	NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) BLCKSZ);
  
  	Insert->curridx = nextidx;
  	Insert->currpage = NewPage;
--- 1225,1236 ----
  	{
  		/* crossing a logid boundary */
  		NewPageEndPtr.xlogid += 1;
! 		NewPageEndPtr.xrecoff = XLOG_BLCKSZ;
  	}
  	else
! 		NewPageEndPtr.xrecoff += XLOG_BLCKSZ;
  	XLogCtl->xlblocks[nextidx] = NewPageEndPtr;
! 	NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
  
  	Insert->curridx = nextidx;
  	Insert->currpage = NewPage;
***************
*** 1241,1247 ****
  	 * Be sure to re-zero the buffer so that bytes beyond what we've written
  	 * will look like zeroes and not valid XLOG records...
  	 */
! 	MemSet((char *) NewPage, 0, BLCKSZ);
  
  	/*
  	 * Fill the new page's header
--- 1241,1247 ----
  	 * Be sure to re-zero the buffer so that bytes beyond what we've written
  	 * will look like zeroes and not valid XLOG records...
  	 */
! 	MemSet((char *) NewPage, 0, XLOG_BLCKSZ);
  
  	/*
  	 * Fill the new page's header
***************
*** 1251,1257 ****
  	/* NewPage->xlp_info = 0; */	/* done by memset */
  	NewPage   ->xlp_tli = ThisTimeLineID;
  	NewPage   ->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid;
! 	NewPage   ->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - BLCKSZ;
  
  	/*
  	 * If first page of an XLOG segment file, make it a long header.
--- 1251,1257 ----
  	/* NewPage->xlp_info = 0; */	/* done by memset */
  	NewPage   ->xlp_tli = ThisTimeLineID;
  	NewPage   ->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid;
! 	NewPage   ->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - XLOG_BLCKSZ;
  
  	/*
  	 * If first page of an XLOG segment file, make it a long header.
***************
*** 1425,1431 ****
  		{
  			/* first of group */
  			startidx = curridx;
! 			startoffset = (LogwrtResult.Write.xrecoff - BLCKSZ) % XLogSegSize;
  		}
  		npages++;
  
--- 1425,1431 ----
  		{
  			/* first of group */
  			startidx = curridx;
! 			startoffset = (LogwrtResult.Write.xrecoff - XLOG_BLCKSZ) % XLogSegSize;
  		}
  		npages++;
  
***************
*** 1436,1442 ****
  		 * segment.
  		 */
  		finishing_seg = !ispartialpage &&
! 			(startoffset + npages * BLCKSZ) >= XLogSegSize;
  
  		if (!XLByteLT(LogwrtResult.Write, WriteRqst.Write) ||
  			curridx == XLogCtl->XLogCacheBlck ||
--- 1436,1442 ----
  		 * segment.
  		 */
  		finishing_seg = !ispartialpage &&
! 			(startoffset + npages * XLOG_BLCKSZ) >= XLogSegSize;
  
  		if (!XLByteLT(LogwrtResult.Write, WriteRqst.Write) ||
  			curridx == XLogCtl->XLogCacheBlck ||
***************
*** 1458,1465 ****
  			}
  
  			/* OK to write the page(s) */
! 			from = XLogCtl->pages + startidx * (Size) BLCKSZ;
! 			nbytes = npages * (Size) BLCKSZ;
  			errno = 0;
  			if (write(openLogFile, from, nbytes) != nbytes)
  			{
--- 1458,1465 ----
  			}
  
  			/* OK to write the page(s) */
! 			from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
! 			nbytes = npages * (Size) XLOG_BLCKSZ;
  			errno = 0;
  			if (write(openLogFile, from, nbytes) != nbytes)
  			{
***************
*** 1717,1723 ****
  {
  	char		path[MAXPGPATH];
  	char		tmppath[MAXPGPATH];
! 	char		zbuffer[BLCKSZ];
  	uint32		installed_log;
  	uint32		installed_seg;
  	int			max_advance;
--- 1717,1723 ----
  {
  	char		path[MAXPGPATH];
  	char		tmppath[MAXPGPATH];
! 	char		zbuffer[XLOG_BLCKSZ];
  	uint32		installed_log;
  	uint32		installed_seg;
  	int			max_advance;
***************
*** 1855,1861 ****
  {
  	char		path[MAXPGPATH];
  	char		tmppath[MAXPGPATH];
! 	char		buffer[BLCKSZ];
  	int			srcfd;
  	int			fd;
  	int			nbytes;
--- 1855,1861 ----
  {
  	char		path[MAXPGPATH];
  	char		tmppath[MAXPGPATH];
! 	char		buffer[XLOG_BLCKSZ];
  	int			srcfd;
  	int			fd;
  	int			nbytes;
***************
*** 2516,2531 ****
  
  				if (bkpb.hole_length == 0)
  				{
! 					memcpy((char *) page, blk, BLCKSZ);
  				}
  				else
  				{
  					/* must zero-fill the hole */
! 					MemSet((char *) page, 0, BLCKSZ);
  					memcpy((char *) page, blk, bkpb.hole_offset);
  					memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
  						   blk + bkpb.hole_offset,
! 						   BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
  				}
  
  				PageSetLSN(page, lsn);
--- 2516,2531 ----
  
  				if (bkpb.hole_length == 0)
  				{
! 					memcpy((char *) page, blk, XLOG_BLCKSZ);
  				}
  				else
  				{
  					/* must zero-fill the hole */
! 					MemSet((char *) page, 0, XLOG_BLCKSZ);
  					memcpy((char *) page, blk, bkpb.hole_offset);
  					memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
  						   blk + bkpb.hole_offset,
! 						   XLOG_BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
  				}
  
  				PageSetLSN(page, lsn);
***************
*** 2535,2541 ****
  			}
  		}
  
! 		blk += BLCKSZ - bkpb.hole_length;
  	}
  }
  
--- 2535,2541 ----
  			}
  		}
  
! 		blk += XLOG_BLCKSZ - bkpb.hole_length;
  	}
  }
  
***************
*** 2569,2582 ****
  			continue;
  
  		memcpy(&bkpb, blk, sizeof(BkpBlock));
! 		if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
  		{
  			ereport(emode,
  					(errmsg("incorrect hole size in record at %X/%X",
  							recptr.xlogid, recptr.xrecoff)));
  			return false;
  		}
! 		blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
  		COMP_CRC32(crc, blk, blen);
  		blk += blen;
  	}
--- 2569,2582 ----
  			continue;
  
  		memcpy(&bkpb, blk, sizeof(BkpBlock));
! 		if (bkpb.hole_offset + bkpb.hole_length > XLOG_BLCKSZ)
  		{
  			ereport(emode,
  					(errmsg("incorrect hole size in record at %X/%X",
  							recptr.xlogid, recptr.xrecoff)));
  			return false;
  		}
! 		blen = sizeof(BkpBlock) + XLOG_BLCKSZ - bkpb.hole_length;
  		COMP_CRC32(crc, blk, blen);
  		blk += blen;
  	}
***************
*** 2640,2646 ****
  		 * (2) a static char array isn't guaranteed to have any particular
  		 * alignment, whereas malloc() will provide MAXALIGN'd storage.
  		 */
! 		readBuf = (char *) malloc(BLCKSZ);
  		Assert(readBuf != NULL);
  	}
  
--- 2640,2646 ----
  		 * (2) a static char array isn't guaranteed to have any particular
  		 * alignment, whereas malloc() will provide MAXALIGN'd storage.
  		 */
! 		readBuf = (char *) malloc(XLOG_BLCKSZ);
  		Assert(readBuf != NULL);
  	}
  
***************
*** 2654,2661 ****
  			goto got_record;
  		}
  		/* align old recptr to next page */
! 		if (tmpRecPtr.xrecoff % BLCKSZ != 0)
! 			tmpRecPtr.xrecoff += (BLCKSZ - tmpRecPtr.xrecoff % BLCKSZ);
  		if (tmpRecPtr.xrecoff >= XLogFileSize)
  		{
  			(tmpRecPtr.xlogid)++;
--- 2654,2661 ----
  			goto got_record;
  		}
  		/* align old recptr to next page */
! 		if (tmpRecPtr.xrecoff % XLOG_BLCKSZ != 0)
! 			tmpRecPtr.xrecoff += (XLOG_BLCKSZ - tmpRecPtr.xrecoff % XLOG_BLCKSZ);
  		if (tmpRecPtr.xrecoff >= XLogFileSize)
  		{
  			(tmpRecPtr.xlogid)++;
***************
*** 2699,2705 ****
  		readOff = (uint32) (-1);	/* force read to occur below */
  	}
  
! 	targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / BLCKSZ) * BLCKSZ;
  	if (readOff != targetPageOff)
  	{
  		readOff = targetPageOff;
--- 2699,2705 ----
  		readOff = (uint32) (-1);	/* force read to occur below */
  	}
  
! 	targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / XLOG_BLCKSZ) * XLOG_BLCKSZ;
  	if (readOff != targetPageOff)
  	{
  		readOff = targetPageOff;
***************
*** 2711,2717 ****
  							readId, readSeg, readOff)));
  			goto next_record_is_invalid;
  		}
! 		if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
  		{
  			ereport(emode,
  					(errcode_for_file_access(),
--- 2711,2717 ----
  							readId, readSeg, readOff)));
  			goto next_record_is_invalid;
  		}
! 		if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
  		{
  			ereport(emode,
  					(errcode_for_file_access(),
***************
*** 2723,2729 ****
  			goto next_record_is_invalid;
  	}
  	pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
! 	targetRecOff = RecPtr->xrecoff % BLCKSZ;
  	if (targetRecOff == 0)
  	{
  		/*
--- 2723,2729 ----
  			goto next_record_is_invalid;
  	}
  	pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
! 	targetRecOff = RecPtr->xrecoff % XLOG_BLCKSZ;
  	if (targetRecOff == 0)
  	{
  		/*
***************
*** 2749,2755 ****
  						RecPtr->xlogid, RecPtr->xrecoff)));
  		goto next_record_is_invalid;
  	}
! 	record = (XLogRecord *) ((char *) readBuf + RecPtr->xrecoff % BLCKSZ);
  
  got_record:;
  
--- 2749,2755 ----
  						RecPtr->xlogid, RecPtr->xrecoff)));
  		goto next_record_is_invalid;
  	}
! 	record = (XLogRecord *) ((char *) readBuf + RecPtr->xrecoff % XLOG_BLCKSZ);
  
  got_record:;
  
***************
*** 2766,2772 ****
  	}
  	if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
  		record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
! 		XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
  	{
  		ereport(emode,
  				(errmsg("invalid record length at %X/%X",
--- 2766,2772 ----
  	}
  	if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
  		record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
! 		XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + XLOG_BLCKSZ))
  	{
  		ereport(emode,
  				(errmsg("invalid record length at %X/%X",
***************
*** 2814,2830 ****
  
  	/*
  	 * Allocate or enlarge readRecordBuf as needed.  To avoid useless small
! 	 * increases, round its size to a multiple of BLCKSZ, and make sure it's
! 	 * at least 4*BLCKSZ to start with.  (That is enough for all "normal"
! 	 * records, but very large commit or abort records might need more space.)
  	 */
  	total_len = record->xl_tot_len;
  	if (total_len > readRecordBufSize)
  	{
  		uint32		newSize = total_len;
  
! 		newSize += BLCKSZ - (newSize % BLCKSZ);
! 		newSize = Max(newSize, 4 * BLCKSZ);
  		if (readRecordBuf)
  			free(readRecordBuf);
  		readRecordBuf = (char *) malloc(newSize);
--- 2814,2831 ----
  
  	/*
  	 * Allocate or enlarge readRecordBuf as needed.  To avoid useless small
! 	 * increases, round its size to a multiple of XLOG_BLCKSZ, and make sure
! 	 * it's at least 4*XLOG_BLCKSZ to start with.  (That is enough for all
! 	 * "normal" records, but very large commit or abort records might need
! 	 * more space.)
  	 */
  	total_len = record->xl_tot_len;
  	if (total_len > readRecordBufSize)
  	{
  		uint32		newSize = total_len;
  
! 		newSize += XLOG_BLCKSZ - (newSize % XLOG_BLCKSZ);
! 		newSize = Max(newSize, 4 * XLOG_BLCKSZ);
  		if (readRecordBuf)
  			free(readRecordBuf);
  		readRecordBuf = (char *) malloc(newSize);
***************
*** 2842,2848 ****
  
  	buffer = readRecordBuf;
  	nextRecord = NULL;
! 	len = BLCKSZ - RecPtr->xrecoff % BLCKSZ;
  	if (total_len > len)
  	{
  		/* Need to reassemble record */
--- 2843,2849 ----
  
  	buffer = readRecordBuf;
  	nextRecord = NULL;
! 	len = XLOG_BLCKSZ - RecPtr->xrecoff % XLOG_BLCKSZ;
  	if (total_len > len)
  	{
  		/* Need to reassemble record */
***************
*** 2854,2860 ****
  		buffer += len;
  		for (;;)
  		{
! 			readOff += BLCKSZ;
  			if (readOff >= XLogSegSize)
  			{
  				close(readFile);
--- 2855,2861 ----
  		buffer += len;
  		for (;;)
  		{
! 			readOff += XLOG_BLCKSZ;
  			if (readOff >= XLogSegSize)
  			{
  				close(readFile);
***************
*** 2865,2871 ****
  					goto next_record_is_invalid;
  				readOff = 0;
  			}
! 			if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
  			{
  				ereport(emode,
  						(errcode_for_file_access(),
--- 2866,2872 ----
  					goto next_record_is_invalid;
  				readOff = 0;
  			}
! 			if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
  			{
  				ereport(emode,
  						(errcode_for_file_access(),
***************
*** 2893,2899 ****
  								readId, readSeg, readOff)));
  				goto next_record_is_invalid;
  			}
! 			len = BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
  			if (contrecord->xl_rem_len > len)
  			{
  				memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len);
--- 2894,2900 ----
  								readId, readSeg, readOff)));
  				goto next_record_is_invalid;
  			}
! 			len = XLOG_BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
  			if (contrecord->xl_rem_len > len)
  			{
  				memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len);
***************
*** 2908,2914 ****
  		if (!RecordIsValid(record, *RecPtr, emode))
  			goto next_record_is_invalid;
  		pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
! 		if (BLCKSZ - SizeOfXLogRecord >= pageHeaderSize +
  			MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len))
  		{
  			nextRecord = (XLogRecord *) ((char *) contrecord +
--- 2909,2915 ----
  		if (!RecordIsValid(record, *RecPtr, emode))
  			goto next_record_is_invalid;
  		pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
! 		if (XLOG_BLCKSZ - SizeOfXLogRecord >= pageHeaderSize +
  			MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len))
  		{
  			nextRecord = (XLogRecord *) ((char *) contrecord +
***************
*** 2925,2931 ****
  	/* Record does not cross a page boundary */
  	if (!RecordIsValid(record, *RecPtr, emode))
  		goto next_record_is_invalid;
! 	if (BLCKSZ - SizeOfXLogRecord >= RecPtr->xrecoff % BLCKSZ +
  		MAXALIGN(total_len))
  		nextRecord = (XLogRecord *) ((char *) record + MAXALIGN(total_len));
  	EndRecPtr.xlogid = RecPtr->xlogid;
--- 2926,2932 ----
  	/* Record does not cross a page boundary */
  	if (!RecordIsValid(record, *RecPtr, emode))
  		goto next_record_is_invalid;
! 	if (XLOG_BLCKSZ - SizeOfXLogRecord >= RecPtr->xrecoff % XLOG_BLCKSZ +
  		MAXALIGN(total_len))
  		nextRecord = (XLogRecord *) ((char *) record + MAXALIGN(total_len));
  	EndRecPtr.xlogid = RecPtr->xlogid;
***************
*** 3221,3227 ****
  	char		tmppath[MAXPGPATH];
  	char		histfname[MAXFNAMELEN];
  	char		xlogfname[MAXFNAMELEN];
! 	char		buffer[BLCKSZ];
  	int			srcfd;
  	int			fd;
  	int			nbytes;
--- 3222,3228 ----
  	char		tmppath[MAXPGPATH];
  	char		histfname[MAXFNAMELEN];
  	char		xlogfname[MAXFNAMELEN];
! 	char		buffer[XLOG_BLCKSZ];
  	int			srcfd;
  	int			fd;
  	int			nbytes;
***************
*** 3699,3705 ****
  	/* extra alignment padding for XLOG I/O buffers */
  	size = add_size(size, ALIGNOF_XLOG_BUFFER);
  	/* and the buffers themselves */
! 	size = add_size(size, mul_size(BLCKSZ, XLOGbuffers));
  
  	/*
  	 * Note: we don't count ControlFileData, it comes out of the "slop factor"
--- 3700,3706 ----
  	/* extra alignment padding for XLOG I/O buffers */
  	size = add_size(size, ALIGNOF_XLOG_BUFFER);
  	/* and the buffers themselves */
! 	size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
  
  	/*
  	 * Note: we don't count ControlFileData, it comes out of the "slop factor"
***************
*** 3746,3758 ****
  	 */
  	allocptr = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, allocptr);
  	XLogCtl->pages = allocptr;
! 	memset(XLogCtl->pages, 0, (Size) BLCKSZ * XLOGbuffers);
  
  	/*
  	 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
  	 * in additional info.)
  	 */
! 	XLogCtl->XLogCacheByte = (Size) BLCKSZ *XLOGbuffers;
  
  	XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
  	XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
--- 3747,3759 ----
  	 */
  	allocptr = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, allocptr);
  	XLogCtl->pages = allocptr;
! 	memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
  
  	/*
  	 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
  	 * in additional info.)
  	 */
! 	XLogCtl->XLogCacheByte = (Size) XLOG_BLCKSZ *XLOGbuffers;
  
  	XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
  	XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
***************
*** 3804,3812 ****
  	ThisTimeLineID = 1;
  
  	/* page buffer must be aligned suitably for O_DIRECT */
! 	buffer = (char *) palloc(BLCKSZ + ALIGNOF_XLOG_BUFFER);
  	page = (XLogPageHeader) TYPEALIGN(ALIGNOF_XLOG_BUFFER, buffer);
! 	memset(page, 0, BLCKSZ);
  
  	/* Set up information for the initial checkpoint record */
  	checkPoint.redo.xlogid = 0;
--- 3805,3813 ----
  	ThisTimeLineID = 1;
  
  	/* page buffer must be aligned suitably for O_DIRECT */
! 	buffer = (char *) palloc(XLOG_BLCKSZ + ALIGNOF_XLOG_BUFFER);
  	page = (XLogPageHeader) TYPEALIGN(ALIGNOF_XLOG_BUFFER, buffer);
! 	memset(page, 0, XLOG_BLCKSZ);
  
  	/* Set up information for the initial checkpoint record */
  	checkPoint.redo.xlogid = 0;
***************
*** 3858,3864 ****
  
  	/* Write the first page with the initial record */
  	errno = 0;
! 	if (write(openLogFile, page, BLCKSZ) != BLCKSZ)
  	{
  		/* if write didn't set errno, assume problem is no disk space */
  		if (errno == 0)
--- 3859,3865 ----
  
  	/* Write the first page with the initial record */
  	errno = 0;
! 	if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
  	{
  		/* if write didn't set errno, assume problem is no disk space */
  		if (errno == 0)
***************
*** 4702,4718 ****
  	Insert->PrevRecord = LastRec;
  	XLogCtl->xlblocks[0].xlogid = openLogId;
  	XLogCtl->xlblocks[0].xrecoff =
! 		((EndOfLog.xrecoff - 1) / BLCKSZ + 1) * BLCKSZ;
  
  	/*
  	 * Tricky point here: readBuf contains the *last* block that the LastRec
  	 * record spans, not the one it starts in.	The last block is indeed the
  	 * one we want to use.
  	 */
! 	Assert(readOff == (XLogCtl->xlblocks[0].xrecoff - BLCKSZ) % XLogSegSize);
! 	memcpy((char *) Insert->currpage, readBuf, BLCKSZ);
  	Insert->currpos = (char *) Insert->currpage +
! 		(EndOfLog.xrecoff + BLCKSZ - XLogCtl->xlblocks[0].xrecoff);
  
  	LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
  
--- 4703,4719 ----
  	Insert->PrevRecord = LastRec;
  	XLogCtl->xlblocks[0].xlogid = openLogId;
  	XLogCtl->xlblocks[0].xrecoff =
! 		((EndOfLog.xrecoff - 1) / XLOG_BLCKSZ + 1) * XLOG_BLCKSZ;
  
  	/*
  	 * Tricky point here: readBuf contains the *last* block that the LastRec
  	 * record spans, not the one it starts in.	The last block is indeed the
  	 * one we want to use.
  	 */
! 	Assert(readOff == (XLogCtl->xlblocks[0].xrecoff - XLOG_BLCKSZ) % XLogSegSize);
! 	memcpy((char *) Insert->currpage, readBuf, XLOG_BLCKSZ);
  	Insert->currpos = (char *) Insert->currpage +
! 		(EndOfLog.xrecoff + XLOG_BLCKSZ - XLogCtl->xlblocks[0].xrecoff);
  
  	LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
  
Index: src/include/pg_config_manual.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/pg_config_manual.h,v
retrieving revision 1.20
diff -c -r1.20 pg_config_manual.h
*** src/include/pg_config_manual.h	5 Jan 2006 03:01:37 -0000	1.20
--- src/include/pg_config_manual.h	23 Mar 2006 19:13:34 -0000
***************
*** 26,31 ****
--- 26,36 ----
  #define BLCKSZ	8192
  
  /*
+  * XLOG_BLCKSZ is the block size for the WAL.
+  */
+ #define XLOG_BLCKSZ 4096
+ 
+ /*
   * RELSEG_SIZE is the maximum number of blocks allowed in one disk
   * file.  Thus, the maximum size of a single file is RELSEG_SIZE *
   * BLCKSZ; relations bigger than that are divided into multiple files.
***************
*** 45,51 ****
  
  /*
   * XLOG_SEG_SIZE is the size of a single WAL file.	This must be a power of 2
!  * and larger than BLCKSZ (preferably, a great deal larger than BLCKSZ).
   *
   * Changing XLOG_SEG_SIZE requires an initdb.
   */
--- 50,57 ----
  
  /*
   * XLOG_SEG_SIZE is the size of a single WAL file.	This must be a power of 2
!  * and larger than XLOG_BLCKSZ (preferably, a great deal larger than
!  * XLOG_BLCKSZ).
   *
   * Changing XLOG_SEG_SIZE requires an initdb.
   */
Index: src/include/access/xlog_internal.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/access/xlog_internal.h,v
retrieving revision 1.10
diff -c -r1.10 xlog_internal.h
*** src/include/access/xlog_internal.h	5 Mar 2006 15:58:54 -0000	1.10
--- src/include/access/xlog_internal.h	23 Mar 2006 19:13:35 -0000
***************
*** 30,36 ****
   * contains only zero bytes.  If hole_length > 0 then we have removed
   * such a "hole" from the stored data (and it's not counted in the
   * XLOG record's CRC, either).  Hence, the amount of block data actually
!  * present following the BkpBlock struct is BLCKSZ - hole_length bytes.
   *
   * Note that we don't attempt to align either the BkpBlock struct or the
   * block's data.  So, the struct must be copied to aligned local storage
--- 30,36 ----
   * contains only zero bytes.  If hole_length > 0 then we have removed
   * such a "hole" from the stored data (and it's not counted in the
   * XLOG record's CRC, either).  Hence, the amount of block data actually
!  * present following the BkpBlock struct is XLOG_BLCKSZ - hole_length bytes.
   *
   * Note that we don't attempt to align either the BkpBlock struct or the
   * block's data.  So, the struct must be copied to aligned local storage
***************
*** 182,189 ****
  
  /* Check if an xrecoff value is in a plausible range */
  #define XRecOffIsValid(xrecoff) \
! 		((xrecoff) % BLCKSZ >= SizeOfXLogShortPHD && \
! 		(BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord)
  
  /*
   * The XLog directory and control file (relative to $PGDATA)
--- 182,189 ----
  
  /* Check if an xrecoff value is in a plausible range */
  #define XRecOffIsValid(xrecoff) \
! 		((xrecoff) % XLOG_BLCKSZ >= SizeOfXLogShortPHD && \
! 		(XLOG_BLCKSZ - (xrecoff) % XLOG_BLCKSZ) >= SizeOfXLogRecord)
  
  /*
   * The XLog directory and control file (relative to $PGDATA)
---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster

Reply via email to