On Wed, 22 Mar 2006 14:19:48 -0500
Tom Lane <[EMAIL PROTECTED]> wrote:
> Mark Wong <[EMAIL PROTECTED]> writes:
> > I proposed to explore splitting BLCKSZ into separate values for logging
> > and data to see if there might be anything to gain:
> > http://archives.postgresql.org/pgsql-hackers/2006-03/msg00745.php
> > My first pass was to do more or less a search and replace (attached) and
> > I am already running into trouble with a 'make check' (below). I'm
> > guessing that when initdb is run, I'm not properly saving the values
> > that I've defined for DATA_BLCKSZ and possibly LOG_BLCKSZ.
>
> I'd suggest leaving BLCKSZ as-is and inventing XLOG_BLCKSZ to be used
> only within the WAL code; should make for a *far* smaller patch.
> Offhand I don't think that anything except xlog.c knows the WAL block
> size --- it should be fairly closely associated with dependencies on
> XLOG_SEG_SIZE, if you are looking for something to grep for.
Ok, I have attached something much smaller. Appears to pass a 'make
check' but I'll keep going to make sure it's really correct and works.
Thanks,
Mark
Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/xlog.c,v
retrieving revision 1.227
diff -c -r1.227 xlog.c
*** src/backend/access/transam/xlog.c 5 Mar 2006 15:58:22 -0000 1.227
--- src/backend/access/transam/xlog.c 23 Mar 2006 19:13:31 -0000
***************
*** 113,122 ****
/*
* Limitation of buffer-alignment for direct IO depends on OS and filesystem,
! * but BLCKSZ is assumed to be enough for it.
*/
#ifdef O_DIRECT
! #define ALIGNOF_XLOG_BUFFER BLCKSZ
#else
#define ALIGNOF_XLOG_BUFFER ALIGNOF_BUFFER
#endif
--- 113,122 ----
/*
* Limitation of buffer-alignment for direct IO depends on OS and filesystem,
! * but XLOG_BLCKSZ is assumed to be enough for it.
*/
#ifdef O_DIRECT
! #define ALIGNOF_XLOG_BUFFER XLOG_BLCKSZ
#else
#define ALIGNOF_XLOG_BUFFER ALIGNOF_BUFFER
#endif
***************
*** 374,380 ****
* and xlblocks values depends on WALInsertLock and WALWriteLock.
*/
char *pages; /* buffers for unwritten XLOG pages */
! XLogRecPtr *xlblocks; /* 1st byte ptr-s + BLCKSZ */
Size XLogCacheByte; /* # bytes in xlog buffers */
int XLogCacheBlck; /* highest allocated xlog buffer index */
TimeLineID ThisTimeLineID;
--- 374,380 ----
* and xlblocks values depends on WALInsertLock and WALWriteLock.
*/
char *pages; /* buffers for unwritten XLOG pages */
! XLogRecPtr *xlblocks; /* 1st byte ptr-s + XLOG_BLCKSZ */
Size XLogCacheByte; /* # bytes in xlog buffers */
int XLogCacheBlck; /* highest allocated xlog buffer index */
TimeLineID ThisTimeLineID;
***************
*** 397,403 ****
/* Free space remaining in the current xlog page buffer */
#define INSERT_FREESPACE(Insert) \
! (BLCKSZ - ((Insert)->currpos - (char *) (Insert)->currpage))
/* Construct XLogRecPtr value for current insertion point */
#define INSERT_RECPTR(recptr,Insert,curridx) \
--- 397,403 ----
/* Free space remaining in the current xlog page buffer */
#define INSERT_FREESPACE(Insert) \
! (XLOG_BLCKSZ - ((Insert)->currpos - (char *) (Insert)->currpage))
/* Construct XLogRecPtr value for current insertion point */
#define INSERT_RECPTR(recptr,Insert,curridx) \
***************
*** 441,447 ****
static uint32 readSeg = 0;
static uint32 readOff = 0;
! /* Buffer for currently read page (BLCKSZ bytes) */
static char *readBuf = NULL;
/* Buffer for current ReadRecord result (expandable) */
--- 441,447 ----
static uint32 readSeg = 0;
static uint32 readOff = 0;
! /* Buffer for currently read page (XLOG_BLCKSZ bytes) */
static char *readBuf = NULL;
/* Buffer for current ReadRecord result (expandable) */
***************
*** 662,668 ****
{
COMP_CRC32(rdata_crc,
page,
! BLCKSZ);
}
else
{
--- 662,668 ----
{
COMP_CRC32(rdata_crc,
page,
! XLOG_BLCKSZ);
}
else
{
***************
*** 672,678 ****
bkpb->hole_offset);
COMP_CRC32(rdata_crc,
page + (bkpb->hole_offset + bkpb->hole_length),
! BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
}
}
}
--- 672,678 ----
bkpb->hole_offset);
COMP_CRC32(rdata_crc,
page + (bkpb->hole_offset + bkpb->hole_length),
! XLOG_BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
}
}
}
***************
*** 705,711 ****
* If cache is half filled then try to acquire write lock and do
* XLogWrite. Ignore any fractional blocks in performing this check.
*/
! LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % BLCKSZ;
if (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid ||
(LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff +
XLogCtl->XLogCacheByte / 2))
--- 705,711 ----
* If cache is half filled then try to acquire write lock and do
* XLogWrite. Ignore any fractional blocks in performing this check.
*/
! LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % XLOG_BLCKSZ;
if (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid ||
(LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff +
XLogCtl->XLogCacheByte / 2))
***************
*** 795,802 ****
if (bkpb->hole_length == 0)
{
rdt->data = page;
! rdt->len = BLCKSZ;
! write_len += BLCKSZ;
rdt->next = NULL;
}
else
--- 795,802 ----
if (bkpb->hole_length == 0)
{
rdt->data = page;
! rdt->len = XLOG_BLCKSZ;
! write_len += XLOG_BLCKSZ;
rdt->next = NULL;
}
else
***************
*** 810,816 ****
rdt = rdt->next;
rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
! rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
write_len += rdt->len;
rdt->next = NULL;
}
--- 810,816 ----
rdt = rdt->next;
rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
! rdt->len = XLOG_BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
write_len += rdt->len;
rdt->next = NULL;
}
***************
*** 993,999 ****
if (lower >= SizeOfPageHeaderData &&
upper > lower &&
! upper <= BLCKSZ)
{
bkpb->hole_offset = lower;
bkpb->hole_length = upper - lower;
--- 993,999 ----
if (lower >= SizeOfPageHeaderData &&
upper > lower &&
! upper <= XLOG_BLCKSZ)
{
bkpb->hole_offset = lower;
bkpb->hole_length = upper - lower;
***************
*** 1225,1236 ****
{
/* crossing a logid boundary */
NewPageEndPtr.xlogid += 1;
! NewPageEndPtr.xrecoff = BLCKSZ;
}
else
! NewPageEndPtr.xrecoff += BLCKSZ;
XLogCtl->xlblocks[nextidx] = NewPageEndPtr;
! NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) BLCKSZ);
Insert->curridx = nextidx;
Insert->currpage = NewPage;
--- 1225,1236 ----
{
/* crossing a logid boundary */
NewPageEndPtr.xlogid += 1;
! NewPageEndPtr.xrecoff = XLOG_BLCKSZ;
}
else
! NewPageEndPtr.xrecoff += XLOG_BLCKSZ;
XLogCtl->xlblocks[nextidx] = NewPageEndPtr;
! NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
Insert->curridx = nextidx;
Insert->currpage = NewPage;
***************
*** 1241,1247 ****
* Be sure to re-zero the buffer so that bytes beyond what we've written
* will look like zeroes and not valid XLOG records...
*/
! MemSet((char *) NewPage, 0, BLCKSZ);
/*
* Fill the new page's header
--- 1241,1247 ----
* Be sure to re-zero the buffer so that bytes beyond what we've written
* will look like zeroes and not valid XLOG records...
*/
! MemSet((char *) NewPage, 0, XLOG_BLCKSZ);
/*
* Fill the new page's header
***************
*** 1251,1257 ****
/* NewPage->xlp_info = 0; */ /* done by memset */
NewPage ->xlp_tli = ThisTimeLineID;
NewPage ->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid;
! NewPage ->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - BLCKSZ;
/*
* If first page of an XLOG segment file, make it a long header.
--- 1251,1257 ----
/* NewPage->xlp_info = 0; */ /* done by memset */
NewPage ->xlp_tli = ThisTimeLineID;
NewPage ->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid;
! NewPage ->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - XLOG_BLCKSZ;
/*
* If first page of an XLOG segment file, make it a long header.
***************
*** 1425,1431 ****
{
/* first of group */
startidx = curridx;
! startoffset = (LogwrtResult.Write.xrecoff - BLCKSZ) % XLogSegSize;
}
npages++;
--- 1425,1431 ----
{
/* first of group */
startidx = curridx;
! startoffset = (LogwrtResult.Write.xrecoff - XLOG_BLCKSZ) % XLogSegSize;
}
npages++;
***************
*** 1436,1442 ****
* segment.
*/
finishing_seg = !ispartialpage &&
! (startoffset + npages * BLCKSZ) >= XLogSegSize;
if (!XLByteLT(LogwrtResult.Write, WriteRqst.Write) ||
curridx == XLogCtl->XLogCacheBlck ||
--- 1436,1442 ----
* segment.
*/
finishing_seg = !ispartialpage &&
! (startoffset + npages * XLOG_BLCKSZ) >= XLogSegSize;
if (!XLByteLT(LogwrtResult.Write, WriteRqst.Write) ||
curridx == XLogCtl->XLogCacheBlck ||
***************
*** 1458,1465 ****
}
/* OK to write the page(s) */
! from = XLogCtl->pages + startidx * (Size) BLCKSZ;
! nbytes = npages * (Size) BLCKSZ;
errno = 0;
if (write(openLogFile, from, nbytes) != nbytes)
{
--- 1458,1465 ----
}
/* OK to write the page(s) */
! from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
! nbytes = npages * (Size) XLOG_BLCKSZ;
errno = 0;
if (write(openLogFile, from, nbytes) != nbytes)
{
***************
*** 1717,1723 ****
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
! char zbuffer[BLCKSZ];
uint32 installed_log;
uint32 installed_seg;
int max_advance;
--- 1717,1723 ----
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
! char zbuffer[XLOG_BLCKSZ];
uint32 installed_log;
uint32 installed_seg;
int max_advance;
***************
*** 1855,1861 ****
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
! char buffer[BLCKSZ];
int srcfd;
int fd;
int nbytes;
--- 1855,1861 ----
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
! char buffer[XLOG_BLCKSZ];
int srcfd;
int fd;
int nbytes;
***************
*** 2516,2531 ****
if (bkpb.hole_length == 0)
{
! memcpy((char *) page, blk, BLCKSZ);
}
else
{
/* must zero-fill the hole */
! MemSet((char *) page, 0, BLCKSZ);
memcpy((char *) page, blk, bkpb.hole_offset);
memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
blk + bkpb.hole_offset,
! BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
}
PageSetLSN(page, lsn);
--- 2516,2531 ----
if (bkpb.hole_length == 0)
{
! memcpy((char *) page, blk, XLOG_BLCKSZ);
}
else
{
/* must zero-fill the hole */
! MemSet((char *) page, 0, XLOG_BLCKSZ);
memcpy((char *) page, blk, bkpb.hole_offset);
memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
blk + bkpb.hole_offset,
! XLOG_BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
}
PageSetLSN(page, lsn);
***************
*** 2535,2541 ****
}
}
! blk += BLCKSZ - bkpb.hole_length;
}
}
--- 2535,2541 ----
}
}
! blk += XLOG_BLCKSZ - bkpb.hole_length;
}
}
***************
*** 2569,2582 ****
continue;
memcpy(&bkpb, blk, sizeof(BkpBlock));
! if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
{
ereport(emode,
(errmsg("incorrect hole size in record at %X/%X",
recptr.xlogid, recptr.xrecoff)));
return false;
}
! blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
COMP_CRC32(crc, blk, blen);
blk += blen;
}
--- 2569,2582 ----
continue;
memcpy(&bkpb, blk, sizeof(BkpBlock));
! if (bkpb.hole_offset + bkpb.hole_length > XLOG_BLCKSZ)
{
ereport(emode,
(errmsg("incorrect hole size in record at %X/%X",
recptr.xlogid, recptr.xrecoff)));
return false;
}
! blen = sizeof(BkpBlock) + XLOG_BLCKSZ - bkpb.hole_length;
COMP_CRC32(crc, blk, blen);
blk += blen;
}
***************
*** 2640,2646 ****
* (2) a static char array isn't guaranteed to have any particular
* alignment, whereas malloc() will provide MAXALIGN'd storage.
*/
! readBuf = (char *) malloc(BLCKSZ);
Assert(readBuf != NULL);
}
--- 2640,2646 ----
* (2) a static char array isn't guaranteed to have any particular
* alignment, whereas malloc() will provide MAXALIGN'd storage.
*/
! readBuf = (char *) malloc(XLOG_BLCKSZ);
Assert(readBuf != NULL);
}
***************
*** 2654,2661 ****
goto got_record;
}
/* align old recptr to next page */
! if (tmpRecPtr.xrecoff % BLCKSZ != 0)
! tmpRecPtr.xrecoff += (BLCKSZ - tmpRecPtr.xrecoff % BLCKSZ);
if (tmpRecPtr.xrecoff >= XLogFileSize)
{
(tmpRecPtr.xlogid)++;
--- 2654,2661 ----
goto got_record;
}
/* align old recptr to next page */
! if (tmpRecPtr.xrecoff % XLOG_BLCKSZ != 0)
! tmpRecPtr.xrecoff += (XLOG_BLCKSZ - tmpRecPtr.xrecoff % XLOG_BLCKSZ);
if (tmpRecPtr.xrecoff >= XLogFileSize)
{
(tmpRecPtr.xlogid)++;
***************
*** 2699,2705 ****
readOff = (uint32) (-1); /* force read to occur below */
}
! targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / BLCKSZ) * BLCKSZ;
if (readOff != targetPageOff)
{
readOff = targetPageOff;
--- 2699,2705 ----
readOff = (uint32) (-1); /* force read to occur below */
}
! targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / XLOG_BLCKSZ) * XLOG_BLCKSZ;
if (readOff != targetPageOff)
{
readOff = targetPageOff;
***************
*** 2711,2717 ****
readId, readSeg, readOff)));
goto next_record_is_invalid;
}
! if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
{
ereport(emode,
(errcode_for_file_access(),
--- 2711,2717 ----
readId, readSeg, readOff)));
goto next_record_is_invalid;
}
! if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
ereport(emode,
(errcode_for_file_access(),
***************
*** 2723,2729 ****
goto next_record_is_invalid;
}
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
! targetRecOff = RecPtr->xrecoff % BLCKSZ;
if (targetRecOff == 0)
{
/*
--- 2723,2729 ----
goto next_record_is_invalid;
}
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
! targetRecOff = RecPtr->xrecoff % XLOG_BLCKSZ;
if (targetRecOff == 0)
{
/*
***************
*** 2749,2755 ****
RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
}
! record = (XLogRecord *) ((char *) readBuf + RecPtr->xrecoff % BLCKSZ);
got_record:;
--- 2749,2755 ----
RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
}
! record = (XLogRecord *) ((char *) readBuf + RecPtr->xrecoff % XLOG_BLCKSZ);
got_record:;
***************
*** 2766,2772 ****
}
if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
! XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
{
ereport(emode,
(errmsg("invalid record length at %X/%X",
--- 2766,2772 ----
}
if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
! XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + XLOG_BLCKSZ))
{
ereport(emode,
(errmsg("invalid record length at %X/%X",
***************
*** 2814,2830 ****
/*
* Allocate or enlarge readRecordBuf as needed. To avoid useless small
! * increases, round its size to a multiple of BLCKSZ, and make sure it's
! * at least 4*BLCKSZ to start with. (That is enough for all "normal"
! * records, but very large commit or abort records might need more space.)
*/
total_len = record->xl_tot_len;
if (total_len > readRecordBufSize)
{
uint32 newSize = total_len;
! newSize += BLCKSZ - (newSize % BLCKSZ);
! newSize = Max(newSize, 4 * BLCKSZ);
if (readRecordBuf)
free(readRecordBuf);
readRecordBuf = (char *) malloc(newSize);
--- 2814,2831 ----
/*
* Allocate or enlarge readRecordBuf as needed. To avoid useless small
! * increases, round its size to a multiple of XLOG_BLCKSZ, and make sure
! * it's at least 4*XLOG_BLCKSZ to start with. (That is enough for all
! * "normal" records, but very large commit or abort records might need
! * more space.)
*/
total_len = record->xl_tot_len;
if (total_len > readRecordBufSize)
{
uint32 newSize = total_len;
! newSize += XLOG_BLCKSZ - (newSize % XLOG_BLCKSZ);
! newSize = Max(newSize, 4 * XLOG_BLCKSZ);
if (readRecordBuf)
free(readRecordBuf);
readRecordBuf = (char *) malloc(newSize);
***************
*** 2842,2848 ****
buffer = readRecordBuf;
nextRecord = NULL;
! len = BLCKSZ - RecPtr->xrecoff % BLCKSZ;
if (total_len > len)
{
/* Need to reassemble record */
--- 2843,2849 ----
buffer = readRecordBuf;
nextRecord = NULL;
! len = XLOG_BLCKSZ - RecPtr->xrecoff % XLOG_BLCKSZ;
if (total_len > len)
{
/* Need to reassemble record */
***************
*** 2854,2860 ****
buffer += len;
for (;;)
{
! readOff += BLCKSZ;
if (readOff >= XLogSegSize)
{
close(readFile);
--- 2855,2861 ----
buffer += len;
for (;;)
{
! readOff += XLOG_BLCKSZ;
if (readOff >= XLogSegSize)
{
close(readFile);
***************
*** 2865,2871 ****
goto next_record_is_invalid;
readOff = 0;
}
! if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
{
ereport(emode,
(errcode_for_file_access(),
--- 2866,2872 ----
goto next_record_is_invalid;
readOff = 0;
}
! if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
ereport(emode,
(errcode_for_file_access(),
***************
*** 2893,2899 ****
readId, readSeg, readOff)));
goto next_record_is_invalid;
}
! len = BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
if (contrecord->xl_rem_len > len)
{
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len);
--- 2894,2900 ----
readId, readSeg, readOff)));
goto next_record_is_invalid;
}
! len = XLOG_BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
if (contrecord->xl_rem_len > len)
{
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len);
***************
*** 2908,2914 ****
if (!RecordIsValid(record, *RecPtr, emode))
goto next_record_is_invalid;
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
! if (BLCKSZ - SizeOfXLogRecord >= pageHeaderSize +
MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len))
{
nextRecord = (XLogRecord *) ((char *) contrecord +
--- 2909,2915 ----
if (!RecordIsValid(record, *RecPtr, emode))
goto next_record_is_invalid;
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
! if (XLOG_BLCKSZ - SizeOfXLogRecord >= pageHeaderSize +
MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len))
{
nextRecord = (XLogRecord *) ((char *) contrecord +
***************
*** 2925,2931 ****
/* Record does not cross a page boundary */
if (!RecordIsValid(record, *RecPtr, emode))
goto next_record_is_invalid;
! if (BLCKSZ - SizeOfXLogRecord >= RecPtr->xrecoff % BLCKSZ +
MAXALIGN(total_len))
nextRecord = (XLogRecord *) ((char *) record + MAXALIGN(total_len));
EndRecPtr.xlogid = RecPtr->xlogid;
--- 2926,2932 ----
/* Record does not cross a page boundary */
if (!RecordIsValid(record, *RecPtr, emode))
goto next_record_is_invalid;
! if (XLOG_BLCKSZ - SizeOfXLogRecord >= RecPtr->xrecoff % XLOG_BLCKSZ +
MAXALIGN(total_len))
nextRecord = (XLogRecord *) ((char *) record + MAXALIGN(total_len));
EndRecPtr.xlogid = RecPtr->xlogid;
***************
*** 3221,3227 ****
char tmppath[MAXPGPATH];
char histfname[MAXFNAMELEN];
char xlogfname[MAXFNAMELEN];
! char buffer[BLCKSZ];
int srcfd;
int fd;
int nbytes;
--- 3222,3228 ----
char tmppath[MAXPGPATH];
char histfname[MAXFNAMELEN];
char xlogfname[MAXFNAMELEN];
! char buffer[XLOG_BLCKSZ];
int srcfd;
int fd;
int nbytes;
***************
*** 3699,3705 ****
/* extra alignment padding for XLOG I/O buffers */
size = add_size(size, ALIGNOF_XLOG_BUFFER);
/* and the buffers themselves */
! size = add_size(size, mul_size(BLCKSZ, XLOGbuffers));
/*
* Note: we don't count ControlFileData, it comes out of the "slop factor"
--- 3700,3706 ----
/* extra alignment padding for XLOG I/O buffers */
size = add_size(size, ALIGNOF_XLOG_BUFFER);
/* and the buffers themselves */
! size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
/*
* Note: we don't count ControlFileData, it comes out of the "slop factor"
***************
*** 3746,3758 ****
*/
allocptr = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, allocptr);
XLogCtl->pages = allocptr;
! memset(XLogCtl->pages, 0, (Size) BLCKSZ * XLOGbuffers);
/*
* Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
* in additional info.)
*/
! XLogCtl->XLogCacheByte = (Size) BLCKSZ *XLOGbuffers;
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
--- 3747,3759 ----
*/
allocptr = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, allocptr);
XLogCtl->pages = allocptr;
! memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
/*
* Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
* in additional info.)
*/
! XLogCtl->XLogCacheByte = (Size) XLOG_BLCKSZ *XLOGbuffers;
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
***************
*** 3804,3812 ****
ThisTimeLineID = 1;
/* page buffer must be aligned suitably for O_DIRECT */
! buffer = (char *) palloc(BLCKSZ + ALIGNOF_XLOG_BUFFER);
page = (XLogPageHeader) TYPEALIGN(ALIGNOF_XLOG_BUFFER, buffer);
! memset(page, 0, BLCKSZ);
/* Set up information for the initial checkpoint record */
checkPoint.redo.xlogid = 0;
--- 3805,3813 ----
ThisTimeLineID = 1;
/* page buffer must be aligned suitably for O_DIRECT */
! buffer = (char *) palloc(XLOG_BLCKSZ + ALIGNOF_XLOG_BUFFER);
page = (XLogPageHeader) TYPEALIGN(ALIGNOF_XLOG_BUFFER, buffer);
! memset(page, 0, XLOG_BLCKSZ);
/* Set up information for the initial checkpoint record */
checkPoint.redo.xlogid = 0;
***************
*** 3858,3864 ****
/* Write the first page with the initial record */
errno = 0;
! if (write(openLogFile, page, BLCKSZ) != BLCKSZ)
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
--- 3859,3865 ----
/* Write the first page with the initial record */
errno = 0;
! if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
***************
*** 4702,4718 ****
Insert->PrevRecord = LastRec;
XLogCtl->xlblocks[0].xlogid = openLogId;
XLogCtl->xlblocks[0].xrecoff =
! ((EndOfLog.xrecoff - 1) / BLCKSZ + 1) * BLCKSZ;
/*
* Tricky point here: readBuf contains the *last* block that the LastRec
* record spans, not the one it starts in. The last block is indeed the
* one we want to use.
*/
! Assert(readOff == (XLogCtl->xlblocks[0].xrecoff - BLCKSZ) % XLogSegSize);
! memcpy((char *) Insert->currpage, readBuf, BLCKSZ);
Insert->currpos = (char *) Insert->currpage +
! (EndOfLog.xrecoff + BLCKSZ - XLogCtl->xlblocks[0].xrecoff);
LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
--- 4703,4719 ----
Insert->PrevRecord = LastRec;
XLogCtl->xlblocks[0].xlogid = openLogId;
XLogCtl->xlblocks[0].xrecoff =
! ((EndOfLog.xrecoff - 1) / XLOG_BLCKSZ + 1) * XLOG_BLCKSZ;
/*
* Tricky point here: readBuf contains the *last* block that the LastRec
* record spans, not the one it starts in. The last block is indeed the
* one we want to use.
*/
! Assert(readOff == (XLogCtl->xlblocks[0].xrecoff - XLOG_BLCKSZ) % XLogSegSize);
! memcpy((char *) Insert->currpage, readBuf, XLOG_BLCKSZ);
Insert->currpos = (char *) Insert->currpage +
! (EndOfLog.xrecoff + XLOG_BLCKSZ - XLogCtl->xlblocks[0].xrecoff);
LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
Index: src/include/pg_config_manual.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/pg_config_manual.h,v
retrieving revision 1.20
diff -c -r1.20 pg_config_manual.h
*** src/include/pg_config_manual.h 5 Jan 2006 03:01:37 -0000 1.20
--- src/include/pg_config_manual.h 23 Mar 2006 19:13:34 -0000
***************
*** 26,31 ****
--- 26,36 ----
#define BLCKSZ 8192
/*
+ * XLOG_BLCKSZ is the block size for the WAL.
+ */
+ #define XLOG_BLCKSZ 4096
+
+ /*
* RELSEG_SIZE is the maximum number of blocks allowed in one disk
* file. Thus, the maximum size of a single file is RELSEG_SIZE *
* BLCKSZ; relations bigger than that are divided into multiple files.
***************
*** 45,51 ****
/*
* XLOG_SEG_SIZE is the size of a single WAL file. This must be a power of 2
! * and larger than BLCKSZ (preferably, a great deal larger than BLCKSZ).
*
* Changing XLOG_SEG_SIZE requires an initdb.
*/
--- 50,57 ----
/*
* XLOG_SEG_SIZE is the size of a single WAL file. This must be a power of 2
! * and larger than XLOG_BLCKSZ (preferably, a great deal larger than
! * XLOG_BLCKSZ).
*
* Changing XLOG_SEG_SIZE requires an initdb.
*/
Index: src/include/access/xlog_internal.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/access/xlog_internal.h,v
retrieving revision 1.10
diff -c -r1.10 xlog_internal.h
*** src/include/access/xlog_internal.h 5 Mar 2006 15:58:54 -0000 1.10
--- src/include/access/xlog_internal.h 23 Mar 2006 19:13:35 -0000
***************
*** 30,36 ****
* contains only zero bytes. If hole_length > 0 then we have removed
* such a "hole" from the stored data (and it's not counted in the
* XLOG record's CRC, either). Hence, the amount of block data actually
! * present following the BkpBlock struct is BLCKSZ - hole_length bytes.
*
* Note that we don't attempt to align either the BkpBlock struct or the
* block's data. So, the struct must be copied to aligned local storage
--- 30,36 ----
* contains only zero bytes. If hole_length > 0 then we have removed
* such a "hole" from the stored data (and it's not counted in the
* XLOG record's CRC, either). Hence, the amount of block data actually
! * present following the BkpBlock struct is XLOG_BLCKSZ - hole_length bytes.
*
* Note that we don't attempt to align either the BkpBlock struct or the
* block's data. So, the struct must be copied to aligned local storage
***************
*** 182,189 ****
/* Check if an xrecoff value is in a plausible range */
#define XRecOffIsValid(xrecoff) \
! ((xrecoff) % BLCKSZ >= SizeOfXLogShortPHD && \
! (BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord)
/*
* The XLog directory and control file (relative to $PGDATA)
--- 182,189 ----
/* Check if an xrecoff value is in a plausible range */
#define XRecOffIsValid(xrecoff) \
! ((xrecoff) % XLOG_BLCKSZ >= SizeOfXLogShortPHD && \
! (XLOG_BLCKSZ - (xrecoff) % XLOG_BLCKSZ) >= SizeOfXLogRecord)
/*
* The XLog directory and control file (relative to $PGDATA)
---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster