On Wed, Feb 17, 2010 at 6:00 PM, Fujii Masao <[email protected]> wrote:
> On Wed, Feb 17, 2010 at 4:07 PM, Fujii Masao <[email protected]> wrote:
>> On Wed, Feb 17, 2010 at 3:03 PM, Magnus Hagander <[email protected]> wrote:
>>> In that case, O_DIRECT would be counterproductive, no? It maps to
>>> FILE_FLAG_NOI_BUFFERING, which makes sure it doesn't go into the
>>> cache. So the read in the startup proc is actually guaranteed to
>>> reuqire a physical read - of something we just wrote, so it'll almost
>>> certainly end up waiting for a rotation, no?
>>>
>>> Seems like getting rid of O_DIRECT here is the right thing to do,
>>> regardless of this.
>>
>> Agreed. I'll remove O_DIRECT from walreceiver.
>
> Here is the patch to do that.
Ooops! I found the bug in the patch. Here is the updated version.
Regards,
--
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 1627,1633 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
/* create/use new log file */
use_existent = true;
openLogFile = XLogFileInit(openLogId, openLogSeg,
! &use_existent, true);
openLogOff = 0;
}
--- 1627,1633 ----
/* create/use new log file */
use_existent = true;
openLogFile = XLogFileInit(openLogId, openLogSeg,
! &use_existent, true, true);
openLogOff = 0;
}
***************
*** 2184,2189 **** XLogNeedsFlush(XLogRecPtr record)
--- 2184,2195 ----
* place. This should be TRUE except during bootstrap log creation. The
* caller must *not* hold the lock at call.
*
+ * allow_direct_io: if TRUE, allow a WAL write to bypass the kernel cache
+ * by using PG_O_DIRECT for opening a file. Otherwise, PG_O_DIRECT is
+ * forcibly removed from the sync flag of open(). This should be FALSE
+ * only when walreceiver process writes WAL data because it's read
+ * immediately by the startup process.
+ *
* Returns FD of opened file.
*
* Note: errors here are ERROR not PANIC because we might or might not be
***************
*** 2193,2199 **** XLogNeedsFlush(XLogRecPtr record)
*/
int
XLogFileInit(uint32 log, uint32 seg,
! bool *use_existent, bool use_lock)
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
--- 2199,2205 ----
*/
int
XLogFileInit(uint32 log, uint32 seg,
! bool *use_existent, bool use_lock, bool allow_direct_io)
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
***************
*** 2203,2208 **** XLogFileInit(uint32 log, uint32 seg,
--- 2209,2219 ----
int max_advance;
int fd;
int nbytes;
+ int sync_bit;
+
+ sync_bit = get_sync_bit(sync_method);
+ if (!allow_direct_io)
+ sync_bit &= ~PG_O_DIRECT;
XLogFilePath(path, ThisTimeLineID, log, seg);
***************
*** 2211,2217 **** XLogFileInit(uint32 log, uint32 seg,
*/
if (*use_existent)
{
! fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method),
S_IRUSR | S_IWUSR);
if (fd < 0)
{
--- 2222,2228 ----
*/
if (*use_existent)
{
! fd = BasicOpenFile(path, O_RDWR | PG_BINARY | sync_bit,
S_IRUSR | S_IWUSR);
if (fd < 0)
{
***************
*** 2237,2243 **** XLogFileInit(uint32 log, uint32 seg,
unlink(tmppath);
! /* do not use get_sync_bit() here --- want to fsync only at end of fill */
fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
S_IRUSR | S_IWUSR);
if (fd < 0)
--- 2248,2254 ----
unlink(tmppath);
! /* do not use sync_bit here --- want to fsync only at end of fill */
fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
S_IRUSR | S_IWUSR);
if (fd < 0)
***************
*** 2317,2323 **** XLogFileInit(uint32 log, uint32 seg,
*use_existent = false;
/* Now open original target segment (might not be file I just made) */
! fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method),
S_IRUSR | S_IWUSR);
if (fd < 0)
ereport(ERROR,
--- 2328,2334 ----
*use_existent = false;
/* Now open original target segment (might not be file I just made) */
! fd = BasicOpenFile(path, O_RDWR | PG_BINARY | sync_bit,
S_IRUSR | S_IWUSR);
if (fd < 0)
ereport(ERROR,
***************
*** 3121,3127 **** PreallocXlogFiles(XLogRecPtr endptr)
{
NextLogSeg(_logId, _logSeg);
use_existent = true;
! lf = XLogFileInit(_logId, _logSeg, &use_existent, true);
close(lf);
if (!use_existent)
CheckpointStats.ckpt_segs_added++;
--- 3132,3138 ----
{
NextLogSeg(_logId, _logSeg);
use_existent = true;
! lf = XLogFileInit(_logId, _logSeg, &use_existent, true, true);
close(lf);
if (!use_existent)
CheckpointStats.ckpt_segs_added++;
***************
*** 4794,4800 **** BootStrapXLOG(void)
/* Create first XLOG segment file */
use_existent = false;
! openLogFile = XLogFileInit(0, 0, &use_existent, false);
/* Write the first page with the initial record */
errno = 0;
--- 4805,4811 ----
/* Create first XLOG segment file */
use_existent = false;
! openLogFile = XLogFileInit(0, 0, &use_existent, false, true);
/* Write the first page with the initial record */
errno = 0;
*** a/src/backend/replication/walreceiver.c
--- b/src/backend/replication/walreceiver.c
***************
*** 446,452 **** XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
XLByteToSeg(recptr, recvId, recvSeg);
use_existent = true;
recvFile = XLogFileInit(recvId, recvSeg,
! &use_existent, true);
recvOff = 0;
}
--- 446,452 ----
XLByteToSeg(recptr, recvId, recvSeg);
use_existent = true;
recvFile = XLogFileInit(recvId, recvSeg,
! &use_existent, true, false);
recvOff = 0;
}
*** a/src/include/access/xlog.h
--- b/src/include/access/xlog.h
***************
*** 251,257 **** extern void XLogFlush(XLogRecPtr RecPtr);
extern void XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
extern int XLogFileInit(uint32 log, uint32 seg,
! bool *use_existent, bool use_lock);
extern int XLogFileOpen(uint32 log, uint32 seg);
--- 251,257 ----
extern void XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
extern int XLogFileInit(uint32 log, uint32 seg,
! bool *use_existent, bool use_lock, bool allow_direct_io);
extern int XLogFileOpen(uint32 log, uint32 seg);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers