On Wed, Feb 17, 2010 at 6:00 PM, Fujii Masao <masao.fu...@gmail.com> wrote:
> On Wed, Feb 17, 2010 at 4:07 PM, Fujii Masao <masao.fu...@gmail.com> wrote:
>> On Wed, Feb 17, 2010 at 3:03 PM, Magnus Hagander <mag...@hagander.net> wrote:
>>> In that case, O_DIRECT would be counterproductive, no? It maps to
>>> FILE_FLAG_NOI_BUFFERING, which makes sure it doesn't go into the
>>> cache. So the read in the startup proc is actually guaranteed to
>>> reuqire a physical read - of something we just wrote, so it'll almost
>>> certainly end up waiting for a rotation, no?
>>>
>>> Seems like getting rid of O_DIRECT here is the right thing to do,
>>> regardless of this.
>>
>> Agreed. I'll remove O_DIRECT from walreceiver.
>
> Here is the patch to do that.

Ooops! I found the bug in the patch. Here is the updated version.

Regards,

-- 
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 1627,1633 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
  			/* create/use new log file */
  			use_existent = true;
  			openLogFile = XLogFileInit(openLogId, openLogSeg,
! 									   &use_existent, true);
  			openLogOff = 0;
  		}
  
--- 1627,1633 ----
  			/* create/use new log file */
  			use_existent = true;
  			openLogFile = XLogFileInit(openLogId, openLogSeg,
! 									   &use_existent, true, true);
  			openLogOff = 0;
  		}
  
***************
*** 2184,2189 **** XLogNeedsFlush(XLogRecPtr record)
--- 2184,2195 ----
   * place.  This should be TRUE except during bootstrap log creation.  The
   * caller must *not* hold the lock at call.
   *
+  * allow_direct_io: if TRUE, allow a WAL write to bypass the kernel cache
+  * by using PG_O_DIRECT for opening a file. Otherwise, PG_O_DIRECT is
+  * forcibly removed from the sync flag of open(). This should be FALSE
+  * only when walreceiver process writes WAL data because it's read
+  * immediately by the startup process.
+  *
   * Returns FD of opened file.
   *
   * Note: errors here are ERROR not PANIC because we might or might not be
***************
*** 2193,2199 **** XLogNeedsFlush(XLogRecPtr record)
   */
  int
  XLogFileInit(uint32 log, uint32 seg,
! 			 bool *use_existent, bool use_lock)
  {
  	char		path[MAXPGPATH];
  	char		tmppath[MAXPGPATH];
--- 2199,2205 ----
   */
  int
  XLogFileInit(uint32 log, uint32 seg,
! 			 bool *use_existent, bool use_lock, bool allow_direct_io)
  {
  	char		path[MAXPGPATH];
  	char		tmppath[MAXPGPATH];
***************
*** 2203,2208 **** XLogFileInit(uint32 log, uint32 seg,
--- 2209,2219 ----
  	int			max_advance;
  	int			fd;
  	int			nbytes;
+ 	int			sync_bit;
+ 
+ 	sync_bit = get_sync_bit(sync_method);
+ 	if (!allow_direct_io)
+ 		sync_bit &= ~PG_O_DIRECT;
  
  	XLogFilePath(path, ThisTimeLineID, log, seg);
  
***************
*** 2211,2217 **** XLogFileInit(uint32 log, uint32 seg,
  	 */
  	if (*use_existent)
  	{
! 		fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method),
  						   S_IRUSR | S_IWUSR);
  		if (fd < 0)
  		{
--- 2222,2228 ----
  	 */
  	if (*use_existent)
  	{
! 		fd = BasicOpenFile(path, O_RDWR | PG_BINARY | sync_bit,
  						   S_IRUSR | S_IWUSR);
  		if (fd < 0)
  		{
***************
*** 2237,2243 **** XLogFileInit(uint32 log, uint32 seg,
  
  	unlink(tmppath);
  
! 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
  	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
  					   S_IRUSR | S_IWUSR);
  	if (fd < 0)
--- 2248,2254 ----
  
  	unlink(tmppath);
  
! 	/* do not use sync_bit here --- want to fsync only at end of fill */
  	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
  					   S_IRUSR | S_IWUSR);
  	if (fd < 0)
***************
*** 2317,2323 **** XLogFileInit(uint32 log, uint32 seg,
  	*use_existent = false;
  
  	/* Now open original target segment (might not be file I just made) */
! 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method),
  					   S_IRUSR | S_IWUSR);
  	if (fd < 0)
  		ereport(ERROR,
--- 2328,2334 ----
  	*use_existent = false;
  
  	/* Now open original target segment (might not be file I just made) */
! 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | sync_bit,
  					   S_IRUSR | S_IWUSR);
  	if (fd < 0)
  		ereport(ERROR,
***************
*** 3121,3127 **** PreallocXlogFiles(XLogRecPtr endptr)
  	{
  		NextLogSeg(_logId, _logSeg);
  		use_existent = true;
! 		lf = XLogFileInit(_logId, _logSeg, &use_existent, true);
  		close(lf);
  		if (!use_existent)
  			CheckpointStats.ckpt_segs_added++;
--- 3132,3138 ----
  	{
  		NextLogSeg(_logId, _logSeg);
  		use_existent = true;
! 		lf = XLogFileInit(_logId, _logSeg, &use_existent, true, true);
  		close(lf);
  		if (!use_existent)
  			CheckpointStats.ckpt_segs_added++;
***************
*** 4794,4800 **** BootStrapXLOG(void)
  
  	/* Create first XLOG segment file */
  	use_existent = false;
! 	openLogFile = XLogFileInit(0, 0, &use_existent, false);
  
  	/* Write the first page with the initial record */
  	errno = 0;
--- 4805,4811 ----
  
  	/* Create first XLOG segment file */
  	use_existent = false;
! 	openLogFile = XLogFileInit(0, 0, &use_existent, false, true);
  
  	/* Write the first page with the initial record */
  	errno = 0;
*** a/src/backend/replication/walreceiver.c
--- b/src/backend/replication/walreceiver.c
***************
*** 446,452 **** XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
  			XLByteToSeg(recptr, recvId, recvSeg);
  			use_existent = true;
  			recvFile = XLogFileInit(recvId, recvSeg,
! 									&use_existent, true);
  			recvOff = 0;
  		}
  
--- 446,452 ----
  			XLByteToSeg(recptr, recvId, recvSeg);
  			use_existent = true;
  			recvFile = XLogFileInit(recvId, recvSeg,
! 									&use_existent, true, false);
  			recvOff = 0;
  		}
  
*** a/src/include/access/xlog.h
--- b/src/include/access/xlog.h
***************
*** 251,257 **** extern void XLogFlush(XLogRecPtr RecPtr);
  extern void XLogBackgroundFlush(void);
  extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
  extern int	XLogFileInit(uint32 log, uint32 seg,
! 						 bool *use_existent, bool use_lock);
  extern int	XLogFileOpen(uint32 log, uint32 seg);
  
  
--- 251,257 ----
  extern void XLogBackgroundFlush(void);
  extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
  extern int	XLogFileInit(uint32 log, uint32 seg,
! 						 bool *use_existent, bool use_lock, bool allow_direct_io);
  extern int	XLogFileOpen(uint32 log, uint32 seg);
  
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to