On 04.01.2012 17:05, Peter Geoghegan wrote:
On 4 January 2012 07:24, Heikki Linnakangas
<heikki.linnakan...@enterprisedb.com>  wrote:
I think SetBufferCommitInfoNeedsSave() needs the same treatment as
MarkBufferDirty(). And it would probably be good to only set the latch if
the buffer wasn't dirty already. Setting a latch that's already set is fast,
but surely it's even faster to not even try.

That seems reasonable. Revised patch is attached.

Thanks! It occurs to me that it's still a bad idea to call SetLatch() while holding the buffer header spinlock. At least when it's trivial to just move the call after releasing the lock. See attached.

Could you do the sleeping/hibernating logic all in BgWriterNap()?

Yeah, I'd like to see a micro-benchmark of a worst-case scenario. I'm a bit
worried about the impact on systems with a lot of CPUs. If you have a lot of
CPUs writing to the same cache line that contains the latch's flag, that
might get expensive.

Also reasonable, but I don't think that I'll get around to it until
after the final commitfest deadline.

That's still pending. And I admit I haven't tested my updated version besides "make check".

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
*** a/src/backend/postmaster/bgwriter.c
--- b/src/backend/postmaster/bgwriter.c
***************
*** 51,56 ****
--- 51,58 ----
  #include "storage/ipc.h"
  #include "storage/lwlock.h"
  #include "storage/pmsignal.h"
+ #include "storage/proc.h"
+ #include "storage/procsignal.h"
  #include "storage/shmem.h"
  #include "storage/smgr.h"
  #include "storage/spin.h"
***************
*** 73,83 **** static volatile sig_atomic_t shutdown_requested = false;
  /*
   * Private state
   */
  static bool am_bg_writer = false;
  
  /* Prototypes for private functions */
  
! static void BgWriterNap(void);
  
  /* Signal handlers */
  
--- 75,87 ----
  /*
   * Private state
   */
+ #define BGWRITER_HIBERNATE_MS			10000
+ 
  static bool am_bg_writer = false;
  
  /* Prototypes for private functions */
  
! static void BgWriterNap(bool hibernating);
  
  /* Signal handlers */
  
***************
*** 97,102 **** BackgroundWriterMain(void)
--- 101,107 ----
  {
  	sigjmp_buf	local_sigjmp_buf;
  	MemoryContext bgwriter_context;
+ 	bool		hibernating;
  
  	am_bg_writer = true;
  
***************
*** 123,129 **** BackgroundWriterMain(void)
  	pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
  	pqsignal(SIGALRM, SIG_IGN);
  	pqsignal(SIGPIPE, SIG_IGN);
! 	pqsignal(SIGUSR1, SIG_IGN);			/* reserve for ProcSignal */
  	pqsignal(SIGUSR2, SIG_IGN);
  
  	/*
--- 128,134 ----
  	pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
  	pqsignal(SIGALRM, SIG_IGN);
  	pqsignal(SIGPIPE, SIG_IGN);
! 	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
  	pqsignal(SIGUSR2, SIG_IGN);
  
  	/*
***************
*** 139,144 **** BackgroundWriterMain(void)
--- 144,155 ----
  	sigdelset(&BlockSig, SIGQUIT);
  
  	/*
+ 	 * Advertise our latch that backends can use to wake us up while we're
+ 	 * sleeping.
+ 	 */
+ 	ProcGlobal->bgwriterLatch = &MyProc->procLatch;
+ 
+ 	/*
  	 * Create a resource owner to keep track of our resources (currently only
  	 * buffer pins).
  	 */
***************
*** 235,242 **** BackgroundWriterMain(void)
--- 246,256 ----
  	/*
  	 * Loop forever
  	 */
+ 	hibernating = false;
  	for (;;)
  	{
+ 		bool lapped;
+ 
  		/*
  		 * Emergency bailout if postmaster has died.  This is to avoid the
  		 * necessity for manual cleanup of all postmaster children.
***************
*** 264,281 **** BackgroundWriterMain(void)
  		/*
  		 * Do one cycle of dirty-buffer writing.
  		 */
! 		BgBufferSync();
  
! 		/* Nap for the configured time. */
! 		BgWriterNap();
  	}
  }
  
  /*
   * BgWriterNap -- Nap for the configured time or until a signal is received.
   */
  static void
! BgWriterNap(void)
  {
  	long		udelay;
  
--- 278,333 ----
  		/*
  		 * Do one cycle of dirty-buffer writing.
  		 */
! 		lapped = BgBufferSync();
! 
! 		if (lapped && !hibernating)
! 		{
! 			/*
! 			 * Initiate hibernation by "arming" the process latch. This usage
! 			 * differs from the standard pattern for latches outlined in
! 			 * latch.h, where the latch is generally reset immediately after
! 			 * WaitLatch returns, in the next iteration of an infinite loop.
! 			 *
! 			 * It should only be possible to *really* set the latch from client
! 			 * backends while the bgwriter is idle, and not during productive
! 			 * cycles where buffers are written, or shortly thereafter. It's
! 			 * important that the SetLatch() call within the buffer manager
! 			 * usually inexpensively returns having found that the latch is
! 			 * already set. For write-heavy workloads, it's quite possible that
! 			 * those calls will never actually get the opportunity to really
! 			 * set the latch.
! 			 *
! 			 * By only giving backends the opportunity to really set the
! 			 * latch at this point via ResetLatch(), we avoid the overhead of
! 			 * sending a signal perhaps as much as once per bgwriter cycle, or
! 			 * maybe even more frequently than that due to the asynchronous
! 			 * nature of signals.
! 			 */
! 			ResetLatch(&MyProc->procLatch);
! 			hibernating = true;
! 		}
! 		else if (!lapped && hibernating)
! 		{
! 			/*
! 			 * Woken up from hibernation. Set own proc latch just in case it's
! 			 * not set yet (usually we wake up from hibernation because a
! 			 * backend already set the latch).
! 			 */
! 			SetLatch(&MyProc->procLatch);
! 			hibernating = false;
! 		}
  
! 		BgWriterNap(hibernating);
  	}
  }
  
  /*
   * BgWriterNap -- Nap for the configured time or until a signal is received.
+  *
+  * Respond to a signal handler having set a flag for us.
   */
  static void
! BgWriterNap(bool hibernating)
  {
  	long		udelay;
  
***************
*** 284,302 **** BgWriterNap(void)
  	 */
  	pgstat_send_bgwriter();
  
  	/*
! 	 * Nap for the configured time, or sleep for 10 seconds if there is no
! 	 * bgwriter activity configured.
  	 *
  	 * On some platforms, signals won't interrupt the sleep.  To ensure we
  	 * respond reasonably promptly when someone signals us, break down the
  	 * sleep into 1-second increments, and check for interrupts after each
  	 * nap.
  	 */
! 	if (bgwriter_lru_maxpages > 0)
! 		udelay = BgWriterDelay * 1000L;
! 	else
! 		udelay = 10000000L;		/* Ten seconds */
  
  	while (udelay > 999999L)
  	{
--- 336,383 ----
  	 */
  	pgstat_send_bgwriter();
  
+ 	if (hibernating)
+ 	{
+ 		/*
+ 		 * Set a timeout so that we can still update BufferAlloc stats
+ 		 * reasonably promptly.
+ 		 */
+ 		int res = WaitLatch(&MyProc->procLatch,
+ 							WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ 							BGWRITER_HIBERNATE_MS);
+ 
+ 		/*
+ 		 * Ensure that no less than BgWriterDelay ms has passed between
+ 		 * BgBufferSyncs - WaitLatch() might have returned instantaneously.
+ 		 *
+ 		 * Napping here rather than immediately prior to the WaitLatch()
+ 		 * call ensures that a sudden burst of dirtied buffers won't result
+ 		 * in the initial background writer cycle ineffectually cleaning
+ 		 * only a few pages dirtied earlier in the burst, rather than
+ 		 * cleaning a number that is somewhere closer to optimal for the
+ 		 * cycle, as modeled by the background writing strategy. We'll also
+ 		 * want to nap if we changed our minds, before starting a new cycle.
+ 		 *
+ 		 * We wake on a buffer being dirtied. It's possible that some
+ 		 * useful work will become available for the bgwriter to do without
+ 		 * a buffer actually being dirtied, as when a dirty buffer's usage
+ 		 * count is decremented to zero or it's unpinned. This corner case
+ 		 * is judged as too marginal to justify adding additional SetLatch()
+ 		 * calls in very hot code paths, cheap though those calls may be.
+ 		 */
+ 		if (res & (WL_TIMEOUT | WL_POSTMASTER_DEATH))
+ 			return;
+ 	}
+ 
  	/*
! 	 * Nap for the configured time.
  	 *
  	 * On some platforms, signals won't interrupt the sleep.  To ensure we
  	 * respond reasonably promptly when someone signals us, break down the
  	 * sleep into 1-second increments, and check for interrupts after each
  	 * nap.
  	 */
! 	udelay = BgWriterDelay * 1000L;
  
  	while (udelay > 999999L)
  	{
***************
*** 351,362 **** bg_quickdie(SIGNAL_ARGS)
--- 432,455 ----
  static void
  BgSigHupHandler(SIGNAL_ARGS)
  {
+ 	int save_errno = errno;
+ 
  	got_SIGHUP = true;
+ 	if (MyProc)
+ 		SetLatch(&MyProc->procLatch);
+ 
+ 	errno = save_errno;
  }
  
  /* SIGTERM: set flag to shutdown and exit */
  static void
  ReqShutdownHandler(SIGNAL_ARGS)
  {
+ 	int save_errno = errno;
+ 
  	shutdown_requested = true;
+ 	if (MyProc)
+ 		SetLatch(&MyProc->procLatch);
+ 
+ 	errno = save_errno;
  }
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
***************
*** 953,958 **** void
--- 953,959 ----
  MarkBufferDirty(Buffer buffer)
  {
  	volatile BufferDesc *bufHdr;
+ 	bool	dirtied = false;
  
  	if (!BufferIsValid(buffer))
  		elog(ERROR, "bad buffer ID: %d", buffer);
***************
*** 973,991 **** MarkBufferDirty(Buffer buffer)
  
  	Assert(bufHdr->refcount > 0);
  
  	/*
! 	 * If the buffer was not dirty already, do vacuum accounting.
  	 */
! 	if (!(bufHdr->flags & BM_DIRTY))
  	{
  		VacuumPageDirty++;
  		if (VacuumCostActive)
  			VacuumCostBalance += VacuumCostPageDirty;
  	}
- 
- 	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
- 
- 	UnlockBufHdr(bufHdr);
  }
  
  /*
--- 974,998 ----
  
  	Assert(bufHdr->refcount > 0);
  
+ 	if (!(bufHdr->flags & BM_DIRTY))
+ 		dirtied = true;
+ 
+ 	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
+ 
+ 	UnlockBufHdr(bufHdr);
+ 
  	/*
! 	 * If the buffer was not dirty already, do vacuum accounting, and
! 	 * nudge bgwriter.
  	 */
! 	if (dirtied)
  	{
  		VacuumPageDirty++;
  		if (VacuumCostActive)
  			VacuumCostBalance += VacuumCostPageDirty;
+ 		if (ProcGlobal->bgwriterLatch)
+ 			SetLatch(ProcGlobal->bgwriterLatch);
  	}
  }
  
  /*
***************
*** 1307,1314 **** BufferSync(int flags)
   * BgBufferSync -- Write out some dirty buffers in the pool.
   *
   * This is called periodically by the background writer process.
   */
! void
  BgBufferSync(void)
  {
  	/* info obtained from freelist.c */
--- 1314,1325 ----
   * BgBufferSync -- Write out some dirty buffers in the pool.
   *
   * This is called periodically by the background writer process.
+  *
+  * Returns a value indicating if the clocksweep has been "lapped", or if the
+  * bgwriter has been effectively disabled due to finding bgwriter_lru_maxpages
+  * at 0.
   */
! bool
  BgBufferSync(void)
  {
  	/* info obtained from freelist.c */
***************
*** 1365,1371 **** BgBufferSync(void)
  	if (bgwriter_lru_maxpages <= 0)
  	{
  		saved_info_valid = false;
! 		return;
  	}
  
  	/*
--- 1376,1382 ----
  	if (bgwriter_lru_maxpages <= 0)
  	{
  		saved_info_valid = false;
! 		return true;
  	}
  
  	/*
***************
*** 1584,1589 **** BgBufferSync(void)
--- 1595,1602 ----
  			 recent_alloc, strategy_delta, scans_per_alloc, smoothed_density);
  #endif
  	}
+ 
+ 	return bufs_to_lap == 0;
  }
  
  /*
***************
*** 2348,2353 **** SetBufferCommitInfoNeedsSave(Buffer buffer)
--- 2361,2369 ----
  			VacuumPageDirty++;
  			if (VacuumCostActive)
  				VacuumCostBalance += VacuumCostPageDirty;
+ 			/* The bgwriter may need to be woken. */
+ 			if (ProcGlobal->bgwriterLatch)
+ 				SetLatch(ProcGlobal->bgwriterLatch);
  		}
  		bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
  		UnlockBufHdr(bufHdr);
*** a/src/include/storage/bufmgr.h
--- b/src/include/storage/bufmgr.h
***************
*** 213,219 **** extern bool HoldingBufferPinThatDelaysRecovery(void);
  extern void AbortBufferIO(void);
  
  extern void BufmgrCommit(void);
! extern void BgBufferSync(void);
  
  extern void AtProcExit_LocalBuffers(void);
  
--- 213,219 ----
  extern void AbortBufferIO(void);
  
  extern void BufmgrCommit(void);
! extern bool BgBufferSync(void);
  
  extern void AtProcExit_LocalBuffers(void);
  
*** a/src/include/storage/proc.h
--- b/src/include/storage/proc.h
***************
*** 188,193 **** typedef struct PROC_HDR
--- 188,195 ----
  	PGPROC	   *freeProcs;
  	/* Head of list of autovacuum's free PGPROC structures */
  	PGPROC	   *autovacFreeProcs;
+ 	/* BGWriter process latch */
+ 	Latch	   *bgwriterLatch;
  	/* Current shared estimate of appropriate spins_per_delay value */
  	int			spins_per_delay;
  	/* The proc of the Startup process, since not in ProcArray */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to