On 4 January 2012 07:24, Heikki Linnakangas
<[email protected]> wrote:
> I think SetBufferCommitInfoNeedsSave() needs the same treatment as
> MarkBufferDirty(). And it would probably be good to only set the latch if
> the buffer wasn't dirty already. Setting a latch that's already set is fast,
> but surely it's even faster to not even try.
That seems reasonable. Revised patch is attached.
> Yeah, I'd like to see a micro-benchmark of a worst-case scenario. I'm a bit
> worried about the impact on systems with a lot of CPUs. If you have a lot of
> CPUs writing to the same cache line that contains the latch's flag, that
> might get expensive.
Also reasonable, but I don't think that I'll get around to it until
after the final commitfest deadline.
--
Peter Geoghegan http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training and Services
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
new file mode 100644
index e3ae92d..0fcaf01
*** a/src/backend/bootstrap/bootstrap.c
--- b/src/backend/bootstrap/bootstrap.c
*************** AuxiliaryProcessMain(int argc, char *arg
*** 416,421 ****
--- 416,422 ----
case BgWriterProcess:
/* don't set signals, bgwriter has its own agenda */
+ ProcGlobal->bgwriterLatch = &MyProc->procLatch; /* Expose */
BackgroundWriterMain();
proc_exit(1); /* should never return */
diff --git a/src/backend/port/unix_latch.c b/src/backend/port/unix_latch.c
new file mode 100644
index fc1a579..a5248ba
*** a/src/backend/port/unix_latch.c
--- b/src/backend/port/unix_latch.c
***************
*** 50,55 ****
--- 50,56 ----
#include "miscadmin.h"
#include "postmaster/postmaster.h"
#include "storage/latch.h"
+ #include "storage/proc.h"
#include "storage/shmem.h"
/* Are we currently in WaitLatch? The signal handler would like to know. */
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
new file mode 100644
index 1f8d2d6..59e5180
*** a/src/backend/postmaster/bgwriter.c
--- b/src/backend/postmaster/bgwriter.c
***************
*** 51,56 ****
--- 51,58 ----
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
+ #include "storage/proc.h"
+ #include "storage/procsignal.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
#include "storage/spin.h"
*************** static volatile sig_atomic_t shutdown_re
*** 73,78 ****
--- 75,82 ----
/*
* Private state
*/
+ #define BGWRITER_HIBERNATE_MS 10000
+
static bool am_bg_writer = false;
/* Prototypes for private functions */
*************** BackgroundWriterMain(void)
*** 123,129 ****
pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
! pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
pqsignal(SIGUSR2, SIG_IGN);
/*
--- 127,133 ----
pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
! pqsignal(SIGUSR1, procsignal_sigusr1_handler);
pqsignal(SIGUSR2, SIG_IGN);
/*
*************** BackgroundWriterMain(void)
*** 238,278 ****
for (;;)
{
/*
! * Emergency bailout if postmaster has died. This is to avoid the
! * necessity for manual cleanup of all postmaster children.
*/
! if (!PostmasterIsAlive())
! exit(1);
!
! if (got_SIGHUP)
{
! got_SIGHUP = false;
! ProcessConfigFile(PGC_SIGHUP);
! /* update global shmem state for sync rep */
}
! if (shutdown_requested)
{
/*
! * From here on, elog(ERROR) should end with exit(1), not send
! * control back to the sigsetjmp block above
*/
! ExitOnAnyError = true;
! /* Normal exit from the bgwriter is here */
! proc_exit(0); /* done */
! }
! /*
! * Do one cycle of dirty-buffer writing.
! */
! BgBufferSync();
! /* Nap for the configured time. */
! BgWriterNap();
}
}
/*
* BgWriterNap -- Nap for the configured time or until a signal is received.
*/
static void
BgWriterNap(void)
--- 242,337 ----
for (;;)
{
/*
! * Do one cycle of dirty-buffer writing, potentially hibernating if
! * there have been no buffers to write.
*/
! if (!BgBufferSync())
{
! /* Clock sweep was not "lapped" - nap for the configured time. */
! BgWriterNap();
}
! else
{
+ int res = 0;
/*
! * Initiate hibernation by "arming" the process latch. This usage
! * differs from the standard pattern for latches outlined in
! * latch.h, where the latch is generally reset immediately after
! * WaitLatch returns, in the next iteration of an infinite loop.
! *
! * It should only be possible to *really* set the latch from client
! * backends while the bgwriter is idle, and not during productive
! * cycles where buffers are written, or shortly thereafter. It's
! * important that the SetLatch() call within the buffer manager
! * usually inexpensively returns having found that the latch is
! * already set. For write-heavy workloads, it's quite possible that
! * those calls will never actually get the opportunity to really
! * set the latch.
! *
! * By only giving backends the opportunity to really set the
! * latch at this point via ResetLatch(), we avoid the overhead of
! * sending a signal perhaps as much as once per bgwriter cycle, or
! * maybe even more frequently than that due to the asynchronous
! * nature of signals.
*/
! ResetLatch(&MyProc->procLatch);
! /*
! * Another nap and sync, in case some buffers were dirtied since
! * the immediately prior BgBufferSync() call and we quite
! * naturally didn't hear about it because the SetLatch calls
! * found the latch already set (i.e. it was "disarmed").
! */
! BgWriterNap();
! /*
! * When syncing, take the opportunity to check one last time if
! * we're still "lapping" the clock sweep, and only proceed if we are.
! */
! if (BgBufferSync())
! /*
! * Set a timeout so that we can still update BufferAlloc stats
! * reasonably promptly.
! */
! res = WaitLatch(&MyProc->procLatch,
! WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
! BGWRITER_HIBERNATE_MS);
! else
! /*
! * Changed our mind, set own proc latch, thereby often avoiding
! * signaling from a backend.
! */
! SetLatch(&MyProc->procLatch);
!
! /*
! * Ensure that no less than BgWriterDelay ms has passed between
! * BgBufferSyncs - WaitLatch() might have returned instantaneously.
! *
! * Napping here rather than immediately prior to the WaitLatch()
! * call ensures that a sudden burst of dirtied buffers won't result
! * in the initial background writer cycle ineffectually cleaning
! * only a few pages dirtied earlier in the burst, rather than
! * cleaning a number that is somewhere closer to optimal for the
! * cycle, as modeled by the background writing strategy. We'll also
! * want to nap if we changed our minds, before starting a new cycle.
! *
! * We wake on a buffer being dirtied. It's possible that some
! * useful work will become available for the bgwriter to do without
! * a buffer actually being dirtied, as when a dirty buffer's usage
! * count is decremented to zero or it's unpinned. This corner case
! * is judged as too marginal to justify adding additional SetLatch()
! * calls in very hot code paths, cheap though those calls may be.
! */
! if (!(res & WL_TIMEOUT))
! BgWriterNap();
! }
}
}
/*
* BgWriterNap -- Nap for the configured time or until a signal is received.
+ *
+ * Respond to a signal handler having set a flag for us.
*/
static void
BgWriterNap(void)
*************** BgWriterNap(void)
*** 285,302 ****
pgstat_send_bgwriter();
/*
! * Nap for the configured time, or sleep for 10 seconds if there is no
! * bgwriter activity configured.
*
* On some platforms, signals won't interrupt the sleep. To ensure we
* respond reasonably promptly when someone signals us, break down the
* sleep into 1-second increments, and check for interrupts after each
* nap.
*/
! if (bgwriter_lru_maxpages > 0)
! udelay = BgWriterDelay * 1000L;
! else
! udelay = 10000000L; /* Ten seconds */
while (udelay > 999999L)
{
--- 344,381 ----
pgstat_send_bgwriter();
/*
! * Emergency bailout if postmaster has died. This is to avoid the
! * necessity for manual cleanup of all postmaster children.
! */
! if (!PostmasterIsAlive())
! exit(1);
!
! if (got_SIGHUP)
! {
! got_SIGHUP = false;
! ProcessConfigFile(PGC_SIGHUP);
! /* update global shmem state for sync rep */
! }
! if (shutdown_requested)
! {
! /*
! * From here on, elog(ERROR) should end with exit(1), not send
! * control back to the sigsetjmp block above
! */
! ExitOnAnyError = true;
! /* Normal exit from the bgwriter is here */
! proc_exit(0); /* done */
! }
!
! /*
! * Nap for the configured time.
*
* On some platforms, signals won't interrupt the sleep. To ensure we
* respond reasonably promptly when someone signals us, break down the
* sleep into 1-second increments, and check for interrupts after each
* nap.
*/
! udelay = BgWriterDelay * 1000L;
while (udelay > 999999L)
{
*************** bg_quickdie(SIGNAL_ARGS)
*** 351,362 ****
--- 430,453 ----
static void
BgSigHupHandler(SIGNAL_ARGS)
{
+ int save_errno = errno;
+
got_SIGHUP = true;
+ if (MyProc)
+ SetLatch(&MyProc->procLatch);
+
+ errno = save_errno;
}
/* SIGTERM: set flag to shutdown and exit */
static void
ReqShutdownHandler(SIGNAL_ARGS)
{
+ int save_errno = errno;
+
shutdown_requested = true;
+ if (MyProc)
+ SetLatch(&MyProc->procLatch);
+
+ errno = save_errno;
}
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
new file mode 100644
index 91cc001..15ddf83
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
*************** MarkBufferDirty(Buffer buffer)
*** 981,986 ****
--- 981,989 ----
VacuumPageDirty++;
if (VacuumCostActive)
VacuumCostBalance += VacuumCostPageDirty;
+ /* The bgwriter may need to be woken. */
+ if (ProcGlobal->bgwriterLatch)
+ SetLatch(ProcGlobal->bgwriterLatch);
}
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
*************** BufferSync(int flags)
*** 1307,1314 ****
* BgBufferSync -- Write out some dirty buffers in the pool.
*
* This is called periodically by the background writer process.
*/
! void
BgBufferSync(void)
{
/* info obtained from freelist.c */
--- 1310,1321 ----
* BgBufferSync -- Write out some dirty buffers in the pool.
*
* This is called periodically by the background writer process.
+ *
+ * Returns a value indicating if the clocksweep has been "lapped", or if the
+ * bgwriter has been effectively disabled due to finding bgwriter_lru_maxpages
+ * at 0.
*/
! bool
BgBufferSync(void)
{
/* info obtained from freelist.c */
*************** BgBufferSync(void)
*** 1365,1371 ****
if (bgwriter_lru_maxpages <= 0)
{
saved_info_valid = false;
! return;
}
/*
--- 1372,1378 ----
if (bgwriter_lru_maxpages <= 0)
{
saved_info_valid = false;
! return true;
}
/*
*************** BgBufferSync(void)
*** 1584,1589 ****
--- 1591,1598 ----
recent_alloc, strategy_delta, scans_per_alloc, smoothed_density);
#endif
}
+
+ return bufs_to_lap == 0;
}
/*
*************** SetBufferCommitInfoNeedsSave(Buffer buff
*** 2348,2353 ****
--- 2357,2365 ----
VacuumPageDirty++;
if (VacuumCostActive)
VacuumCostBalance += VacuumCostPageDirty;
+ /* The bgwriter may need to be woken. */
+ if (ProcGlobal->bgwriterLatch)
+ SetLatch(ProcGlobal->bgwriterLatch);
}
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
UnlockBufHdr(bufHdr);
diff --git a/src/include/postmaster/bgwriter.h b/src/include/postmaster/bgwriter.h
new file mode 100644
index 6cc4b62..970faa2
*** a/src/include/postmaster/bgwriter.h
--- b/src/include/postmaster/bgwriter.h
***************
*** 13,18 ****
--- 13,19 ----
#define _BGWRITER_H
#include "storage/block.h"
+ #include "storage/latch.h"
#include "storage/relfilenode.h"
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
new file mode 100644
index a03c068..de1bbd0
*** a/src/include/storage/bufmgr.h
--- b/src/include/storage/bufmgr.h
*************** extern bool HoldingBufferPinThatDelaysRe
*** 213,219 ****
extern void AbortBufferIO(void);
extern void BufmgrCommit(void);
! extern void BgBufferSync(void);
extern void AtProcExit_LocalBuffers(void);
--- 213,219 ----
extern void AbortBufferIO(void);
extern void BufmgrCommit(void);
! extern bool BgBufferSync(void);
extern void AtProcExit_LocalBuffers(void);
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
new file mode 100644
index 358d1a4..4520f27
*** a/src/include/storage/proc.h
--- b/src/include/storage/proc.h
*************** typedef struct PROC_HDR
*** 188,193 ****
--- 188,195 ----
PGPROC *freeProcs;
/* Head of list of autovacuum's free PGPROC structures */
PGPROC *autovacFreeProcs;
+ /* BGWriter process latch */
+ Latch *bgwriterLatch;
/* Current shared estimate of appropriate spins_per_delay value */
int spins_per_delay;
/* The proc of the Startup process, since not in ProcArray */
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers