Hi, On 2017-03-30 13:10:41 +1100, Haribabu Kommi wrote: > diff --git a/src/backend/access/transam/xlog.c > b/src/backend/access/transam/xlog.c > index 5d58f09..a29c108 100644 > --- a/src/backend/access/transam/xlog.c > +++ b/src/backend/access/transam/xlog.c > @@ -600,6 +600,9 @@ typedef struct XLogCtlData > */ > XLogwrtResult LogwrtResult; > > + /* Protected by WALWriteLock */ > + PgStat_WalWritesCounts stats;
> /* > + * Check whether the current process is a normal backend or not. > + * This function checks for the background processes that does > + * some WAL write activity only and other background processes > + * are not considered. It considers all the background workers > + * as WAL write activity workers. > + * > + * Returns FALSE - when the current process is a normal backend > + * TRUE - when the current process a background process/worker > + */ I don't think we commonly capitalize true/false these days. > +static bool > +am_background_process() > +{ > + /* check whether current process is a background process/worker? */ > + if (AmBackgroundWriterProcess() || > + AmWalWriterProcess() || > + AmCheckpointerProcess() || > + AmStartupProcess() || > + IsBackgroundWorker || > + am_walsender || > + am_autovacuum_worker) > + { > + return true; > + } > + > + return false; > +} Uhm, wouldn't inverting this be a lot easier and future proof? There's far fewer non-background processes. > +/* > * Write and/or fsync the log at least as far as WriteRqst indicates. > * > * If flexible == TRUE, we don't have to write as far as WriteRqst, but > @@ -2341,6 +2377,9 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) > int npages; > int startidx; > uint32 startoffset; > + instr_time io_start, > + io_time; > + bool is_background_process = am_background_process(); > > /* We should always be inside a critical section here */ > Assert(CritSectionCount > 0); > @@ -2458,6 +2497,11 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) > /* OK to write the page(s) */ > from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ; > nbytes = npages * (Size) XLOG_BLCKSZ; > + > + /* Start timer to acquire start time of the wal write */ > + if (track_io_timing) > + INSTR_TIME_SET_CURRENT(io_start); > + I'm more than a bit hesitant adding overhead to WAL writing - it's already quite a bit of a bottleneck. Normally track_io_timing just makes things a tiny bit slower, but doesn't cause a scalability issue, here it does. This is all done under an already highly contended lock. > nleft = nbytes; > do > { > @@ -2480,6 +2524,34 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) > from += written; > } while (nleft > 0); > > + /* calculate the total time spent for wal writing */ > + if (track_io_timing) > + { > + INSTR_TIME_SET_CURRENT(io_time); > + INSTR_TIME_SUBTRACT(io_time, io_start); > + > + if (is_background_process) > + XLogCtl->stats.total_write_time += > INSTR_TIME_GET_MILLISEC(io_time); > + else > + XLogCtl->stats.backend_total_write_time > += INSTR_TIME_GET_MILLISEC(io_time); > + } > + else > + { > + XLogCtl->stats.total_write_time = 0; > + XLogCtl->stats.backend_total_write_time = 0; > + } > + > + /* check whether writer is a normal backend or not? */ > + if (is_background_process) > + XLogCtl->stats.writes++; > + else > + XLogCtl->stats.backend_writes++; > + > + if (is_background_process) > + XLogCtl->stats.write_blocks += npages; > + else > + XLogCtl->stats.backend_write_blocks += npages; > + > /* Update state for write */ > openLogOff += nbytes; > npages = 0; > @@ -2499,8 +2571,29 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) > */ > if (finishing_seg) > { > + /* Start timer to acquire start time of the wal > sync */ > + if (track_io_timing) > + INSTR_TIME_SET_CURRENT(io_start); > + > issue_xlog_fsync(openLogFile, openLogSegNo); > > + /* calculate the total time spent for wal sync > */ > + if (track_io_timing) > + { > + INSTR_TIME_SET_CURRENT(io_time); > + INSTR_TIME_SUBTRACT(io_time, io_start); > + > + if (is_background_process) > + XLogCtl->stats.total_sync_time > += INSTR_TIME_GET_MILLISEC(io_time); > + else > + > XLogCtl->stats.backend_total_sync_time += INSTR_TIME_GET_MILLISEC(io_time); > + } > + else > + { > + XLogCtl->stats.total_sync_time = 0; > + XLogCtl->stats.backend_total_sync_time > = 0; > + } > + > /* signal that we need to wakeup walsenders > later */ > WalSndWakeupRequest(); > > @@ -2568,7 +2661,28 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) > openLogOff = 0; > } > > + /* Start timer to acquire start time of the wal sync */ > + if (track_io_timing) > + INSTR_TIME_SET_CURRENT(io_start); > + > issue_xlog_fsync(openLogFile, openLogSegNo); > + > + /* calculate the total time spent for wal sync */ > + if (track_io_timing) > + { > + INSTR_TIME_SET_CURRENT(io_time); > + INSTR_TIME_SUBTRACT(io_time, io_start); > + > + if (is_background_process) > + XLogCtl->stats.total_sync_time += > INSTR_TIME_GET_MILLISEC(io_time); > + else > + XLogCtl->stats.backend_total_sync_time > += INSTR_TIME_GET_MILLISEC(io_time); > + } > + else > + { > + XLogCtl->stats.total_sync_time = 0; > + XLogCtl->stats.backend_total_sync_time = 0; > + } > } I'm *very* doubtful about this, but even if we do it, this needs careful benchmarking. > /* signal that we need to wakeup walsenders later */ > @@ -3043,6 +3157,9 @@ XLogBackgroundFlush(void) > { > XLogWrite(WriteRqst, flexible); > } > + > + /* Collect all the wal write shared counters into local, and report it > to stats collector */ > + memcpy(&LocalWalWritesStats.stats, &XLogCtl->stats, > sizeof(PgStat_WalWritesCounts)); > LWLockRelease(WALWriteLock); Hm. I think in a good number of workloads hti sis never reached, because we return early. I think this is an interesting feature, but I don't think it's ready, and it was submitted very late in the v10 release cycle. Therefore I think this should be moved to the next CF. Greetings, Andres Freund -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers