Hi,

On 2017-03-30 13:10:41 +1100, Haribabu Kommi wrote:
> diff --git a/src/backend/access/transam/xlog.c 
> b/src/backend/access/transam/xlog.c
> index 5d58f09..a29c108 100644
> --- a/src/backend/access/transam/xlog.c
> +++ b/src/backend/access/transam/xlog.c
> @@ -600,6 +600,9 @@ typedef struct XLogCtlData
>        */
>       XLogwrtResult LogwrtResult;
>  
> +     /* Protected by WALWriteLock */
> +     PgStat_WalWritesCounts stats;

>  /*
> + * Check whether the current process is a normal backend or not.
> + * This function checks for the background processes that does
> + * some WAL write activity only and other background processes
> + * are not considered. It considers all the background workers
> + * as WAL write activity workers.
> + *
> + * Returns FALSE - when the current process is a normal backend
> + *              TRUE - when the current process a background process/worker
> + */

I don't think we commonly capitalize true/false these days.

> +static bool
> +am_background_process()
> +{
> +     /* check whether current process is a background process/worker? */
> +     if (AmBackgroundWriterProcess() ||
> +             AmWalWriterProcess() ||
> +             AmCheckpointerProcess() ||
> +             AmStartupProcess() ||
> +             IsBackgroundWorker ||
> +             am_walsender ||
> +             am_autovacuum_worker)
> +     {
> +             return true;
> +     }
> +
> +     return false;
> +}

Uhm, wouldn't inverting this be a lot easier and future proof?  There's
far fewer non-background processes.

> +/*
>   * Write and/or fsync the log at least as far as WriteRqst indicates.
>   *
>   * If flexible == TRUE, we don't have to write as far as WriteRqst, but
> @@ -2341,6 +2377,9 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
>       int                     npages;
>       int                     startidx;
>       uint32          startoffset;
> +     instr_time      io_start,
> +                             io_time;
> +     bool            is_background_process = am_background_process();
>  
>       /* We should always be inside a critical section here */
>       Assert(CritSectionCount > 0);
> @@ -2458,6 +2497,11 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
>                       /* OK to write the page(s) */
>                       from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
>                       nbytes = npages * (Size) XLOG_BLCKSZ;
> +
> +                     /* Start timer to acquire start time of the wal write */
> +                     if (track_io_timing)
> +                             INSTR_TIME_SET_CURRENT(io_start);
> +

I'm more than a bit hesitant adding overhead to WAL writing - it's
already quite a bit of a bottleneck.  Normally track_io_timing just
makes things a tiny bit slower, but doesn't cause a scalability issue,
here it does.  This is all done under an already highly contended lock.


>                       nleft = nbytes;
>                       do
>                       {
> @@ -2480,6 +2524,34 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
>                               from += written;
>                       } while (nleft > 0);
>  
> +                     /* calculate the total time spent for wal writing */
> +                     if (track_io_timing)
> +                     {
> +                             INSTR_TIME_SET_CURRENT(io_time);
> +                             INSTR_TIME_SUBTRACT(io_time, io_start);
> +
> +                             if (is_background_process)
> +                                     XLogCtl->stats.total_write_time += 
> INSTR_TIME_GET_MILLISEC(io_time);
> +                             else
> +                                     XLogCtl->stats.backend_total_write_time 
> += INSTR_TIME_GET_MILLISEC(io_time);
> +                     }
> +                     else
> +                     {
> +                             XLogCtl->stats.total_write_time = 0;
> +                             XLogCtl->stats.backend_total_write_time = 0;
> +                     }
> +
> +                     /* check whether writer is a normal backend or not? */
> +                     if (is_background_process)
> +                             XLogCtl->stats.writes++;
> +                     else
> +                             XLogCtl->stats.backend_writes++;
> +
> +                     if (is_background_process)
> +                             XLogCtl->stats.write_blocks += npages;
> +                     else
> +                             XLogCtl->stats.backend_write_blocks += npages;
> +
>                       /* Update state for write */
>                       openLogOff += nbytes;
>                       npages = 0;
> @@ -2499,8 +2571,29 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
>                        */
>                       if (finishing_seg)
>                       {
> +                             /* Start timer to acquire start time of the wal 
> sync */
> +                             if (track_io_timing)
> +                                     INSTR_TIME_SET_CURRENT(io_start);
> +
>                               issue_xlog_fsync(openLogFile, openLogSegNo);
>  
> +                             /* calculate the total time spent for wal sync 
> */
> +                             if (track_io_timing)
> +                             {
> +                                     INSTR_TIME_SET_CURRENT(io_time);
> +                                     INSTR_TIME_SUBTRACT(io_time, io_start);
> +
> +                                     if (is_background_process)
> +                                             XLogCtl->stats.total_sync_time 
> += INSTR_TIME_GET_MILLISEC(io_time);
> +                                     else
> +                                             
> XLogCtl->stats.backend_total_sync_time += INSTR_TIME_GET_MILLISEC(io_time);
> +                             }
> +                             else
> +                             {
> +                                     XLogCtl->stats.total_sync_time = 0;
> +                                     XLogCtl->stats.backend_total_sync_time 
> = 0;
> +                             }
> +
>                               /* signal that we need to wakeup walsenders 
> later */
>                               WalSndWakeupRequest();
>  
> @@ -2568,7 +2661,28 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
>                               openLogOff = 0;
>                       }
>  
> +                     /* Start timer to acquire start time of the wal sync */
> +                     if (track_io_timing)
> +                             INSTR_TIME_SET_CURRENT(io_start);
> +
>                       issue_xlog_fsync(openLogFile, openLogSegNo);
> +
> +                     /* calculate the total time spent for wal sync */
> +                     if (track_io_timing)
> +                     {
> +                             INSTR_TIME_SET_CURRENT(io_time);
> +                             INSTR_TIME_SUBTRACT(io_time, io_start);
> +
> +                             if (is_background_process)
> +                                     XLogCtl->stats.total_sync_time += 
> INSTR_TIME_GET_MILLISEC(io_time);
> +                             else
> +                                     XLogCtl->stats.backend_total_sync_time 
> += INSTR_TIME_GET_MILLISEC(io_time);
> +                     }
> +                     else
> +                     {
> +                             XLogCtl->stats.total_sync_time = 0;
> +                             XLogCtl->stats.backend_total_sync_time = 0;
> +                     }
>               }

I'm *very* doubtful about this, but even if we do it, this needs careful
benchmarking.

>               /* signal that we need to wakeup walsenders later */
> @@ -3043,6 +3157,9 @@ XLogBackgroundFlush(void)
>       {
>               XLogWrite(WriteRqst, flexible);
>       }
> +
> +     /* Collect all the wal write shared counters into local, and report it 
> to stats collector */
> +     memcpy(&LocalWalWritesStats.stats, &XLogCtl->stats, 
> sizeof(PgStat_WalWritesCounts));
>       LWLockRelease(WALWriteLock);

Hm. I think in a good number of workloads hti sis never reached, because
we return early.


I think this is an interesting feature, but I don't think it's ready,
and it was submitted very late in the v10 release cycle. Therefore I
think this should be moved to the next CF.

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to