[PATCHES] Logging checkpoints and other slowdown causes

Greg Smith Sat, 31 Mar 2007 23:29:36 -0800

This patch puts entries in the log file around some backend events thatregularly cause client pauses, most of which are visible only when runningwith log_min_messages = debug2. The main focus is regularly occuringcheckpoints, from either running out of segments or hitting a timeout.This is an example of what gets written to the logs at checkpoint time:


DEBUG2:  checkpoint required (timeout passed)
DEBUG2:  checkpoint starting
DEBUG2:  checkpoint flushing buffer pool
DEBUG2:  checkpoint database fsync starting
DEBUG2:  checkpoint database fsync complete

DEBUG1: checkpoint buffers dirty=16.7 MB (13.0%) write=174.8 mssync=3716.5 msDEBUG2: checkpoint complete; 0 transaction log file(s) added, 0 removed,0 recycled

The idea here is that if you want to really watch what your system isdoing, either to track down a problem or for benchmarking, you log atDEBUG2 and timestamp the logs with something like log_line_prefix ='[%m]:'. Then line up your timestamped transactions with the logs tofigure out what happened during bad periods. This is very easy to do withthe timestamped pgbench latency logs for example, so you can put them intime sequence order and see something like "oh, those long transactionsall came in between the fsync start and end". The summary line with thewrite/sync timing information has a broader use and suggests when you needto go deeper, which is why I put that one at DEBUG1.

I also adjust some other log messages to make it easier to run the systemat DEBUG2 without going completely crazy, like moving individual WALsegment recycles to DEBUG3 and standardizing the format of the fork/exitmessages so they're easier to filter out.

My main concern about this code is how it adds several gettimeofday callsand complicated string building inside FlushBufferPool, all of which iswasted if this data isn't actually going to get logged. I'd like to havesomething that tells me whether I should even bother, but I didn't want topoke into the elog variables for fear of adding a dependency on itscurrent structure to here. Ideally, it would be nice to call somethinglike WillLog(DEBUG2) and get a boolean back saying whether something atthat log level will be output anywhere; I don't know enough about thelogging code to add such a thing to it myself.


--
* Greg Smith [EMAIL PROTECTED] http://www.gregsmith.com Baltimore, MD

Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/xlog.c,v
retrieving revision 1.265
diff -c -r1.265 xlog.c
*** src/backend/access/transam/xlog.c   3 Mar 2007 20:02:26 -0000       1.265
--- src/backend/access/transam/xlog.c   1 Apr 2007 04:55:19 -0000
***************
*** 1591,1600 ****
                                        if (new_highbits != old_highbits ||
                                                new_segno >= old_segno + 
(uint32) (CheckPointSegments-1))
                                        {
! #ifdef WAL_DEBUG
!                                               if (XLOG_DEBUG)
!                                                       elog(LOG, "time for a 
checkpoint, signaling bgwriter");
! #endif
                                                RequestCheckpoint(false, true);
                                        }
                                }
--- 1591,1598 ----
                                        if (new_highbits != old_highbits ||
                                                new_segno >= old_segno + 
(uint32) (CheckPointSegments-1))
                                        {
!                                               ereport(DEBUG2,(errmsg(
!                                                       "checkpoint required 
(wrote checkpoint_segments)")));
                                                RequestCheckpoint(false, true);
                                        }
                                }
***************
*** 1858,1863 ****
--- 1856,1863 ----
                                (errcode_for_file_access(),
                                 errmsg("could not create file \"%s\": %m", 
tmppath)));
  
+       ereport(DEBUG2, (errmsg("WAL creating and filling new file on disk")));
+ 
        /*
         * Zero-fill the file.  We have to do this the hard way to ensure that 
all
         * the file space has really been allocated --- on platforms that allow
***************
*** 2565,2571 ****
                                                                                
   true, &max_advance,
                                                                                
   true))
                                {
!                                       ereport(DEBUG2,
                                                        (errmsg("recycled 
transaction log file \"%s\"",
                                                                        
xlde->d_name)));
                                        (*nsegsrecycled)++;
--- 2565,2571 ----
                                                                                
   true, &max_advance,
                                                                                
   true))
                                {
!                                       ereport(DEBUG3,
                                                        (errmsg("recycled 
transaction log file \"%s\"",
                                                                        
xlde->d_name)));
                                        (*nsegsrecycled)++;
***************
*** 5424,5429 ****
--- 5424,5430 ----
                        LWLockRelease(CheckpointStartLock);
                        LWLockRelease(CheckpointLock);
                        END_CRIT_SECTION();
+                       ereport(DEBUG2,(errmsg("checkpoint request ignored (no 
changes)")));
                        return;
                }
        }
***************
*** 6087,6092 ****
--- 6088,6094 ----
                 * have different checkpoint positions and hence different 
history
                 * file names, even if nothing happened in between.
                 */
+               ereport(DEBUG2, (errmsg("checkpoint required (starting 
backup)")));
                RequestCheckpoint(true, false);
  
                /*
Index: src/backend/postmaster/autovacuum.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/postmaster/autovacuum.c,v
retrieving revision 1.40
diff -c -r1.40 autovacuum.c
*** src/backend/postmaster/autovacuum.c 28 Mar 2007 22:17:12 -0000      1.40
--- src/backend/postmaster/autovacuum.c 1 Apr 2007 04:55:20 -0000
***************
*** 811,816 ****
--- 811,818 ----
                /* And do an appropriate amount of work */
                recentXid = ReadNewTransactionId();
                do_autovacuum();
+               ereport(DEBUG2,
+                               (errmsg("autovacuum: processing database \"%s\" 
complete", dbname)));
        }
  
        /*
Index: src/backend/postmaster/bgwriter.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/postmaster/bgwriter.c,v
retrieving revision 1.37
diff -c -r1.37 bgwriter.c
*** src/backend/postmaster/bgwriter.c   30 Mar 2007 18:34:55 -0000      1.37
--- src/backend/postmaster/bgwriter.c   1 Apr 2007 04:55:21 -0000
***************
*** 392,397 ****
--- 392,398 ----
                if (elapsed_secs >= CheckPointTimeout)
                {
                        do_checkpoint = true;
+                       ereport(DEBUG2, (errmsg("checkpoint required (timeout 
passed)")));
                        if (!force_checkpoint)
                                BgWriterStats.m_timed_checkpoints++;
                }
Index: src/backend/postmaster/postmaster.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/postmaster/postmaster.c,v
retrieving revision 1.527
diff -c -r1.527 postmaster.c
*** src/backend/postmaster/postmaster.c 22 Mar 2007 19:53:30 -0000      1.527
--- src/backend/postmaster/postmaster.c 1 Apr 2007 04:55:23 -0000
***************
*** 2630,2636 ****
  
        /* in parent, successful fork */
        ereport(DEBUG2,
!                       (errmsg_internal("forked new backend, pid=%d socket=%d",
                                                         (int) pid, 
port->sock)));
  
        /*
--- 2630,2636 ----
  
        /* in parent, successful fork */
        ereport(DEBUG2,
!                       (errmsg_internal("forked new backend (PID %d) 
socket=%d",
                                                         (int) pid, 
port->sock)));
  
        /*
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.216
diff -c -r1.216 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c 30 Mar 2007 18:34:55 -0000      1.216
--- src/backend/storage/buffer/bufmgr.c 1 Apr 2007 04:55:25 -0000
***************
*** 946,957 ****
   *
   * This is called at checkpoint time to write out all dirty shared buffers.
   */
! void
  BufferSync(void)
  {
        int                     buf_id;
        int                     num_to_scan;
        int                     absorb_counter;
  
        /*
         * Find out where to start the circular scan.
--- 946,958 ----
   *
   * This is called at checkpoint time to write out all dirty shared buffers.
   */
! int
  BufferSync(void)
  {
        int                     buf_id;
        int                     num_to_scan;
        int                     absorb_counter;
+       int                     dirty_buffers;
  
        /*
         * Find out where to start the circular scan.
***************
*** 966,976 ****
         */
        num_to_scan = NBuffers;
        absorb_counter = WRITES_PER_ABSORB;
        while (num_to_scan-- > 0)
        {
                if (SyncOneBuffer(buf_id, false))
                {
!                       BgWriterStats.m_buf_written_checkpoints++;
  
                        /*
                         * If in bgwriter, absorb pending fsync requests after 
each
--- 967,978 ----
         */
        num_to_scan = NBuffers;
        absorb_counter = WRITES_PER_ABSORB;
+       dirty_buffers = 0;
        while (num_to_scan-- > 0)
        {
                if (SyncOneBuffer(buf_id, false))
                {
!                       dirty_buffers++;
  
                        /*
                         * If in bgwriter, absorb pending fsync requests after 
each
***************
*** 987,992 ****
--- 989,997 ----
                if (++buf_id >= NBuffers)
                        buf_id = 0;
        }
+ 
+       BgWriterStats.m_buf_written_checkpoints+=dirty_buffers;
+       return dirty_buffers;
  }
  
  /*
***************
*** 1288,1295 ****
  void
  FlushBufferPool(void)
  {
!       BufferSync();
        smgrsync();
  }
  
  
--- 1293,1320 ----
  void
  FlushBufferPool(void)
  {
!       int dirty;
!       struct timeval start_t, sync_t, end_t;
!       long u_sec_for_buffers, u_sec_for_sync;
! 
!       gettimeofday(&start_t, NULL);
!       ereport(DEBUG2, (errmsg("checkpoint flushing buffer pool")));
!       dirty=BufferSync();
!       gettimeofday(&sync_t, NULL);
!       ereport(DEBUG2, (errmsg("checkpoint database fsync starting")));
        smgrsync();
+       gettimeofday(&end_t, NULL);
+       ereport(DEBUG2, (errmsg("checkpoint database fsync complete")));
+ 
+       u_sec_for_buffers = 1000000 * (long) (sync_t.tv_sec - start_t.tv_sec) +
+               (long) (sync_t.tv_usec - start_t.tv_usec);
+       u_sec_for_sync = 1000000 * (long) (end_t.tv_sec - sync_t.tv_sec) +
+               (long) (end_t.tv_usec - sync_t.tv_usec);
+ 
+       ereport(DEBUG1,(errmsg(
+               "checkpoint buffers dirty=%.1f MB (%.1f%%) write=%.1f ms 
sync=%.1f ms",
+               (float) (BLCKSZ * dirty) / (1024*1024), (float) 100 * dirty / 
NBuffers,
+               (float) u_sec_for_buffers / 1000, (float) u_sec_for_sync / 
1000)));
  }
  
  
Index: src/include/storage/bufmgr.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/bufmgr.h,v
retrieving revision 1.102
diff -c -r1.102 bufmgr.h
*** src/include/storage/bufmgr.h        5 Jan 2007 22:19:57 -0000       1.102
--- src/include/storage/bufmgr.h        1 Apr 2007 04:55:26 -0000
***************
*** 150,156 ****
  extern void AbortBufferIO(void);
  
  extern void BufmgrCommit(void);
! extern void BufferSync(void);
  extern void BgBufferSync(void);
  
  extern void AtProcExit_LocalBuffers(void);
--- 150,156 ----
  extern void AbortBufferIO(void);
  
  extern void BufmgrCommit(void);
! extern int BufferSync(void);
  extern void BgBufferSync(void);
  
  extern void AtProcExit_LocalBuffers(void);

---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?

               http://archives.postgresql.org

[PATCHES] Logging checkpoints and other slowdown causes

Reply via email to