Re: Add Information during standby recovery conflicts

Fujii Masao Thu, 07 Jan 2021 20:19:37 -0800


On 2021/01/08 11:17, Kyotaro Horiguchi wrote:

At Fri, 8 Jan 2021 01:32:11 +0900, Fujii Masao <masao.fu...@oss.nttdata.com> 
wrote in


Attached is the updated version of the patch. This can be applied to
current master.

With the patch, for example, if the startup process waited longer than
deadlock_timeout for the recovery conflict on the lock, the latter log
message in the followings would be additionally output.

     LOG: recovery still waiting after 1001.223 ms: recovery conflict on
     lock
     LOG: recovery finished waiting after 19004.694 ms: recovery conflict
     on lock


+                       /*
+                        * Emit the log message if recovery conflict on buffer 
pin was resolved but
+                        * the startup process waited longer than 
deadlock_timeout for it.

The first line is beyond the 80th column.


Thanks for pointing out this! This happened because I forgot to run pgindent
for bufmgr.c. Attached is the updated version of the patch.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 7c0a673a8d..82864bbb24 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6961,7 +6961,7 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
       <listitem>
        <para>
         Controls whether a log message is produced when the startup process
-        is waiting longer than <varname>deadlock_timeout</varname>
+        waits longer than <varname>deadlock_timeout</varname>
         for recovery conflicts.  This is useful in determining if recovery
         conflicts prevent the recovery from applying WAL.
        </para>
diff --git a/src/backend/storage/buffer/bufmgr.c 
b/src/backend/storage/buffer/bufmgr.c
index 71b5852224..c116e75ca1 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3846,6 +3846,16 @@ LockBufferForCleanup(Buffer buffer)
                        /* Successfully acquired exclusive lock with pincount 1 
*/
                        UnlockBufHdr(bufHdr, buf_state);
 
+                       /*
+                        * Emit the log message if recovery conflict on buffer 
pin was
+                        * resolved but the startup process waited longer than
+                        * deadlock_timeout for it.
+                        */
+                       if (logged_recovery_conflict)
+                               
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+                                                                       
waitStart, GetCurrentTimestamp(),
+                                                                       NULL, 
false);
+
                        /* Report change to non-waiting status */
                        if (new_status)
                        {
@@ -3900,7 +3910,7 @@ LockBufferForCleanup(Buffer buffer)
                                                                                
           DeadlockTimeout))
                                {
                                        
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
-                                                                               
waitStart, now, NULL);
+                                                                               
waitStart, now, NULL, true);
                                        logged_recovery_conflict = true;
                                }
                        }
diff --git a/src/backend/storage/ipc/standby.c 
b/src/backend/storage/ipc/standby.c
index d4b0f65ba2..39a30c00f7 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -226,11 +226,14 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
  * wait_start is the timestamp when the caller started to wait.
  * now is the timestamp when this function has been called.
  * wait_list is the list of virtual transaction ids assigned to
- * conflicting processes.
+ * conflicting processes. still_waiting indicates whether
+ * the startup process is still waiting for the recovery conflict
+ * to be resolved or not.
  */
 void
 LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
-                                       TimestampTz now, VirtualTransactionId 
*wait_list)
+                                       TimestampTz now, VirtualTransactionId 
*wait_list,
+                                       bool still_waiting)
 {
        long            secs;
        int                     usecs;
@@ -238,6 +241,12 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz 
wait_start,
        StringInfoData buf;
        int                     nprocs = 0;
 
+       /*
+        * There must be no conflicting processes when the recovery conflict has
+        * already been resolved.
+        */
+       Assert(still_waiting || wait_list == NULL);
+
        TimestampDifference(wait_start, now, &secs, &usecs);
        msecs = secs * 1000 + usecs / 1000;
        usecs = usecs % 1000;
@@ -275,12 +284,21 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz 
wait_start,
         * conflicting backends in a detail message. Note that if all the 
backends
         * in the list are not active, no detail message is logged.
         */
-       ereport(LOG,
-                       errmsg("recovery still waiting after %ld.%03d ms: %s",
-                                  msecs, usecs, 
_(get_recovery_conflict_desc(reason))),
-                       nprocs > 0 ? errdetail_log_plural("Conflicting process: 
%s.",
-                                                                               
          "Conflicting processes: %s.",
-                                                                               
          nprocs, buf.data) : 0);
+       if (still_waiting)
+       {
+               ereport(LOG,
+                               errmsg("recovery still waiting after %ld.%03d 
ms: %s",
+                                          msecs, usecs, 
_(get_recovery_conflict_desc(reason))),
+                               nprocs > 0 ? errdetail_log_plural("Conflicting 
process: %s.",
+                                                                               
                  "Conflicting processes: %s.",
+                                                                               
                  nprocs, buf.data) : 0);
+       }
+       else
+       {
+               ereport(LOG,
+                               errmsg("recovery finished waiting after 
%ld.%03d ms: %s",
+                                          msecs, usecs, 
_(get_recovery_conflict_desc(reason))));
+       }
 
        if (nprocs > 0)
                pfree(buf.data);
@@ -375,13 +393,12 @@ 
ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
 
                                /*
                                 * Emit the log message if the startup process 
is waiting
-                                * longer than deadlock_timeout for recovery 
conflict on
-                                * buffer pin.
+                                * longer than deadlock_timeout for recovery 
conflict.
                                 */
                                if (maybe_log_conflict &&
                                        TimestampDifferenceExceeds(waitStart, 
now, DeadlockTimeout))
                                {
-                                       LogRecoveryConflict(reason, waitStart, 
now, waitlist);
+                                       LogRecoveryConflict(reason, waitStart, 
now, waitlist, true);
                                        logged_recovery_conflict = true;
                                }
                        }
@@ -391,6 +408,14 @@ 
ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
                waitlist++;
        }
 
+       /*
+        * Emit the log message if recovery conflict was resolved but the 
startup
+        * process waited longer than deadlock_timeout for it.
+        */
+       if (logged_recovery_conflict)
+               LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
+                                                       NULL, false);
+
        /* Reset ps display if we changed it */
        if (new_status)
        {
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index db0cfaa360..c87ffc6549 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1322,7 +1322,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod 
lockMethodTable)
                                         * longer than deadlock_timeout.
                                         */
                                        
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
-                                                                               
standbyWaitStart, now, cnt > 0 ? vxids : NULL);
+                                                                               
standbyWaitStart, now,
+                                                                               
cnt > 0 ? vxids : NULL, true);
                                        logged_recovery_conflict = true;
                                }
                        }
@@ -1607,6 +1608,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod 
lockMethodTable)
                        disable_timeout(DEADLOCK_TIMEOUT, false);
        }
 
+       /*
+        * Emit the log message if recovery conflict on lock was resolved but 
the
+        * startup process waited longer than deadlock_timeout for it.
+        */
+       if (InHotStandby && logged_recovery_conflict)
+               LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
+                                                       standbyWaitStart, 
GetCurrentTimestamp(),
+                                                       NULL, false);
+
        /*
         * Re-acquire the lock table's partition lock.  We have to do this to 
hold
         * off cancel/die interrupts before we can mess with lockAwaited (else 
we
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index a0f3e0bdf0..2b1f340b82 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -40,7 +40,8 @@ extern void StandbyDeadLockHandler(void);
 extern void StandbyTimeoutHandler(void);
 extern void StandbyLockTimeoutHandler(void);
 extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz 
wait_start,
-                                                               TimestampTz 
cur_ts, VirtualTransactionId *wait_list);
+                                                               TimestampTz 
cur_ts, VirtualTransactionId *wait_list,
+                                                               bool 
still_waiting);
 
 /*
  * Standby Rmgr (RM_STANDBY_ID)

Re: Add Information during standby recovery conflicts

Reply via email to