On 2021/10/21 23:55, Bharath Rupireddy wrote:
Also how about adding wait events for other commands like
archive_cleanup_command, restore_command and recovery_end_command?

+1 for the wait event.

Thanks!
I added the wait events for also restore_command, etc into the patch.
I attached that updated version of the patch.


The following activitymsg that are being set to ps display in
XLogFileRead and pgarch_archiveXlog have come up for one of our
internal team discussions recently:

             snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
                      xlogfname);
             set_ps_display(activitymsg);

         snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
                  xlogfname);
         set_ps_display(activitymsg);

     snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
     set_ps_display(activitymsg);

The ps display info might be useful if we run postgres on a stand
alone box and there's someone monitoring at the ps output, but it
doesn't help debugging after an issue has occurred. How about we have
the following statements which will be useful for someone to look at
the server logs and know what was/is happening during the recovery and
archiving.

If an issue occurs while the command is executing,
the error message is logged, isn't it? Isn't that enough for your case?


IMO, we should also have the elog statement.

elog(LOG, "waiting for %s", xlogfname);
elog(LOG, "recovering %s"", xlogfname);
elog(LOG, "archiving %s", xlog);

I'm afraid that some people think that it's noisy to always log those messages.


Another idea could be to have a hook emitting the above info to
outside components, but a hook just for this purpose isn't a great
idea IMO.

Yes, this idea sounds overkill to me.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 3173ec2566..af6914872b 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1569,7 +1569,17 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   
11:34   0:00 postgres: ser
       <entry>Waiting for subplan nodes of an <literal>Append</literal> plan
        node to be ready.</entry>
      </row>
-    <row>
+     <row>
+      <entry><literal>ArchiveCleanupCommand</literal></entry>
+      <entry>Waiting for <xref linkend="guc-archive-cleanup-command"/> to
+       complete.</entry>
+     </row>
+     <row>
+      <entry><literal>ArchiveCommand</literal></entry>
+      <entry>Waiting for <xref linkend="guc-archive-command"/> to
+       complete.</entry>
+     </row>
+     <row>
       <entry><literal>BackendTermination</literal></entry>
       <entry>Waiting for the termination of another backend.</entry>
      </row>
@@ -1747,6 +1757,11 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   
11:34   0:00 postgres: ser
       <entry>Waiting for recovery conflict resolution for dropping a
        tablespace.</entry>
      </row>
+     <row>
+      <entry><literal>RecoveryEndCommand</literal></entry>
+      <entry>Waiting for <xref linkend="guc-recovery-end-command"/> to
+       complete.</entry>
+     </row>
      <row>
       <entry><literal>RecoveryPause</literal></entry>
       <entry>Waiting for recovery to be resumed.</entry>
@@ -1761,6 +1776,11 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   
11:34   0:00 postgres: ser
       <entry>Waiting for a replication slot to become inactive so it can be
        dropped.</entry>
      </row>
+     <row>
+      <entry><literal>RestoreCommand</literal></entry>
+      <entry>Waiting for <xref linkend="guc-restore-command"/> to
+       complete.</entry>
+     </row>
      <row>
       <entry><literal>SafeSnapshot</literal></entry>
       <entry>Waiting to obtain a valid snapshot for a <literal>READ ONLY
diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index f547efd294..c23229a0ac 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5747,7 +5747,8 @@ CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, 
XLogRecPtr EndOfLog)
        if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
                ExecuteRecoveryCommand(recoveryEndCommand,
                                                           
"recovery_end_command",
-                                                          true);
+                                                          true,
+                                                          
WAIT_EVENT_RECOVERY_END_COMMAND);
 
        /*
         * We switched to a new timeline. Clean up segments on the old timeline.
@@ -9863,7 +9864,8 @@ CreateRestartPoint(int flags)
        if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
                ExecuteRecoveryCommand(archiveCleanupCommand,
                                                           
"archive_cleanup_command",
-                                                          false);
+                                                          false,
+                                                          
WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
 
        return true;
 }
diff --git a/src/backend/access/transam/xlogarchive.c 
b/src/backend/access/transam/xlogarchive.c
index 26b023e754..47a287dd2c 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -24,6 +24,7 @@
 #include "access/xlogarchive.h"
 #include "common/archive.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "postmaster/startup.h"
 #include "postmaster/pgarch.h"
 #include "replication/walsender.h"
@@ -168,7 +169,9 @@ RestoreArchivedFile(char *path, const char *xlogfname,
        /*
         * Copy xlog from archival storage to XLOGDIR
         */
+       pgstat_report_wait_start(WAIT_EVENT_RESTORE_COMMAND);
        rc = system(xlogRestoreCmd);
+       pgstat_report_wait_end();
 
        PostRestoreCommand();
        pfree(xlogRestoreCmd);
@@ -284,7 +287,8 @@ not_available:
  * This is currently used for recovery_end_command and archive_cleanup_command.
  */
 void
-ExecuteRecoveryCommand(const char *command, const char *commandName, bool 
failOnSignal)
+ExecuteRecoveryCommand(const char *command, const char *commandName,
+                                          bool failOnSignal, uint32 
wait_event_info)
 {
        char            xlogRecoveryCmd[MAXPGPATH];
        char            lastRestartPointFname[MAXPGPATH];
@@ -354,7 +358,10 @@ ExecuteRecoveryCommand(const char *command, const char 
*commandName, bool failOn
        /*
         * execute the constructed command
         */
+       pgstat_report_wait_start(wait_event_info);
        rc = system(xlogRecoveryCmd);
+       pgstat_report_wait_end();
+
        if (rc != 0)
        {
                /*
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 74a7d7c4d0..2201f069ea 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -515,7 +515,10 @@ pgarch_archiveXlog(char *xlog)
        snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
        set_ps_display(activitymsg);
 
+       pgstat_report_wait_start(WAIT_EVENT_ARCHIVE_COMMAND);
        rc = system(xlogarchcmd);
+       pgstat_report_wait_end();
+
        if (rc != 0)
        {
                /*
diff --git a/src/backend/utils/activity/wait_event.c 
b/src/backend/utils/activity/wait_event.c
index 4a5b7502f5..4d53f040e8 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -313,6 +313,12 @@ pgstat_get_wait_ipc(WaitEventIPC w)
                case WAIT_EVENT_APPEND_READY:
                        event_name = "AppendReady";
                        break;
+               case WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND:
+                       event_name = "ArchiveCleanupCommand";
+                       break;
+               case WAIT_EVENT_ARCHIVE_COMMAND:
+                       event_name = "ArchiveCommand";
+                       break;
                case WAIT_EVENT_BACKEND_TERMINATION:
                        event_name = "BackendTermination";
                        break;
@@ -427,6 +433,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
                case WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE:
                        event_name = "RecoveryConflictTablespace";
                        break;
+               case WAIT_EVENT_RECOVERY_END_COMMAND:
+                       event_name = "RecoveryEndCommand";
+                       break;
                case WAIT_EVENT_RECOVERY_PAUSE:
                        event_name = "RecoveryPause";
                        break;
@@ -436,6 +445,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
                case WAIT_EVENT_REPLICATION_SLOT_DROP:
                        event_name = "ReplicationSlotDrop";
                        break;
+               case WAIT_EVENT_RESTORE_COMMAND:
+                       event_name = "RestoreCommand";
+                       break;
                case WAIT_EVENT_SAFE_SNAPSHOT:
                        event_name = "SafeSnapshot";
                        break;
diff --git a/src/include/access/xlogarchive.h b/src/include/access/xlogarchive.h
index 3edd1a976c..91df8641f2 100644
--- a/src/include/access/xlogarchive.h
+++ b/src/include/access/xlogarchive.h
@@ -21,7 +21,7 @@ extern bool RestoreArchivedFile(char *path, const char 
*xlogfname,
                                                                const char 
*recovername, off_t expectedSize,
                                                                bool 
cleanupEnabled);
 extern void ExecuteRecoveryCommand(const char *command, const char 
*commandName,
-                                                                  bool 
failOnSignal);
+                                                                  bool 
failOnSignal, uint32 wait_event_info);
 extern void KeepFileRestoredFromArchive(const char *path, const char 
*xlogfname);
 extern void XLogArchiveNotify(const char *xlog);
 extern void XLogArchiveNotifySeg(XLogSegNo segno);
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index c22142365f..8785a8e12c 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -80,6 +80,8 @@ typedef enum
 typedef enum
 {
        WAIT_EVENT_APPEND_READY = PG_WAIT_IPC,
+       WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND,
+       WAIT_EVENT_ARCHIVE_COMMAND,
        WAIT_EVENT_BACKEND_TERMINATION,
        WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE,
        WAIT_EVENT_BGWORKER_SHUTDOWN,
@@ -118,9 +120,11 @@ typedef enum
        WAIT_EVENT_PROMOTE,
        WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT,
        WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE,
+       WAIT_EVENT_RECOVERY_END_COMMAND,
        WAIT_EVENT_RECOVERY_PAUSE,
        WAIT_EVENT_REPLICATION_ORIGIN_DROP,
        WAIT_EVENT_REPLICATION_SLOT_DROP,
+       WAIT_EVENT_RESTORE_COMMAND,
        WAIT_EVENT_SAFE_SNAPSHOT,
        WAIT_EVENT_SYNC_REP,
        WAIT_EVENT_WAL_RECEIVER_EXIT,

Reply via email to