Fujii Masao wrote:
> +    TimestampDifference(start_time, stop_time, &secs, &microsecs);
> +    pg_usleep(interval_msec * 1000L - (1000000L * secs + 1L * microsecs));
>
> What if the large interval is set and a signal arrives during the sleep?
> I'm afraid that a signal cannot terminate the sleep on some platforms.
> This problem exists even now because pg_usleep is used, but the sleep
> time is just 5 seconds, so it's not so bad. But the patch allows a user to
> set large sleep time.

Yes, and I thought that we could live with that for this patch... Now
that you mention it something similar to what recoveryPausesHere would
be quite good to ease the shutdown of a process interrupted, even more
than now as well. So let's do that.

> Shouldn't we use WaitLatch or split the pg_usleep like recoveryPausesHere() 
> does?

I'd vote for the latter as we would still need to calculate a
timestamp difference in any cases, so it feels easier to do that in
the new single API and this patch does (routine useful for plugins as
well). Now I will not fight if people think that using
recoveryWakeupLatch is better.

An updated patch is attached. This patch contains as well a fix for
something that was mentioned upthread but not added in latest version:
wal_availability_check_interval should be used when waiting for a WAL
record from a stream, and for a segment when fetching from archives.
Last version did only the former, not the latter.
-- 
Michael
From 9c3a7fa35538993c1345a40fe0973332a76bdb81 Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@otacoo.com>
Date: Mon, 19 Jan 2015 16:08:48 +0900
Subject: [PATCH] Add wal_availability_check_interval

This parameter aids to control at which timing WAL availability is checked
when a node is in recovery, particularly  when successive failures happen when
fetching WAL archives, or when fetching WAL records from a streaming source.

Default value is 5s.
---
 doc/src/sgml/recovery-config.sgml               | 21 +++++++++++
 src/backend/access/transam/recovery.conf.sample | 10 ++++++
 src/backend/access/transam/xlog.c               | 47 +++++++++++++++++--------
 src/backend/utils/adt/timestamp.c               | 38 ++++++++++++++++++++
 src/include/utils/timestamp.h                   |  2 ++
 5 files changed, 104 insertions(+), 14 deletions(-)

diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml
index 0c64ff2..7fd51ce 100644
--- a/doc/src/sgml/recovery-config.sgml
+++ b/doc/src/sgml/recovery-config.sgml
@@ -145,6 +145,27 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
       </listitem>
      </varlistentry>
 
+     <varlistentry id="wal-availability-check-interval" xreflabel="wal_availability_check_interval">
+      <term><varname>wal_availability_check_interval</varname> (<type>integer</type>)
+      <indexterm>
+        <primary><varname>wal_availability_check_interval</> recovery parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        This parameter specifies the amount of time to wait when
+        WAL is not available for a node in recovery. Default value is
+        <literal>5s</>.
+       </para>
+       <para>
+        A node in recovery will wait for this amount of time if
+        <varname>restore_command</> returns nonzero exit status code when
+        fetching new WAL segment files from archive or when a WAL receiver
+        is not able to fetch a WAL record when using streaming replication.
+       </para>
+      </listitem>
+     </varlistentry>
+
     </variablelist>
 
   </sect1>
diff --git a/src/backend/access/transam/recovery.conf.sample b/src/backend/access/transam/recovery.conf.sample
index b777400..70d3946 100644
--- a/src/backend/access/transam/recovery.conf.sample
+++ b/src/backend/access/transam/recovery.conf.sample
@@ -58,6 +58,16 @@
 #
 #recovery_end_command = ''
 #
+#
+# wal_availability_check_interval
+#
+# specifies an optional interval to check for availability of WAL when
+# recovering a node. This interval of time represents the frequency of
+# retries if a previous command of restore_command returned nonzero exit
+# status code or if a walreceiver did not stream completely a WAL record.
+#
+#wal_availability_check_interval = '5s'
+#
 #---------------------------------------------------------------------------
 # RECOVERY TARGET PARAMETERS
 #---------------------------------------------------------------------------
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 629a457..4f4efca 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -235,6 +235,7 @@ static TimestampTz recoveryTargetTime;
 static char *recoveryTargetName;
 static int	recovery_min_apply_delay = 0;
 static TimestampTz recoveryDelayUntilTime;
+static int	wal_availability_check_interval = 5000;
 
 /* options taken from recovery.conf for XLOG streaming */
 static bool StandbyModeRequested = false;
@@ -4881,6 +4882,26 @@ readRecoveryCommandFile(void)
 					(errmsg_internal("trigger_file = '%s'",
 									 TriggerFile)));
 		}
+		else if (strcmp(item->name, "wal_availability_check_interval") == 0)
+		{
+			const char *hintmsg;
+
+			if (!parse_int(item->value, &wal_availability_check_interval, GUC_UNIT_MS,
+						   &hintmsg))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("parameter \"%s\" requires a temporal value",
+								"wal_availability_check_interval"),
+						 hintmsg ? errhint("%s", _(hintmsg)) : 0));
+			ereport(DEBUG2,
+					(errmsg_internal("wal_availability_check_interval = '%s'", item->value)));
+
+			if (wal_availability_check_interval <= 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("\"%s\" must have a value strictly positive",
+								"wal_availability_check_interval")));
+		}
 		else if (strcmp(item->name, "recovery_min_apply_delay") == 0)
 		{
 			const char *hintmsg;
@@ -10340,8 +10361,8 @@ static bool
 WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 							bool fetching_ckpt, XLogRecPtr tliRecPtr)
 {
-	static pg_time_t last_fail_time = 0;
-	pg_time_t	now;
+	TimestampTz now = GetCurrentTimestamp();
+	TimestampTz	last_fail_time = now;
 
 	/*-------
 	 * Standby mode is implemented by a state machine:
@@ -10490,15 +10511,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					 * machine, so we've exhausted all the options for
 					 * obtaining the requested WAL. We're going to loop back
 					 * and retry from the archive, but if it hasn't been long
-					 * since last attempt, sleep 5 seconds to avoid
-					 * busy-waiting.
+					 * since last attempt, sleep the amount of time specified
+					 * by wal_availability_check_interval to avoid busy-waiting.
 					 */
-					now = (pg_time_t) time(NULL);
-					if ((now - last_fail_time) < 5)
-					{
-						pg_usleep(1000000L * (5 - (now - last_fail_time)));
-						now = (pg_time_t) time(NULL);
-					}
+					now = GetCurrentTimestamp();
+					if (TimestampSleepDifference(last_fail_time, now,
+											wal_availability_check_interval))
+						now = GetCurrentTimestamp();
 					last_fail_time = now;
 					currentSource = XLOG_FROM_ARCHIVE;
 					break;
@@ -10653,13 +10672,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					}
 
 					/*
-					 * Wait for more WAL to arrive. Time out after 5 seconds,
-					 * like when polling the archive, to react to a trigger
-					 * file promptly.
+					 * Wait for more WAL to arrive. Time out after the amount of
+					 * time specified by wal_availability_check_interval, like
+					 * when polling the archive, to react to a trigger file promptly.
 					 */
 					WaitLatch(&XLogCtl->recoveryWakeupLatch,
 							  WL_LATCH_SET | WL_TIMEOUT,
-							  5000L);
+							  wal_availability_check_interval * 1000L);
 					ResetLatch(&XLogCtl->recoveryWakeupLatch);
 					break;
 				}
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 67e0cf9..a4cf717 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -1674,6 +1674,44 @@ TimestampDifferenceExceeds(TimestampTz start_time,
 }
 
 /*
+ * TimestampSleepDifference -- sleep for the amout of interval time
+ *		specified, reduced by the difference between two timestamps.
+ *		Returns true if sleep is done, false otherwise.
+ *
+ * Both inputs must be ordinary finite timestamps (in current usage,
+ * they'll be results from GetCurrentTimestamp()). Sleep is done
+ * per steps of 1s to be able to handle process interruptions without
+ * having to wait for a too long time.
+ */
+bool
+TimestampSleepDifference(TimestampTz start_time,
+						 TimestampTz stop_time,
+						 int interval_msec)
+{
+	long        secs, total_time;
+	int         microsecs;
+
+	if (TimestampDifferenceExceeds(start_time, stop_time, interval_msec))
+		return false;
+
+	TimestampDifference(start_time, stop_time, &secs, &microsecs);
+	total_time = interval_msec * 1000L - (1000000L * secs + 1L * microsecs);
+
+	while (total_time > 0)
+	{
+		long wait_time = 1000000L;	/* 1s */
+
+		if (total_time < wait_time)
+			wait_time = total_time;
+
+		pg_usleep(wait_time);
+		HandleStartupProcInterrupts();
+		total_time -= wait_time;
+	}
+	return true;
+}
+
+/*
  * Convert a time_t to TimestampTz.
  *
  * We do not use time_t internally in Postgres, but this is provided for use
diff --git a/src/include/utils/timestamp.h b/src/include/utils/timestamp.h
index 70118f5..e4a7673 100644
--- a/src/include/utils/timestamp.h
+++ b/src/include/utils/timestamp.h
@@ -217,6 +217,8 @@ extern void TimestampDifference(TimestampTz start_time, TimestampTz stop_time,
 extern bool TimestampDifferenceExceeds(TimestampTz start_time,
 						   TimestampTz stop_time,
 						   int msec);
+extern bool TimestampSleepDifference(TimestampTz start_time,
+					TimestampTz stop_time, int interval_msec);
 
 /*
  * Prototypes for functions to deal with integer timestamps, when the native
-- 
2.2.2

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to