On Mon, Mar 12, 2012 at 10:27 PM, Fujii Masao <[email protected]> wrote:
> On Mon, Mar 12, 2012 at 7:58 PM, Heikki Linnakangas
> <[email protected]> wrote:
>> On 09.03.2012 13:40, Fujii Masao wrote:
>>>
>>> I found the bug which causes walsender to enter into busy loop
>>> when replication connection is terminated. Walsender consumes
>>> lots of CPU resource (%sys), and this situation lasts until it has
>>> detected the termination of replication connection and exited.
>>>
>>> The cause of this bug is that the walsender loop doesn't call
>>> ResetLatch at all in the above case. Since the latch remains set,
>>> the walsender loop cannot sleep on the latch, i.e., WaitLatch
>>> always returns immediately.
>>>
>>> We can fix this bug by adding ResetLatch into the top of the
>>> walsender loop. Patch attached.
>>>
>>> This bug exists in 9.1 but not in 9.2dev. In 9.2dev, this bug has
>>> already been fixed by the commit
>>> (cff75130b5f63e45423c2ed90d6f2e84c21ef840). This commit
>>> refactors and refines the walsender loop logic in addition to
>>> adding ResetLatch. So I'm tempted to backport this commit
>>> (except the deletion of wal_sender_delay) to 9.1 rather than
>>> applying the attached patch. OTOH, attached patch is quite simple,
>>> and its impact on 9.1 would be very small, so it's easy to backport that.
>>> Thought?
>>
>>
>> This patch makes the code that follows bogus:
>>
>>> /*
>>> * If we don't have any pending data in the output buffer,
>>> try to send
>>> * some more.
>>> */
>>> if (!pq_is_send_pending())
>>> {
>>> XLogSend(output_message, &caughtup);
>>>
>>> /*
>>> * Even if we wrote all the WAL that was available
>>> when we started
>>> * sending, more might have arrived while we were
>>> sending this
>>> * batch. We had the latch set while sending, so we
>>> have not
>>> * received any signals from that time. Let's arm
>>> the latch again,
>>> * and after that check that we're still
>>> up-to-date.
>>> */
>>> if (caughtup && !pq_is_send_pending())
>>> {
>>> ResetLatch(&MyWalSnd->latch);
>>>
>>> XLogSend(output_message, &caughtup);
>>> }
>>> }
>>
>>
>> The comment is no longer valid, and the calls to ResetLatch and XLogSend are
>> no longer necessary, once you have the ResetLatch() call at the top of the
>> loop.
>
> Right.
>
>> I also think we should backport commit
>> cff75130b5f63e45423c2ed90d6f2e84c21ef840, except for the removal of
>> wal_sender_delay).
>
> Agreed. The attached patch is the same as
> cff75130b5f63e45423c2ed90d6f2e84c21ef840,
> except for the removal of wal_sender_delay. Could you apply this?
Oh, I forgot to attach the patch. Patch attached really.
Regards,
--
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
*** a/src/backend/replication/walsender.c
--- b/src/backend/replication/walsender.c
***************
*** 476,482 **** ProcessRepliesIfAny(void)
{
unsigned char firstchar;
int r;
! int received = false;
for (;;)
{
--- 476,482 ----
{
unsigned char firstchar;
int r;
! bool received = false;
for (;;)
{
***************
*** 700,705 **** WalSndLoop(void)
--- 700,708 ----
/* Loop forever, unless we get an error */
for (;;)
{
+ /* Clear any already-pending wakeups */
+ ResetLatch(&MyWalSnd->latch);
+
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children.
***************
*** 718,777 **** WalSndLoop(void)
/* Normal exit from the walsender is here */
if (walsender_shutdown_requested)
{
! /* Inform the standby that XLOG streaming was done */
pq_puttextmessage('C', "COPY 0");
pq_flush();
proc_exit(0);
}
/*
* If we don't have any pending data in the output buffer, try to send
! * some more.
*/
if (!pq_is_send_pending())
- {
XLogSend(output_message, &caughtup);
/*
! * Even if we wrote all the WAL that was available when we started
! * sending, more might have arrived while we were sending this
! * batch. We had the latch set while sending, so we have not
! * received any signals from that time. Let's arm the latch again,
! * and after that check that we're still up-to-date.
*/
! if (caughtup && !pq_is_send_pending())
{
! ResetLatch(&MyWalSnd->latch);
XLogSend(output_message, &caughtup);
}
}
- /* Flush pending output to the client */
- if (pq_flush_if_writable() != 0)
- break;
-
/*
! * When SIGUSR2 arrives, we send any outstanding logs up to the
! * shutdown checkpoint record (i.e., the latest record) and exit.
*/
! if (walsender_ready_to_stop && !pq_is_send_pending())
! {
! XLogSend(output_message, &caughtup);
! ProcessRepliesIfAny();
! if (caughtup && !pq_is_send_pending())
! walsender_shutdown_requested = true;
! }
!
! if ((caughtup || pq_is_send_pending()) &&
! !got_SIGHUP &&
! !walsender_shutdown_requested)
{
TimestampTz finish_time = 0;
! long sleeptime;
! /* Reschedule replication timeout */
if (replication_timeout > 0)
{
long secs;
--- 721,801 ----
/* Normal exit from the walsender is here */
if (walsender_shutdown_requested)
{
! /* Inform the standby that XLOG streaming is done */
pq_puttextmessage('C', "COPY 0");
pq_flush();
proc_exit(0);
}
+ /* Check for input from the client */
+ ProcessRepliesIfAny();
+
/*
* If we don't have any pending data in the output buffer, try to send
! * some more. If there is some, we don't bother to call XLogSend
! * again until we've flushed it ... but we'd better assume we are not
! * caught up.
*/
if (!pq_is_send_pending())
XLogSend(output_message, &caughtup);
+ else
+ caughtup = false;
+
+ /* Try to flush pending output to the client */
+ if (pq_flush_if_writable() != 0)
+ break;
+ /* If nothing remains to be sent right now ... */
+ if (caughtup && !pq_is_send_pending())
+ {
/*
! * If we're in catchup state, move to streaming. This is an
! * important state change for users to know about, since before
! * this point data loss might occur if the primary dies and we
! * need to failover to the standby. The state change is also
! * important for synchronous replication, since commits that
! * started to wait at that point might wait for some time.
*/
! if (MyWalSnd->state == WALSNDSTATE_CATCHUP)
{
! ereport(DEBUG1,
! (errmsg("standby \"%s\" has now caught up with primary",
! application_name)));
! WalSndSetState(WALSNDSTATE_STREAMING);
! }
+ /*
+ * When SIGUSR2 arrives, we send any outstanding logs up to the
+ * shutdown checkpoint record (i.e., the latest record) and exit.
+ * This may be a normal termination at shutdown, or a promotion,
+ * the walsender is not sure which.
+ */
+ if (walsender_ready_to_stop)
+ {
+ /* ... let's just be real sure we're caught up ... */
XLogSend(output_message, &caughtup);
+ if (caughtup && !pq_is_send_pending())
+ {
+ walsender_shutdown_requested = true;
+ continue; /* don't want to wait more */
+ }
}
}
/*
! * We don't block if not caught up, unless there is unsent data
! * pending in which case we'd better block until the socket is
! * write-ready. This test is only needed for the case where XLogSend
! * loaded a subset of the available data but then pq_flush_if_writable
! * flushed it all --- we should immediately try to send more.
*/
! if (caughtup || pq_is_send_pending())
{
TimestampTz finish_time = 0;
! long sleeptime = -1;
! /* Determine time until replication timeout */
if (replication_timeout > 0)
{
long secs;
***************
*** 795,806 **** WalSndLoop(void)
sleeptime = WalSndDelay;
}
! /* Sleep */
WaitLatchOrSocket(&MyWalSnd->latch, MyProcPort->sock,
true, pq_is_send_pending(),
sleeptime);
! /* Check for replication timeout */
if (replication_timeout > 0 &&
GetCurrentTimestamp() >= finish_time)
{
--- 819,833 ----
sleeptime = WalSndDelay;
}
! /* Sleep until something happens or replication timeout */
WaitLatchOrSocket(&MyWalSnd->latch, MyProcPort->sock,
true, pq_is_send_pending(),
sleeptime);
! /* Check for replication timeout. Note we ignore the corner case
! * possibility that the client replied just as we reached the
! * timeout ... he's supposed to reply *before* that.
! */
if (replication_timeout > 0 &&
GetCurrentTimestamp() >= finish_time)
{
***************
*** 814,837 **** WalSndLoop(void)
break;
}
}
-
- /*
- * If we're in catchup state, see if its time to move to streaming.
- * This is an important state change for users, since before this
- * point data loss might occur if the primary dies and we need to
- * failover to the standby. The state change is also important for
- * synchronous replication, since commits that started to wait at that
- * point might wait for some time.
- */
- if (MyWalSnd->state == WALSNDSTATE_CATCHUP && caughtup)
- {
- ereport(DEBUG1,
- (errmsg("standby \"%s\" has now caught up with primary",
- application_name)));
- WalSndSetState(WALSNDSTATE_STREAMING);
- }
-
- ProcessRepliesIfAny();
}
/*
--- 841,846 ----
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers