Hi,

On Mon, Dec 8, 2008 at 11:39 PM, Heikki Linnakangas
<[EMAIL PROTECTED]> wrote:
> Tom Lane wrote:
>>
>> Heikki Linnakangas <[EMAIL PROTECTED]> writes:
>>>
>>> To set or clear the flag from PGPROC, to send or handle a signal, we have
>>> to acquire ProcArrayLock. Is that safe to do in a signal handler?
>>
>> No.  If it's trying to do that then it's broken.  In fact, if it's
>> trying to do much of anything beyond setting a "volatile" flag variable
>> in a signal handler, it's broken --- unless there are special provisions
>> to limit where the signal trap can occur, which would be pretty much
>> unacceptable for a multiplexed-signal implementation.
>
> Ok, I was afraid so.
>
> I think we'll need to replace the proposed bitmask with an array of
> sig_atomic_t flags then, and do without locking.

Thanks! I updated the patch so (based on signal_handling_v2-heikki-1.patch).

Regards,

-- 
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
diff -rc base/src/backend/access/transam/twophase.c new/src/backend/access/transam/twophase.c
*** base/src/backend/access/transam/twophase.c	2008-12-09 11:45:21.000000000 +0900
--- new/src/backend/access/transam/twophase.c	2008-12-09 12:38:52.000000000 +0900
***************
*** 287,292 ****
--- 287,293 ----
  	gxact->proc.databaseId = databaseid;
  	gxact->proc.roleId = owner;
  	gxact->proc.inCommit = false;
+ 	MemSet(gxact->proc.signalFlags, 0, NUM_PROCSIGNALS * sizeof(sig_atomic_t));
  	gxact->proc.vacuumFlags = 0;
  	gxact->proc.lwWaiting = false;
  	gxact->proc.lwExclusive = false;
diff -rc base/src/backend/commands/async.c new/src/backend/commands/async.c
*** base/src/backend/commands/async.c	2008-12-09 11:45:21.000000000 +0900
--- new/src/backend/commands/async.c	2008-12-09 11:45:43.000000000 +0900
***************
*** 915,923 ****
   *		a frontend command.  Signal handler execution of inbound notifies
   *		is disabled until the next EnableNotifyInterrupt call.
   *
!  *		The SIGUSR1 signal handler also needs to call this, so as to
!  *		prevent conflicts if one signal interrupts the other.  So we
!  *		must return the previous state of the flag.
   */
  bool
  DisableNotifyInterrupt(void)
--- 915,924 ----
   *		a frontend command.  Signal handler execution of inbound notifies
   *		is disabled until the next EnableNotifyInterrupt call.
   *
!  *		This also needs to be called when SIGUSR1 with 
!  *		PROCSIG_CATCHUP_INTERRUPT is received, so as to prevent conflicts 
!  *		if one signal interrupts the other.  So we must return the previous 
!  *		state of the flag.
   */
  bool
  DisableNotifyInterrupt(void)
***************
*** 954,960 ****
  				nulls[Natts_pg_listener];
  	bool		catchup_enabled;
  
! 	/* Must prevent SIGUSR1 interrupt while I am running */
  	catchup_enabled = DisableCatchupInterrupt();
  
  	if (Trace_notify)
--- 955,961 ----
  				nulls[Natts_pg_listener];
  	bool		catchup_enabled;
  
! 	/* Must prevent catchup interrupt while I am running */
  	catchup_enabled = DisableCatchupInterrupt();
  
  	if (Trace_notify)
diff -rc base/src/backend/postmaster/autovacuum.c new/src/backend/postmaster/autovacuum.c
*** base/src/backend/postmaster/autovacuum.c	2008-12-09 11:45:21.000000000 +0900
--- new/src/backend/postmaster/autovacuum.c	2008-12-09 11:45:43.000000000 +0900
***************
*** 1477,1483 ****
  	pqsignal(SIGALRM, handle_sig_alarm);
  
  	pqsignal(SIGPIPE, SIG_IGN);
! 	pqsignal(SIGUSR1, CatchupInterruptHandler);
  	/* We don't listen for async notifies */
  	pqsignal(SIGUSR2, SIG_IGN);
  	pqsignal(SIGFPE, FloatExceptionHandler);
--- 1477,1483 ----
  	pqsignal(SIGALRM, handle_sig_alarm);
  
  	pqsignal(SIGPIPE, SIG_IGN);
! 	pqsignal(SIGUSR1, proc_sigusr1_handler);
  	/* We don't listen for async notifies */
  	pqsignal(SIGUSR2, SIG_IGN);
  	pqsignal(SIGFPE, FloatExceptionHandler);
diff -rc base/src/backend/storage/ipc/sinval.c new/src/backend/storage/ipc/sinval.c
*** base/src/backend/storage/ipc/sinval.c	2008-12-09 11:45:21.000000000 +0900
--- new/src/backend/storage/ipc/sinval.c	2008-12-09 11:45:43.000000000 +0900
***************
*** 27,33 ****
   * need a way to give an idle backend a swift kick in the rear and make
   * it catch up before the sinval queue overflows and forces it to go
   * through a cache reset exercise.	This is done by sending SIGUSR1
!  * to any backend that gets too far behind.
   *
   * State for catchup events consists of two flags: one saying whether
   * the signal handler is currently allowed to call ProcessCatchupEvent
--- 27,34 ----
   * need a way to give an idle backend a swift kick in the rear and make
   * it catch up before the sinval queue overflows and forces it to go
   * through a cache reset exercise.	This is done by sending SIGUSR1
!  * with PROCSIG_CATCHUP_INTERRUPT to any backend that gets too far 
!  * behind.
   *
   * State for catchup events consists of two flags: one saying whether
   * the signal handler is currently allowed to call ProcessCatchupEvent
***************
*** 144,152 ****
  
  
  /*
!  * CatchupInterruptHandler
   *
!  * This is the signal handler for SIGUSR1.
   *
   * If we are idle (catchupInterruptEnabled is set), we can safely
   * invoke ProcessCatchupEvent directly.  Otherwise, just set a flag
--- 145,154 ----
  
  
  /*
!  * HandleCatchupInterrupt
   *
!  * This is called when SIGUSR1 with PROCSIG_CATCHUP_INTERRUPT is
!  * received.
   *
   * If we are idle (catchupInterruptEnabled is set), we can safely
   * invoke ProcessCatchupEvent directly.  Otherwise, just set a flag
***************
*** 156,168 ****
   * since there's no longer any reason to do anything.)
   */
  void
! CatchupInterruptHandler(SIGNAL_ARGS)
  {
- 	int			save_errno = errno;
- 
  	/*
! 	 * Note: this is a SIGNAL HANDLER.	You must be very wary what you do
! 	 * here.
  	 */
  
  	/* Don't joggle the elbow of proc_exit */
--- 158,168 ----
   * since there's no longer any reason to do anything.)
   */
  void
! HandleCatchupInterrupt(void)
  {
  	/*
! 	 * Note: this is called by a SIGNAL HANDLER.	
! 	 * You must be very wary what you do here.
  	 */
  
  	/* Don't joggle the elbow of proc_exit */
***************
*** 216,223 ****
  		 */
  		catchupInterruptOccurred = 1;
  	}
- 
- 	errno = save_errno;
  }
  
  /*
--- 216,221 ----
***************
*** 289,295 ****
  /*
   * ProcessCatchupEvent
   *
!  * Respond to a catchup event (SIGUSR1) from another backend.
   *
   * This is called either directly from the SIGUSR1 signal handler,
   * or the next time control reaches the outer idle loop (assuming
--- 287,294 ----
  /*
   * ProcessCatchupEvent
   *
!  * Respond to a catchup event (SIGUSR1 with PROCSIG_CATCHUP_INTERRUPT) 
!  * from another backend.
   *
   * This is called either directly from the SIGUSR1 signal handler,
   * or the next time control reaches the outer idle loop (assuming
diff -rc base/src/backend/storage/ipc/sinvaladt.c new/src/backend/storage/ipc/sinvaladt.c
*** base/src/backend/storage/ipc/sinvaladt.c	2008-12-09 11:45:21.000000000 +0900
--- new/src/backend/storage/ipc/sinvaladt.c	2008-12-09 11:45:43.000000000 +0900
***************
*** 21,26 ****
--- 21,27 ----
  #include "storage/backendid.h"
  #include "storage/ipc.h"
  #include "storage/proc.h"
+ #include "storage/procarray.h"
  #include "storage/shmem.h"
  #include "storage/sinvaladt.h"
  #include "storage/spin.h"
***************
*** 136,144 ****
  /* Per-backend state in shared invalidation structure */
  typedef struct ProcState
  {
! 	/* procPid is zero in an inactive ProcState array entry. */
! 	pid_t		procPid;		/* PID of backend, for signaling */
! 	/* nextMsgNum is meaningless if procPid == 0 or resetState is true. */
  	int			nextMsgNum;		/* next message number to read */
  	bool		resetState;		/* backend needs to reset its state */
  	bool		signaled;		/* backend has been sent catchup signal */
--- 137,145 ----
  /* Per-backend state in shared invalidation structure */
  typedef struct ProcState
  {
! 	/* proc is NULL in an inactive ProcState array entry. */
! 	PGPROC	   *proc;			/* PGPROC entry of backend, for signaling */
! 	/* nextMsgNum is meaningless if proc == NULL or resetState is true. */
  	int			nextMsgNum;		/* next message number to read */
  	bool		resetState;		/* backend needs to reset its state */
  	bool		signaled;		/* backend has been sent catchup signal */
***************
*** 235,241 ****
  	/* Mark all backends inactive, and initialize nextLXID */
  	for (i = 0; i < shmInvalBuffer->maxBackends; i++)
  	{
! 		shmInvalBuffer->procState[i].procPid = 0;			/* inactive */
  		shmInvalBuffer->procState[i].nextMsgNum = 0;		/* meaningless */
  		shmInvalBuffer->procState[i].resetState = false;
  		shmInvalBuffer->procState[i].signaled = false;
--- 236,242 ----
  	/* Mark all backends inactive, and initialize nextLXID */
  	for (i = 0; i < shmInvalBuffer->maxBackends; i++)
  	{
! 		shmInvalBuffer->procState[i].proc = NULL;			/* inactive */
  		shmInvalBuffer->procState[i].nextMsgNum = 0;		/* meaningless */
  		shmInvalBuffer->procState[i].resetState = false;
  		shmInvalBuffer->procState[i].signaled = false;
***************
*** 266,272 ****
  	/* Look for a free entry in the procState array */
  	for (index = 0; index < segP->lastBackend; index++)
  	{
! 		if (segP->procState[index].procPid == 0)		/* inactive slot? */
  		{
  			stateP = &segP->procState[index];
  			break;
--- 267,273 ----
  	/* Look for a free entry in the procState array */
  	for (index = 0; index < segP->lastBackend; index++)
  	{
! 		if (segP->procState[index].proc == NULL)		/* inactive slot? */
  		{
  			stateP = &segP->procState[index];
  			break;
***************
*** 278,284 ****
  		if (segP->lastBackend < segP->maxBackends)
  		{
  			stateP = &segP->procState[segP->lastBackend];
! 			Assert(stateP->procPid == 0);
  			segP->lastBackend++;
  		}
  		else
--- 279,285 ----
  		if (segP->lastBackend < segP->maxBackends)
  		{
  			stateP = &segP->procState[segP->lastBackend];
! 			Assert(stateP->proc == NULL);
  			segP->lastBackend++;
  		}
  		else
***************
*** 303,309 ****
  	nextLocalTransactionId = stateP->nextLXID;
  
  	/* mark myself active, with all extant messages already read */
! 	stateP->procPid = MyProcPid;
  	stateP->nextMsgNum = segP->maxMsgNum;
  	stateP->resetState = false;
  	stateP->signaled = false;
--- 304,310 ----
  	nextLocalTransactionId = stateP->nextLXID;
  
  	/* mark myself active, with all extant messages already read */
! 	stateP->proc = MyProc;
  	stateP->nextMsgNum = segP->maxMsgNum;
  	stateP->resetState = false;
  	stateP->signaled = false;
***************
*** 341,347 ****
  	stateP->nextLXID = nextLocalTransactionId;
  
  	/* Mark myself inactive */
! 	stateP->procPid = 0;
  	stateP->nextMsgNum = 0;
  	stateP->resetState = false;
  	stateP->signaled = false;
--- 342,348 ----
  	stateP->nextLXID = nextLocalTransactionId;
  
  	/* Mark myself inactive */
! 	stateP->proc = NULL;
  	stateP->nextMsgNum = 0;
  	stateP->resetState = false;
  	stateP->signaled = false;
***************
*** 349,355 ****
  	/* Recompute index of last active backend */
  	for (i = segP->lastBackend; i > 0; i--)
  	{
! 		if (segP->procState[i - 1].procPid != 0)
  			break;
  	}
  	segP->lastBackend = i;
--- 350,356 ----
  	/* Recompute index of last active backend */
  	for (i = segP->lastBackend; i > 0; i--)
  	{
! 		if (segP->procState[i - 1].proc != NULL)
  			break;
  	}
  	segP->lastBackend = i;
***************
*** 374,380 ****
  	{
  		ProcState  *stateP = &segP->procState[backendID - 1];
  
! 		result = (stateP->procPid != 0);
  	}
  	else
  		result = false;
--- 375,381 ----
  	{
  		ProcState  *stateP = &segP->procState[backendID - 1];
  
! 		result = (stateP->proc != NULL);
  	}
  	else
  		result = false;
***************
*** 590,596 ****
  		int		n = stateP->nextMsgNum;
  
  		/* Ignore if inactive or already in reset state */
! 		if (stateP->procPid == 0 || stateP->resetState)
  			continue;
  
  		/*
--- 591,597 ----
  		int		n = stateP->nextMsgNum;
  
  		/* Ignore if inactive or already in reset state */
! 		if (stateP->proc == NULL || stateP->resetState)
  			continue;
  
  		/*
***************
*** 644,661 ****
  		segP->nextThreshold = (numMsgs / CLEANUP_QUANTUM + 1) * CLEANUP_QUANTUM;
  
  	/*
! 	 * Lastly, signal anyone who needs a catchup interrupt.  Since kill()
! 	 * might not be fast, we don't want to hold locks while executing it.
  	 */
  	if (needSig)
  	{
! 		pid_t	his_pid = needSig->procPid;
  
  		needSig->signaled = true;
  		LWLockRelease(SInvalReadLock);
  		LWLockRelease(SInvalWriteLock);
! 		elog(DEBUG4, "sending sinval catchup signal to PID %d", (int) his_pid);
! 		kill(his_pid, SIGUSR1);
  		if (callerHasWriteLock)
  			LWLockAcquire(SInvalWriteLock, LW_EXCLUSIVE);
  	}
--- 645,664 ----
  		segP->nextThreshold = (numMsgs / CLEANUP_QUANTUM + 1) * CLEANUP_QUANTUM;
  
  	/*
! 	 * Lastly, signal anyone who needs a catchup interrupt.  Since
! 	 * SendProcSignal() might not be fast, we don't want to hold locks while
! 	 * executing it.
  	 */
  	if (needSig)
  	{
! 		PGPROC *his_proc = needSig->proc;
  
  		needSig->signaled = true;
  		LWLockRelease(SInvalReadLock);
  		LWLockRelease(SInvalWriteLock);
! 		elog(DEBUG4, "sending sinval catchup signal to PID %d",
! 			 (int) his_proc->pid);
! 		SendProcSignal(his_proc, PROCSIG_CATCHUP_INTERRUPT);
  		if (callerHasWriteLock)
  			LWLockAcquire(SInvalWriteLock, LW_EXCLUSIVE);
  	}
diff -rc base/src/backend/storage/lmgr/proc.c new/src/backend/storage/lmgr/proc.c
*** base/src/backend/storage/lmgr/proc.c	2008-12-09 11:45:21.000000000 +0900
--- new/src/backend/storage/lmgr/proc.c	2008-12-09 12:37:50.000000000 +0900
***************
*** 289,294 ****
--- 289,295 ----
  	MyProc->databaseId = InvalidOid;
  	MyProc->roleId = InvalidOid;
  	MyProc->inCommit = false;
+ 	MemSet(MyProc->signalFlags, 0, NUM_PROCSIGNALS * sizeof(sig_atomic_t));
  	MyProc->vacuumFlags = 0;
  	if (IsAutoVacuumWorkerProcess())
  		MyProc->vacuumFlags |= PROC_IS_AUTOVACUUM;
***************
*** 428,433 ****
--- 429,435 ----
  	MyProc->databaseId = InvalidOid;
  	MyProc->roleId = InvalidOid;
  	MyProc->inCommit = false;
+ 	MemSet(MyProc->signalFlags, 0, NUM_PROCSIGNALS * sizeof(sig_atomic_t));
  	/* we don't set the "is autovacuum" flag in the launcher */
  	MyProc->vacuumFlags = 0;
  	MyProc->lwWaiting = false;
***************
*** 1277,1282 ****
--- 1279,1321 ----
  		PGSemaphoreUnlock(&proc->sem);
  }
  
+ /*
+  * SendProcSignal - send the signal with the reason to the process
+  * (such as backend, autovacuum worker and auxiliary process) 
+  * identified by proc.
+  */
+ void
+ SendProcSignal(PGPROC *proc, ProcSignalReason reason)
+ {
+ 	/* If the process has gone, do nothing */
+ 	if (proc == NULL || proc->pid == 0)
+ 		return;
+ 	
+ 	/* Atomically set the proper flag */
+ 	proc->signalFlags[reason] = true;
+ 	
+ 	/* Send SIGUSR1 to the process */
+ 	kill(proc->pid, SIGUSR1);
+ }
+ 
+ /*
+  * CheckProcSignal - check to see if the particular reason has been
+  * signaled, and clear the signal flag.  Should be called after 
+  * receiving SIGUSR1.
+  */
+ bool
+ CheckProcSignal(ProcSignalReason reason)
+ {
+ 	/* Careful here --- don't clear flag if we haven't seen it set */
+ 	if (MyProc->signalFlags[reason])
+ 	{
+ 		MyProc->signalFlags[reason] = false;
+ 		return true;
+ 	}
+ 
+ 	return false;
+ }
+ 
  
  /*****************************************************************************
   * SIGALRM interrupt support
diff -rc base/src/backend/tcop/postgres.c new/src/backend/tcop/postgres.c
*** base/src/backend/tcop/postgres.c	2008-12-09 11:45:21.000000000 +0900
--- new/src/backend/tcop/postgres.c	2008-12-09 12:27:08.000000000 +0900
***************
*** 2437,2442 ****
--- 2437,2464 ----
   */
  
  /*
+  * proc_sigusr1_handler - handle SIGUSR1 signal.
+  *
+  * SIGUSR1 is multiplexed to handle multiple different events. The signalFlags
+  * array in PGPROC indicates which events have been signaled.
+  */
+ void
+ proc_sigusr1_handler(SIGNAL_ARGS)
+ {
+ 	int			save_errno = errno;
+ 
+ 	if (CheckProcSignal(PROCSIG_CATCHUP_INTERRUPT))
+ 	{
+ 		/*
+ 		 * Catchup interrupt has been sent.
+ 		 */
+ 		HandleCatchupInterrupt();
+ 	}
+ 
+ 	errno = save_errno;
+ }
+ 
+ /*
   * quickdie() occurs when signalled SIGQUIT by the postmaster.
   *
   * Some backend has bought the farm,
***************
*** 3180,3186 ****
  	 * of output during who-knows-what operation...
  	 */
  	pqsignal(SIGPIPE, SIG_IGN);
! 	pqsignal(SIGUSR1, CatchupInterruptHandler);
  	pqsignal(SIGUSR2, NotifyInterruptHandler);
  	pqsignal(SIGFPE, FloatExceptionHandler);
  
--- 3202,3208 ----
  	 * of output during who-knows-what operation...
  	 */
  	pqsignal(SIGPIPE, SIG_IGN);
! 	pqsignal(SIGUSR1, proc_sigusr1_handler);
  	pqsignal(SIGUSR2, NotifyInterruptHandler);
  	pqsignal(SIGFPE, FloatExceptionHandler);
  
diff -rc base/src/include/storage/proc.h new/src/include/storage/proc.h
*** base/src/include/storage/proc.h	2008-12-09 11:45:21.000000000 +0900
--- new/src/include/storage/proc.h	2008-12-09 13:01:25.000000000 +0900
***************
*** 14,19 ****
--- 14,21 ----
  #ifndef _PROC_H_
  #define _PROC_H_
  
+ #include <signal.h>
+ 
  #include "storage/lock.h"
  #include "storage/pg_sema.h"
  
***************
*** 38,43 ****
--- 40,59 ----
  	TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
  };
  
+ /*
+  * Reasons for signaling the process (such as backend, autovacuum worker
+  * and auxiliary process). We can cope with simultaneous signals for
+  * different reasons. If the same reason is signaled multiple times in
+  * quick succession, however, the postmaster is likely to observe only
+  * one notification of it.  This is okay for the present uses.
+  */
+ typedef enum
+ {
+ 	PROCSIG_CATCHUP_INTERRUPT,	/* catchup interrupt */
+ 
+ 	NUM_PROCSIGNALS				/* Must be last value of enum! */
+ } ProcSignalReason;
+ 
  /* Flags for PGPROC->vacuumFlags */
  #define		PROC_IS_AUTOVACUUM	0x01	/* is it an autovac worker? */
  #define		PROC_IN_VACUUM		0x02	/* currently running lazy vacuum */
***************
*** 91,96 ****
--- 107,126 ----
  
  	bool		inCommit;		/* true if within commit critical section */
  
+ 	/*
+ 	 * The backend and auxiliary process are sent SIGUSR1 with the specific
+ 	 * reason, which is communicated via flags in shared memory. We keep
+ 	 * a boolean flag for each possible "reason", so that different reasons
+ 	 * can be signaled by different processes at the same time.	(However,
+ 	 * if the same reason is signaled more than once simultaneously, the
+ 	 * backend and auxiliary process will observe it only once.)
+ 	 *
+ 	 * The flags are actually declared as "volatile sig_atomic_t" for maximum
+ 	 * portability.  This should ensure that loads and stores of the flag
+ 	 * values are atomic, allowing us to dispense with any explicit locking.
+ 	 */
+ 	volatile sig_atomic_t	signalFlags[NUM_PROCSIGNALS];
+ 
  	uint8		vacuumFlags;	/* vacuum-related flags, see above */
  
  	/* Info about LWLock the process is currently waiting for, if any. */
***************
*** 171,176 ****
--- 201,209 ----
  extern void ProcWaitForSignal(void);
  extern void ProcSendSignal(int pid);
  
+ extern void SendProcSignal(PGPROC *proc, ProcSignalReason reason);
+ extern bool CheckProcSignal(ProcSignalReason reason);
+ 
  extern bool enable_sig_alarm(int delayms, bool is_statement_timeout);
  extern bool disable_sig_alarm(bool is_statement_timeout);
  extern void handle_sig_alarm(SIGNAL_ARGS);
diff -rc base/src/include/storage/sinval.h new/src/include/storage/sinval.h
*** base/src/include/storage/sinval.h	2008-12-09 11:45:21.000000000 +0900
--- new/src/include/storage/sinval.h	2008-12-09 11:45:43.000000000 +0900
***************
*** 90,96 ****
  							 void (*resetFunction) (void));
  
  /* signal handler for catchup events (SIGUSR1) */
! extern void CatchupInterruptHandler(SIGNAL_ARGS);
  
  /*
   * enable/disable processing of catchup events directly from signal handler.
--- 90,96 ----
  							 void (*resetFunction) (void));
  
  /* signal handler for catchup events (SIGUSR1) */
! extern void HandleCatchupInterrupt(void);
  
  /*
   * enable/disable processing of catchup events directly from signal handler.
diff -rc base/src/include/tcop/tcopprot.h new/src/include/tcop/tcopprot.h
*** base/src/include/tcop/tcopprot.h	2008-12-09 11:45:21.000000000 +0900
--- new/src/include/tcop/tcopprot.h	2008-12-09 11:45:43.000000000 +0900
***************
*** 56,61 ****
--- 56,62 ----
  
  extern bool assign_max_stack_depth(int newval, bool doit, GucSource source);
  
+ extern void proc_sigusr1_handler(SIGNAL_ARGS);
  extern void die(SIGNAL_ARGS);
  extern void quickdie(SIGNAL_ARGS);
  extern void authdie(SIGNAL_ARGS);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to