On Thu, 2009-12-31 at 13:14 +0100, Andres Freund wrote:
> > > When cancelling a backend that behaviour could be a bit
> annoying ;-)
> > 
> > Reading comments alone doesn't show the full situation here.
> > 
> > Before we send signal we check pid also, so the chances of this
> > happening are fairly small. i.e. we would need to have a backend
> slot
> > reused by a new backend with exactly same pid as the last slot
> holder.
> Well. The problem does not occur for critical errors (i.e. session
> death) 
> only. As signal delivery is asynchronous it can very well happen for 
> transaction cancellation as well.

> > I'm proposing to use this for killing transactions and connections,
> so I
> > don't think there's too much harm done there. If the backend is
> leaving
> > anyway, that's what we wanted. If its a new guy that is wearing the
> same
> > boots then a little collateral damage doesn't leave the server in a
> bad
> > place. HS cancellations aren't yet so precise that this matters.
> Building racy infrastructure when it can be avoided with a little care
> still seems not to be the best path to me.

Doing that will add more complexity in an area that is hard to test
effectively. I think the risk of introducing further bugs while trying
to fix this rare condition is high. Right now the conflict processing
needs more work and is often much less precise than this, so improving
this aspect of it would not be a priority. I've added it to the TODO
though. Thank you for your research.

I enclose the patch I am currently testing, as a patch-on-patch on top
of Joachim's changes, recently published on this thread. POC only as
yet.

Patch implements recovery conflict signalling using SIGUSR1
multiplexing, then uses a SessionCancelPending mode similar to Joachim's
TransactionCancelPending.

-- 
 Simon Riggs           www.2ndQuadrant.com
*** a/src/backend/storage/ipc/procarray.c
--- b/src/backend/storage/ipc/procarray.c
***************
*** 1704,1710 **** GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid,
   * Returns pid of the process signaled, or 0 if not found.
   */
  pid_t
! CancelVirtualTransaction(VirtualTransactionId vxid, int cancel_mode)
  {
  	ProcArrayStruct *arrayP = procArray;
  	int			index;
--- 1704,1710 ----
   * Returns pid of the process signaled, or 0 if not found.
   */
  pid_t
! CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
  {
  	ProcArrayStruct *arrayP = procArray;
  	int			index;
***************
*** 1722,1733 **** CancelVirtualTransaction(VirtualTransactionId vxid, int cancel_mode)
  		if (procvxid.backendId == vxid.backendId &&
  			procvxid.localTransactionId == vxid.localTransactionId)
  		{
- 			/*
- 			 * Issue orders for the proc to read next time it receives SIGINT
- 			 */
- 			if (proc->recoveryConflictMode < cancel_mode)
- 				proc->recoveryConflictMode = cancel_mode;
- 
  			pid = proc->pid;
  			break;
  		}
--- 1722,1727 ----
***************
*** 1741,1747 **** CancelVirtualTransaction(VirtualTransactionId vxid, int cancel_mode)
  		 * Kill the pid if it's still here. If not, that's what we wanted
  		 * so ignore any errors.
  		 */
! 		kill(pid, SIGINT);
  	}
  
  	return pid;
--- 1735,1741 ----
  		 * Kill the pid if it's still here. If not, that's what we wanted
  		 * so ignore any errors.
  		 */
! 		(void) SendProcSignal(pid, sigmode, vxid.backendId);
  	}
  
  	return pid;
*** a/src/backend/storage/ipc/procsignal.c
--- b/src/backend/storage/ipc/procsignal.c
***************
*** 24,29 ****
--- 24,30 ----
  #include "storage/procsignal.h"
  #include "storage/shmem.h"
  #include "storage/sinval.h"
+ #include "storage/standby.h"
  
  
  /*
***************
*** 258,262 **** procsignal_sigusr1_handler(SIGNAL_ARGS)
--- 259,269 ----
  	if (CheckProcSignal(PROCSIG_NOTIFY_INTERRUPT))
  		HandleNotifyInterrupt();
  
+ 	if (CheckProcSignal(PROCSIG_CONFLICT_ERROR_INTERRUPT))
+ 		HandleConflictInterrupt(ERROR);
+ 
+ 	if (CheckProcSignal(PROCSIG_CONFLICT_FATAL_INTERRUPT))
+ 		HandleConflictInterrupt(FATAL);
+ 
  	errno = save_errno;
  }
*** a/src/backend/storage/ipc/standby.c
--- b/src/backend/storage/ipc/standby.c
***************
*** 218,229 **** ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
  			if (WaitExceedsMaxStandbyDelay())
  			{
  				pid_t pid;
  
  				/*
  				 * Now find out who to throw out of the balloon.
  				 */
  				Assert(VirtualTransactionIdIsValid(*waitlist));
! 				pid = CancelVirtualTransaction(*waitlist, cancel_mode);
  
  				if (pid != 0)
  				{
--- 218,235 ----
  			if (WaitExceedsMaxStandbyDelay())
  			{
  				pid_t pid;
+ 				ProcSignalReason	sigmode;
+ 
+ 				if (cancel_mode == CONFLICT_MODE_ERROR)
+ 					sigmode = PROCSIG_CONFLICT_ERROR_INTERRUPT;
+ 				else
+ 					sigmode = PROCSIG_CONFLICT_FATAL_INTERRUPT;
  
  				/*
  				 * Now find out who to throw out of the balloon.
  				 */
  				Assert(VirtualTransactionIdIsValid(*waitlist));
! 				pid = CancelVirtualTransaction(*waitlist, sigmode);
  
  				if (pid != 0)
  				{
***************
*** 272,277 **** ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
--- 278,305 ----
      }
  }
  
+ void
+ HandleConflictInterrupt(int conflict_mode)
+ {
+ 	switch (conflict_mode)
+ 	{
+ 		case FATAL:
+ 			SessionCancelPending = true;
+ 			RecoveryConflictPending = true;
+ 			ProcessInterrupts();				/* Should not return */
+ 			return;
+ 
+ 		case ERROR:
+ 			TransactionCancelPending = true;
+ 			RecoveryConflictPending = true;
+ 			ProcessInterrupts();
+ 			return;
+ 
+ 		default:
+ 			elog(ERROR, "Unknown conflict mode");
+ 	}
+ }
+ 
  /*
   * -----------------------------------------------------
   * Locking in Recovery Mode
*** a/src/backend/tcop/postgres.c
--- b/src/backend/tcop/postgres.c
***************
*** 2737,2795 **** ProcessInterrupts(void)
  			ereport(ERROR,
  					(errcode(ERRCODE_QUERY_CANCELED),
  					 errmsg("canceling autovacuum task")));
! 		else
! 		{
! 			int cancelMode = MyProc->recoveryConflictMode;
! 
! 			/*
! 			 * XXXHS: We don't yet have a clean way to cancel an
! 			 * idle-in-transaction session, so make it FATAL instead.
! 			 * This isn't as bad as it looks because we don't issue a
! 			 * CONFLICT_MODE_ERROR for a session with proc->xmin == 0
! 			 * on cleanup conflicts. There's a possibility that we
! 			 * marked somebody as a conflict and then they go idle.
! 			 */
! 			if (DoingCommandRead && IsTransactionBlock() &&
! 				cancelMode == CONFLICT_MODE_ERROR)
! 			{
! 				cancelMode = CONFLICT_MODE_FATAL;
! 			}
! 
! 			switch (cancelMode)
! 			{
! 				case CONFLICT_MODE_FATAL:
! 						Assert(RecoveryInProgress());
! 						ereport(FATAL,
! 							(errcode(ERRCODE_QUERY_CANCELED),
! 							 errmsg("canceling session due to conflict with recovery")));
! 
! 				case CONFLICT_MODE_ERROR:
! 						/*
! 						 * We are aborting because we need to release
! 						 * locks. So we need to abort out of all
! 						 * subtransactions to make sure we release
! 						 * all locks at whatever their level.
! 						 *
! 						 * XXX Should we try to examine the
! 						 * transaction tree and cancel just enough
! 						 * subxacts to remove locks? Doubt it.
! 						 */
! 						Assert(RecoveryInProgress());
! 						AbortOutOfAnyTransaction();
! 						ereport(ERROR,
! 							(errcode(ERRCODE_QUERY_CANCELED),
! 							 errmsg("canceling statement due to conflict with recovery")));
! 						return;
! 
! 				default:
! 						/* No conflict pending, so fall through */
! 						break;
! 			}
! 
  			ereport(ERROR,
  					(errcode(ERRCODE_QUERY_CANCELED),
  					 errmsg("canceling statement due to user request")));
- 		}
  	}
  	if (TransactionCancelPending)
  	{
--- 2737,2746 ----
  			ereport(ERROR,
  					(errcode(ERRCODE_QUERY_CANCELED),
  					 errmsg("canceling autovacuum task")));
! 		else 
  			ereport(ERROR,
  					(errcode(ERRCODE_QUERY_CANCELED),
  					 errmsg("canceling statement due to user request")));
  	}
  	if (TransactionCancelPending)
  	{
***************
*** 2802,2810 **** ProcessInterrupts(void)
  		DisableNotifyInterrupt();
  		DisableCatchupInterrupt();
  
! 		ereport(NOTICE,
! 				(errcode(ERRCODE_QUERY_CANCELED),
! 				 errmsg("canceling idle transaction due to administrator request")));
  
  		AbortTransactionAndAnySubtransactions();
  
--- 2753,2766 ----
  		DisableNotifyInterrupt();
  		DisableCatchupInterrupt();
  
! 		if (RecoveryConflictPending)
! 			ereport(NOTICE,
! 					(errcode(ERRCODE_QUERY_CANCELED),
! 					 errmsg("canceling transaction due to conflict with recovery")));
! 		else
! 			ereport(NOTICE,
! 					(errcode(ERRCODE_QUERY_CANCELED),
! 					 errmsg("canceling idle transaction due to administrator request")));
  
  		AbortTransactionAndAnySubtransactions();
  
***************
*** 2815,2820 **** ProcessInterrupts(void)
--- 2771,2790 ----
  		set_ps_display("idle in transaction (aborted)", false);
  		pgstat_report_activity("<IDLE> in transaction (aborted)");
  	}
+ 	if (SessionCancelPending)
+ 	{
+ 		SessionCancelPending = false;
+ 		ImmediateInterruptOK = false;	/* not idle anymore */
+ 
+ 		if (RecoveryConflictPending)
+ 			ereport(FATAL,
+ 					(errcode(ERRCODE_QUERY_CANCELED),
+ 					 errmsg("canceling session due to conflict with recovery")));
+ 		else
+ 			ereport(FATAL,
+ 					(errcode(ERRCODE_QUERY_CANCELED),
+ 					 errmsg("canceling session due to administrator request")));
+ 	}
  	/* If we get here, do nothing (probably, QueryCancelPending was reset) */
  }
  
*** a/src/backend/utils/init/globals.c
--- b/src/backend/utils/init/globals.c
***************
*** 28,33 **** ProtocolVersion FrontendProtocol;
--- 28,35 ----
  volatile bool InterruptPending = false;
  volatile bool QueryCancelPending = false;
  volatile bool TransactionCancelPending = false;
+ volatile bool SessionCancelPending = false;
+ volatile bool RecoveryConflictPending = false;
  volatile bool ProcDiePending = false;
  volatile bool ImmediateInterruptOK = false;
  volatile uint32 InterruptHoldoffCount = 0;
*** a/src/include/miscadmin.h
--- b/src/include/miscadmin.h
***************
*** 69,74 ****
--- 69,76 ----
  extern PGDLLIMPORT volatile bool InterruptPending;
  extern volatile bool QueryCancelPending;
  extern volatile bool TransactionCancelPending;
+ extern volatile bool SessionCancelPending;
+ extern volatile bool RecoveryConflictPending;
  extern volatile bool ProcDiePending;
  
  /* these are marked volatile because they are examined by signal handlers: */
*** a/src/include/storage/procarray.h
--- b/src/include/storage/procarray.h
***************
*** 15,20 ****
--- 15,21 ----
  #define PROCARRAY_H
  
  #include "storage/lock.h"
+ #include "storage/procsignal.h"
  #include "storage/standby.h"
  #include "utils/snapshot.h"
  
***************
*** 58,65 **** extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin,
  					  int *nvxids);
  extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin,
  					Oid dbOid, bool skipExistingConflicts);
! extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid,
! 						 int cancel_mode);
  
  extern int	CountActiveBackends(void);
  extern int	CountDBBackends(Oid databaseid);
--- 59,65 ----
  					  int *nvxids);
  extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin,
  					Oid dbOid, bool skipExistingConflicts);
! extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode);
  
  extern int	CountActiveBackends(void);
  extern int	CountDBBackends(Oid databaseid);
*** a/src/include/storage/procsignal.h
--- b/src/include/storage/procsignal.h
***************
*** 31,36 **** typedef enum
--- 31,38 ----
  {
  	PROCSIG_CATCHUP_INTERRUPT,	/* sinval catchup interrupt */
  	PROCSIG_NOTIFY_INTERRUPT,	/* listen/notify interrupt */
+ 	PROCSIG_CONFLICT_ERROR_INTERRUPT,	/* recovery conflict error */
+ 	PROCSIG_CONFLICT_FATAL_INTERRUPT,	/* recovery conflict fatal */
  
  	NUM_PROCSIGNALS				/* Must be last! */
  } ProcSignalReason;
*** a/src/include/storage/standby.h
--- b/src/include/storage/standby.h
***************
*** 26,31 **** extern int	vacuum_defer_cleanup_age;
--- 26,32 ----
  
  extern void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
  									   char *reason, int cancel_mode);
+ extern void HandleConflictInterrupt(int conflict_mode);
  
  extern void InitRecoveryTransactionEnvironment(void);
  extern void ShutdownRecoveryTransactionEnvironment(void);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to