Patch, with matching discussion thread on hackers.

Test script included, not suggested for inclusion in regression tests.

2007-10-11 19:35:27 BST 2209LOG:  autovacuum launcher started
2007-10-11 19:36:42 BST 2213LOG:  automatic analyze of table
"postgres.pg_catalog.pg_attribute" system usage: CPU 0.00s/0.01u sec
elapsed 4.28 sec
2007-10-11 19:36:43 BST 2213LOG:  automatic analyze of table
"postgres.pg_catalog.pg_class" system usage: CPU 0.00s/0.00u sec elapsed
0.95 sec
2007-10-11 19:36:45 BST 2213LOG:  cancelling autoanalyze of table
"postgres.public.a"
2007-10-11 19:41:38 BST 2214LOG:  automatic analyze of table
"postgres.public.a" system usage: CPU 0.00s/0.00u sec elapsed 233.62 sec

Test output shows that ANALYZE is cancelled, with no ill effects and
runs again a few minutes later after all the DDL has completed.

-- 
  Simon Riggs
  2ndQuadrant  http://www.2ndQuadrant.com
Index: src/backend/postmaster/autovacuum.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/postmaster/autovacuum.c,v
retrieving revision 1.61
diff -c -r1.61 autovacuum.c
*** src/backend/postmaster/autovacuum.c	24 Sep 2007 04:12:01 -0000	1.61
--- src/backend/postmaster/autovacuum.c	11 Oct 2007 15:16:19 -0000
***************
*** 395,400 ****
--- 395,403 ----
  	/* Identify myself via ps */
  	init_ps_display("autovacuum launcher process", "", "", "");
  
+ 	if (PostAuthDelay)
+ 		pg_usleep(PostAuthDelay * 1000000L);
+ 
  	SetProcessingMode(InitProcessing);
  
  	/*
***************
*** 1437,1442 ****
--- 1440,1448 ----
  	/* Identify myself via ps */
  	init_ps_display("autovacuum worker process", "", "", "");
  
+ 	if (PostAuthDelay)
+ 		pg_usleep(PostAuthDelay * 1000000L);
+ 
  	SetProcessingMode(InitProcessing);
  
  	/*
***************
*** 2107,2116 ****
  			if (errdata->sqlerrcode == ERRCODE_QUERY_CANCELED)
  			{
  				HOLD_INTERRUPTS();
! 				elog(LOG, "cancelling autovacuum of table \"%s.%s.%s\"",
! 					 get_database_name(MyDatabaseId),
! 					 get_namespace_name(get_rel_namespace(tab->at_relid)),
! 					 get_rel_name(tab->at_relid));
  
  				AbortOutOfAnyTransaction();
  				FlushErrorState();
--- 2113,2128 ----
  			if (errdata->sqlerrcode == ERRCODE_QUERY_CANCELED)
  			{
  				HOLD_INTERRUPTS();
! 				if (tab->at_dovacuum)
! 					elog(LOG, "cancelling autovacuum of table \"%s.%s.%s\"",
! 						 get_database_name(MyDatabaseId),
! 						 get_namespace_name(get_rel_namespace(tab->at_relid)),
! 						 get_rel_name(tab->at_relid));
! 				else
! 					elog(LOG, "cancelling autoanalyze of table \"%s.%s.%s\"",
! 						 get_database_name(MyDatabaseId),
! 						 get_namespace_name(get_rel_namespace(tab->at_relid)),
! 						 get_rel_name(tab->at_relid));
  
  				AbortOutOfAnyTransaction();
  				FlushErrorState();
Index: src/backend/storage/lmgr/deadlock.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/lmgr/deadlock.c,v
retrieving revision 1.48
diff -c -r1.48 deadlock.c
*** src/backend/storage/lmgr/deadlock.c	19 Jun 2007 20:13:21 -0000	1.48
--- src/backend/storage/lmgr/deadlock.c	11 Oct 2007 18:28:52 -0000
***************
*** 63,68 ****
--- 63,69 ----
  	int			pid;			/* PID of blocked backend */
  } DEADLOCK_INFO;
  
+ int blocking_autovacuum_pid = 0; /* the pid of any blocking av worker found */
  
  static bool DeadLockCheckRecurse(PGPROC *proc);
  static int	TestConfiguration(PGPROC *startProc);
***************
*** 206,211 ****
--- 207,215 ----
  	nPossibleConstraints = 0;
  	nWaitOrders = 0;
  
+ 	/* Initialize to not blocked by an autovacuum worker */
+ 	blocking_autovacuum_pid = 0;
+ 
  	/* Search for deadlocks and possible fixes */
  	if (DeadLockCheckRecurse(proc))
  	{
***************
*** 255,264 ****
--- 259,276 ----
  	/* Return code tells caller if we had to escape a deadlock or not */
  	if (nWaitOrders > 0)
  		return DS_SOFT_DEADLOCK;
+ 	else if (blocking_autovacuum_pid > 0)
+ 		return DS_BLOCKED_BY_AUTOVACUUM;
  	else
  		return DS_NO_DEADLOCK;
  }
  
+ int
+ GetBlockingAutoVacuumWorkerPid(void)
+ {
+ 	return blocking_autovacuum_pid;
+ }
+ 
  /*
   * DeadLockCheckRecurse -- recursively search for valid orderings
   *
***************
*** 497,502 ****
--- 509,522 ----
  				if ((proclock->holdMask & LOCKBIT_ON(lm)) &&
  					(conflictMask & LOCKBIT_ON(lm)))
  				{
+ 					/*
+ 					 * Look for a blocking autovacuum. There will only ever
+ 					 * be one, since the autovacuum workers are careful
+ 					 * not to operate concurrently on the same table. 
+ 					 */
+ 					if (proc->isAutovacuum)
+ 						blocking_autovacuum_pid = proc->pid;
+ 
  					/* This proc hard-blocks checkProc */
  					if (FindLockCycleRecurse(proc, depth + 1,
  											 softEdges, nSoftEdges))
Index: src/backend/storage/lmgr/proc.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/lmgr/proc.c,v
retrieving revision 1.194
diff -c -r1.194 proc.c
*** src/backend/storage/lmgr/proc.c	8 Sep 2007 20:31:15 -0000	1.194
--- src/backend/storage/lmgr/proc.c	11 Oct 2007 19:05:20 -0000
***************
*** 733,738 ****
--- 733,739 ----
  	PROC_QUEUE *waitQueue = &(lock->waitProcs);
  	LOCKMASK	myHeldLocks = MyProc->heldLocks;
  	bool		early_deadlock = false;
+ 	bool 		allow_autovacuum_cancel = true;
  	int			myWaitStatus;
  	PGPROC	   *proc;
  	int			i;
***************
*** 892,897 ****
--- 893,926 ----
  		 */
  		myWaitStatus = MyProc->waitStatus;
  
+ 		if (allow_autovacuum_cancel && deadlock_state == DS_BLOCKED_BY_AUTOVACUUM)
+ 		{
+ 			int	blocking_autovacuum_pid = GetBlockingAutoVacuumWorkerPid();
+ 
+ 			elog(DEBUG2, "sending cancel to blocking autovacuum pid = %d", 
+ 					blocking_autovacuum_pid);
+ 
+ 			/*
+ 			 * Send the autovacuum worker Back to Old Kent Road
+ 			 *
+ 			 * If we have setsid(), signal the backend's whole process group 
+ 			 */
+ #ifdef HAVE_SETSID
+ 			if (kill(-blocking_autovacuum_pid, SIGINT))
+ #else
+ 			if (kill(blocking_autovacuum_pid, SIGINT))
+ #endif
+ 			{
+ 				/* Just a warning to allow multiple callers */
+ 				ereport(WARNING,
+ 					(errmsg("could not send signal to process %d: %m", 
+ 						blocking_autovacuum_pid)));
+ 			}
+ 
+ 			/* prevent signal from being resent more than once */
+ 			allow_autovacuum_cancel = false;
+ 		}
+ 
  		/*
  		 * If awoken after the deadlock check interrupt has run, and
  		 * log_lock_waits is on, then report about the wait.
***************
*** 930,935 ****
--- 959,967 ----
  						(errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms",
  								MyProcPid, modename, buf.data, msecs, usecs)));
  			}
+ 			else if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM)
+ 				ereport(DEBUG2,
+ 						(errmsg("sent cancel to blocking autovacuum worker")));
  
  			if (myWaitStatus == STATUS_WAITING)
  				ereport(LOG,
***************
*** 1188,1194 ****
  		 * RemoveFromWaitQueue took care of waking up any such processes.
  		 */
  	}
! 	else if (log_lock_waits)
  	{
  		/*
  		 * Unlock my semaphore so that the interrupted ProcSleep() call can
--- 1220,1226 ----
  		 * RemoveFromWaitQueue took care of waking up any such processes.
  		 */
  	}
! 	else if (log_lock_waits || deadlock_state == DS_BLOCKED_BY_AUTOVACUUM)
  	{
  		/*
  		 * Unlock my semaphore so that the interrupted ProcSleep() call can
Index: src/include/storage/lock.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/storage/lock.h,v
retrieving revision 1.107
diff -c -r1.107 lock.h
*** src/include/storage/lock.h	5 Sep 2007 18:10:48 -0000	1.107
--- src/include/storage/lock.h	11 Oct 2007 14:35:29 -0000
***************
*** 442,448 ****
  	DS_NOT_YET_CHECKED,			/* no deadlock check has run yet */
  	DS_NO_DEADLOCK,				/* no deadlock detected */
  	DS_SOFT_DEADLOCK,			/* deadlock avoided by queue rearrangement */
! 	DS_HARD_DEADLOCK			/* deadlock, no way out but ERROR */
  } DeadLockState;
  
  
--- 442,449 ----
  	DS_NOT_YET_CHECKED,			/* no deadlock check has run yet */
  	DS_NO_DEADLOCK,				/* no deadlock detected */
  	DS_SOFT_DEADLOCK,			/* deadlock avoided by queue rearrangement */
! 	DS_HARD_DEADLOCK,			/* deadlock, no way out but ERROR */
! 	DS_BLOCKED_BY_AUTOVACUUM	/* queue blocked by autovacuum worker */
  } DeadLockState;
  
  
***************
*** 495,500 ****
--- 496,502 ----
  						void *recdata, uint32 len);
  
  extern DeadLockState DeadLockCheck(PGPROC *proc);
+ extern int GetBlockingAutoVacuumWorkerPid(void);
  extern void DeadLockReport(void);
  extern void RememberSimpleDeadLock(PGPROC *proc1,
  					   LOCKMODE lockmode,
--
drop table a;

create table a as select generate_series(1,1000000)::integer as col1;

alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
alter table a alter column col1 type bigint; 
--
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
       subscribe-nomail command to [EMAIL PROTECTED] so that your
       message can get through to the mailing list cleanly

Reply via email to