Patch, with matching discussion thread on hackers.
Test script included, not suggested for inclusion in regression tests.
2007-10-11 19:35:27 BST 2209LOG: autovacuum launcher started
2007-10-11 19:36:42 BST 2213LOG: automatic analyze of table
"postgres.pg_catalog.pg_attribute" system usage: CPU 0.00s/0.01u sec
elapsed 4.28 sec
2007-10-11 19:36:43 BST 2213LOG: automatic analyze of table
"postgres.pg_catalog.pg_class" system usage: CPU 0.00s/0.00u sec elapsed
0.95 sec
2007-10-11 19:36:45 BST 2213LOG: cancelling autoanalyze of table
"postgres.public.a"
2007-10-11 19:41:38 BST 2214LOG: automatic analyze of table
"postgres.public.a" system usage: CPU 0.00s/0.00u sec elapsed 233.62 sec
Test output shows that ANALYZE is cancelled, with no ill effects and
runs again a few minutes later after all the DDL has completed.
--
Simon Riggs
2ndQuadrant http://www.2ndQuadrant.com
Index: src/backend/postmaster/autovacuum.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/postmaster/autovacuum.c,v
retrieving revision 1.61
diff -c -r1.61 autovacuum.c
*** src/backend/postmaster/autovacuum.c 24 Sep 2007 04:12:01 -0000 1.61
--- src/backend/postmaster/autovacuum.c 11 Oct 2007 15:16:19 -0000
***************
*** 395,400 ****
--- 395,403 ----
/* Identify myself via ps */
init_ps_display("autovacuum launcher process", "", "", "");
+ if (PostAuthDelay)
+ pg_usleep(PostAuthDelay * 1000000L);
+
SetProcessingMode(InitProcessing);
/*
***************
*** 1437,1442 ****
--- 1440,1448 ----
/* Identify myself via ps */
init_ps_display("autovacuum worker process", "", "", "");
+ if (PostAuthDelay)
+ pg_usleep(PostAuthDelay * 1000000L);
+
SetProcessingMode(InitProcessing);
/*
***************
*** 2107,2116 ****
if (errdata->sqlerrcode == ERRCODE_QUERY_CANCELED)
{
HOLD_INTERRUPTS();
! elog(LOG, "cancelling autovacuum of table \"%s.%s.%s\"",
! get_database_name(MyDatabaseId),
! get_namespace_name(get_rel_namespace(tab->at_relid)),
! get_rel_name(tab->at_relid));
AbortOutOfAnyTransaction();
FlushErrorState();
--- 2113,2128 ----
if (errdata->sqlerrcode == ERRCODE_QUERY_CANCELED)
{
HOLD_INTERRUPTS();
! if (tab->at_dovacuum)
! elog(LOG, "cancelling autovacuum of table \"%s.%s.%s\"",
! get_database_name(MyDatabaseId),
! get_namespace_name(get_rel_namespace(tab->at_relid)),
! get_rel_name(tab->at_relid));
! else
! elog(LOG, "cancelling autoanalyze of table \"%s.%s.%s\"",
! get_database_name(MyDatabaseId),
! get_namespace_name(get_rel_namespace(tab->at_relid)),
! get_rel_name(tab->at_relid));
AbortOutOfAnyTransaction();
FlushErrorState();
Index: src/backend/storage/lmgr/deadlock.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/lmgr/deadlock.c,v
retrieving revision 1.48
diff -c -r1.48 deadlock.c
*** src/backend/storage/lmgr/deadlock.c 19 Jun 2007 20:13:21 -0000 1.48
--- src/backend/storage/lmgr/deadlock.c 11 Oct 2007 18:28:52 -0000
***************
*** 63,68 ****
--- 63,69 ----
int pid; /* PID of blocked backend */
} DEADLOCK_INFO;
+ int blocking_autovacuum_pid = 0; /* the pid of any blocking av worker found */
static bool DeadLockCheckRecurse(PGPROC *proc);
static int TestConfiguration(PGPROC *startProc);
***************
*** 206,211 ****
--- 207,215 ----
nPossibleConstraints = 0;
nWaitOrders = 0;
+ /* Initialize to not blocked by an autovacuum worker */
+ blocking_autovacuum_pid = 0;
+
/* Search for deadlocks and possible fixes */
if (DeadLockCheckRecurse(proc))
{
***************
*** 255,264 ****
--- 259,276 ----
/* Return code tells caller if we had to escape a deadlock or not */
if (nWaitOrders > 0)
return DS_SOFT_DEADLOCK;
+ else if (blocking_autovacuum_pid > 0)
+ return DS_BLOCKED_BY_AUTOVACUUM;
else
return DS_NO_DEADLOCK;
}
+ int
+ GetBlockingAutoVacuumWorkerPid(void)
+ {
+ return blocking_autovacuum_pid;
+ }
+
/*
* DeadLockCheckRecurse -- recursively search for valid orderings
*
***************
*** 497,502 ****
--- 509,522 ----
if ((proclock->holdMask & LOCKBIT_ON(lm)) &&
(conflictMask & LOCKBIT_ON(lm)))
{
+ /*
+ * Look for a blocking autovacuum. There will only ever
+ * be one, since the autovacuum workers are careful
+ * not to operate concurrently on the same table.
+ */
+ if (proc->isAutovacuum)
+ blocking_autovacuum_pid = proc->pid;
+
/* This proc hard-blocks checkProc */
if (FindLockCycleRecurse(proc, depth + 1,
softEdges, nSoftEdges))
Index: src/backend/storage/lmgr/proc.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/lmgr/proc.c,v
retrieving revision 1.194
diff -c -r1.194 proc.c
*** src/backend/storage/lmgr/proc.c 8 Sep 2007 20:31:15 -0000 1.194
--- src/backend/storage/lmgr/proc.c 11 Oct 2007 19:05:20 -0000
***************
*** 733,738 ****
--- 733,739 ----
PROC_QUEUE *waitQueue = &(lock->waitProcs);
LOCKMASK myHeldLocks = MyProc->heldLocks;
bool early_deadlock = false;
+ bool allow_autovacuum_cancel = true;
int myWaitStatus;
PGPROC *proc;
int i;
***************
*** 892,897 ****
--- 893,926 ----
*/
myWaitStatus = MyProc->waitStatus;
+ if (allow_autovacuum_cancel && deadlock_state == DS_BLOCKED_BY_AUTOVACUUM)
+ {
+ int blocking_autovacuum_pid = GetBlockingAutoVacuumWorkerPid();
+
+ elog(DEBUG2, "sending cancel to blocking autovacuum pid = %d",
+ blocking_autovacuum_pid);
+
+ /*
+ * Send the autovacuum worker Back to Old Kent Road
+ *
+ * If we have setsid(), signal the backend's whole process group
+ */
+ #ifdef HAVE_SETSID
+ if (kill(-blocking_autovacuum_pid, SIGINT))
+ #else
+ if (kill(blocking_autovacuum_pid, SIGINT))
+ #endif
+ {
+ /* Just a warning to allow multiple callers */
+ ereport(WARNING,
+ (errmsg("could not send signal to process %d: %m",
+ blocking_autovacuum_pid)));
+ }
+
+ /* prevent signal from being resent more than once */
+ allow_autovacuum_cancel = false;
+ }
+
/*
* If awoken after the deadlock check interrupt has run, and
* log_lock_waits is on, then report about the wait.
***************
*** 930,935 ****
--- 959,967 ----
(errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms",
MyProcPid, modename, buf.data, msecs, usecs)));
}
+ else if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM)
+ ereport(DEBUG2,
+ (errmsg("sent cancel to blocking autovacuum worker")));
if (myWaitStatus == STATUS_WAITING)
ereport(LOG,
***************
*** 1188,1194 ****
* RemoveFromWaitQueue took care of waking up any such processes.
*/
}
! else if (log_lock_waits)
{
/*
* Unlock my semaphore so that the interrupted ProcSleep() call can
--- 1220,1226 ----
* RemoveFromWaitQueue took care of waking up any such processes.
*/
}
! else if (log_lock_waits || deadlock_state == DS_BLOCKED_BY_AUTOVACUUM)
{
/*
* Unlock my semaphore so that the interrupted ProcSleep() call can
Index: src/include/storage/lock.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/storage/lock.h,v
retrieving revision 1.107
diff -c -r1.107 lock.h
*** src/include/storage/lock.h 5 Sep 2007 18:10:48 -0000 1.107
--- src/include/storage/lock.h 11 Oct 2007 14:35:29 -0000
***************
*** 442,448 ****
DS_NOT_YET_CHECKED, /* no deadlock check has run yet */
DS_NO_DEADLOCK, /* no deadlock detected */
DS_SOFT_DEADLOCK, /* deadlock avoided by queue rearrangement */
! DS_HARD_DEADLOCK /* deadlock, no way out but ERROR */
} DeadLockState;
--- 442,449 ----
DS_NOT_YET_CHECKED, /* no deadlock check has run yet */
DS_NO_DEADLOCK, /* no deadlock detected */
DS_SOFT_DEADLOCK, /* deadlock avoided by queue rearrangement */
! DS_HARD_DEADLOCK, /* deadlock, no way out but ERROR */
! DS_BLOCKED_BY_AUTOVACUUM /* queue blocked by autovacuum worker */
} DeadLockState;
***************
*** 495,500 ****
--- 496,502 ----
void *recdata, uint32 len);
extern DeadLockState DeadLockCheck(PGPROC *proc);
+ extern int GetBlockingAutoVacuumWorkerPid(void);
extern void DeadLockReport(void);
extern void RememberSimpleDeadLock(PGPROC *proc1,
LOCKMODE lockmode,
--
drop table a;
create table a as select generate_series(1,1000000)::integer as col1;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
alter table a alter column col1 type bigint;
--
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
subscribe-nomail command to [EMAIL PROTECTED] so that your
message can get through to the mailing list cleanly