Patch, with matching discussion thread on hackers. Test script included, not suggested for inclusion in regression tests.
2007-10-11 19:35:27 BST 2209LOG: autovacuum launcher started 2007-10-11 19:36:42 BST 2213LOG: automatic analyze of table "postgres.pg_catalog.pg_attribute" system usage: CPU 0.00s/0.01u sec elapsed 4.28 sec 2007-10-11 19:36:43 BST 2213LOG: automatic analyze of table "postgres.pg_catalog.pg_class" system usage: CPU 0.00s/0.00u sec elapsed 0.95 sec 2007-10-11 19:36:45 BST 2213LOG: cancelling autoanalyze of table "postgres.public.a" 2007-10-11 19:41:38 BST 2214LOG: automatic analyze of table "postgres.public.a" system usage: CPU 0.00s/0.00u sec elapsed 233.62 sec Test output shows that ANALYZE is cancelled, with no ill effects and runs again a few minutes later after all the DDL has completed. -- Simon Riggs 2ndQuadrant http://www.2ndQuadrant.com
Index: src/backend/postmaster/autovacuum.c =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/postmaster/autovacuum.c,v retrieving revision 1.61 diff -c -r1.61 autovacuum.c *** src/backend/postmaster/autovacuum.c 24 Sep 2007 04:12:01 -0000 1.61 --- src/backend/postmaster/autovacuum.c 11 Oct 2007 15:16:19 -0000 *************** *** 395,400 **** --- 395,403 ---- /* Identify myself via ps */ init_ps_display("autovacuum launcher process", "", "", ""); + if (PostAuthDelay) + pg_usleep(PostAuthDelay * 1000000L); + SetProcessingMode(InitProcessing); /* *************** *** 1437,1442 **** --- 1440,1448 ---- /* Identify myself via ps */ init_ps_display("autovacuum worker process", "", "", ""); + if (PostAuthDelay) + pg_usleep(PostAuthDelay * 1000000L); + SetProcessingMode(InitProcessing); /* *************** *** 2107,2116 **** if (errdata->sqlerrcode == ERRCODE_QUERY_CANCELED) { HOLD_INTERRUPTS(); ! elog(LOG, "cancelling autovacuum of table \"%s.%s.%s\"", ! get_database_name(MyDatabaseId), ! get_namespace_name(get_rel_namespace(tab->at_relid)), ! get_rel_name(tab->at_relid)); AbortOutOfAnyTransaction(); FlushErrorState(); --- 2113,2128 ---- if (errdata->sqlerrcode == ERRCODE_QUERY_CANCELED) { HOLD_INTERRUPTS(); ! if (tab->at_dovacuum) ! elog(LOG, "cancelling autovacuum of table \"%s.%s.%s\"", ! get_database_name(MyDatabaseId), ! get_namespace_name(get_rel_namespace(tab->at_relid)), ! get_rel_name(tab->at_relid)); ! else ! elog(LOG, "cancelling autoanalyze of table \"%s.%s.%s\"", ! get_database_name(MyDatabaseId), ! get_namespace_name(get_rel_namespace(tab->at_relid)), ! get_rel_name(tab->at_relid)); AbortOutOfAnyTransaction(); FlushErrorState(); Index: src/backend/storage/lmgr/deadlock.c =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/lmgr/deadlock.c,v retrieving revision 1.48 diff -c -r1.48 deadlock.c *** src/backend/storage/lmgr/deadlock.c 19 Jun 2007 20:13:21 -0000 1.48 --- src/backend/storage/lmgr/deadlock.c 11 Oct 2007 18:28:52 -0000 *************** *** 63,68 **** --- 63,69 ---- int pid; /* PID of blocked backend */ } DEADLOCK_INFO; + int blocking_autovacuum_pid = 0; /* the pid of any blocking av worker found */ static bool DeadLockCheckRecurse(PGPROC *proc); static int TestConfiguration(PGPROC *startProc); *************** *** 206,211 **** --- 207,215 ---- nPossibleConstraints = 0; nWaitOrders = 0; + /* Initialize to not blocked by an autovacuum worker */ + blocking_autovacuum_pid = 0; + /* Search for deadlocks and possible fixes */ if (DeadLockCheckRecurse(proc)) { *************** *** 255,264 **** --- 259,276 ---- /* Return code tells caller if we had to escape a deadlock or not */ if (nWaitOrders > 0) return DS_SOFT_DEADLOCK; + else if (blocking_autovacuum_pid > 0) + return DS_BLOCKED_BY_AUTOVACUUM; else return DS_NO_DEADLOCK; } + int + GetBlockingAutoVacuumWorkerPid(void) + { + return blocking_autovacuum_pid; + } + /* * DeadLockCheckRecurse -- recursively search for valid orderings * *************** *** 497,502 **** --- 509,522 ---- if ((proclock->holdMask & LOCKBIT_ON(lm)) && (conflictMask & LOCKBIT_ON(lm))) { + /* + * Look for a blocking autovacuum. There will only ever + * be one, since the autovacuum workers are careful + * not to operate concurrently on the same table. + */ + if (proc->isAutovacuum) + blocking_autovacuum_pid = proc->pid; + /* This proc hard-blocks checkProc */ if (FindLockCycleRecurse(proc, depth + 1, softEdges, nSoftEdges)) Index: src/backend/storage/lmgr/proc.c =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/lmgr/proc.c,v retrieving revision 1.194 diff -c -r1.194 proc.c *** src/backend/storage/lmgr/proc.c 8 Sep 2007 20:31:15 -0000 1.194 --- src/backend/storage/lmgr/proc.c 11 Oct 2007 19:05:20 -0000 *************** *** 733,738 **** --- 733,739 ---- PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; + bool allow_autovacuum_cancel = true; int myWaitStatus; PGPROC *proc; int i; *************** *** 892,897 **** --- 893,926 ---- */ myWaitStatus = MyProc->waitStatus; + if (allow_autovacuum_cancel && deadlock_state == DS_BLOCKED_BY_AUTOVACUUM) + { + int blocking_autovacuum_pid = GetBlockingAutoVacuumWorkerPid(); + + elog(DEBUG2, "sending cancel to blocking autovacuum pid = %d", + blocking_autovacuum_pid); + + /* + * Send the autovacuum worker Back to Old Kent Road + * + * If we have setsid(), signal the backend's whole process group + */ + #ifdef HAVE_SETSID + if (kill(-blocking_autovacuum_pid, SIGINT)) + #else + if (kill(blocking_autovacuum_pid, SIGINT)) + #endif + { + /* Just a warning to allow multiple callers */ + ereport(WARNING, + (errmsg("could not send signal to process %d: %m", + blocking_autovacuum_pid))); + } + + /* prevent signal from being resent more than once */ + allow_autovacuum_cancel = false; + } + /* * If awoken after the deadlock check interrupt has run, and * log_lock_waits is on, then report about the wait. *************** *** 930,935 **** --- 959,967 ---- (errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs))); } + else if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM) + ereport(DEBUG2, + (errmsg("sent cancel to blocking autovacuum worker"))); if (myWaitStatus == STATUS_WAITING) ereport(LOG, *************** *** 1188,1194 **** * RemoveFromWaitQueue took care of waking up any such processes. */ } ! else if (log_lock_waits) { /* * Unlock my semaphore so that the interrupted ProcSleep() call can --- 1220,1226 ---- * RemoveFromWaitQueue took care of waking up any such processes. */ } ! else if (log_lock_waits || deadlock_state == DS_BLOCKED_BY_AUTOVACUUM) { /* * Unlock my semaphore so that the interrupted ProcSleep() call can Index: src/include/storage/lock.h =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/storage/lock.h,v retrieving revision 1.107 diff -c -r1.107 lock.h *** src/include/storage/lock.h 5 Sep 2007 18:10:48 -0000 1.107 --- src/include/storage/lock.h 11 Oct 2007 14:35:29 -0000 *************** *** 442,448 **** DS_NOT_YET_CHECKED, /* no deadlock check has run yet */ DS_NO_DEADLOCK, /* no deadlock detected */ DS_SOFT_DEADLOCK, /* deadlock avoided by queue rearrangement */ ! DS_HARD_DEADLOCK /* deadlock, no way out but ERROR */ } DeadLockState; --- 442,449 ---- DS_NOT_YET_CHECKED, /* no deadlock check has run yet */ DS_NO_DEADLOCK, /* no deadlock detected */ DS_SOFT_DEADLOCK, /* deadlock avoided by queue rearrangement */ ! DS_HARD_DEADLOCK, /* deadlock, no way out but ERROR */ ! DS_BLOCKED_BY_AUTOVACUUM /* queue blocked by autovacuum worker */ } DeadLockState; *************** *** 495,500 **** --- 496,502 ---- void *recdata, uint32 len); extern DeadLockState DeadLockCheck(PGPROC *proc); + extern int GetBlockingAutoVacuumWorkerPid(void); extern void DeadLockReport(void); extern void RememberSimpleDeadLock(PGPROC *proc1, LOCKMODE lockmode,
-- drop table a; create table a as select generate_series(1,1000000)::integer as col1; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; alter table a alter column col1 type bigint; --
---------------------------(end of broadcast)--------------------------- TIP 1: if posting/reading through Usenet, please send an appropriate subscribe-nomail command to [EMAIL PROTECTED] so that your message can get through to the mailing list cleanly