Here is the patch for it.
Jan -- Anyone who trades liberty for security deserves neither liberty nor security. -- Benjamin Franklin
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index c9253a9..9f880f0 100644 *** a/src/backend/commands/vacuumlazy.c --- b/src/backend/commands/vacuumlazy.c *************** *** 57,62 **** --- 57,63 ---- #include "utils/pg_rusage.h" #include "utils/timestamp.h" #include "utils/tqual.h" + #include "portability/instr_time.h" /* *************** typedef struct LVRelStats *** 103,108 **** --- 104,110 ---- ItemPointer dead_tuples; /* array of ItemPointerData */ int num_index_scans; TransactionId latestRemovedXid; + bool lock_waiter_detected; } LVRelStats; *************** lazy_vacuum_rel(Relation onerel, VacuumS *** 193,198 **** --- 195,202 ---- vacrelstats->old_rel_pages = onerel->rd_rel->relpages; vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples; vacrelstats->num_index_scans = 0; + vacrelstats->pages_removed = 0; + vacrelstats->lock_waiter_detected = false; /* Open all indexes of the relation */ vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); *************** lazy_vacuum_rel(Relation onerel, VacuumS *** 259,268 **** vacrelstats->hasindex, new_frozen_xid); ! /* report results to the stats collector, too */ ! pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, new_rel_tuples); /* and log the action if appropriate */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) --- 263,280 ---- vacrelstats->hasindex, new_frozen_xid); ! /* ! * report results to the stats collector, too. ! * An early terminated lazy_truncate_heap attempt ! * suppresses the message and also cancels the ! * execution of ANALYZE, if that was ordered. ! */ ! if (!vacrelstats->lock_waiter_detected) ! pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, new_rel_tuples); + else + vacstmt->options &= ~VACOPT_ANALYZE; /* and log the action if appropriate */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) *************** lazy_truncate_heap(Relation onerel, LVRe *** 1255,1334 **** BlockNumber old_rel_pages = vacrelstats->rel_pages; BlockNumber new_rel_pages; PGRUsage ru0; pg_rusage_init(&ru0); /* ! * We need full exclusive lock on the relation in order to do truncation. ! * If we can't get it, give up rather than waiting --- we don't want to ! * block other backends, and we don't want to deadlock (which is quite ! * possible considering we already hold a lower-grade lock). ! */ ! if (!ConditionalLockRelation(onerel, AccessExclusiveLock)) ! return; ! ! /* ! * Now that we have exclusive lock, look to see if the rel has grown ! * whilst we were vacuuming with non-exclusive lock. If so, give up; the ! * newly added pages presumably contain non-deletable tuples. */ ! new_rel_pages = RelationGetNumberOfBlocks(onerel); ! if (new_rel_pages != old_rel_pages) { /* ! * Note: we intentionally don't update vacrelstats->rel_pages with the ! * new rel size here. If we did, it would amount to assuming that the ! * new pages are empty, which is unlikely. Leaving the numbers alone ! * amounts to assuming that the new pages have the same tuple density ! * as existing ones, which is less unlikely. */ ! UnlockRelation(onerel, AccessExclusiveLock); ! return; ! } ! /* ! * Scan backwards from the end to verify that the end pages actually ! * contain no tuples. This is *necessary*, not optional, because other ! * backends could have added tuples to these pages whilst we were ! * vacuuming. ! */ ! new_rel_pages = count_nondeletable_pages(onerel, vacrelstats); ! if (new_rel_pages >= old_rel_pages) ! { ! /* can't do anything after all */ ! UnlockRelation(onerel, AccessExclusiveLock); ! return; ! } ! /* ! * Okay to truncate. ! */ ! RelationTruncate(onerel, new_rel_pages); ! /* ! * We can release the exclusive lock as soon as we have truncated. Other ! * backends can't safely access the relation until they have processed the ! * smgr invalidation that smgrtruncate sent out ... but that should happen ! * as part of standard invalidation processing once they acquire lock on ! * the relation. ! */ ! UnlockRelation(onerel, AccessExclusiveLock); ! /* ! * Update statistics. Here, it *is* correct to adjust rel_pages without ! * also touching reltuples, since the tuple count wasn't changed by the ! * truncation. ! */ ! vacrelstats->rel_pages = new_rel_pages; ! vacrelstats->pages_removed = old_rel_pages - new_rel_pages; ! ereport(elevel, ! (errmsg("\"%s\": truncated %u to %u pages", ! RelationGetRelationName(onerel), ! old_rel_pages, new_rel_pages), ! errdetail("%s.", ! pg_rusage_show(&ru0)))); } /* --- 1267,1388 ---- BlockNumber old_rel_pages = vacrelstats->rel_pages; BlockNumber new_rel_pages; PGRUsage ru0; + int lock_retry; pg_rusage_init(&ru0); /* ! * Loop until no more truncating can be done. */ ! do { /* ! * We need full exclusive lock on the relation in order to do ! * truncation. ! * If we can't get it, give up rather than waiting --- we don't want to ! * block other backends, and we don't want to deadlock (which is quite ! * possible considering we already hold a lower-grade lock). */ ! vacrelstats->lock_waiter_detected = false; ! lock_retry = 0; ! while (true) ! { ! if (ConditionalLockRelation(onerel, AccessExclusiveLock)) ! break; ! if (autovacuum_truncate_lock_retry == 0) ! return; ! /* ! * Check for interrupts while trying to (re-)acquire ! * the exclusive lock. ! */ ! CHECK_FOR_INTERRUPTS(); ! if (++lock_retry > autovacuum_truncate_lock_retry) ! { ! /* ! * We failed to establish the lock in the specified ! * number of retries. This means we give up truncating. ! * Suppress the ANALYZE step. Doing an ANALYZE at ! * this point will reset the dead_tuple_count in the ! * stats collector, so we will not get called by the ! * autovacuum launcher again to do the truncate. ! */ ! vacrelstats->lock_waiter_detected = true; ! return; ! } ! if (autovacuum_truncate_lock_wait > 0) ! pg_usleep((long)autovacuum_truncate_lock_wait); ! } ! /* ! * Now that we have exclusive lock, look to see if the rel has grown ! * whilst we were vacuuming with non-exclusive lock. If so, give up; ! * the newly added pages presumably contain non-deletable tuples. ! */ ! new_rel_pages = RelationGetNumberOfBlocks(onerel); ! if (new_rel_pages != old_rel_pages) ! { ! /* ! * Note: we intentionally don't update vacrelstats->rel_pages ! * with the new rel size here. If we did, it would amount to ! * assuming that the new pages are empty, which is unlikely. ! * Leaving the numbers alone amounts to assuming that the new ! * pages have the same tuple density as existing ones, which ! * is less unlikely. ! */ ! UnlockRelation(onerel, AccessExclusiveLock); ! return; ! } ! /* ! * Scan backwards from the end to verify that the end pages actually ! * contain no tuples. This is *necessary*, not optional, because other ! * backends could have added tuples to these pages whilst we were ! * vacuuming. ! */ ! new_rel_pages = count_nondeletable_pages(onerel, vacrelstats); ! ! if (new_rel_pages >= old_rel_pages) ! { ! /* can't do anything after all */ ! UnlockRelation(onerel, AccessExclusiveLock); ! return; ! } ! ! /* ! * Okay to truncate. ! */ ! RelationTruncate(onerel, new_rel_pages); ! ! /* ! * We can release the exclusive lock as soon as we have truncated. ! * Other backends can't safely access the relation until they have ! * processed the smgr invalidation that smgrtruncate sent out ... ! * but that should happen as part of standard invalidation ! * processing once they acquire lock on the relation. ! */ ! UnlockRelation(onerel, AccessExclusiveLock); ! ! /* ! * Update statistics. Here, it *is* correct to adjust rel_pages without ! * also touching reltuples, since the tuple count wasn't changed by the ! * truncation. ! */ ! vacrelstats->pages_removed += old_rel_pages - new_rel_pages; ! vacrelstats->rel_pages = new_rel_pages; ! ! ereport(elevel, ! (errmsg("\"%s\": truncated %u to %u pages", ! RelationGetRelationName(onerel), ! old_rel_pages, new_rel_pages), ! errdetail("%s.", ! pg_rusage_show(&ru0)))); ! old_rel_pages = new_rel_pages; ! } while (new_rel_pages > vacrelstats->nonempty_pages && ! vacrelstats->lock_waiter_detected); } /* *************** static BlockNumber *** 1340,1345 **** --- 1394,1406 ---- count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) { BlockNumber blkno; + instr_time starttime; + instr_time currenttime; + instr_time elapsed; + + /* Initialize the starttime if we check for conflicting lock requests */ + if (autovacuum_truncate_lock_check > 0) + INSTR_TIME_SET_CURRENT(starttime); /* Strange coding of loop control is needed because blkno is unsigned */ blkno = vacrelstats->rel_pages; *************** count_nondeletable_pages(Relation onerel *** 1352,1357 **** --- 1413,1451 ---- bool hastup; /* + * Check if another process requests a lock on our relation. + * We are holding an AccessExclusiveLock here, so they will + * be waiting. We only do this in autovacuum_truncate_lock_check + * millisecond intervals, and we only check if that interval + * has elapsed once every 32 blocks to keep the number of + * system calls and actual shared lock table lookups to a + * minimum. + */ + if (autovacuum_truncate_lock_check > 0 && (blkno % 32) == 0) + { + INSTR_TIME_SET_CURRENT(currenttime); + INSTR_TIME_SET_ZERO(elapsed); + INSTR_TIME_ADD(elapsed, currenttime); + INSTR_TIME_SUBTRACT(elapsed, starttime); + if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000) + >= autovacuum_truncate_lock_check) + { + if (LockHasWaitersRelation(onerel, AccessExclusiveLock)) + { + ereport(elevel, + (errmsg("\"%s\": terminating truncate " + "due to conflicting lock request", + RelationGetRelationName(onerel)))); + + vacrelstats->lock_waiter_detected = true; + return blkno; + } + INSTR_TIME_SET_ZERO(starttime); + INSTR_TIME_ADD(starttime, currenttime); + } + } + + /* * We don't insert a vacuum delay point here, because we have an * exclusive lock on the table which we want to hold for as short a * time as possible. We still need to check for interrupts however. diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 6977bcf..b8b8466 100644 *** a/src/backend/postmaster/autovacuum.c --- b/src/backend/postmaster/autovacuum.c *************** int autovacuum_freeze_max_age; *** 118,123 **** --- 118,126 ---- int autovacuum_vac_cost_delay; int autovacuum_vac_cost_limit; + int autovacuum_truncate_lock_check; + int autovacuum_truncate_lock_retry; + int autovacuum_truncate_lock_wait; int Log_autovacuum_min_duration = -1; diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index a7786d0..e1fa74f 100644 *** a/src/backend/storage/lmgr/lmgr.c --- b/src/backend/storage/lmgr/lmgr.c *************** UnlockRelation(Relation relation, LOCKMO *** 233,238 **** --- 233,256 ---- } /* + * LockHasWaitersRelation + * + * This is a functiion to check if someone else is waiting on a + * lock, we are currently holding. + */ + bool + LockHasWaitersRelation(Relation relation, LOCKMODE lockmode) + { + LOCKTAG tag; + + SET_LOCKTAG_RELATION(tag, + relation->rd_lockInfo.lockRelId.dbId, + relation->rd_lockInfo.lockRelId.relId); + + return LockHasWaiters(&tag, lockmode, false); + } + + /* * LockRelationIdForSession * * This routine grabs a session-level lock on the target relation. The diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 32cc229..605df84 100644 *** a/src/backend/storage/lmgr/lock.c --- b/src/backend/storage/lmgr/lock.c *************** ProcLockHashCode(const PROCLOCKTAG *proc *** 539,544 **** --- 539,636 ---- return lockhash; } + /* + * LockHasWaiters -- look up 'locktag' and check if releasing this + * lock would wake up other processes waiting for it. + */ + bool + LockHasWaiters(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) + { + LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid; + LockMethod lockMethodTable; + LOCALLOCKTAG localtag; + LOCALLOCK *locallock; + LOCK *lock; + PROCLOCK *proclock; + LWLockId partitionLock; + bool hasWaiters = FALSE; + + if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) + elog(ERROR, "unrecognized lock method: %d", lockmethodid); + lockMethodTable = LockMethods[lockmethodid]; + if (lockmode <= 0 || lockmode > lockMethodTable->numLockModes) + elog(ERROR, "unrecognized lock mode: %d", lockmode); + + #ifdef LOCK_DEBUG + if (LOCK_DEBUG_ENABLED(locktag)) + elog(LOG, "LockHasWaiters: lock [%u,%u] %s", + locktag->locktag_field1, locktag->locktag_field2, + lockMethodTable->lockModeNames[lockmode]); + #endif + + /* + * Find the LOCALLOCK entry for this lock and lockmode + */ + MemSet(&localtag, 0, sizeof(localtag)); /* must clear padding */ + localtag.lock = *locktag; + localtag.mode = lockmode; + + locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash, + (void *) &localtag, + HASH_FIND, NULL); + + /* + * let the caller print its own error message, too. Do not ereport(ERROR). + */ + if (!locallock || locallock->nLocks <= 0) + { + elog(WARNING, "you don't own a lock of type %s", + lockMethodTable->lockModeNames[lockmode]); + return FALSE; + } + + /* + * Check the shared lock table. + */ + partitionLock = LockHashPartitionLock(locallock->hashcode); + + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + /* + * We don't need to re-find the lock or proclock, since we kept their + * addresses in the locallock table, and they couldn't have been removed + * while we were holding a lock on them. + */ + lock = locallock->lock; + LOCK_PRINT("LockHasWaiters: found", lock, lockmode); + proclock = locallock->proclock; + PROCLOCK_PRINT("LockHasWaiters: found", proclock); + + /* + * Double-check that we are actually holding a lock of the type we want to + * release. + */ + if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) + { + PROCLOCK_PRINT("LockHasWaiters: WRONGTYPE", proclock); + LWLockRelease(partitionLock); + elog(WARNING, "you don't own a lock of type %s", + lockMethodTable->lockModeNames[lockmode]); + RemoveLocalLock(locallock); + return FALSE; + } + + /* + * Do the checking. + */ + if ((lockMethodTable->conflictTab[lockmode] & lock->waitMask) != 0) + hasWaiters = TRUE; + + LWLockRelease(partitionLock); + + return hasWaiters; + } + /* * LockAcquire -- Check for lock conflicts, sleep if conflict found, diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 745e7be..d3fd4a3 100644 *** a/src/backend/utils/misc/guc.c --- b/src/backend/utils/misc/guc.c *************** static struct config_int ConfigureNamesI *** 1815,1820 **** --- 1815,1852 ---- }, { + {"autovacuum_truncate_lock_check", PGC_SIGHUP, AUTOVACUUM, + gettext_noop("How often autovacuum checks for conflicting lock requests during truncate."), + NULL, + GUC_UNIT_MS + }, + &autovacuum_truncate_lock_check, + 100, 0, 500, + NULL, NULL, NULL + }, + + { + {"autovacuum_truncate_lock_retry", PGC_SIGHUP, AUTOVACUUM, + gettext_noop("How often autovacuum will (re)try to acquire an exclusive lock for truncate."), + NULL + }, + &autovacuum_truncate_lock_retry, + 50, 0, 100, + NULL, NULL, NULL + }, + + { + {"autovacuum_truncate_lock_wait", PGC_SIGHUP, AUTOVACUUM, + gettext_noop("How long autovacuum wait between attempts for exclusive lock for truncate."), + NULL, + GUC_UNIT_MS + }, + &autovacuum_truncate_lock_wait, + 20, 0, 50, + NULL, NULL, NULL + }, + + { {"max_files_per_process", PGC_POSTMASTER, RESOURCES_KERNEL, gettext_noop("Sets the maximum number of simultaneously open files for each server process."), NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index eeb9b82..ec9e8c4 100644 *** a/src/backend/utils/misc/postgresql.conf.sample --- b/src/backend/utils/misc/postgresql.conf.sample *************** *** 471,476 **** --- 471,481 ---- #autovacuum_vacuum_cost_limit = -1 # default vacuum cost limit for # autovacuum, -1 means use # vacuum_cost_limit + #autovacuum_truncate_lock_check = 100ms # default for conflicting lock check + # 0 means disabled (deadlock code will kill autovacuum), + #autovacuum_truncate_lock_retry = 50 # default exclusive lock attempts. + #autovacuum_truncate_lock_wait = 20ms # default wait between exclusive + # lock attempts for truncate. #------------------------------------------------------------------------------ diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index a851758..6e0e286 100644 *** a/src/include/postmaster/autovacuum.h --- b/src/include/postmaster/autovacuum.h *************** extern double autovacuum_anl_scale; *** 26,31 **** --- 26,34 ---- extern int autovacuum_freeze_max_age; extern int autovacuum_vac_cost_delay; extern int autovacuum_vac_cost_limit; + extern int autovacuum_truncate_lock_check; + extern int autovacuum_truncate_lock_retry; + extern int autovacuum_truncate_lock_wait; /* autovacuum launcher PID, only valid when worker is shutting down */ extern int AutovacuumLauncherPid; diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index de340c4..aa79eda 100644 *** a/src/include/storage/lmgr.h --- b/src/include/storage/lmgr.h *************** extern void UnlockRelationOid(Oid relid, *** 31,36 **** --- 31,37 ---- extern void LockRelation(Relation relation, LOCKMODE lockmode); extern bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode); extern void UnlockRelation(Relation relation, LOCKMODE lockmode); + extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode); extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index d56f0fa..f0eca35 100644 *** a/src/include/storage/lock.h --- b/src/include/storage/lock.h *************** extern void LockReleaseAll(LOCKMETHODID *** 494,499 **** --- 494,501 ---- extern void LockReleaseSession(LOCKMETHODID lockmethodid); extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks); extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks); + extern bool LockHasWaiters(const LOCKTAG *locktag, + LOCKMODE lockmode, bool sessionLock); extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode); extern void AtPrepare_Locks(void);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers