Here is the patch for it.
Jan
--
Anyone who trades liberty for security deserves neither
liberty nor security. -- Benjamin Franklin
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index c9253a9..9f880f0 100644
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 57,62 ****
--- 57,63 ----
#include "utils/pg_rusage.h"
#include "utils/timestamp.h"
#include "utils/tqual.h"
+ #include "portability/instr_time.h"
/*
*************** typedef struct LVRelStats
*** 103,108 ****
--- 104,110 ----
ItemPointer dead_tuples; /* array of ItemPointerData */
int num_index_scans;
TransactionId latestRemovedXid;
+ bool lock_waiter_detected;
} LVRelStats;
*************** lazy_vacuum_rel(Relation onerel, VacuumS
*** 193,198 ****
--- 195,202 ----
vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
vacrelstats->num_index_scans = 0;
+ vacrelstats->pages_removed = 0;
+ vacrelstats->lock_waiter_detected = false;
/* Open all indexes of the relation */
vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
*************** lazy_vacuum_rel(Relation onerel, VacuumS
*** 259,268 ****
vacrelstats->hasindex,
new_frozen_xid);
! /* report results to the stats collector, too */
! pgstat_report_vacuum(RelationGetRelid(onerel),
onerel->rd_rel->relisshared,
new_rel_tuples);
/* and log the action if appropriate */
if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
--- 263,280 ----
vacrelstats->hasindex,
new_frozen_xid);
! /*
! * report results to the stats collector, too.
! * An early terminated lazy_truncate_heap attempt
! * suppresses the message and also cancels the
! * execution of ANALYZE, if that was ordered.
! */
! if (!vacrelstats->lock_waiter_detected)
! pgstat_report_vacuum(RelationGetRelid(onerel),
onerel->rd_rel->relisshared,
new_rel_tuples);
+ else
+ vacstmt->options &= ~VACOPT_ANALYZE;
/* and log the action if appropriate */
if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
*************** lazy_truncate_heap(Relation onerel, LVRe
*** 1255,1334 ****
BlockNumber old_rel_pages = vacrelstats->rel_pages;
BlockNumber new_rel_pages;
PGRUsage ru0;
pg_rusage_init(&ru0);
/*
! * We need full exclusive lock on the relation in order to do truncation.
! * If we can't get it, give up rather than waiting --- we don't want to
! * block other backends, and we don't want to deadlock (which is quite
! * possible considering we already hold a lower-grade lock).
! */
! if (!ConditionalLockRelation(onerel, AccessExclusiveLock))
! return;
!
! /*
! * Now that we have exclusive lock, look to see if the rel has grown
! * whilst we were vacuuming with non-exclusive lock. If so, give up; the
! * newly added pages presumably contain non-deletable tuples.
*/
! new_rel_pages = RelationGetNumberOfBlocks(onerel);
! if (new_rel_pages != old_rel_pages)
{
/*
! * Note: we intentionally don't update vacrelstats->rel_pages with the
! * new rel size here. If we did, it would amount to assuming that the
! * new pages are empty, which is unlikely. Leaving the numbers alone
! * amounts to assuming that the new pages have the same tuple density
! * as existing ones, which is less unlikely.
*/
! UnlockRelation(onerel, AccessExclusiveLock);
! return;
! }
! /*
! * Scan backwards from the end to verify that the end pages actually
! * contain no tuples. This is *necessary*, not optional, because other
! * backends could have added tuples to these pages whilst we were
! * vacuuming.
! */
! new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
! if (new_rel_pages >= old_rel_pages)
! {
! /* can't do anything after all */
! UnlockRelation(onerel, AccessExclusiveLock);
! return;
! }
! /*
! * Okay to truncate.
! */
! RelationTruncate(onerel, new_rel_pages);
! /*
! * We can release the exclusive lock as soon as we have truncated. Other
! * backends can't safely access the relation until they have processed the
! * smgr invalidation that smgrtruncate sent out ... but that should happen
! * as part of standard invalidation processing once they acquire lock on
! * the relation.
! */
! UnlockRelation(onerel, AccessExclusiveLock);
! /*
! * Update statistics. Here, it *is* correct to adjust rel_pages without
! * also touching reltuples, since the tuple count wasn't changed by the
! * truncation.
! */
! vacrelstats->rel_pages = new_rel_pages;
! vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
! ereport(elevel,
! (errmsg("\"%s\": truncated %u to %u pages",
! RelationGetRelationName(onerel),
! old_rel_pages, new_rel_pages),
! errdetail("%s.",
! pg_rusage_show(&ru0))));
}
/*
--- 1267,1388 ----
BlockNumber old_rel_pages = vacrelstats->rel_pages;
BlockNumber new_rel_pages;
PGRUsage ru0;
+ int lock_retry;
pg_rusage_init(&ru0);
/*
! * Loop until no more truncating can be done.
*/
! do
{
/*
! * We need full exclusive lock on the relation in order to do
! * truncation.
! * If we can't get it, give up rather than waiting --- we don't want to
! * block other backends, and we don't want to deadlock (which is quite
! * possible considering we already hold a lower-grade lock).
*/
! vacrelstats->lock_waiter_detected = false;
! lock_retry = 0;
! while (true)
! {
! if (ConditionalLockRelation(onerel, AccessExclusiveLock))
! break;
! if (autovacuum_truncate_lock_retry == 0)
! return;
! /*
! * Check for interrupts while trying to (re-)acquire
! * the exclusive lock.
! */
! CHECK_FOR_INTERRUPTS();
! if (++lock_retry > autovacuum_truncate_lock_retry)
! {
! /*
! * We failed to establish the lock in the specified
! * number of retries. This means we give up truncating.
! * Suppress the ANALYZE step. Doing an ANALYZE at
! * this point will reset the dead_tuple_count in the
! * stats collector, so we will not get called by the
! * autovacuum launcher again to do the truncate.
! */
! vacrelstats->lock_waiter_detected = true;
! return;
! }
! if (autovacuum_truncate_lock_wait > 0)
! pg_usleep((long)autovacuum_truncate_lock_wait);
! }
! /*
! * Now that we have exclusive lock, look to see if the rel has grown
! * whilst we were vacuuming with non-exclusive lock. If so, give up;
! * the newly added pages presumably contain non-deletable tuples.
! */
! new_rel_pages = RelationGetNumberOfBlocks(onerel);
! if (new_rel_pages != old_rel_pages)
! {
! /*
! * Note: we intentionally don't update vacrelstats->rel_pages
! * with the new rel size here. If we did, it would amount to
! * assuming that the new pages are empty, which is unlikely.
! * Leaving the numbers alone amounts to assuming that the new
! * pages have the same tuple density as existing ones, which
! * is less unlikely.
! */
! UnlockRelation(onerel, AccessExclusiveLock);
! return;
! }
! /*
! * Scan backwards from the end to verify that the end pages actually
! * contain no tuples. This is *necessary*, not optional, because other
! * backends could have added tuples to these pages whilst we were
! * vacuuming.
! */
! new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
!
! if (new_rel_pages >= old_rel_pages)
! {
! /* can't do anything after all */
! UnlockRelation(onerel, AccessExclusiveLock);
! return;
! }
!
! /*
! * Okay to truncate.
! */
! RelationTruncate(onerel, new_rel_pages);
!
! /*
! * We can release the exclusive lock as soon as we have truncated.
! * Other backends can't safely access the relation until they have
! * processed the smgr invalidation that smgrtruncate sent out ...
! * but that should happen as part of standard invalidation
! * processing once they acquire lock on the relation.
! */
! UnlockRelation(onerel, AccessExclusiveLock);
!
! /*
! * Update statistics. Here, it *is* correct to adjust rel_pages without
! * also touching reltuples, since the tuple count wasn't changed by the
! * truncation.
! */
! vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
! vacrelstats->rel_pages = new_rel_pages;
!
! ereport(elevel,
! (errmsg("\"%s\": truncated %u to %u pages",
! RelationGetRelationName(onerel),
! old_rel_pages, new_rel_pages),
! errdetail("%s.",
! pg_rusage_show(&ru0))));
! old_rel_pages = new_rel_pages;
! } while (new_rel_pages > vacrelstats->nonempty_pages &&
! vacrelstats->lock_waiter_detected);
}
/*
*************** static BlockNumber
*** 1340,1345 ****
--- 1394,1406 ----
count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
{
BlockNumber blkno;
+ instr_time starttime;
+ instr_time currenttime;
+ instr_time elapsed;
+
+ /* Initialize the starttime if we check for conflicting lock requests */
+ if (autovacuum_truncate_lock_check > 0)
+ INSTR_TIME_SET_CURRENT(starttime);
/* Strange coding of loop control is needed because blkno is unsigned */
blkno = vacrelstats->rel_pages;
*************** count_nondeletable_pages(Relation onerel
*** 1352,1357 ****
--- 1413,1451 ----
bool hastup;
/*
+ * Check if another process requests a lock on our relation.
+ * We are holding an AccessExclusiveLock here, so they will
+ * be waiting. We only do this in autovacuum_truncate_lock_check
+ * millisecond intervals, and we only check if that interval
+ * has elapsed once every 32 blocks to keep the number of
+ * system calls and actual shared lock table lookups to a
+ * minimum.
+ */
+ if (autovacuum_truncate_lock_check > 0 && (blkno % 32) == 0)
+ {
+ INSTR_TIME_SET_CURRENT(currenttime);
+ INSTR_TIME_SET_ZERO(elapsed);
+ INSTR_TIME_ADD(elapsed, currenttime);
+ INSTR_TIME_SUBTRACT(elapsed, starttime);
+ if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
+ >= autovacuum_truncate_lock_check)
+ {
+ if (LockHasWaitersRelation(onerel, AccessExclusiveLock))
+ {
+ ereport(elevel,
+ (errmsg("\"%s\": terminating truncate "
+ "due to conflicting lock request",
+ RelationGetRelationName(onerel))));
+
+ vacrelstats->lock_waiter_detected = true;
+ return blkno;
+ }
+ INSTR_TIME_SET_ZERO(starttime);
+ INSTR_TIME_ADD(starttime, currenttime);
+ }
+ }
+
+ /*
* We don't insert a vacuum delay point here, because we have an
* exclusive lock on the table which we want to hold for as short a
* time as possible. We still need to check for interrupts however.
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 6977bcf..b8b8466 100644
*** a/src/backend/postmaster/autovacuum.c
--- b/src/backend/postmaster/autovacuum.c
*************** int autovacuum_freeze_max_age;
*** 118,123 ****
--- 118,126 ----
int autovacuum_vac_cost_delay;
int autovacuum_vac_cost_limit;
+ int autovacuum_truncate_lock_check;
+ int autovacuum_truncate_lock_retry;
+ int autovacuum_truncate_lock_wait;
int Log_autovacuum_min_duration = -1;
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index a7786d0..e1fa74f 100644
*** a/src/backend/storage/lmgr/lmgr.c
--- b/src/backend/storage/lmgr/lmgr.c
*************** UnlockRelation(Relation relation, LOCKMO
*** 233,238 ****
--- 233,256 ----
}
/*
+ * LockHasWaitersRelation
+ *
+ * This is a functiion to check if someone else is waiting on a
+ * lock, we are currently holding.
+ */
+ bool
+ LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
+ {
+ LOCKTAG tag;
+
+ SET_LOCKTAG_RELATION(tag,
+ relation->rd_lockInfo.lockRelId.dbId,
+ relation->rd_lockInfo.lockRelId.relId);
+
+ return LockHasWaiters(&tag, lockmode, false);
+ }
+
+ /*
* LockRelationIdForSession
*
* This routine grabs a session-level lock on the target relation. The
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 32cc229..605df84 100644
*** a/src/backend/storage/lmgr/lock.c
--- b/src/backend/storage/lmgr/lock.c
*************** ProcLockHashCode(const PROCLOCKTAG *proc
*** 539,544 ****
--- 539,636 ----
return lockhash;
}
+ /*
+ * LockHasWaiters -- look up 'locktag' and check if releasing this
+ * lock would wake up other processes waiting for it.
+ */
+ bool
+ LockHasWaiters(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
+ {
+ LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
+ LockMethod lockMethodTable;
+ LOCALLOCKTAG localtag;
+ LOCALLOCK *locallock;
+ LOCK *lock;
+ PROCLOCK *proclock;
+ LWLockId partitionLock;
+ bool hasWaiters = FALSE;
+
+ if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
+ elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+ lockMethodTable = LockMethods[lockmethodid];
+ if (lockmode <= 0 || lockmode > lockMethodTable->numLockModes)
+ elog(ERROR, "unrecognized lock mode: %d", lockmode);
+
+ #ifdef LOCK_DEBUG
+ if (LOCK_DEBUG_ENABLED(locktag))
+ elog(LOG, "LockHasWaiters: lock [%u,%u] %s",
+ locktag->locktag_field1, locktag->locktag_field2,
+ lockMethodTable->lockModeNames[lockmode]);
+ #endif
+
+ /*
+ * Find the LOCALLOCK entry for this lock and lockmode
+ */
+ MemSet(&localtag, 0, sizeof(localtag)); /* must clear padding */
+ localtag.lock = *locktag;
+ localtag.mode = lockmode;
+
+ locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash,
+ (void *) &localtag,
+ HASH_FIND, NULL);
+
+ /*
+ * let the caller print its own error message, too. Do not ereport(ERROR).
+ */
+ if (!locallock || locallock->nLocks <= 0)
+ {
+ elog(WARNING, "you don't own a lock of type %s",
+ lockMethodTable->lockModeNames[lockmode]);
+ return FALSE;
+ }
+
+ /*
+ * Check the shared lock table.
+ */
+ partitionLock = LockHashPartitionLock(locallock->hashcode);
+
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
+
+ /*
+ * We don't need to re-find the lock or proclock, since we kept their
+ * addresses in the locallock table, and they couldn't have been removed
+ * while we were holding a lock on them.
+ */
+ lock = locallock->lock;
+ LOCK_PRINT("LockHasWaiters: found", lock, lockmode);
+ proclock = locallock->proclock;
+ PROCLOCK_PRINT("LockHasWaiters: found", proclock);
+
+ /*
+ * Double-check that we are actually holding a lock of the type we want to
+ * release.
+ */
+ if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
+ {
+ PROCLOCK_PRINT("LockHasWaiters: WRONGTYPE", proclock);
+ LWLockRelease(partitionLock);
+ elog(WARNING, "you don't own a lock of type %s",
+ lockMethodTable->lockModeNames[lockmode]);
+ RemoveLocalLock(locallock);
+ return FALSE;
+ }
+
+ /*
+ * Do the checking.
+ */
+ if ((lockMethodTable->conflictTab[lockmode] & lock->waitMask) != 0)
+ hasWaiters = TRUE;
+
+ LWLockRelease(partitionLock);
+
+ return hasWaiters;
+ }
+
/*
* LockAcquire -- Check for lock conflicts, sleep if conflict found,
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 745e7be..d3fd4a3 100644
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
*************** static struct config_int ConfigureNamesI
*** 1815,1820 ****
--- 1815,1852 ----
},
{
+ {"autovacuum_truncate_lock_check", PGC_SIGHUP, AUTOVACUUM,
+ gettext_noop("How often autovacuum checks for conflicting lock requests during truncate."),
+ NULL,
+ GUC_UNIT_MS
+ },
+ &autovacuum_truncate_lock_check,
+ 100, 0, 500,
+ NULL, NULL, NULL
+ },
+
+ {
+ {"autovacuum_truncate_lock_retry", PGC_SIGHUP, AUTOVACUUM,
+ gettext_noop("How often autovacuum will (re)try to acquire an exclusive lock for truncate."),
+ NULL
+ },
+ &autovacuum_truncate_lock_retry,
+ 50, 0, 100,
+ NULL, NULL, NULL
+ },
+
+ {
+ {"autovacuum_truncate_lock_wait", PGC_SIGHUP, AUTOVACUUM,
+ gettext_noop("How long autovacuum wait between attempts for exclusive lock for truncate."),
+ NULL,
+ GUC_UNIT_MS
+ },
+ &autovacuum_truncate_lock_wait,
+ 20, 0, 50,
+ NULL, NULL, NULL
+ },
+
+ {
{"max_files_per_process", PGC_POSTMASTER, RESOURCES_KERNEL,
gettext_noop("Sets the maximum number of simultaneously open files for each server process."),
NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index eeb9b82..ec9e8c4 100644
*** a/src/backend/utils/misc/postgresql.conf.sample
--- b/src/backend/utils/misc/postgresql.conf.sample
***************
*** 471,476 ****
--- 471,481 ----
#autovacuum_vacuum_cost_limit = -1 # default vacuum cost limit for
# autovacuum, -1 means use
# vacuum_cost_limit
+ #autovacuum_truncate_lock_check = 100ms # default for conflicting lock check
+ # 0 means disabled (deadlock code will kill autovacuum),
+ #autovacuum_truncate_lock_retry = 50 # default exclusive lock attempts.
+ #autovacuum_truncate_lock_wait = 20ms # default wait between exclusive
+ # lock attempts for truncate.
#------------------------------------------------------------------------------
diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h
index a851758..6e0e286 100644
*** a/src/include/postmaster/autovacuum.h
--- b/src/include/postmaster/autovacuum.h
*************** extern double autovacuum_anl_scale;
*** 26,31 ****
--- 26,34 ----
extern int autovacuum_freeze_max_age;
extern int autovacuum_vac_cost_delay;
extern int autovacuum_vac_cost_limit;
+ extern int autovacuum_truncate_lock_check;
+ extern int autovacuum_truncate_lock_retry;
+ extern int autovacuum_truncate_lock_wait;
/* autovacuum launcher PID, only valid when worker is shutting down */
extern int AutovacuumLauncherPid;
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
index de340c4..aa79eda 100644
*** a/src/include/storage/lmgr.h
--- b/src/include/storage/lmgr.h
*************** extern void UnlockRelationOid(Oid relid,
*** 31,36 ****
--- 31,37 ----
extern void LockRelation(Relation relation, LOCKMODE lockmode);
extern bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode);
extern void UnlockRelation(Relation relation, LOCKMODE lockmode);
+ extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode);
extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index d56f0fa..f0eca35 100644
*** a/src/include/storage/lock.h
--- b/src/include/storage/lock.h
*************** extern void LockReleaseAll(LOCKMETHODID
*** 494,499 ****
--- 494,501 ----
extern void LockReleaseSession(LOCKMETHODID lockmethodid);
extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks);
extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks);
+ extern bool LockHasWaiters(const LOCKTAG *locktag,
+ LOCKMODE lockmode, bool sessionLock);
extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
LOCKMODE lockmode);
extern void AtPrepare_Locks(void);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers