Re: [HACKERS] autovacuum truncate exclusive lock round two

Jan Wieck Wed, 24 Oct 2012 15:21:51 -0700

Here is the patch for it.


Jan

--
Anyone who trades liberty for security deserves neither
liberty nor security. -- Benjamin Franklin

diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index c9253a9..9f880f0 100644
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 57,62 ****
--- 57,63 ----
  #include "utils/pg_rusage.h"
  #include "utils/timestamp.h"
  #include "utils/tqual.h"
+ #include "portability/instr_time.h"
  
  
  /*
*************** typedef struct LVRelStats
*** 103,108 ****
--- 104,110 ----
  	ItemPointer dead_tuples;	/* array of ItemPointerData */
  	int			num_index_scans;
  	TransactionId latestRemovedXid;
+ 	bool		lock_waiter_detected;
  } LVRelStats;
  
  
*************** lazy_vacuum_rel(Relation onerel, VacuumS
*** 193,198 ****
--- 195,202 ----
  	vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
  	vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
  	vacrelstats->num_index_scans = 0;
+ 	vacrelstats->pages_removed = 0;
+ 	vacrelstats->lock_waiter_detected = false;
  
  	/* Open all indexes of the relation */
  	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
*************** lazy_vacuum_rel(Relation onerel, VacuumS
*** 259,268 ****
  						vacrelstats->hasindex,
  						new_frozen_xid);
  
! 	/* report results to the stats collector, too */
! 	pgstat_report_vacuum(RelationGetRelid(onerel),
  						 onerel->rd_rel->relisshared,
  						 new_rel_tuples);
  
  	/* and log the action if appropriate */
  	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
--- 263,280 ----
  						vacrelstats->hasindex,
  						new_frozen_xid);
  
! 	/*
! 	 * report results to the stats collector, too.
! 	 * An early terminated lazy_truncate_heap attempt 
! 	 * suppresses the message and also cancels the
! 	 * execution of ANALYZE, if that was ordered.
! 	 */
! 	if (!vacrelstats->lock_waiter_detected)
! 		pgstat_report_vacuum(RelationGetRelid(onerel),
  						 onerel->rd_rel->relisshared,
  						 new_rel_tuples);
+ 	else
+ 		vacstmt->options &= ~VACOPT_ANALYZE;
  
  	/* and log the action if appropriate */
  	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
*************** lazy_truncate_heap(Relation onerel, LVRe
*** 1255,1334 ****
  	BlockNumber old_rel_pages = vacrelstats->rel_pages;
  	BlockNumber new_rel_pages;
  	PGRUsage	ru0;
  
  	pg_rusage_init(&ru0);
  
  	/*
! 	 * We need full exclusive lock on the relation in order to do truncation.
! 	 * If we can't get it, give up rather than waiting --- we don't want to
! 	 * block other backends, and we don't want to deadlock (which is quite
! 	 * possible considering we already hold a lower-grade lock).
! 	 */
! 	if (!ConditionalLockRelation(onerel, AccessExclusiveLock))
! 		return;
! 
! 	/*
! 	 * Now that we have exclusive lock, look to see if the rel has grown
! 	 * whilst we were vacuuming with non-exclusive lock.  If so, give up; the
! 	 * newly added pages presumably contain non-deletable tuples.
  	 */
! 	new_rel_pages = RelationGetNumberOfBlocks(onerel);
! 	if (new_rel_pages != old_rel_pages)
  	{
  		/*
! 		 * Note: we intentionally don't update vacrelstats->rel_pages with the
! 		 * new rel size here.  If we did, it would amount to assuming that the
! 		 * new pages are empty, which is unlikely.	Leaving the numbers alone
! 		 * amounts to assuming that the new pages have the same tuple density
! 		 * as existing ones, which is less unlikely.
  		 */
! 		UnlockRelation(onerel, AccessExclusiveLock);
! 		return;
! 	}
  
! 	/*
! 	 * Scan backwards from the end to verify that the end pages actually
! 	 * contain no tuples.  This is *necessary*, not optional, because other
! 	 * backends could have added tuples to these pages whilst we were
! 	 * vacuuming.
! 	 */
! 	new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
  
! 	if (new_rel_pages >= old_rel_pages)
! 	{
! 		/* can't do anything after all */
! 		UnlockRelation(onerel, AccessExclusiveLock);
! 		return;
! 	}
  
! 	/*
! 	 * Okay to truncate.
! 	 */
! 	RelationTruncate(onerel, new_rel_pages);
  
! 	/*
! 	 * We can release the exclusive lock as soon as we have truncated.	Other
! 	 * backends can't safely access the relation until they have processed the
! 	 * smgr invalidation that smgrtruncate sent out ... but that should happen
! 	 * as part of standard invalidation processing once they acquire lock on
! 	 * the relation.
! 	 */
! 	UnlockRelation(onerel, AccessExclusiveLock);
  
! 	/*
! 	 * Update statistics.  Here, it *is* correct to adjust rel_pages without
! 	 * also touching reltuples, since the tuple count wasn't changed by the
! 	 * truncation.
! 	 */
! 	vacrelstats->rel_pages = new_rel_pages;
! 	vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
  
! 	ereport(elevel,
! 			(errmsg("\"%s\": truncated %u to %u pages",
! 					RelationGetRelationName(onerel),
! 					old_rel_pages, new_rel_pages),
! 			 errdetail("%s.",
! 					   pg_rusage_show(&ru0))));
  }
  
  /*
--- 1267,1388 ----
  	BlockNumber old_rel_pages = vacrelstats->rel_pages;
  	BlockNumber new_rel_pages;
  	PGRUsage	ru0;
+ 	int			lock_retry;
  
  	pg_rusage_init(&ru0);
  
  	/*
! 	 * Loop until no more truncating can be done.
  	 */
! 	do
  	{
  		/*
! 		 * We need full exclusive lock on the relation in order to do 
! 		 * truncation.
! 		 * If we can't get it, give up rather than waiting --- we don't want to
! 		 * block other backends, and we don't want to deadlock (which is quite
! 		 * possible considering we already hold a lower-grade lock).
  		 */
! 		vacrelstats->lock_waiter_detected = false;
! 		lock_retry = 0;
! 		while (true)
! 		{
! 			if (ConditionalLockRelation(onerel, AccessExclusiveLock))
! 				break;
  
! 			if (autovacuum_truncate_lock_retry == 0)
! 				return;
  
! 			/*
! 			 * Check for interrupts while trying to (re-)acquire
! 			 * the exclusive lock.
! 			 */
! 			CHECK_FOR_INTERRUPTS();
  
! 			if (++lock_retry > autovacuum_truncate_lock_retry)
! 			{
! 				/*
! 				 * We failed to establish the lock in the specified
! 				 * number of retries. This means we give up truncating.
! 				 * Suppress the ANALYZE step. Doing an ANALYZE at
! 				 * this point will reset the dead_tuple_count in the
! 				 * stats collector, so we will not get called by the
! 				 * autovacuum launcher again to do the truncate.
! 				 */
! 				vacrelstats->lock_waiter_detected = true;
! 				return;
! 			}
  
! 			if (autovacuum_truncate_lock_wait > 0)
! 				pg_usleep((long)autovacuum_truncate_lock_wait);
! 		}
  
! 		/*
! 		 * Now that we have exclusive lock, look to see if the rel has grown
! 		 * whilst we were vacuuming with non-exclusive lock.  If so, give up;
! 		 * the newly added pages presumably contain non-deletable tuples.
! 		 */
! 		new_rel_pages = RelationGetNumberOfBlocks(onerel);
! 		if (new_rel_pages != old_rel_pages)
! 		{
! 			/*
! 			 * Note: we intentionally don't update vacrelstats->rel_pages 
! 			 * with the new rel size here.  If we did, it would amount to 
! 			 * assuming that the new pages are empty, which is unlikely.
! 			 * Leaving the numbers alone amounts to assuming that the new 
! 			 * pages have the same tuple density as existing ones, which 
! 			 * is less unlikely.
! 			 */
! 			UnlockRelation(onerel, AccessExclusiveLock);
! 			return;
! 		}
  
! 		/*
! 		 * Scan backwards from the end to verify that the end pages actually
! 		 * contain no tuples.  This is *necessary*, not optional, because other
! 		 * backends could have added tuples to these pages whilst we were
! 		 * vacuuming.
! 		 */
! 		new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
! 
! 		if (new_rel_pages >= old_rel_pages)
! 		{
! 			/* can't do anything after all */
! 			UnlockRelation(onerel, AccessExclusiveLock);
! 			return;
! 		}
! 
! 		/*
! 		 * Okay to truncate.
! 		 */
! 		RelationTruncate(onerel, new_rel_pages);
! 
! 		/*
! 		 * We can release the exclusive lock as soon as we have truncated.
! 		 * Other backends can't safely access the relation until they have 
! 		 * processed the smgr invalidation that smgrtruncate sent out ... 
! 		 * but that should happen as part of standard invalidation 
! 		 * processing once they acquire lock on the relation.
! 		 */
! 		UnlockRelation(onerel, AccessExclusiveLock);
! 
! 		/*
! 		 * Update statistics.  Here, it *is* correct to adjust rel_pages without
! 		 * also touching reltuples, since the tuple count wasn't changed by the
! 		 * truncation.
! 		 */
! 		vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
! 		vacrelstats->rel_pages = new_rel_pages;
! 
! 		ereport(elevel,
! 				(errmsg("\"%s\": truncated %u to %u pages",
! 						RelationGetRelationName(onerel),
! 						old_rel_pages, new_rel_pages),
! 				 errdetail("%s.",
! 						   pg_rusage_show(&ru0))));
! 		old_rel_pages = new_rel_pages;
! 	} while (new_rel_pages > vacrelstats->nonempty_pages && 
! 			vacrelstats->lock_waiter_detected);
  }
  
  /*
*************** static BlockNumber
*** 1340,1345 ****
--- 1394,1406 ----
  count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
  {
  	BlockNumber blkno;
+ 	instr_time	starttime;
+ 	instr_time	currenttime;
+ 	instr_time	elapsed;
+ 
+ 	/* Initialize the starttime if we check for conflicting lock requests */
+ 	if (autovacuum_truncate_lock_check > 0)
+ 		INSTR_TIME_SET_CURRENT(starttime);
  
  	/* Strange coding of loop control is needed because blkno is unsigned */
  	blkno = vacrelstats->rel_pages;
*************** count_nondeletable_pages(Relation onerel
*** 1352,1357 ****
--- 1413,1451 ----
  		bool		hastup;
  
  		/*
+ 		 * Check if another process requests a lock on our relation.
+ 		 * We are holding an AccessExclusiveLock here, so they will
+ 		 * be waiting. We only do this in autovacuum_truncate_lock_check
+ 		 * millisecond intervals, and we only check if that interval
+ 		 * has elapsed once every 32 blocks to keep the number of
+ 		 * system calls and actual shared lock table lookups to a
+ 		 * minimum.
+ 		 */
+ 		if (autovacuum_truncate_lock_check > 0 && (blkno % 32) == 0)
+ 		{
+ 			INSTR_TIME_SET_CURRENT(currenttime);
+ 			INSTR_TIME_SET_ZERO(elapsed);
+ 			INSTR_TIME_ADD(elapsed, currenttime);
+ 			INSTR_TIME_SUBTRACT(elapsed, starttime);
+ 			if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000) 
+ 					>= autovacuum_truncate_lock_check)
+ 			{
+ 				if (LockHasWaitersRelation(onerel, AccessExclusiveLock))
+ 				{
+ 					ereport(elevel,
+ 							(errmsg("\"%s\": terminating truncate "
+ 									"due to conflicting lock request",
+ 									RelationGetRelationName(onerel))));
+ 
+ 					vacrelstats->lock_waiter_detected = true;
+ 					return blkno;
+ 				}
+ 				INSTR_TIME_SET_ZERO(starttime);
+ 				INSTR_TIME_ADD(starttime, currenttime);
+ 			}
+ 		}
+ 
+ 		/*
  		 * We don't insert a vacuum delay point here, because we have an
  		 * exclusive lock on the table which we want to hold for as short a
  		 * time as possible.  We still need to check for interrupts however.
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 6977bcf..b8b8466 100644
*** a/src/backend/postmaster/autovacuum.c
--- b/src/backend/postmaster/autovacuum.c
*************** int			autovacuum_freeze_max_age;
*** 118,123 ****
--- 118,126 ----
  
  int			autovacuum_vac_cost_delay;
  int			autovacuum_vac_cost_limit;
+ int			autovacuum_truncate_lock_check;
+ int			autovacuum_truncate_lock_retry;
+ int			autovacuum_truncate_lock_wait;
  
  int			Log_autovacuum_min_duration = -1;
  
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index a7786d0..e1fa74f 100644
*** a/src/backend/storage/lmgr/lmgr.c
--- b/src/backend/storage/lmgr/lmgr.c
*************** UnlockRelation(Relation relation, LOCKMO
*** 233,238 ****
--- 233,256 ----
  }
  
  /*
+  *		LockHasWaitersRelation
+  *
+  * This is a functiion to check if someone else is waiting on a
+  * lock, we are currently holding.
+  */
+ bool
+ LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
+ {
+ 	LOCKTAG		tag;
+ 
+ 	SET_LOCKTAG_RELATION(tag,
+ 						 relation->rd_lockInfo.lockRelId.dbId,
+ 						 relation->rd_lockInfo.lockRelId.relId);
+ 
+ 	return LockHasWaiters(&tag, lockmode, false);
+ }
+ 
+ /*
   *		LockRelationIdForSession
   *
   * This routine grabs a session-level lock on the target relation.	The
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 32cc229..605df84 100644
*** a/src/backend/storage/lmgr/lock.c
--- b/src/backend/storage/lmgr/lock.c
*************** ProcLockHashCode(const PROCLOCKTAG *proc
*** 539,544 ****
--- 539,636 ----
  	return lockhash;
  }
  
+ /*
+  * LockHasWaiters -- look up 'locktag' and check if releasing this
+  *		lock would wake up other processes waiting for it.
+  */
+ bool
+ LockHasWaiters(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
+ {
+ 	LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
+ 	LockMethod	lockMethodTable;
+ 	LOCALLOCKTAG localtag;
+ 	LOCALLOCK  *locallock;
+ 	LOCK	   *lock;
+ 	PROCLOCK   *proclock;
+ 	LWLockId	partitionLock;
+ 	bool		hasWaiters = FALSE;
+ 
+ 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
+ 		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+ 	lockMethodTable = LockMethods[lockmethodid];
+ 	if (lockmode <= 0 || lockmode > lockMethodTable->numLockModes)
+ 		elog(ERROR, "unrecognized lock mode: %d", lockmode);
+ 
+ #ifdef LOCK_DEBUG
+ 	if (LOCK_DEBUG_ENABLED(locktag))
+ 		elog(LOG, "LockHasWaiters: lock [%u,%u] %s",
+ 			 locktag->locktag_field1, locktag->locktag_field2,
+ 			 lockMethodTable->lockModeNames[lockmode]);
+ #endif
+ 
+ 	/*
+ 	 * Find the LOCALLOCK entry for this lock and lockmode
+ 	 */
+ 	MemSet(&localtag, 0, sizeof(localtag));		/* must clear padding */
+ 	localtag.lock = *locktag;
+ 	localtag.mode = lockmode;
+ 
+ 	locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash,
+ 										  (void *) &localtag,
+ 										  HASH_FIND, NULL);
+ 
+ 	/*
+ 	 * let the caller print its own error message, too. Do not ereport(ERROR).
+ 	 */
+ 	if (!locallock || locallock->nLocks <= 0)
+ 	{
+ 		elog(WARNING, "you don't own a lock of type %s",
+ 			 lockMethodTable->lockModeNames[lockmode]);
+ 		return FALSE;
+ 	}
+ 
+ 	/*
+ 	 * Check the shared lock table.
+ 	 */
+ 	partitionLock = LockHashPartitionLock(locallock->hashcode);
+ 
+ 	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
+ 
+ 	/*
+ 	 * We don't need to re-find the lock or proclock, since we kept their
+ 	 * addresses in the locallock table, and they couldn't have been removed
+ 	 * while we were holding a lock on them.
+ 	 */
+ 	lock = locallock->lock;
+ 	LOCK_PRINT("LockHasWaiters: found", lock, lockmode);
+ 	proclock = locallock->proclock;
+ 	PROCLOCK_PRINT("LockHasWaiters: found", proclock);
+ 
+ 	/*
+ 	 * Double-check that we are actually holding a lock of the type we want to
+ 	 * release.
+ 	 */
+ 	if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
+ 	{
+ 		PROCLOCK_PRINT("LockHasWaiters: WRONGTYPE", proclock);
+ 		LWLockRelease(partitionLock);
+ 		elog(WARNING, "you don't own a lock of type %s",
+ 			 lockMethodTable->lockModeNames[lockmode]);
+ 		RemoveLocalLock(locallock);
+ 		return FALSE;
+ 	}
+ 
+ 	/*
+ 	 * Do the checking.
+ 	 */
+ 	if ((lockMethodTable->conflictTab[lockmode] & lock->waitMask) != 0)
+ 		hasWaiters = TRUE;
+ 
+ 	LWLockRelease(partitionLock);
+ 
+ 	return hasWaiters;
+ }
+ 
  
  /*
   * LockAcquire -- Check for lock conflicts, sleep if conflict found,
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 745e7be..d3fd4a3 100644
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
*************** static struct config_int ConfigureNamesI
*** 1815,1820 ****
--- 1815,1852 ----
  	},
  
  	{
+ 		{"autovacuum_truncate_lock_check", PGC_SIGHUP, AUTOVACUUM,
+ 			gettext_noop("How often autovacuum checks for conflicting lock requests during truncate."),
+ 			NULL,
+ 			GUC_UNIT_MS
+ 		},
+ 		&autovacuum_truncate_lock_check,
+ 		100, 0, 500,
+ 		NULL, NULL, NULL
+ 	},
+ 
+ 	{
+ 		{"autovacuum_truncate_lock_retry", PGC_SIGHUP, AUTOVACUUM,
+ 			gettext_noop("How often autovacuum will (re)try to acquire an exclusive lock for truncate."),
+ 			NULL
+ 		},
+ 		&autovacuum_truncate_lock_retry,
+ 		50, 0, 100,
+ 		NULL, NULL, NULL
+ 	},
+ 
+ 	{
+ 		{"autovacuum_truncate_lock_wait", PGC_SIGHUP, AUTOVACUUM,
+ 			gettext_noop("How long autovacuum wait between attempts for exclusive lock for truncate."),
+ 			NULL,
+ 			GUC_UNIT_MS
+ 		},
+ 		&autovacuum_truncate_lock_wait,
+ 		20, 0, 50,
+ 		NULL, NULL, NULL
+ 	},
+ 
+ 	{
  		{"max_files_per_process", PGC_POSTMASTER, RESOURCES_KERNEL,
  			gettext_noop("Sets the maximum number of simultaneously open files for each server process."),
  			NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index eeb9b82..ec9e8c4 100644
*** a/src/backend/utils/misc/postgresql.conf.sample
--- b/src/backend/utils/misc/postgresql.conf.sample
***************
*** 471,476 ****
--- 471,481 ----
  #autovacuum_vacuum_cost_limit = -1	# default vacuum cost limit for
  					# autovacuum, -1 means use
  					# vacuum_cost_limit
+ #autovacuum_truncate_lock_check = 100ms	# default for conflicting lock check
+ 					# 0 means disabled (deadlock code will kill autovacuum),
+ #autovacuum_truncate_lock_retry = 50	# default exclusive lock attempts.
+ #autovacuum_truncate_lock_wait = 20ms	# default wait between exclusive
+ 					# lock attempts for truncate.
  
  
  #------------------------------------------------------------------------------
diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h
index a851758..6e0e286 100644
*** a/src/include/postmaster/autovacuum.h
--- b/src/include/postmaster/autovacuum.h
*************** extern double autovacuum_anl_scale;
*** 26,31 ****
--- 26,34 ----
  extern int	autovacuum_freeze_max_age;
  extern int	autovacuum_vac_cost_delay;
  extern int	autovacuum_vac_cost_limit;
+ extern int	autovacuum_truncate_lock_check;
+ extern int	autovacuum_truncate_lock_retry;
+ extern int	autovacuum_truncate_lock_wait;
  
  /* autovacuum launcher PID, only valid when worker is shutting down */
  extern int	AutovacuumLauncherPid;
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
index de340c4..aa79eda 100644
*** a/src/include/storage/lmgr.h
--- b/src/include/storage/lmgr.h
*************** extern void UnlockRelationOid(Oid relid,
*** 31,36 ****
--- 31,37 ----
  extern void LockRelation(Relation relation, LOCKMODE lockmode);
  extern bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode);
  extern void UnlockRelation(Relation relation, LOCKMODE lockmode);
+ extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode);
  
  extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
  extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index d56f0fa..f0eca35 100644
*** a/src/include/storage/lock.h
--- b/src/include/storage/lock.h
*************** extern void LockReleaseAll(LOCKMETHODID 
*** 494,499 ****
--- 494,501 ----
  extern void LockReleaseSession(LOCKMETHODID lockmethodid);
  extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks);
  extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks);
+ extern bool LockHasWaiters(const LOCKTAG *locktag,
+ 			LOCKMODE lockmode, bool sessionLock);
  extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
  				 LOCKMODE lockmode);
  extern void AtPrepare_Locks(void);

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] autovacuum truncate exclusive lock round two

Reply via email to