On K, 2005-08-17 at 15:40 -0400, Tom Lane wrote:
>                            Saatja: 
> Tom Lane <[EMAIL PROTECTED]>
>                           Kellele: 
> Bruce Momjian
> <pgman@candle.pha.pa.us>, Hannu
> Krosing <[EMAIL PROTECTED]>, Neil Conway
> <[EMAIL PROTECTED]>, pgsql-
> [EMAIL PROTECTED]
>                             Teema: 
> Re: [PATCHES] PATCH to allow
> concurrent VACUUMs to not lock each
>                           KuupƤev: 
> Wed, 17 Aug 2005 15:40:53 -0400
> (22:40 EEST)
> 
> Just for the archives, attached is as far as I'd gotten with cleaning
> up
> Hannu's patch before I realized that it wasn't doing what it needed to
> do.  This fixes an end-of-transaction race condition (can't unset
> inVacuum before xact end, unless you want OldestXmin going backwards
> from the point of view of other people) and improves the documentation
> of what's going on.  But unless someone can convince me that it's safe
> to mess with GetSnapshotData, it's unlikely this'll ever get applied.
> 
> 
> 

Attached is a patch, based on you last one, which messes with
GetSnapshotData in what I think is a safe way.

It introduces another attribute to PROC , proc->nonInVacuumXmin and
computes this in addition to prox->xmin inside GetSnapshotData.

When (and only when) GetOldestXmin is called with ignoreVacuum=true,
then proc->nonInVacuumXmin is checked instead of prox->xmin.

I believe that this will make this change invisible to all other places
where GetSnapshotData or GetOldestXmin is used.

-- 
Hannu Krosing <[EMAIL PROTECTED]>





Index: src/backend/access/transam/twophase.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/twophase.c,v
retrieving revision 1.10
diff -c -r1.10 twophase.c
*** src/backend/access/transam/twophase.c	20 Aug 2005 23:26:10 -0000	1.10
--- src/backend/access/transam/twophase.c	24 Aug 2005 12:01:17 -0000
***************
*** 280,285 ****
--- 280,287 ----
  	gxact->proc.pid = 0;
  	gxact->proc.databaseId = databaseid;
  	gxact->proc.roleId = owner;
+ 	gxact->proc.inVacuum = false;
+ 	gxact->proc.nonInVacuumXmin = InvalidTransactionId;
  	gxact->proc.lwWaiting = false;
  	gxact->proc.lwExclusive = false;
  	gxact->proc.lwWaitLink = NULL;
Index: src/backend/access/transam/xact.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/xact.c,v
retrieving revision 1.214
diff -c -r1.214 xact.c
*** src/backend/access/transam/xact.c	20 Aug 2005 23:45:08 -0000	1.214
--- src/backend/access/transam/xact.c	24 Aug 2005 12:01:17 -0000
***************
*** 1516,1521 ****
--- 1516,1523 ----
  		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
  		MyProc->xid = InvalidTransactionId;
  		MyProc->xmin = InvalidTransactionId;
+ 		MyProc->inVacuum = false;	/* must be cleared with xid/xmin */
+ 		MyProc->nonInVacuumXmin = InvalidTransactionId; /* this too */
  
  		/* Clear the subtransaction-XID cache too while holding the lock */
  		MyProc->subxids.nxids = 0;
***************
*** 1752,1757 ****
--- 1754,1761 ----
  	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
  	MyProc->xid = InvalidTransactionId;
  	MyProc->xmin = InvalidTransactionId;
+ 	MyProc->inVacuum = false;	/* must be cleared with xid/xmin */
+ 	MyProc->nonInVacuumXmin = InvalidTransactionId; /* this too */
  
  	/* Clear the subtransaction-XID cache too while holding the lock */
  	MyProc->subxids.nxids = 0;
***************
*** 1915,1920 ****
--- 1919,1926 ----
  		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
  		MyProc->xid = InvalidTransactionId;
  		MyProc->xmin = InvalidTransactionId;
+ 		MyProc->inVacuum = false;	/* must be cleared with xid/xmin */
+ 		MyProc->nonInVacuumXmin = InvalidTransactionId; /* this too */
  
  		/* Clear the subtransaction-XID cache too while holding the lock */
  		MyProc->subxids.nxids = 0;
Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/xlog.c,v
retrieving revision 1.218
diff -c -r1.218 xlog.c
*** src/backend/access/transam/xlog.c	22 Aug 2005 23:59:04 -0000	1.218
--- src/backend/access/transam/xlog.c	24 Aug 2005 12:01:18 -0000
***************
*** 5303,5309 ****
  	 * mustn't do this because StartupSUBTRANS hasn't been called yet.
  	 */
  	if (!InRecovery)
! 		TruncateSUBTRANS(GetOldestXmin(true));
  
  	if (!shutdown)
  		ereport(DEBUG2,
--- 5303,5309 ----
  	 * mustn't do this because StartupSUBTRANS hasn't been called yet.
  	 */
  	if (!InRecovery)
! 		TruncateSUBTRANS(GetOldestXmin(true, false));
  
  	if (!shutdown)
  		ereport(DEBUG2,
Index: src/backend/catalog/index.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/catalog/index.c,v
retrieving revision 1.259
diff -c -r1.259 index.c
*** src/backend/catalog/index.c	12 Aug 2005 01:35:56 -0000	1.259
--- src/backend/catalog/index.c	24 Aug 2005 12:01:18 -0000
***************
*** 1433,1439 ****
  	else
  	{
  		snapshot = SnapshotAny;
! 		OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared);
  	}
  
  	scan = heap_beginscan(heapRelation, /* relation */
--- 1433,1440 ----
  	else
  	{
  		snapshot = SnapshotAny;
! 		/* okay to ignore lazy VACUUMs here */
! 		OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
  	}
  
  	scan = heap_beginscan(heapRelation, /* relation */
Index: src/backend/commands/vacuum.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/commands/vacuum.c,v
retrieving revision 1.313
diff -c -r1.313 vacuum.c
*** src/backend/commands/vacuum.c	20 Aug 2005 00:39:54 -0000	1.313
--- src/backend/commands/vacuum.c	24 Aug 2005 12:01:18 -0000
***************
*** 36,41 ****
--- 36,42 ----
  #include "executor/executor.h"
  #include "miscadmin.h"
  #include "storage/freespace.h"
+ #include "storage/proc.h"
  #include "storage/procarray.h"
  #include "storage/smgr.h"
  #include "tcop/pquery.h"
***************
*** 371,378 ****
  		 * determining the cutoff with which we vacuum shared relations,
  		 * it is not possible for that database to have a cutoff newer
  		 * than OLDXMIN recorded in pg_database.
  		 */
! 		vacuum_set_xid_limits(vacstmt, false,
  							  &initialOldestXmin,
  							  &initialFreezeLimit);
  	}
--- 372,382 ----
  		 * determining the cutoff with which we vacuum shared relations,
  		 * it is not possible for that database to have a cutoff newer
  		 * than OLDXMIN recorded in pg_database.
+ 		 *
+ 		 * We can't ignore concurrent lazy VACUUMs, because these values will
+ 		 * be used to truncate clog below.
  		 */
! 		vacuum_set_xid_limits(vacstmt, false, false,
  							  &initialOldestXmin,
  							  &initialFreezeLimit);
  	}
***************
*** 612,624 ****
   * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
   */
  void
! vacuum_set_xid_limits(VacuumStmt *vacstmt, bool sharedRel,
  					  TransactionId *oldestXmin,
  					  TransactionId *freezeLimit)
  {
  	TransactionId limit;
  
! 	*oldestXmin = GetOldestXmin(sharedRel);
  
  	Assert(TransactionIdIsNormal(*oldestXmin));
  
--- 616,629 ----
   * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
   */
  void
! vacuum_set_xid_limits(VacuumStmt *vacstmt,
! 					  bool sharedRel, bool ignoreVacuum,
  					  TransactionId *oldestXmin,
  					  TransactionId *freezeLimit)
  {
  	TransactionId limit;
  
! 	*oldestXmin = GetOldestXmin(sharedRel, ignoreVacuum);
  
  	Assert(TransactionIdIsNormal(*oldestXmin));
  
***************
*** 673,678 ****
--- 678,688 ----
   *		pg_class would've been obsoleted.  Of course, this only works for
   *		fixed-size never-null columns, but these are.
   *
+  *		Another reason for doing it this way is that when we are in a lazy
+  *		VACUUM and have inVacuum set, we mustn't do any updates --- somebody
+  *		vacuuming pg_class might think they could delete a tuple marked with
+  *		xmin = our xid.
+  *
   *		This routine is shared by full VACUUM, lazy VACUUM, and stand-alone
   *		ANALYZE.
   */
***************
*** 953,960 ****
  
  	/* Begin a transaction for vacuuming this relation */
  	StartTransactionCommand();
! 	/* functions in indexes may want a snapshot set */
! 	ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());
  
  	/*
  	 * Tell the cache replacement strategy that vacuum is causing all
--- 963,997 ----
  
  	/* Begin a transaction for vacuuming this relation */
  	StartTransactionCommand();
! 
! 	if (vacstmt->full)
! 	{
! 		/* functions in indexes may want a snapshot set */
! 		ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());
! 	}
! 	else
! 	{
! 		/*
! 		 * During a lazy VACUUM we do not run any user-supplied functions,
! 		 * and so it should be safe to not create a transaction snapshot.
! 		 *
! 		 * We can furthermore set the inVacuum flag, which lets other
! 		 * concurrent VACUUMs know that they can ignore this one while
! 		 * determining their OldestXmin.  (The reason we don't set inVacuum
! 		 * during a full VACUUM is exactly that we may have to run user-
! 		 * defined functions for functional indexes, and we want to make
! 		 * sure that if they use the snapshot set above, any tuples it
! 		 * requires can't get removed from other tables.  An index function
! 		 * that depends on the contents of other tables is arguably broken,
! 		 * but we won't break it here by violating transaction semantics.)
! 		 *
! 		 * Note: the inVacuum flag remains set until CommitTransaction or
! 		 * AbortTransaction.  We don't want to clear it until we reset
! 		 * MyProc->xid/xmin, else OldestXmin might appear to go backwards,
! 		 * which is probably Not Good.
! 		 */
! 		MyProc->inVacuum = true;
! 	}
  
  	/*
  	 * Tell the cache replacement strategy that vacuum is causing all
***************
*** 1134,1140 ****
  				i;
  	VRelStats  *vacrelstats;
  
! 	vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
  						  &OldestXmin, &FreezeLimit);
  
  	/*
--- 1171,1177 ----
  				i;
  	VRelStats  *vacrelstats;
  
! 	vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared, true,
  						  &OldestXmin, &FreezeLimit);
  
  	/*
Index: src/backend/commands/vacuumlazy.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v
retrieving revision 1.57
diff -c -r1.57 vacuumlazy.c
*** src/backend/commands/vacuumlazy.c	20 Aug 2005 23:26:13 -0000	1.57
--- src/backend/commands/vacuumlazy.c	24 Aug 2005 12:01:18 -0000
***************
*** 142,148 ****
  	else
  		elevel = DEBUG2;
  
! 	vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
  						  &OldestXmin, &FreezeLimit);
  
  	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
--- 142,148 ----
  	else
  		elevel = DEBUG2;
  
! 	vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared, true,
  						  &OldestXmin, &FreezeLimit);
  
  	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
Index: src/backend/storage/ipc/procarray.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/ipc/procarray.c,v
retrieving revision 1.6
diff -c -r1.6 procarray.c
*** src/backend/storage/ipc/procarray.c	20 Aug 2005 23:26:20 -0000	1.6
--- src/backend/storage/ipc/procarray.c	24 Aug 2005 12:01:19 -0000
***************
*** 391,410 ****
   * If allDbs is TRUE then all backends are considered; if allDbs is FALSE
   * then only backends running in my own database are considered.
   *
   * This is used by VACUUM to decide which deleted tuples must be preserved
   * in a table.	allDbs = TRUE is needed for shared relations, but allDbs =
   * FALSE is sufficient for non-shared relations, since only backends in my
!  * own database could ever see the tuples in them.
   *
   * This is also used to determine where to truncate pg_subtrans.  allDbs
!  * must be TRUE for that case.
   *
   * Note: we include the currently running xids in the set of considered xids.
   * This ensures that if a just-started xact has not yet set its snapshot,
   * when it does set the snapshot it cannot set xmin less than what we compute.
   */
  TransactionId
! GetOldestXmin(bool allDbs)
  {
  	ProcArrayStruct *arrayP = procArray;
  	TransactionId result;
--- 391,414 ----
   * If allDbs is TRUE then all backends are considered; if allDbs is FALSE
   * then only backends running in my own database are considered.
   *
+  * If ignoreVacuum is TRUE then backends with inVacuum set are ignored.
+  *
   * This is used by VACUUM to decide which deleted tuples must be preserved
   * in a table.	allDbs = TRUE is needed for shared relations, but allDbs =
   * FALSE is sufficient for non-shared relations, since only backends in my
!  * own database could ever see the tuples in them.  Also, we can ignore
!  * concurrently running lazy VACUUMs because (a) they must be working on other
!  * tables, and (b) they don't need to do snapshot-based lookups.
   *
   * This is also used to determine where to truncate pg_subtrans.  allDbs
!  * must be TRUE for that case, and ignoreVacuum FALSE.
   *
   * Note: we include the currently running xids in the set of considered xids.
   * This ensures that if a just-started xact has not yet set its snapshot,
   * when it does set the snapshot it cannot set xmin less than what we compute.
   */
  TransactionId
! GetOldestXmin(bool allDbs, bool ignoreVacuum)
  {
  	ProcArrayStruct *arrayP = procArray;
  	TransactionId result;
***************
*** 428,433 ****
--- 432,440 ----
  	{
  		PGPROC	   *proc = arrayP->procs[index];
  
+ 		if (ignoreVacuum && proc->inVacuum)
+ 			continue;
+ 
  		if (allDbs || proc->databaseId == MyDatabaseId)
  		{
  			/* Fetch xid just once - see GetNewTransactionId */
***************
*** 437,443 ****
  			{
  				if (TransactionIdPrecedes(xid, result))
  					result = xid;
! 				xid = proc->xmin;
  				if (TransactionIdIsNormal(xid))
  					if (TransactionIdPrecedes(xid, result))
  						result = xid;
--- 444,453 ----
  			{
  				if (TransactionIdPrecedes(xid, result))
  					result = xid;
! 				if (ignoreVacuum)
! 					xid = proc->nonInVacuumXmin;
! 				else
! 					xid = proc->xmin;
  				if (TransactionIdIsNormal(xid))
  					if (TransactionIdPrecedes(xid, result))
  						result = xid;
***************
*** 475,481 ****
   *			older than this are known not running any more.
   *		RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
   *			running transactions).  This is the same computation done by
!  *			GetOldestXmin(TRUE).
   *----------
   */
  Snapshot
--- 485,491 ----
   *			older than this are known not running any more.
   *		RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
   *			running transactions).  This is the same computation done by
!  *			GetOldestXmin(TRUE, FALSE).
   *----------
   */
  Snapshot
***************
*** 485,490 ****
--- 495,501 ----
  	TransactionId xmin;
  	TransactionId xmax;
  	TransactionId globalxmin;
+ 	TransactionId noninvacuumxmin;
  	int			index;
  	int			count = 0;
  
***************
*** 518,524 ****
  					 errmsg("out of memory")));
  	}
  
! 	globalxmin = xmin = GetTopTransactionId();
  
  	/*
  	 * If we are going to set MyProc->xmin then we'd better get exclusive
--- 529,535 ----
  					 errmsg("out of memory")));
  	}
  
! 	globalxmin = xmin = noninvacuumxmin = GetTopTransactionId();
  
  	/*
  	 * If we are going to set MyProc->xmin then we'd better get exclusive
***************
*** 576,583 ****
  			TransactionIdFollowsOrEquals(xid, xmax))
  			continue;
  
! 		if (TransactionIdPrecedes(xid, xmin))
  			xmin = xid;
  		snapshot->xip[count] = xid;
  		count++;
  
--- 587,597 ----
  			TransactionIdFollowsOrEquals(xid, xmax))
  			continue;
  
! 		if (TransactionIdPrecedes(xid, xmin)) {
  			xmin = xid;
+ 			if (proc->inVacuum==false)
+ 				noninvacuumxmin = xid;
+ 		}
  		snapshot->xip[count] = xid;
  		count++;
  
***************
*** 588,595 ****
  				globalxmin = xid;
  	}
  
! 	if (serializable)
  		MyProc->xmin = TransactionXmin = xmin;
  
  	LWLockRelease(ProcArrayLock);
  
--- 602,611 ----
  				globalxmin = xid;
  	}
  
! 	if (serializable) {
  		MyProc->xmin = TransactionXmin = xmin;
+ 		MyProc->nonInVacuumXmin = noninvacuumxmin;
+ 	}
  
  	LWLockRelease(ProcArrayLock);
  
Index: src/backend/storage/lmgr/proc.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v
retrieving revision 1.163
diff -c -r1.163 proc.c
*** src/backend/storage/lmgr/proc.c	20 Aug 2005 23:26:24 -0000	1.163
--- src/backend/storage/lmgr/proc.c	24 Aug 2005 12:01:19 -0000
***************
*** 260,265 ****
--- 260,267 ----
  	MyProc->databaseId = MyDatabaseId;
  	/* Will be set properly after the session role id is determined */
  	MyProc->roleId = InvalidOid;
+ 	MyProc->inVacuum = false;
+ 	MyProc->nonInVacuumXmin = InvalidTransactionId;
  	MyProc->lwWaiting = false;
  	MyProc->lwExclusive = false;
  	MyProc->lwWaitLink = NULL;
***************
*** 339,344 ****
--- 341,348 ----
  	MyProc->xmin = InvalidTransactionId;
  	MyProc->databaseId = MyDatabaseId;
  	MyProc->roleId = InvalidOid;
+ 	MyProc->inVacuum = false;
+ 	MyProc->nonInVacuumXmin = InvalidTransactionId;
  	MyProc->lwWaiting = false;
  	MyProc->lwExclusive = false;
  	MyProc->lwWaitLink = NULL;
Index: src/include/commands/vacuum.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/commands/vacuum.h,v
retrieving revision 1.60
diff -c -r1.60 vacuum.h
*** src/include/commands/vacuum.h	14 Jul 2005 05:13:43 -0000	1.60
--- src/include/commands/vacuum.h	24 Aug 2005 12:01:19 -0000
***************
*** 133,139 ****
  					BlockNumber num_pages,
  					double num_tuples,
  					bool hasindex);
! extern void vacuum_set_xid_limits(VacuumStmt *vacstmt, bool sharedRel,
  					  TransactionId *oldestXmin,
  					  TransactionId *freezeLimit);
  extern bool vac_is_partial_index(Relation indrel);
--- 133,140 ----
  					BlockNumber num_pages,
  					double num_tuples,
  					bool hasindex);
! extern void vacuum_set_xid_limits(VacuumStmt *vacstmt,
! 					  bool sharedRel, bool ignoreVacuum,
  					  TransactionId *oldestXmin,
  					  TransactionId *freezeLimit);
  extern bool vac_is_partial_index(Relation indrel);
Index: src/include/storage/proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/proc.h,v
retrieving revision 1.81
diff -c -r1.81 proc.h
*** src/include/storage/proc.h	20 Aug 2005 23:26:34 -0000	1.81
--- src/include/storage/proc.h	24 Aug 2005 12:01:19 -0000
***************
*** 73,78 ****
--- 73,83 ----
  	Oid			databaseId;		/* OID of database this backend is using */
  	Oid			roleId;			/* OID of role using this backend */
  
+ 	bool		inVacuum;		/* true if current xact is a VACUUM */
+ 	
+ 	TransactionId nonInVacuumXmin;	/* same as xmin with transactions where
+ 									 * (proc->inVacuum == true) excluded */
+ 
  	/* Info about LWLock the process is currently waiting for, if any. */
  	bool		lwWaiting;		/* true if waiting for an LW lock */
  	bool		lwExclusive;	/* true if waiting for exclusive access */
Index: src/include/storage/procarray.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/procarray.h,v
retrieving revision 1.5
diff -c -r1.5 procarray.h
*** src/include/storage/procarray.h	20 Aug 2005 23:26:34 -0000	1.5
--- src/include/storage/procarray.h	24 Aug 2005 12:01:19 -0000
***************
*** 24,30 ****
  
  extern bool TransactionIdIsInProgress(TransactionId xid);
  extern bool TransactionIdIsActive(TransactionId xid);
! extern TransactionId GetOldestXmin(bool allDbs);
  
  extern PGPROC *BackendPidGetProc(int pid);
  extern int BackendXidGetPid(TransactionId xid);
--- 24,30 ----
  
  extern bool TransactionIdIsInProgress(TransactionId xid);
  extern bool TransactionIdIsActive(TransactionId xid);
! extern TransactionId GetOldestXmin(bool allDbs, bool ignoreVacuum);
  
  extern PGPROC *BackendPidGetProc(int pid);
  extern int BackendXidGetPid(TransactionId xid);
---------------------------(end of broadcast)---------------------------
TIP 9: In versions below 8.0, the planner will ignore your desire to
       choose an index scan if your joining column's datatypes do not
       match

Reply via email to