On K, 2005-08-17 at 15:40 -0400, Tom Lane wrote:
> Saatja:
> Tom Lane <[EMAIL PROTECTED]>
> Kellele:
> Bruce Momjian
> <[email protected]>, Hannu
> Krosing <[EMAIL PROTECTED]>, Neil Conway
> <[EMAIL PROTECTED]>, pgsql-
> [EMAIL PROTECTED]
> Teema:
> Re: [PATCHES] PATCH to allow
> concurrent VACUUMs to not lock each
> KuupƤev:
> Wed, 17 Aug 2005 15:40:53 -0400
> (22:40 EEST)
>
> Just for the archives, attached is as far as I'd gotten with cleaning
> up
> Hannu's patch before I realized that it wasn't doing what it needed to
> do. This fixes an end-of-transaction race condition (can't unset
> inVacuum before xact end, unless you want OldestXmin going backwards
> from the point of view of other people) and improves the documentation
> of what's going on. But unless someone can convince me that it's safe
> to mess with GetSnapshotData, it's unlikely this'll ever get applied.
>
>
>
Attached is a patch, based on you last one, which messes with
GetSnapshotData in what I think is a safe way.
It introduces another attribute to PROC , proc->nonInVacuumXmin and
computes this in addition to prox->xmin inside GetSnapshotData.
When (and only when) GetOldestXmin is called with ignoreVacuum=true,
then proc->nonInVacuumXmin is checked instead of prox->xmin.
I believe that this will make this change invisible to all other places
where GetSnapshotData or GetOldestXmin is used.
--
Hannu Krosing <[EMAIL PROTECTED]>
Index: src/backend/access/transam/twophase.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/twophase.c,v
retrieving revision 1.10
diff -c -r1.10 twophase.c
*** src/backend/access/transam/twophase.c 20 Aug 2005 23:26:10 -0000 1.10
--- src/backend/access/transam/twophase.c 24 Aug 2005 12:01:17 -0000
***************
*** 280,285 ****
--- 280,287 ----
gxact->proc.pid = 0;
gxact->proc.databaseId = databaseid;
gxact->proc.roleId = owner;
+ gxact->proc.inVacuum = false;
+ gxact->proc.nonInVacuumXmin = InvalidTransactionId;
gxact->proc.lwWaiting = false;
gxact->proc.lwExclusive = false;
gxact->proc.lwWaitLink = NULL;
Index: src/backend/access/transam/xact.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/xact.c,v
retrieving revision 1.214
diff -c -r1.214 xact.c
*** src/backend/access/transam/xact.c 20 Aug 2005 23:45:08 -0000 1.214
--- src/backend/access/transam/xact.c 24 Aug 2005 12:01:17 -0000
***************
*** 1516,1521 ****
--- 1516,1523 ----
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
+ MyProc->inVacuum = false; /* must be cleared with xid/xmin */
+ MyProc->nonInVacuumXmin = InvalidTransactionId; /* this too */
/* Clear the subtransaction-XID cache too while holding the lock */
MyProc->subxids.nxids = 0;
***************
*** 1752,1757 ****
--- 1754,1761 ----
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
+ MyProc->inVacuum = false; /* must be cleared with xid/xmin */
+ MyProc->nonInVacuumXmin = InvalidTransactionId; /* this too */
/* Clear the subtransaction-XID cache too while holding the lock */
MyProc->subxids.nxids = 0;
***************
*** 1915,1920 ****
--- 1919,1926 ----
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
+ MyProc->inVacuum = false; /* must be cleared with xid/xmin */
+ MyProc->nonInVacuumXmin = InvalidTransactionId; /* this too */
/* Clear the subtransaction-XID cache too while holding the lock */
MyProc->subxids.nxids = 0;
Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/xlog.c,v
retrieving revision 1.218
diff -c -r1.218 xlog.c
*** src/backend/access/transam/xlog.c 22 Aug 2005 23:59:04 -0000 1.218
--- src/backend/access/transam/xlog.c 24 Aug 2005 12:01:18 -0000
***************
*** 5303,5309 ****
* mustn't do this because StartupSUBTRANS hasn't been called yet.
*/
if (!InRecovery)
! TruncateSUBTRANS(GetOldestXmin(true));
if (!shutdown)
ereport(DEBUG2,
--- 5303,5309 ----
* mustn't do this because StartupSUBTRANS hasn't been called yet.
*/
if (!InRecovery)
! TruncateSUBTRANS(GetOldestXmin(true, false));
if (!shutdown)
ereport(DEBUG2,
Index: src/backend/catalog/index.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/catalog/index.c,v
retrieving revision 1.259
diff -c -r1.259 index.c
*** src/backend/catalog/index.c 12 Aug 2005 01:35:56 -0000 1.259
--- src/backend/catalog/index.c 24 Aug 2005 12:01:18 -0000
***************
*** 1433,1439 ****
else
{
snapshot = SnapshotAny;
! OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared);
}
scan = heap_beginscan(heapRelation, /* relation */
--- 1433,1440 ----
else
{
snapshot = SnapshotAny;
! /* okay to ignore lazy VACUUMs here */
! OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
}
scan = heap_beginscan(heapRelation, /* relation */
Index: src/backend/commands/vacuum.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/commands/vacuum.c,v
retrieving revision 1.313
diff -c -r1.313 vacuum.c
*** src/backend/commands/vacuum.c 20 Aug 2005 00:39:54 -0000 1.313
--- src/backend/commands/vacuum.c 24 Aug 2005 12:01:18 -0000
***************
*** 36,41 ****
--- 36,42 ----
#include "executor/executor.h"
#include "miscadmin.h"
#include "storage/freespace.h"
+ #include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/smgr.h"
#include "tcop/pquery.h"
***************
*** 371,378 ****
* determining the cutoff with which we vacuum shared relations,
* it is not possible for that database to have a cutoff newer
* than OLDXMIN recorded in pg_database.
*/
! vacuum_set_xid_limits(vacstmt, false,
&initialOldestXmin,
&initialFreezeLimit);
}
--- 372,382 ----
* determining the cutoff with which we vacuum shared relations,
* it is not possible for that database to have a cutoff newer
* than OLDXMIN recorded in pg_database.
+ *
+ * We can't ignore concurrent lazy VACUUMs, because these values will
+ * be used to truncate clog below.
*/
! vacuum_set_xid_limits(vacstmt, false, false,
&initialOldestXmin,
&initialFreezeLimit);
}
***************
*** 612,624 ****
* vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
*/
void
! vacuum_set_xid_limits(VacuumStmt *vacstmt, bool sharedRel,
TransactionId *oldestXmin,
TransactionId *freezeLimit)
{
TransactionId limit;
! *oldestXmin = GetOldestXmin(sharedRel);
Assert(TransactionIdIsNormal(*oldestXmin));
--- 616,629 ----
* vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
*/
void
! vacuum_set_xid_limits(VacuumStmt *vacstmt,
! bool sharedRel, bool ignoreVacuum,
TransactionId *oldestXmin,
TransactionId *freezeLimit)
{
TransactionId limit;
! *oldestXmin = GetOldestXmin(sharedRel, ignoreVacuum);
Assert(TransactionIdIsNormal(*oldestXmin));
***************
*** 673,678 ****
--- 678,688 ----
* pg_class would've been obsoleted. Of course, this only works for
* fixed-size never-null columns, but these are.
*
+ * Another reason for doing it this way is that when we are in a lazy
+ * VACUUM and have inVacuum set, we mustn't do any updates --- somebody
+ * vacuuming pg_class might think they could delete a tuple marked with
+ * xmin = our xid.
+ *
* This routine is shared by full VACUUM, lazy VACUUM, and stand-alone
* ANALYZE.
*/
***************
*** 953,960 ****
/* Begin a transaction for vacuuming this relation */
StartTransactionCommand();
! /* functions in indexes may want a snapshot set */
! ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());
/*
* Tell the cache replacement strategy that vacuum is causing all
--- 963,997 ----
/* Begin a transaction for vacuuming this relation */
StartTransactionCommand();
!
! if (vacstmt->full)
! {
! /* functions in indexes may want a snapshot set */
! ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());
! }
! else
! {
! /*
! * During a lazy VACUUM we do not run any user-supplied functions,
! * and so it should be safe to not create a transaction snapshot.
! *
! * We can furthermore set the inVacuum flag, which lets other
! * concurrent VACUUMs know that they can ignore this one while
! * determining their OldestXmin. (The reason we don't set inVacuum
! * during a full VACUUM is exactly that we may have to run user-
! * defined functions for functional indexes, and we want to make
! * sure that if they use the snapshot set above, any tuples it
! * requires can't get removed from other tables. An index function
! * that depends on the contents of other tables is arguably broken,
! * but we won't break it here by violating transaction semantics.)
! *
! * Note: the inVacuum flag remains set until CommitTransaction or
! * AbortTransaction. We don't want to clear it until we reset
! * MyProc->xid/xmin, else OldestXmin might appear to go backwards,
! * which is probably Not Good.
! */
! MyProc->inVacuum = true;
! }
/*
* Tell the cache replacement strategy that vacuum is causing all
***************
*** 1134,1140 ****
i;
VRelStats *vacrelstats;
! vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
&OldestXmin, &FreezeLimit);
/*
--- 1171,1177 ----
i;
VRelStats *vacrelstats;
! vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared, true,
&OldestXmin, &FreezeLimit);
/*
Index: src/backend/commands/vacuumlazy.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v
retrieving revision 1.57
diff -c -r1.57 vacuumlazy.c
*** src/backend/commands/vacuumlazy.c 20 Aug 2005 23:26:13 -0000 1.57
--- src/backend/commands/vacuumlazy.c 24 Aug 2005 12:01:18 -0000
***************
*** 142,148 ****
else
elevel = DEBUG2;
! vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
&OldestXmin, &FreezeLimit);
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
--- 142,148 ----
else
elevel = DEBUG2;
! vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared, true,
&OldestXmin, &FreezeLimit);
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
Index: src/backend/storage/ipc/procarray.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/ipc/procarray.c,v
retrieving revision 1.6
diff -c -r1.6 procarray.c
*** src/backend/storage/ipc/procarray.c 20 Aug 2005 23:26:20 -0000 1.6
--- src/backend/storage/ipc/procarray.c 24 Aug 2005 12:01:19 -0000
***************
*** 391,410 ****
* If allDbs is TRUE then all backends are considered; if allDbs is FALSE
* then only backends running in my own database are considered.
*
* This is used by VACUUM to decide which deleted tuples must be preserved
* in a table. allDbs = TRUE is needed for shared relations, but allDbs =
* FALSE is sufficient for non-shared relations, since only backends in my
! * own database could ever see the tuples in them.
*
* This is also used to determine where to truncate pg_subtrans. allDbs
! * must be TRUE for that case.
*
* Note: we include the currently running xids in the set of considered xids.
* This ensures that if a just-started xact has not yet set its snapshot,
* when it does set the snapshot it cannot set xmin less than what we compute.
*/
TransactionId
! GetOldestXmin(bool allDbs)
{
ProcArrayStruct *arrayP = procArray;
TransactionId result;
--- 391,414 ----
* If allDbs is TRUE then all backends are considered; if allDbs is FALSE
* then only backends running in my own database are considered.
*
+ * If ignoreVacuum is TRUE then backends with inVacuum set are ignored.
+ *
* This is used by VACUUM to decide which deleted tuples must be preserved
* in a table. allDbs = TRUE is needed for shared relations, but allDbs =
* FALSE is sufficient for non-shared relations, since only backends in my
! * own database could ever see the tuples in them. Also, we can ignore
! * concurrently running lazy VACUUMs because (a) they must be working on other
! * tables, and (b) they don't need to do snapshot-based lookups.
*
* This is also used to determine where to truncate pg_subtrans. allDbs
! * must be TRUE for that case, and ignoreVacuum FALSE.
*
* Note: we include the currently running xids in the set of considered xids.
* This ensures that if a just-started xact has not yet set its snapshot,
* when it does set the snapshot it cannot set xmin less than what we compute.
*/
TransactionId
! GetOldestXmin(bool allDbs, bool ignoreVacuum)
{
ProcArrayStruct *arrayP = procArray;
TransactionId result;
***************
*** 428,433 ****
--- 432,440 ----
{
PGPROC *proc = arrayP->procs[index];
+ if (ignoreVacuum && proc->inVacuum)
+ continue;
+
if (allDbs || proc->databaseId == MyDatabaseId)
{
/* Fetch xid just once - see GetNewTransactionId */
***************
*** 437,443 ****
{
if (TransactionIdPrecedes(xid, result))
result = xid;
! xid = proc->xmin;
if (TransactionIdIsNormal(xid))
if (TransactionIdPrecedes(xid, result))
result = xid;
--- 444,453 ----
{
if (TransactionIdPrecedes(xid, result))
result = xid;
! if (ignoreVacuum)
! xid = proc->nonInVacuumXmin;
! else
! xid = proc->xmin;
if (TransactionIdIsNormal(xid))
if (TransactionIdPrecedes(xid, result))
result = xid;
***************
*** 475,481 ****
* older than this are known not running any more.
* RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
* running transactions). This is the same computation done by
! * GetOldestXmin(TRUE).
*----------
*/
Snapshot
--- 485,491 ----
* older than this are known not running any more.
* RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
* running transactions). This is the same computation done by
! * GetOldestXmin(TRUE, FALSE).
*----------
*/
Snapshot
***************
*** 485,490 ****
--- 495,501 ----
TransactionId xmin;
TransactionId xmax;
TransactionId globalxmin;
+ TransactionId noninvacuumxmin;
int index;
int count = 0;
***************
*** 518,524 ****
errmsg("out of memory")));
}
! globalxmin = xmin = GetTopTransactionId();
/*
* If we are going to set MyProc->xmin then we'd better get exclusive
--- 529,535 ----
errmsg("out of memory")));
}
! globalxmin = xmin = noninvacuumxmin = GetTopTransactionId();
/*
* If we are going to set MyProc->xmin then we'd better get exclusive
***************
*** 576,583 ****
TransactionIdFollowsOrEquals(xid, xmax))
continue;
! if (TransactionIdPrecedes(xid, xmin))
xmin = xid;
snapshot->xip[count] = xid;
count++;
--- 587,597 ----
TransactionIdFollowsOrEquals(xid, xmax))
continue;
! if (TransactionIdPrecedes(xid, xmin)) {
xmin = xid;
+ if (proc->inVacuum==false)
+ noninvacuumxmin = xid;
+ }
snapshot->xip[count] = xid;
count++;
***************
*** 588,595 ****
globalxmin = xid;
}
! if (serializable)
MyProc->xmin = TransactionXmin = xmin;
LWLockRelease(ProcArrayLock);
--- 602,611 ----
globalxmin = xid;
}
! if (serializable) {
MyProc->xmin = TransactionXmin = xmin;
+ MyProc->nonInVacuumXmin = noninvacuumxmin;
+ }
LWLockRelease(ProcArrayLock);
Index: src/backend/storage/lmgr/proc.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v
retrieving revision 1.163
diff -c -r1.163 proc.c
*** src/backend/storage/lmgr/proc.c 20 Aug 2005 23:26:24 -0000 1.163
--- src/backend/storage/lmgr/proc.c 24 Aug 2005 12:01:19 -0000
***************
*** 260,265 ****
--- 260,267 ----
MyProc->databaseId = MyDatabaseId;
/* Will be set properly after the session role id is determined */
MyProc->roleId = InvalidOid;
+ MyProc->inVacuum = false;
+ MyProc->nonInVacuumXmin = InvalidTransactionId;
MyProc->lwWaiting = false;
MyProc->lwExclusive = false;
MyProc->lwWaitLink = NULL;
***************
*** 339,344 ****
--- 341,348 ----
MyProc->xmin = InvalidTransactionId;
MyProc->databaseId = MyDatabaseId;
MyProc->roleId = InvalidOid;
+ MyProc->inVacuum = false;
+ MyProc->nonInVacuumXmin = InvalidTransactionId;
MyProc->lwWaiting = false;
MyProc->lwExclusive = false;
MyProc->lwWaitLink = NULL;
Index: src/include/commands/vacuum.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/commands/vacuum.h,v
retrieving revision 1.60
diff -c -r1.60 vacuum.h
*** src/include/commands/vacuum.h 14 Jul 2005 05:13:43 -0000 1.60
--- src/include/commands/vacuum.h 24 Aug 2005 12:01:19 -0000
***************
*** 133,139 ****
BlockNumber num_pages,
double num_tuples,
bool hasindex);
! extern void vacuum_set_xid_limits(VacuumStmt *vacstmt, bool sharedRel,
TransactionId *oldestXmin,
TransactionId *freezeLimit);
extern bool vac_is_partial_index(Relation indrel);
--- 133,140 ----
BlockNumber num_pages,
double num_tuples,
bool hasindex);
! extern void vacuum_set_xid_limits(VacuumStmt *vacstmt,
! bool sharedRel, bool ignoreVacuum,
TransactionId *oldestXmin,
TransactionId *freezeLimit);
extern bool vac_is_partial_index(Relation indrel);
Index: src/include/storage/proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/proc.h,v
retrieving revision 1.81
diff -c -r1.81 proc.h
*** src/include/storage/proc.h 20 Aug 2005 23:26:34 -0000 1.81
--- src/include/storage/proc.h 24 Aug 2005 12:01:19 -0000
***************
*** 73,78 ****
--- 73,83 ----
Oid databaseId; /* OID of database this backend is using */
Oid roleId; /* OID of role using this backend */
+ bool inVacuum; /* true if current xact is a VACUUM */
+
+ TransactionId nonInVacuumXmin; /* same as xmin with transactions where
+ * (proc->inVacuum == true) excluded */
+
/* Info about LWLock the process is currently waiting for, if any. */
bool lwWaiting; /* true if waiting for an LW lock */
bool lwExclusive; /* true if waiting for exclusive access */
Index: src/include/storage/procarray.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/procarray.h,v
retrieving revision 1.5
diff -c -r1.5 procarray.h
*** src/include/storage/procarray.h 20 Aug 2005 23:26:34 -0000 1.5
--- src/include/storage/procarray.h 24 Aug 2005 12:01:19 -0000
***************
*** 24,30 ****
extern bool TransactionIdIsInProgress(TransactionId xid);
extern bool TransactionIdIsActive(TransactionId xid);
! extern TransactionId GetOldestXmin(bool allDbs);
extern PGPROC *BackendPidGetProc(int pid);
extern int BackendXidGetPid(TransactionId xid);
--- 24,30 ----
extern bool TransactionIdIsInProgress(TransactionId xid);
extern bool TransactionIdIsActive(TransactionId xid);
! extern TransactionId GetOldestXmin(bool allDbs, bool ignoreVacuum);
extern PGPROC *BackendPidGetProc(int pid);
extern int BackendXidGetPid(TransactionId xid);
---------------------------(end of broadcast)---------------------------
TIP 9: In versions below 8.0, the planner will ignore your desire to
choose an index scan if your joining column's datatypes do not
match