Attached is a patch for the next milestone on the Serializable wiki page: changing the table-level predicate locks to SIREAD locks without worrying about lifespan. (Implementing correct lifespan is next.) The result of not worrying about it is that they aren't cleaned up at all, even when the transaction ends and the connection is closed. In a way, that's not all bad, because neither of those events *should* remove these locks; so there's nothing to *undo* for the next step. As before, this is "for the record" and not a request for commit or official review. If anyone looks at it out of interest in this effort, any feedback is welcome. Applies cleanly to head and passes regression tests. ;-) -Kevin
*** a/src/backend/catalog/index.c --- b/src/backend/catalog/index.c *************** *** 2132,2138 **** IndexCheckExclusion(Relation heapRelation, * * After completing validate_index(), we wait until all transactions that * were alive at the time of the reference snapshot are gone; this is ! * necessary to be sure there are none left with a serializable snapshot * older than the reference (and hence possibly able to see tuples we did * not index). Then we mark the index "indisvalid" and commit. Subsequent * transactions will be able to use it for queries. --- 2132,2138 ---- * * After completing validate_index(), we wait until all transactions that * were alive at the time of the reference snapshot are gone; this is ! * necessary to be sure there are none left with a transaction-based snapshot * older than the reference (and hence possibly able to see tuples we did * not index). Then we mark the index "indisvalid" and commit. Subsequent * transactions will be able to use it for queries. *** a/src/backend/commands/trigger.c --- b/src/backend/commands/trigger.c *************** *** 2360,2366 **** ltrmark:; case HeapTupleUpdated: ReleaseBuffer(buffer); ! if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 2360,2366 ---- case HeapTupleUpdated: ReleaseBuffer(buffer); ! if (IsXactIsoLevelXactSnapshotBased) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *** a/src/backend/executor/execMain.c --- b/src/backend/executor/execMain.c *************** *** 1538,1544 **** EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, case HeapTupleUpdated: ReleaseBuffer(buffer); ! if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 1538,1544 ---- case HeapTupleUpdated: ReleaseBuffer(buffer); ! if (IsXactIsoLevelXactSnapshotBased) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *** a/src/backend/executor/nodeBitmapHeapscan.c --- b/src/backend/executor/nodeBitmapHeapscan.c *************** *** 42,47 **** --- 42,48 ---- #include "executor/nodeBitmapHeapscan.h" #include "pgstat.h" #include "storage/bufmgr.h" + #include "storage/predicate.h" #include "utils/memutils.h" #include "utils/snapmgr.h" #include "utils/tqual.h" *************** *** 114,119 **** BitmapHeapNext(BitmapHeapScanState *node) --- 115,123 ---- #endif /* USE_PREFETCH */ } + /* TODO SSI: Lock at tuple level subject to granularity promotion. */ + PredicateLockRelation(node->ss.ss_currentRelation); + for (;;) { Page dp; *** a/src/backend/executor/nodeIndexscan.c --- b/src/backend/executor/nodeIndexscan.c *************** *** 30,35 **** --- 30,36 ---- #include "executor/execdebug.h" #include "executor/nodeIndexscan.h" #include "optimizer/clauses.h" + #include "storage/predicate.h" #include "utils/array.h" #include "utils/lsyscache.h" #include "utils/memutils.h" *************** *** 72,77 **** IndexNext(IndexScanState *node) --- 73,81 ---- econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; + /* TODO SSI: Lock at tuple level subject to granularity promotion. */ + PredicateLockRelation(node->ss.ss_currentRelation); + /* * ok, now that we have what we need, fetch the next tuple. */ *** a/src/backend/executor/nodeLockRows.c --- b/src/backend/executor/nodeLockRows.c *************** *** 130,136 **** lnext: break; case HeapTupleUpdated: ! if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 130,136 ---- break; case HeapTupleUpdated: ! if (IsXactIsoLevelXactSnapshotBased) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *** a/src/backend/executor/nodeModifyTable.c --- b/src/backend/executor/nodeModifyTable.c *************** *** 326,332 **** ldelete:; break; case HeapTupleUpdated: ! if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 326,332 ---- break; case HeapTupleUpdated: ! if (IsXactIsoLevelXactSnapshotBased) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *************** *** 514,520 **** lreplace:; break; case HeapTupleUpdated: ! if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 514,520 ---- break; case HeapTupleUpdated: ! if (IsXactIsoLevelXactSnapshotBased) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *** a/src/backend/executor/nodeSeqscan.c --- b/src/backend/executor/nodeSeqscan.c *************** *** 28,33 **** --- 28,34 ---- #include "access/relscan.h" #include "executor/execdebug.h" #include "executor/nodeSeqscan.h" + #include "storage/predicate.h" static void InitScanRelation(SeqScanState *node, EState *estate); static TupleTableSlot *SeqNext(SeqScanState *node); *************** *** 106,116 **** SeqRecheck(SeqScanState *node, TupleTableSlot *slot) --- 107,122 ---- * tuple. * We call the ExecScan() routine and pass it the appropriate * access method functions. + * For serializable transactions, we first lock the entire relation. + * TODO SSI: Would it make sense to optimize cases where the plan + * includes a LIMIT, such that individual tuples are + * locked instead, subject to granularity promotion? * ---------------------------------------------------------------- */ TupleTableSlot * ExecSeqScan(SeqScanState *node) { + PredicateLockRelation(node->ss_currentRelation); return ExecScan((ScanState *) node, (ExecScanAccessMtd) SeqNext, (ExecScanRecheckMtd) SeqRecheck); *** a/src/backend/executor/nodeTidscan.c --- b/src/backend/executor/nodeTidscan.c *************** *** 31,36 **** --- 31,37 ---- #include "executor/nodeTidscan.h" #include "optimizer/clauses.h" #include "storage/bufmgr.h" + #include "storage/predicate.h" #include "utils/array.h" *************** *** 308,313 **** TidNext(TidScanState *node) --- 309,317 ---- node->tss_TidPtr++; } + /* TODO SSI: Lock at tuple level subject to granularity promotion. */ + PredicateLockRelation(heapRelation); + while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids) { tuple->t_self = tidList[node->tss_TidPtr]; *** a/src/backend/storage/lmgr/Makefile --- b/src/backend/storage/lmgr/Makefile *************** *** 12,18 **** subdir = src/backend/storage/lmgr top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global ! OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o include $(top_srcdir)/src/backend/common.mk --- 12,18 ---- top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global ! OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o predicate.o include $(top_srcdir)/src/backend/common.mk *** a/src/backend/storage/lmgr/lock.c --- b/src/backend/storage/lmgr/lock.c *************** *** 94,100 **** static const LOCKMASK LockConflicts[] = { (1 << AccessShareLock) | (1 << RowShareLock) | (1 << RowExclusiveLock) | (1 << ShareUpdateExclusiveLock) | (1 << ShareLock) | (1 << ShareRowExclusiveLock) | ! (1 << ExclusiveLock) | (1 << AccessExclusiveLock) }; --- 94,103 ---- (1 << AccessShareLock) | (1 << RowShareLock) | (1 << RowExclusiveLock) | (1 << ShareUpdateExclusiveLock) | (1 << ShareLock) | (1 << ShareRowExclusiveLock) | ! (1 << ExclusiveLock) | (1 << AccessExclusiveLock), ! ! /* SIReadLock */ ! 0 }; *************** *** 109,115 **** static const char *const lock_mode_names[] = "ShareLock", "ShareRowExclusiveLock", "ExclusiveLock", ! "AccessExclusiveLock" }; #ifndef LOCK_DEBUG --- 112,119 ---- "ShareLock", "ShareRowExclusiveLock", "ExclusiveLock", ! "AccessExclusiveLock", ! "SIReadLock" }; #ifndef LOCK_DEBUG *************** *** 140,152 **** static const LockMethodData user_lockmethod = { #endif }; /* * map from lock method id to the lock table data structures */ static const LockMethod LockMethods[] = { NULL, &default_lockmethod, ! &user_lockmethod }; --- 144,169 ---- #endif }; + static const LockMethodData predicate_lockmethod = { + SIReadLock, /* highest valid lock mode number */ + true, /* TODO SSI: this eventually needs to be false */ + LockConflicts, + lock_mode_names, + #ifdef LOCK_DEBUG + &Trace_userlocks + #else + &Dummy_trace + #endif + }; + /* * map from lock method id to the lock table data structures */ static const LockMethod LockMethods[] = { NULL, &default_lockmethod, ! &user_lockmethod, ! &predicate_lockmethod }; *** /dev/null --- b/src/backend/storage/lmgr/predicate.c *************** *** 0 **** --- 1,62 ---- + /*------------------------------------------------------------------------- + * + * predicate.c + * POSTGRES predicate locking + * to support full serializable transaction isolation + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + /* + * INTERFACE ROUTINES + * + * PredicateLockRelation(Relation relation) + */ + + #include "postgres.h" + + #include "access/xact.h" + #include "storage/lmgr.h" + #include "storage/predicate.h" + + + /* ---------------------------------------------------------------- + * Gets a predicate lock at the relation level. + * Skip if not in full serializable transaction isolation level. + * Skip if this is a temporary table or toast table.. + * Skip this if a write lock exists for the relation; otherwise, + * clear any finer-grained predicate locks this session has on the relation. + * TODO SSI: Some of the above. Using SIREAD locks. + * TODO SSI: Only do on open, already-locked relation? + * ---------------------------------------------------------------- + */ + void + PredicateLockRelation(Relation relation) + { + if (IsXactIsoLevelFullySerializable && !(relation->rd_istemp)) + { + LOCKTAG tag; + LockAcquireResult res; + + SET_LOCKTAG_RELATION_PREDICATE(tag, + relation->rd_lockInfo.lockRelId.dbId, + relation->rd_lockInfo.lockRelId.relId); + + res = LockAcquireExtended(&tag, SIReadLock, false, false, true); + /* TODO SSI: Nned to get to non-transactional. */ + /* TODO SSI: Don't throw error on full; go to coarser granularity. */ + + /* + * Now that we have the lock, check for invalidation messages; see notes + * in LockRelationOid. + */ + if (res != LOCKACQUIRE_ALREADY_HELD) + AcceptInvalidationMessages(); /* TODO SSI: Needed? */ + } + } *** a/src/backend/utils/adt/ri_triggers.c --- b/src/backend/utils/adt/ri_triggers.c *************** *** 3314,3320 **** ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan, /* * In READ COMMITTED mode, we just need to use an up-to-date regular * snapshot, and we will see all rows that could be interesting. But in ! * SERIALIZABLE mode, we can't change the transaction snapshot. If the * caller passes detectNewRows == false then it's okay to do the query * with the transaction snapshot; otherwise we use a current snapshot, and * tell the executor to error out if it finds any rows under the current --- 3314,3320 ---- /* * In READ COMMITTED mode, we just need to use an up-to-date regular * snapshot, and we will see all rows that could be interesting. But in ! * xact-snapshot-based modes, we can't change the transaction snapshot. If the * caller passes detectNewRows == false then it's okay to do the query * with the transaction snapshot; otherwise we use a current snapshot, and * tell the executor to error out if it finds any rows under the current *************** *** 3322,3328 **** ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan, * that SPI_execute_snapshot will register the snapshots, so we don't need * to bother here. */ ! if (IsXactIsoLevelSerializable && detectNewRows) { CommandCounterIncrement(); /* be sure all my own work is visible */ test_snapshot = GetLatestSnapshot(); --- 3322,3328 ---- * that SPI_execute_snapshot will register the snapshots, so we don't need * to bother here. */ ! if (IsXactIsoLevelXactSnapshotBased && detectNewRows) { CommandCounterIncrement(); /* be sure all my own work is visible */ test_snapshot = GetLatestSnapshot(); *** a/src/backend/utils/time/snapmgr.c --- b/src/backend/utils/time/snapmgr.c *************** *** 37,44 **** /* ! * CurrentSnapshot points to the only snapshot taken in a serializable ! * transaction, and to the latest one taken in a read-committed transaction. * SecondarySnapshot is a snapshot that's always up-to-date as of the current * instant, even on a serializable transaction. It should only be used for * special-purpose code (say, RI checking.) --- 37,44 ---- /* ! * CurrentSnapshot points to the only snapshot taken in a xact-snapshot-based ! * transaction; otherwise to the latest one taken. * SecondarySnapshot is a snapshot that's always up-to-date as of the current * instant, even on a serializable transaction. It should only be used for * special-purpose code (say, RI checking.) *************** *** 97,107 **** static int RegisteredSnapshots = 0; bool FirstSnapshotSet = false; /* ! * Remembers whether this transaction registered a serializable snapshot at * start. We cannot trust FirstSnapshotSet in combination with ! * IsXactIsoLevelSerializable, because GUC may be reset before us. */ ! static bool registered_serializable = false; static Snapshot CopySnapshot(Snapshot snapshot); --- 97,107 ---- bool FirstSnapshotSet = false; /* ! * Remembers whether this transaction registered a transaction-based snapshot at * start. We cannot trust FirstSnapshotSet in combination with ! * IsXactIsoLevelXactSnapshotBased, because GUC may be reset before us. */ ! static bool registered_xact_snapshot = false; static Snapshot CopySnapshot(Snapshot snapshot); *************** *** 130,150 **** GetTransactionSnapshot(void) FirstSnapshotSet = true; /* ! * In serializable mode, the first snapshot must live until end of * xact regardless of what the caller does with it, so we must * register it internally here and unregister it at end of xact. */ ! if (IsXactIsoLevelSerializable) { CurrentSnapshot = RegisterSnapshotOnOwner(CurrentSnapshot, TopTransactionResourceOwner); ! registered_serializable = true; } return CurrentSnapshot; } ! if (IsXactIsoLevelSerializable) return CurrentSnapshot; CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); --- 130,150 ---- FirstSnapshotSet = true; /* ! * In xact-snapshot-based isolation levels, the first snapshot must live until end of * xact regardless of what the caller does with it, so we must * register it internally here and unregister it at end of xact. */ ! if (IsXactIsoLevelXactSnapshotBased) { CurrentSnapshot = RegisterSnapshotOnOwner(CurrentSnapshot, TopTransactionResourceOwner); ! registered_xact_snapshot = true; } return CurrentSnapshot; } ! if (IsXactIsoLevelXactSnapshotBased) return CurrentSnapshot; CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); *************** *** 155,161 **** GetTransactionSnapshot(void) /* * GetLatestSnapshot * Get a snapshot that is up-to-date as of the current instant, ! * even if we are executing in SERIALIZABLE mode. */ Snapshot GetLatestSnapshot(void) --- 155,161 ---- /* * GetLatestSnapshot * Get a snapshot that is up-to-date as of the current instant, ! * even if we are executing in xact-snapshot-based mode. */ Snapshot GetLatestSnapshot(void) *************** *** 515,527 **** void AtEarlyCommit_Snapshot(void) { /* ! * On a serializable transaction we must unregister our private refcount ! * to the serializable snapshot. */ ! if (registered_serializable) UnregisterSnapshotFromOwner(CurrentSnapshot, TopTransactionResourceOwner); ! registered_serializable = false; } --- 515,527 ---- AtEarlyCommit_Snapshot(void) { /* ! * On a xact-snapshot-based transaction we must unregister our private refcount ! * to the xact snapshot. */ ! if (registered_xact_snapshot) UnregisterSnapshotFromOwner(CurrentSnapshot, TopTransactionResourceOwner); ! registered_xact_snapshot = false; } *************** *** 557,561 **** AtEOXact_Snapshot(bool isCommit) SecondarySnapshot = NULL; FirstSnapshotSet = false; ! registered_serializable = false; } --- 557,561 ---- SecondarySnapshot = NULL; FirstSnapshotSet = false; ! registered_xact_snapshot = false; } *** a/src/include/access/xact.h --- b/src/include/access/xact.h *************** *** 32,41 **** extern int DefaultXactIsoLevel; extern int XactIsoLevel; /* ! * We only implement two isolation levels internally. This macro should ! * be used to check which one is selected. */ ! #define IsXactIsoLevelSerializable (XactIsoLevel >= XACT_REPEATABLE_READ) /* Xact read-only state */ extern bool DefaultXactReadOnly; --- 32,45 ---- extern int XactIsoLevel; /* ! * We implement three isolation levels internally. ! * The two stronger ones use one snapshot per database transaction; ! * the others use one snapshot per statement. ! * Serializable uses predicate locks. ! * These macros should be used to check which isolation level is selected. */ ! #define IsXactIsoLevelXactSnapshotBased (XactIsoLevel >= XACT_REPEATABLE_READ) ! #define IsXactIsoLevelFullySerializable (XactIsoLevel == XACT_SERIALIZABLE) /* Xact read-only state */ extern bool DefaultXactReadOnly; *** a/src/include/storage/lock.h --- b/src/include/storage/lock.h *************** *** 134,139 **** typedef uint16 LOCKMETHODID; --- 134,140 ---- /* These identify the known lock methods */ #define DEFAULT_LOCKMETHOD 1 #define USER_LOCKMETHOD 2 + #define PREDICATE_LOCKMETHOD 3 /* * These are the valid values of type LOCKMODE for all the standard lock *************** *** 155,160 **** typedef uint16 LOCKMETHODID; --- 156,162 ---- * UPDATE */ #define AccessExclusiveLock 8 /* ALTER TABLE, DROP TABLE, VACUUM * FULL, and unqualified LOCK TABLE */ + #define SIReadLock 9 /* Predicate locking for SSI */ /* *************** *** 271,276 **** typedef struct LOCKTAG --- 273,279 ---- (locktag).locktag_type = LOCKTAG_OBJECT, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) + #define SET_LOCKTAG_ADVISORY(locktag,id1,id2,id3,id4) \ ((locktag).locktag_field1 = (id1), \ (locktag).locktag_field2 = (id2), \ *************** *** 280,285 **** typedef struct LOCKTAG --- 283,313 ---- (locktag).locktag_lockmethodid = USER_LOCKMETHOD) + #define SET_LOCKTAG_RELATION_PREDICATE(locktag,dboid,reloid) \ + ((locktag).locktag_field1 = (dboid), \ + (locktag).locktag_field2 = (reloid), \ + (locktag).locktag_field3 = 0, \ + (locktag).locktag_field4 = 0, \ + (locktag).locktag_type = LOCKTAG_RELATION, \ + (locktag).locktag_lockmethodid = PREDICATE_LOCKMETHOD) + + #define SET_LOCKTAG_PAGE_PREDICATE(locktag,dboid,reloid,blocknum) \ + ((locktag).locktag_field1 = (dboid), \ + (locktag).locktag_field2 = (reloid), \ + (locktag).locktag_field3 = (blocknum), \ + (locktag).locktag_field4 = 0, \ + (locktag).locktag_type = LOCKTAG_PAGE, \ + (locktag).locktag_lockmethodid = PREDICATE_LOCKMETHOD) + + #define SET_LOCKTAG_TUPLE_PREDICATE(locktag,dboid,reloid,blocknum,offnum) \ + ((locktag).locktag_field1 = (dboid), \ + (locktag).locktag_field2 = (reloid), \ + (locktag).locktag_field3 = (blocknum), \ + (locktag).locktag_field4 = (offnum), \ + (locktag).locktag_type = LOCKTAG_TUPLE, \ + (locktag).locktag_lockmethodid = PREDICATE_LOCKMETHOD) + + /* * Per-locked-object lock information: * *** /dev/null --- b/src/include/storage/predicate.h *************** *** 0 **** --- 1,22 ---- + /*------------------------------------------------------------------------- + * + * predicate.h + * POSTGRES predicate locking definitions. + * + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + #ifndef PREDICATE_H + #define PREDICATE_H + + #include "storage/lock.h" + #include "utils/relcache.h" + + extern void PredicateLockRelation(Relation relation); + + #endif /* PREDICATE_H */
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers