The attached patch fixes the problem discussed here http://archives.postgresql.org/pgsql-hackers/2006-11/msg00357.php as well as a related problem that I discovered while working on it: the sequence
begin; savepoint x; select * from foo for update; release savepoint x; select * from foo for share; leaves us holding only share lock not exclusive lock on the selected tuples. That's because heap_lock_tuple() considered only the exact-XID-equality case when checking to see if we were requesting share lock while already holding exclusive lock. We should treat exclusive lock held under any of the current backend's subtransactions as not to be overridden. In addition, this formulation avoids useless buffer-dirtying and WAL reporting in all cases where the desired lock is already effectively held, whereas the old code would go through the full pushups anyway. I've only tested it against HEAD but it will need to be applied to 8.1 as well. Anyone see any problems? regards, tom lane
*** src/backend/access/heap/heapam.c.orig Sun Nov 5 17:42:07 2006 --- src/backend/access/heap/heapam.c Thu Nov 16 20:10:37 2006 *************** *** 2359,2364 **** --- 2359,2366 ---- ItemId lp; PageHeader dp; TransactionId xid; + TransactionId xmax; + uint16 old_infomask; uint16 new_infomask; LOCKMODE tuple_lock_type; bool have_tuple_lock = false; *************** *** 2396,2401 **** --- 2398,2422 ---- LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); /* + * If we wish to acquire share lock, and the tuple is already + * share-locked by a multixact that includes any subtransaction of the + * current top transaction, then we effectively hold the desired lock + * already. We *must* succeed without trying to take the tuple lock, + * else we will deadlock against anyone waiting to acquire exclusive + * lock. We don't need to make any state changes in this case. + */ + if (mode == LockTupleShared && + (infomask & HEAP_XMAX_IS_MULTI) && + MultiXactIdIsCurrent((MultiXactId) xwait)) + { + Assert(infomask & HEAP_XMAX_SHARED_LOCK); + /* Probably can't hold tuple lock here, but may as well check */ + if (have_tuple_lock) + UnlockTuple(relation, tid, tuple_lock_type); + return HeapTupleMayBeUpdated; + } + + /* * Acquire tuple lock to establish our priority for the tuple. * LockTuple will release us when we are next-in-line for the tuple. * We must do this even if we are share-locking. *************** *** 2533,2557 **** } /* * Compute the new xmax and infomask to store into the tuple. Note we do * not modify the tuple just yet, because that would leave it in the wrong * state if multixact.c elogs. */ xid = GetCurrentTransactionId(); ! new_infomask = tuple->t_data->t_infomask; ! ! new_infomask &= ~(HEAP_XMAX_COMMITTED | ! HEAP_XMAX_INVALID | ! HEAP_XMAX_IS_MULTI | ! HEAP_IS_LOCKED | ! HEAP_MOVED); if (mode == LockTupleShared) { - TransactionId xmax = HeapTupleHeaderGetXmax(tuple->t_data); - uint16 old_infomask = tuple->t_data->t_infomask; - /* * If this is the first acquisition of a shared lock in the current * transaction, set my per-backend OldestMemberMXactId setting. We can --- 2554,2602 ---- } /* + * We might already hold the desired lock (or stronger), possibly under + * a different subtransaction of the current top transaction. If so, + * there is no need to change state or issue a WAL record. We already + * handled the case where this is true for xmax being a MultiXactId, + * so now check for cases where it is a plain TransactionId. + * + * Note in particular that this covers the case where we already hold + * exclusive lock on the tuple and the caller only wants shared lock. + * It would certainly not do to give up the exclusive lock. + */ + xmax = HeapTupleHeaderGetXmax(tuple->t_data); + old_infomask = tuple->t_data->t_infomask; + + if (!(old_infomask & (HEAP_XMAX_INVALID | + HEAP_XMAX_COMMITTED | + HEAP_XMAX_IS_MULTI)) && + (mode == LockTupleShared ? + (old_infomask & HEAP_IS_LOCKED) : + (old_infomask & HEAP_XMAX_EXCL_LOCK)) && + TransactionIdIsCurrentTransactionId(xmax)) + { + LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); + /* Probably can't hold tuple lock here, but may as well check */ + if (have_tuple_lock) + UnlockTuple(relation, tid, tuple_lock_type); + return HeapTupleMayBeUpdated; + } + + /* * Compute the new xmax and infomask to store into the tuple. Note we do * not modify the tuple just yet, because that would leave it in the wrong * state if multixact.c elogs. */ xid = GetCurrentTransactionId(); ! new_infomask = old_infomask & ~(HEAP_XMAX_COMMITTED | ! HEAP_XMAX_INVALID | ! HEAP_XMAX_IS_MULTI | ! HEAP_IS_LOCKED | ! HEAP_MOVED); if (mode == LockTupleShared) { /* * If this is the first acquisition of a shared lock in the current * transaction, set my per-backend OldestMemberMXactId setting. We can *************** *** 2592,2623 **** } else if (TransactionIdIsInProgress(xmax)) { ! if (TransactionIdEquals(xmax, xid)) ! { ! /* ! * If the old locker is ourselves, we'll just mark the ! * tuple again with our own TransactionId. However we ! * have to consider the possibility that we had exclusive ! * rather than shared lock before --- if so, be careful to ! * preserve the exclusivity of the lock. ! */ ! if (!(old_infomask & HEAP_XMAX_SHARED_LOCK)) ! { ! new_infomask &= ~HEAP_XMAX_SHARED_LOCK; ! new_infomask |= HEAP_XMAX_EXCL_LOCK; ! mode = LockTupleExclusive; ! } ! } ! else ! { ! /* ! * If the Xmax is a valid TransactionId, then we need to ! * create a new MultiXactId that includes both the old ! * locker and our own TransactionId. ! */ ! xid = MultiXactIdCreate(xmax, xid); ! new_infomask |= HEAP_XMAX_IS_MULTI; ! } } else { --- 2637,2649 ---- } else if (TransactionIdIsInProgress(xmax)) { ! /* ! * If the XMAX is a valid TransactionId, then we need to ! * create a new MultiXactId that includes both the old ! * locker and our own TransactionId. ! */ ! xid = MultiXactIdCreate(xmax, xid); ! new_infomask |= HEAP_XMAX_IS_MULTI; } else { *** src/backend/access/transam/multixact.c.orig Tue Oct 3 23:16:15 2006 --- src/backend/access/transam/multixact.c Thu Nov 16 19:00:59 2006 *************** *** 366,372 **** MultiXactIdIsRunning(MultiXactId multi) { TransactionId *members; - TransactionId myXid; int nmembers; int i; --- 366,371 ---- *************** *** 380,391 **** return false; } ! /* checking for myself is cheap */ ! myXid = GetTopTransactionId(); ! for (i = 0; i < nmembers; i++) { ! if (TransactionIdEquals(members[i], myXid)) { debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i); pfree(members); --- 379,392 ---- return false; } ! /* ! * Checking for myself is cheap compared to looking in shared memory, ! * so first do the equivalent of MultiXactIdIsCurrent(). This is not ! * needed for correctness, it's just a fast path. ! */ for (i = 0; i < nmembers; i++) { ! if (TransactionIdIsCurrentTransactionId(members[i])) { debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i); pfree(members); *************** *** 414,419 **** --- 415,458 ---- debug_elog3(DEBUG2, "IsRunning: %u is not running", multi); return false; + } + + /* + * MultiXactIdIsCurrent + * Returns true if the current transaction is a member of the MultiXactId. + * + * We return true if any live subtransaction of the current top-level + * transaction is a member. This is appropriate for the same reason that a + * lock held by any such subtransaction is globally equivalent to a lock + * held by the current subtransaction: no such lock could be released without + * aborting this subtransaction, and hence releasing its locks. So it's not + * necessary to add the current subxact to the MultiXact separately. + */ + bool + MultiXactIdIsCurrent(MultiXactId multi) + { + bool result = false; + TransactionId *members; + int nmembers; + int i; + + nmembers = GetMultiXactIdMembers(multi, &members); + + if (nmembers < 0) + return false; + + for (i = 0; i < nmembers; i++) + { + if (TransactionIdIsCurrentTransactionId(members[i])) + { + result = true; + break; + } + } + + pfree(members); + + return result; } /* *** src/backend/utils/time/tqual.c.orig Sun Nov 5 17:42:09 2006 --- src/backend/utils/time/tqual.c Thu Nov 16 19:51:57 2006 *************** *** 511,517 **** * HeapTupleUpdated: The tuple was updated by a committed transaction. * * HeapTupleBeingUpdated: The tuple is being updated by an in-progress ! * transaction other than the current transaction. */ HTSU_Result HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid, --- 511,520 ---- * HeapTupleUpdated: The tuple was updated by a committed transaction. * * HeapTupleBeingUpdated: The tuple is being updated by an in-progress ! * transaction other than the current transaction. (Note: this includes ! * the case where the tuple is share-locked by a MultiXact, even if the ! * MultiXact includes the current transaction. Callers that want to ! * distinguish that case must test for it themselves.) */ HTSU_Result HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid, *** src/include/access/multixact.h.orig Thu Mar 23 23:32:13 2006 --- src/include/access/multixact.h Thu Nov 16 19:00:49 2006 *************** *** 45,50 **** --- 45,51 ---- extern MultiXactId MultiXactIdCreate(TransactionId xid1, TransactionId xid2); extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid); extern bool MultiXactIdIsRunning(MultiXactId multi); + extern bool MultiXactIdIsCurrent(MultiXactId multi); extern void MultiXactIdWait(MultiXactId multi); extern bool ConditionalMultiXactIdWait(MultiXactId multi); extern void MultiXactIdSetOldestMember(void);
---------------------------(end of broadcast)--------------------------- TIP 2: Don't 'kill -9' the postmaster