Heikki Linnakangas <heikki.linnakan...@enterprisedb.com> wrote:
> On 07.06.2011 21:10, Kevin Grittner wrote:
 
>> I think that leaves me with all the answers I need to get a new
>> patch out this evening (U.S. Central Time).
> 
> Great, I'll review it in my morning (in about 12h)
 
Attached.  Passes all the usual regression tests I run, plus light
ad hoc testing.  All is working fine as far as this patch itself
goes, although more testing is needed to really call it sound.
 
If anyone is interested in the differential from version 3 of the
patch, it is the result of these two commits to my local repo:
 
http://git.postgresql.org/gitweb?p=users/kgrittn/postgres.git;a=commitdiff;h=018b0fcbeba05317ba7066e552efe9a04e6890d9
http://git.postgresql.org/gitweb?p=users/kgrittn/postgres.git;a=commitdiff;h=fc651e2721a601ea806cf6e5d53d0676dfd26dca
 
During testing I found two annoying things not caused by this patch
which should probably be addressed in 9.1 if feasible, although I
don't think they rise to the level of blockers.  More on those in
separate threads.
 
-Kevin

*** a/src/backend/catalog/heap.c
--- b/src/backend/catalog/heap.c
***************
*** 63,68 ****
--- 63,69 ----
  #include "parser/parse_relation.h"
  #include "storage/bufmgr.h"
  #include "storage/freespace.h"
+ #include "storage/predicate.h"
  #include "storage/smgr.h"
  #include "utils/acl.h"
  #include "utils/builtins.h"
***************
*** 1658,1663 **** heap_drop_with_catalog(Oid relid)
--- 1659,1672 ----
        CheckTableNotInUse(rel, "DROP TABLE");
  
        /*
+        * This effectively deletes all rows in the table, and may be done in a
+        * serializable transaction.  In that case we must record a rw-conflict 
in
+        * to this transaction from each transaction holding a predicate lock on
+        * the table.
+        */
+       CheckTableForSerializableConflictIn(rel);
+ 
+       /*
         * Delete pg_foreign_table tuple first.
         */
        if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
*** a/src/backend/catalog/index.c
--- b/src/backend/catalog/index.c
***************
*** 54,59 ****
--- 54,60 ----
  #include "parser/parser.h"
  #include "storage/bufmgr.h"
  #include "storage/lmgr.h"
+ #include "storage/predicate.h"
  #include "storage/procarray.h"
  #include "storage/smgr.h"
  #include "utils/builtins.h"
***************
*** 1312,1317 **** index_drop(Oid indexId)
--- 1313,1324 ----
        CheckTableNotInUse(userIndexRelation, "DROP INDEX");
  
        /*
+        * All predicate locks on the index are about to be made invalid.
+        * Promote them to relation locks on the heap.
+        */
+       TransferPredicateLocksToHeapRelation(userIndexRelation);
+ 
+       /*
         * Schedule physical removal of the files
         */
        RelationDropStorage(userIndexRelation);
***************
*** 2799,2804 **** reindex_index(Oid indexId, bool skip_constraint_checks)
--- 2806,2817 ----
         */
        CheckTableNotInUse(iRel, "REINDEX INDEX");
  
+       /*
+        * All predicate locks on the index are about to be made invalid.
+        * Promote them to relation locks on the heap.
+        */
+       TransferPredicateLocksToHeapRelation(iRel);
+ 
        PG_TRY();
        {
                /* Suppress use of the target index while rebuilding it */
*** a/src/backend/commands/cluster.c
--- b/src/backend/commands/cluster.c
***************
*** 39,44 ****
--- 39,45 ----
  #include "optimizer/planner.h"
  #include "storage/bufmgr.h"
  #include "storage/lmgr.h"
+ #include "storage/predicate.h"
  #include "storage/procarray.h"
  #include "storage/smgr.h"
  #include "utils/acl.h"
***************
*** 385,390 **** cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool 
verbose,
--- 386,397 ----
        if (OidIsValid(indexOid))
                check_index_is_clusterable(OldHeap, indexOid, recheck, 
AccessExclusiveLock);
  
+       /*
+        * All predicate locks on the table and its indexes are about to be made
+        * invalid.  Promote them to relation locks on the heap.
+        */
+       TransferPredicateLocksToHeapRelation(OldHeap);
+ 
        /* rebuild_relation does all the dirty work */
        rebuild_relation(OldHeap, indexOid, freeze_min_age, freeze_table_age,
                                         verbose);
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
***************
*** 70,75 ****
--- 70,76 ----
  #include "storage/bufmgr.h"
  #include "storage/lmgr.h"
  #include "storage/lock.h"
+ #include "storage/predicate.h"
  #include "storage/smgr.h"
  #include "utils/acl.h"
  #include "utils/builtins.h"
***************
*** 1040,1045 **** ExecuteTruncate(TruncateStmt *stmt)
--- 1041,1054 ----
                        Oid                     toast_relid;
  
                        /*
+                        * This effectively deletes all rows in the table, and 
may be done
+                        * in a serializable transaction.  In that case we must 
record a
+                        * rw-conflict in to this transaction from each 
transaction
+                        * holding a predicate lock on the table.
+                        */
+                       CheckTableForSerializableConflictIn(rel);
+ 
+                       /*
                         * Need the full transaction-safe pushups.
                         *
                         * Create a new empty storage file for the relation, 
and assign it
*** a/src/backend/storage/lmgr/predicate.c
--- b/src/backend/storage/lmgr/predicate.c
***************
*** 155,160 ****
--- 155,161 ----
   *                                                       BlockNumber 
newblkno);
   *            PredicateLockPageCombine(Relation relation, BlockNumber 
oldblkno,
   *                                                             BlockNumber 
newblkno);
+  *            TransferPredicateLocksToHeapRelation(const Relation relation)
   *            ReleasePredicateLocks(bool isCommit)
   *
   * conflict detection (may also trigger rollback)
***************
*** 162,167 ****
--- 163,169 ----
   *                                                                            
HeapTupleData *tup, Buffer buffer)
   *            CheckForSerializableConflictIn(Relation relation, HeapTupleData 
*tup,
   *                                                                       
Buffer buffer)
+  *            CheckTableForSerializableConflictIn(const Relation relation)
   *
   * final rollback checking
   *            PreCommit_CheckForSerializationFailure(void)
***************
*** 257,266 ****
  #define SxactIsMarkedForDeath(sxact) (((sxact)->flags & 
SXACT_FLAG_MARKED_FOR_DEATH) != 0)
  
  /*
!  * When a public interface method is called for a split on an index relation,
!  * this is the test to see if we should do a quick return.
   */
! #define SkipSplitTracking(relation) \
        (((relation)->rd_id < FirstBootstrapObjectId) \
        || RelationUsesLocalBuffers(relation))
  
--- 259,269 ----
  #define SxactIsMarkedForDeath(sxact) (((sxact)->flags & 
SXACT_FLAG_MARKED_FOR_DEATH) != 0)
  
  /*
!  * When a public interface method is called which needs to manipulate locks on
!  * a particular relation regardless of the lock holder, do a quick check to
!  * see if this relation can be skipped.
   */
! #define SkipPredicateLocksForRelation(relation) \
        (((relation)->rd_id < FirstBootstrapObjectId) \
        || RelationUsesLocalBuffers(relation))
  
***************
*** 273,279 ****
        ((!IsolationIsSerializable()) \
        || ((MySerializableXact == InvalidSerializableXact)) \
        || ReleasePredicateLocksIfROSafe() \
!       || SkipSplitTracking(relation))
  
  
  /*
--- 276,282 ----
        ((!IsolationIsSerializable()) \
        || ((MySerializableXact == InvalidSerializableXact)) \
        || ReleasePredicateLocksIfROSafe() \
!       || SkipPredicateLocksForRelation(relation))
  
  
  /*
***************
*** 434,439 **** static bool TransferPredicateLocksToNewTarget(const 
PREDICATELOCKTARGETTAG oldta
--- 437,443 ----
                                                                  const 
PREDICATELOCKTARGETTAG newtargettag,
                                                                  bool 
removeOld);
  static void PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag);
+ static void DropAllPredicateLocksFromTableImpl(const Relation relation, bool 
transfer);
  static void SetNewSxactGlobalXmin(void);
  static bool ReleasePredicateLocksIfROSafe(void);
  static void ClearOldPredicateLocks(void);
***************
*** 2543,2548 **** exit:
--- 2547,2781 ----
        return !outOfShmem;
  }
  
+ /*
+  * Drop all predicate locks of any granularity from the specified relation,
+  * which can be a heap relation or an index relation.  Optionally acquire a
+  * relation lock on the heap for any transactions with any lock(s) on the
+  * specified relation.
+  *
+  * This requires grabbing a lot of LW locks and scanning the entire lock
+  * target table for matches.  That makes this more expensive than most
+  * predicate lock management functions, but it will only be called for DDL
+  * type commands and there are fast returns when no serializable transactions
+  * are active or the relation is temporary.
+  *
+  * We are not using the TransferPredicateLocksToNewTarget function because
+  * it acquires its own locks on the partitions of the two targets involved,
+  * and we'll already be holding all partition locks.
+  *
+  * We can't throw an error from here, because the call could be from a
+  * transaction which is not serializable.
+  *
+  * NOTE: This is currently only called with transfer set to true, but that may
+  * change.    If we decide to clean up the locks from a table on commit of a
+  * transaction which executed DROP TABLE, the false condition will be useful.
+  */
+ static void
+ DropAllPredicateLocksFromTableImpl(const Relation relation, bool transfer)
+ {
+       HASH_SEQ_STATUS seqstat;
+       PREDICATELOCKTARGET *oldtarget;
+       PREDICATELOCKTARGET *heaptarget;
+       PREDICATELOCKTARGETTAG heaptargettag;
+       PREDICATELOCKTAG newpredlocktag;
+       Oid                     dbId;
+       Oid                     relId;
+       Oid                     heapId;
+       int                     i;
+       bool            isIndex;
+       bool            found;
+       uint32          reservedtargettaghash;
+       uint32          heaptargettaghash;
+ 
+       /*
+        * Bail out quickly if there are no serializable transactions running.
+        * It's safe to check this without taking locks because the caller is
+        * holding an ACCESS EXCLUSIVE lock on the relation.  No new locks which
+        * would matter here can be acquired while that is held.
+        */
+       if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
+               return;
+ 
+       if (SkipPredicateLocksForRelation(relation))
+               return;
+ 
+       dbId = relation->rd_node.dbNode;
+       relId = relation->rd_id;
+       if (relation->rd_index == NULL)
+       {
+               isIndex = false;
+               heapId = relId;
+       }
+       else
+       {
+               isIndex = true;
+               heapId = relation->rd_index->indrelid;
+       }
+       Assert(heapId != InvalidOid);
+       Assert(transfer || !isIndex);           /* index OID only makes sense 
with
+                                                                               
 * transfer */
+ 
+       SET_PREDICATELOCKTARGETTAG_RELATION(heaptargettag, dbId, heapId);
+       heaptargettaghash = PredicateLockTargetTagHashCode(&heaptargettag);
+       heaptarget = NULL;                      /* Retrieve first time needed, 
then keep. */
+ 
+       LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
+       for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
+               LWLockAcquire(FirstPredicateLockMgrLock + i, LW_EXCLUSIVE);
+       LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
+ 
+       /*
+        * Remove the reserved entry to give us scratch space, so we know we'll 
be
+        * able to create the new lock target.
+        */
+       reservedtargettaghash = 0;      /* quiet compiler warning */
+       if (transfer)
+       {
+               reservedtargettaghash = 
PredicateLockTargetTagHashCode(&ReservedTargetTag);
+               hash_search_with_hash_value(PredicateLockTargetHash,
+                                                                       
&ReservedTargetTag,
+                                                                       
reservedtargettaghash,
+                                                                       
HASH_REMOVE, &found);
+               Assert(found);
+       }
+ 
+       /* Scan through target map */
+       hash_seq_init(&seqstat, PredicateLockTargetHash);
+ 
+       while ((oldtarget = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
+       {
+               PREDICATELOCK *oldpredlock;
+ 
+               /*
+                * Check whether this is a target which needs attention.
+                */
+               if (GET_PREDICATELOCKTARGETTAG_RELATION(oldtarget->tag) != 
relId)
+                       continue;                       /* wrong relation id */
+               if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId)
+                       continue;                       /* wrong database id */
+               if (transfer && !isIndex
+                       && GET_PREDICATELOCKTARGETTAG_TYPE(oldtarget->tag) == 
PREDLOCKTAG_RELATION)
+                       continue;                       /* already the right 
lock */
+ 
+               /*
+                * If we made it here, we have work to do.      We make sure 
the heap
+                * relation lock exists, then we walk the list of predicate 
locks for
+                * the old target we found, moving all locks to the heap 
relation lock
+                * -- unless they already hold that.
+                */
+ 
+               /*
+                * First make sure we have the heap relation target.  We only 
need to
+                * do this once.
+                */
+               if (transfer && heaptarget == NULL)
+               {
+                       heaptarget = 
hash_search_with_hash_value(PredicateLockTargetHash,
+                                                                               
                         &heaptargettag,
+                                                                               
                         heaptargettaghash,
+                                                                               
                         HASH_ENTER, &found);
+                       Assert(heaptarget != NULL);
+                       if (!found)
+                               SHMQueueInit(&heaptarget->predicateLocks);
+                       newpredlocktag.myTarget = heaptarget;
+               }
+ 
+               /*
+                * Loop through moving locks from this target to the relation 
target.
+                */
+               oldpredlock = (PREDICATELOCK *)
+                       SHMQueueNext(&(oldtarget->predicateLocks),
+                                                &(oldtarget->predicateLocks),
+                                                offsetof(PREDICATELOCK, 
targetLink));
+               while (oldpredlock)
+               {
+                       PREDICATELOCK *nextpredlock;
+                       PREDICATELOCK *newpredlock;
+                       SerCommitSeqNo oldCommitSeqNo = 
oldpredlock->commitSeqNo;
+ 
+                       nextpredlock = (PREDICATELOCK *)
+                               SHMQueueNext(&(oldtarget->predicateLocks),
+                                                        
&(oldpredlock->targetLink),
+                                                        
offsetof(PREDICATELOCK, targetLink));
+                       newpredlocktag.myXact = oldpredlock->tag.myXact;
+ 
+                       /*
+                        * It's OK to remove the old lock first because of the 
ACCESS
+                        * EXCLUSIVE lock on the heap relation when this is 
called.  It is
+                        * desirable to do so because it avoids any chance of 
running out
+                        * of lock structure entries for the table.
+                        */
+                       SHMQueueDelete(&(oldpredlock->xactLink));
+                       /* No need for retail delete from oldtarget list. */
+                       hash_search(PredicateLockHash,
+                                               &oldpredlock->tag,
+                                               HASH_REMOVE, &found);
+                       Assert(found);
+ 
+                       if (transfer)
+                       {
+                               newpredlock = (PREDICATELOCK *)
+                                       hash_search_with_hash_value
+                                       (PredicateLockHash,
+                                        &newpredlocktag,
+                                        
PredicateLockHashCodeFromTargetHashCode(&newpredlocktag,
+                                                                               
                                  heaptargettaghash),
+                                        HASH_ENTER_NULL, &found);
+                               Assert(newpredlock != NULL);
+                               if (!found)
+                               {
+                                       
SHMQueueInsertBefore(&(heaptarget->predicateLocks),
+                                                                               
 &(newpredlock->targetLink));
+                                       
SHMQueueInsertBefore(&(newpredlocktag.myXact->predicateLocks),
+                                                                               
 &(newpredlock->xactLink));
+                                       newpredlock->commitSeqNo = 
oldCommitSeqNo;
+                               }
+                               else
+                               {
+                                       if (newpredlock->commitSeqNo < 
oldCommitSeqNo)
+                                               newpredlock->commitSeqNo = 
oldCommitSeqNo;
+                               }
+ 
+                               Assert(newpredlock->commitSeqNo != 0);
+                               Assert((newpredlock->commitSeqNo == 
InvalidSerCommitSeqNo)
+                                          || (newpredlock->tag.myXact == 
OldCommittedSxact));
+                       }
+ 
+                       oldpredlock = nextpredlock;
+               }
+ 
+               hash_search(PredicateLockTargetHash, &oldtarget->tag, 
HASH_REMOVE, &found);
+               Assert(found);
+       }
+ 
+       if (transfer)
+       {
+               /* Put the reserved entry back */
+               hash_search_with_hash_value(PredicateLockTargetHash,
+                                                                       
&ReservedTargetTag,
+                                                                       
reservedtargettaghash,
+                                                                       
HASH_ENTER, &found);
+               Assert(!found);
+       }
+ 
+       /* Release locks in reverse order */
+       LWLockRelease(SerializableXactHashLock);
+       for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
+               LWLockRelease(FirstPredicateLockMgrLock + i);
+       LWLockRelease(SerializablePredicateLockListLock);
+ }
+ 
+ /*
+  * TransferPredicateLocksToHeapRelation
+  *            For all transactions, transfer all predicate locks for the given
+  *            relation to a single relation lock on the heap.
+  */
+ void
+ TransferPredicateLocksToHeapRelation(const Relation relation)
+ {
+       DropAllPredicateLocksFromTableImpl(relation, true);
+ }
+ 
  
  /*
   *            PredicateLockPageSplit
***************
*** 2581,2587 **** PredicateLockPageSplit(const Relation relation, const 
BlockNumber oldblkno,
        if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
                return;
  
!       if (SkipSplitTracking(relation))
                return;
  
        Assert(oldblkno != newblkno);
--- 2814,2820 ----
        if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
                return;
  
!       if (SkipPredicateLocksForRelation(relation))
                return;
  
        Assert(oldblkno != newblkno);
***************
*** 3792,3797 **** CheckForSerializableConflictIn(const Relation relation, 
const HeapTuple tuple,
--- 4025,4129 ----
  }
  
  /*
+  * CheckTableForSerializableConflictIn
+  *            The entire table is going through a DDL-style logical mass 
delete
+  *            (like TRUNCATE TABLE or DROP TABLE).  While these operations do 
not
+  *            operate entirely within the bounds of snapshot isolation, they 
can
+  *            occur inside of a serialziable transaction, and will logically 
occur
+  *            after any reads which saw rows which were destroyed by these
+  *            operations, so we do what we can to serialize properly under 
SSI.
+  *
+  * The relation passed in must be a heap relation for a table. Any predicate
+  * lock of any granularity on the heap will cause a rw-conflict in to this
+  * transaction.  Predicate locks on indexes do not matter because they only
+  * exist to guard against conflicting inserts into the index, and this is a
+  * mass *delete*.
+  *
+  * This should be done before altering the predicate locks because the
+  * transaction could be rolled back because of a conflict, in which case the
+  * lock changes are not needed.
+  */
+ void
+ CheckTableForSerializableConflictIn(const Relation relation)
+ {
+       HASH_SEQ_STATUS seqstat;
+       PREDICATELOCKTARGET *target;
+       Oid                     dbId;
+       Oid                     heapId;
+       int                     i;
+ 
+       /*
+        * Bail out quickly if there are no serializable transactions running.
+        * It's safe to check this without taking locks because the caller is
+        * holding an ACCESS EXCLUSIVE lock on the relation.  No new locks which
+        * would matter here can be acquired while that is held.
+        */
+       if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
+               return;
+ 
+       if (SkipSerialization(relation))
+               return;
+ 
+       Assert(relation->rd_index == NULL); /* not an index relation */
+ 
+       dbId = relation->rd_node.dbNode;
+       heapId = relation->rd_id;
+ 
+       LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
+       for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
+               LWLockAcquire(FirstPredicateLockMgrLock + i, LW_SHARED);
+       LWLockAcquire(SerializableXactHashLock, LW_SHARED);
+ 
+       /* Scan through target list */
+       hash_seq_init(&seqstat, PredicateLockTargetHash);
+ 
+       while ((target = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
+       {
+               PREDICATELOCK *predlock;
+ 
+               /*
+                * Check whether this is a target which needs attention.
+                */
+               if (GET_PREDICATELOCKTARGETTAG_RELATION(target->tag) != heapId)
+                       continue;                       /* wrong relation id */
+               if (GET_PREDICATELOCKTARGETTAG_DB(target->tag) != dbId)
+                       continue;                       /* wrong database id */
+ 
+               /*
+                * Loop through locks for this target and flag conflicts.
+                */
+               predlock = (PREDICATELOCK *)
+                       SHMQueueNext(&(target->predicateLocks),
+                                                &(target->predicateLocks),
+                                                offsetof(PREDICATELOCK, 
targetLink));
+               while (predlock)
+               {
+                       PREDICATELOCK *nextpredlock;
+ 
+                       nextpredlock = (PREDICATELOCK *)
+                               SHMQueueNext(&(target->predicateLocks),
+                                                        
&(predlock->targetLink),
+                                                        
offsetof(PREDICATELOCK, targetLink));
+ 
+                       if (predlock->tag.myXact != MySerializableXact
+                               && !RWConflictExists(predlock->tag.myXact,
+                                                                        
(SERIALIZABLEXACT *) MySerializableXact))
+                               FlagRWConflict(predlock->tag.myXact,
+                                                          (SERIALIZABLEXACT *) 
MySerializableXact);
+ 
+                       predlock = nextpredlock;
+               }
+       }
+ 
+       /* Release locks in reverse order */
+       LWLockRelease(SerializableXactHashLock);
+       for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
+               LWLockRelease(FirstPredicateLockMgrLock + i);
+       LWLockRelease(SerializablePredicateLockListLock);
+ }
+ 
+ 
+ /*
   * Flag a rw-dependency between two serializable transactions.
   *
   * The caller is responsible for ensuring that we have a LW lock on
*** a/src/include/storage/predicate.h
--- b/src/include/storage/predicate.h
***************
*** 49,59 **** extern void PredicateLockPage(const Relation relation, const 
BlockNumber blkno);
--- 49,61 ----
  extern void PredicateLockTuple(const Relation relation, const HeapTuple 
tuple);
  extern void PredicateLockPageSplit(const Relation relation, const BlockNumber 
oldblkno, const BlockNumber newblkno);
  extern void PredicateLockPageCombine(const Relation relation, const 
BlockNumber oldblkno, const BlockNumber newblkno);
+ extern void TransferPredicateLocksToHeapRelation(const Relation relation);
  extern void ReleasePredicateLocks(const bool isCommit);
  
  /* conflict detection (may also trigger rollback) */
  extern void CheckForSerializableConflictOut(const bool valid, const Relation 
relation, const HeapTuple tuple, const Buffer buffer);
  extern void CheckForSerializableConflictIn(const Relation relation, const 
HeapTuple tuple, const Buffer buffer);
+ extern void CheckTableForSerializableConflictIn(const Relation relation);
  
  /* final rollback checking */
  extern void PreCommit_CheckForSerializationFailure(void);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to