Robert Haas <robertmh...@gmail.com> wrote:
 
> Nope, you're on target.  Although - if I were you - I would post
> the ACCESS EXCLUSIVE lock version of the patch for feedback.  I
> can't speak for anyone else, but I'll read it.
 
Here you go!  :-)
 
This is the milestone of having full serializable behavior, albeit
with horrible performance, using the simplest implementation
possible.  I didn't use ACCESS EXCLUSIVE locks, because on review it
seemed to me that a SHARE lock would be strong enough.  It compiles
and passes the regression tests, and I've been testing some of the
scenarios previously used to show the snapshot anomalies; I now get
correct behavior through blocking.
 
I identified the points to insert predicate locking by looking for
places where ExecStoreTuple was called with a valid heap buffer; if
there is anywhere that obtains tuples from the heap without going
through that method, I have more work to do.  If anyone knows of
such locations, I'd be grateful for a "heads up".
 
If I've done anything horribly wrong in organizing the code, that'd
be nice to hear about before I go too much farther, too.
 
I'm definitely not looking for this to be committed, but should I
add it to the CF page just for a "feedback" review?  (I'm OK with
keeping it more ad hoc, especially if it's going to hold up the
beta at all.)
 
-Kevin
*** a/src/backend/catalog/index.c
--- b/src/backend/catalog/index.c
***************
*** 2133,2139 **** IndexCheckExclusion(Relation heapRelation,
   *
   * After completing validate_index(), we wait until all transactions that
   * were alive at the time of the reference snapshot are gone; this is
!  * necessary to be sure there are none left with a serializable snapshot
   * older than the reference (and hence possibly able to see tuples we did
   * not index).        Then we mark the index "indisvalid" and commit.  
Subsequent
   * transactions will be able to use it for queries.
--- 2133,2139 ----
   *
   * After completing validate_index(), we wait until all transactions that
   * were alive at the time of the reference snapshot are gone; this is
!  * necessary to be sure there are none left with a transaction-based snapshot
   * older than the reference (and hence possibly able to see tuples we did
   * not index).        Then we mark the index "indisvalid" and commit.  
Subsequent
   * transactions will be able to use it for queries.
*** a/src/backend/commands/trigger.c
--- b/src/backend/commands/trigger.c
***************
*** 2319,2325 **** ltrmark:;
  
                        case HeapTupleUpdated:
                                ReleaseBuffer(buffer);
!                               if (IsXactIsoLevelSerializable)
                                        ereport(ERROR,
                                                        
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                                                         errmsg("could not 
serialize access due to concurrent update")));
--- 2319,2325 ----
  
                        case HeapTupleUpdated:
                                ReleaseBuffer(buffer);
!                               if (IsXactIsoLevelXactSnapshotBased)
                                        ereport(ERROR,
                                                        
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                                                         errmsg("could not 
serialize access due to concurrent update")));
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
***************
*** 1538,1544 **** EvalPlanQualFetch(EState *estate, Relation relation, int 
lockmode,
  
                                case HeapTupleUpdated:
                                        ReleaseBuffer(buffer);
!                                       if (IsXactIsoLevelSerializable)
                                                ereport(ERROR,
                                                                
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                                                                 errmsg("could 
not serialize access due to concurrent update")));
--- 1538,1544 ----
  
                                case HeapTupleUpdated:
                                        ReleaseBuffer(buffer);
!                                       if (IsXactIsoLevelXactSnapshotBased)
                                                ereport(ERROR,
                                                                
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                                                                 errmsg("could 
not serialize access due to concurrent update")));
*** a/src/backend/executor/nodeBitmapHeapscan.c
--- b/src/backend/executor/nodeBitmapHeapscan.c
***************
*** 42,47 ****
--- 42,48 ----
  #include "executor/nodeBitmapHeapscan.h"
  #include "pgstat.h"
  #include "storage/bufmgr.h"
+ #include "storage/predicate.h"
  #include "utils/memutils.h"
  #include "utils/snapmgr.h"
  #include "utils/tqual.h"
***************
*** 114,119 **** BitmapHeapNext(BitmapHeapScanState *node)
--- 115,123 ----
  #endif   /* USE_PREFETCH */
        }
  
+       /* TODO SSI: Lock at tuple level subject to granularity promotion. */
+       PredicateLockRelation(node->ss.ss_currentRelation);
+ 
        for (;;)
        {
                Page            dp;
*** a/src/backend/executor/nodeIndexscan.c
--- b/src/backend/executor/nodeIndexscan.c
***************
*** 30,35 ****
--- 30,36 ----
  #include "executor/execdebug.h"
  #include "executor/nodeIndexscan.h"
  #include "optimizer/clauses.h"
+ #include "storage/predicate.h"
  #include "utils/array.h"
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
***************
*** 72,77 **** IndexNext(IndexScanState *node)
--- 73,81 ----
        econtext = node->ss.ps.ps_ExprContext;
        slot = node->ss.ss_ScanTupleSlot;
  
+       /* TODO SSI: Lock at tuple level subject to granularity promotion. */
+       PredicateLockRelation(node->ss.ss_currentRelation);
+ 
        /*
         * ok, now that we have what we need, fetch the next tuple.
         */
*** a/src/backend/executor/nodeLockRows.c
--- b/src/backend/executor/nodeLockRows.c
***************
*** 130,136 **** lnext:
                                break;
  
                        case HeapTupleUpdated:
!                               if (IsXactIsoLevelSerializable)
                                        ereport(ERROR,
                                                        
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                                                         errmsg("could not 
serialize access due to concurrent update")));
--- 130,136 ----
                                break;
  
                        case HeapTupleUpdated:
!                               if (IsXactIsoLevelXactSnapshotBased)
                                        ereport(ERROR,
                                                        
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                                                         errmsg("could not 
serialize access due to concurrent update")));
*** a/src/backend/executor/nodeModifyTable.c
--- b/src/backend/executor/nodeModifyTable.c
***************
*** 326,332 **** ldelete:;
                        break;
  
                case HeapTupleUpdated:
!                       if (IsXactIsoLevelSerializable)
                                ereport(ERROR,
                                                
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                                                 errmsg("could not serialize 
access due to concurrent update")));
--- 326,332 ----
                        break;
  
                case HeapTupleUpdated:
!                       if (IsXactIsoLevelXactSnapshotBased)
                                ereport(ERROR,
                                                
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                                                 errmsg("could not serialize 
access due to concurrent update")));
***************
*** 514,520 **** lreplace:;
                        break;
  
                case HeapTupleUpdated:
!                       if (IsXactIsoLevelSerializable)
                                ereport(ERROR,
                                                
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                                                 errmsg("could not serialize 
access due to concurrent update")));
--- 514,520 ----
                        break;
  
                case HeapTupleUpdated:
!                       if (IsXactIsoLevelXactSnapshotBased)
                                ereport(ERROR,
                                                
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                                                 errmsg("could not serialize 
access due to concurrent update")));
*** a/src/backend/executor/nodeSeqscan.c
--- b/src/backend/executor/nodeSeqscan.c
***************
*** 28,33 ****
--- 28,34 ----
  #include "access/relscan.h"
  #include "executor/execdebug.h"
  #include "executor/nodeSeqscan.h"
+ #include "storage/predicate.h"
  
  static void InitScanRelation(SeqScanState *node, EState *estate);
  static TupleTableSlot *SeqNext(SeqScanState *node);
***************
*** 106,116 **** SeqRecheck(SeqScanState *node, TupleTableSlot *slot)
--- 107,122 ----
   *            tuple.
   *            We call the ExecScan() routine and pass it the appropriate
   *            access method functions.
+  *            For serializable transactions, we first lock the entire 
relation.
+  *            TODO SSI: Would it make sense to optimize cases where the plan
+  *                      includes a LIMIT, such that individual tuples are
+  *                      locked instead, subject to granularity promotion?
   * ----------------------------------------------------------------
   */
  TupleTableSlot *
  ExecSeqScan(SeqScanState *node)
  {
+       PredicateLockRelation(node->ss_currentRelation);
        return ExecScan((ScanState *) node,
                                        (ExecScanAccessMtd) SeqNext,
                                        (ExecScanRecheckMtd) SeqRecheck);
*** a/src/backend/executor/nodeTidscan.c
--- b/src/backend/executor/nodeTidscan.c
***************
*** 31,36 ****
--- 31,37 ----
  #include "executor/nodeTidscan.h"
  #include "optimizer/clauses.h"
  #include "storage/bufmgr.h"
+ #include "storage/predicate.h"
  #include "utils/array.h"
  
  
***************
*** 308,313 **** TidNext(TidScanState *node)
--- 309,317 ----
                        node->tss_TidPtr++;
        }
  
+       /* TODO SSI: Lock at tuple level subject to granularity promotion. */
+       PredicateLockRelation(heapRelation);
+ 
        while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
        {
                tuple->t_self = tidList[node->tss_TidPtr];
*** a/src/backend/storage/lmgr/Makefile
--- b/src/backend/storage/lmgr/Makefile
***************
*** 12,18 **** subdir = src/backend/storage/lmgr
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o
  
  include $(top_srcdir)/src/backend/common.mk
  
--- 12,18 ----
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o predicate.o
  
  include $(top_srcdir)/src/backend/common.mk
  
*** /dev/null
--- b/src/backend/storage/lmgr/predicate.c
***************
*** 0 ****
--- 1,43 ----
+ /*-------------------------------------------------------------------------
+  *
+  * predicate.c
+  *      POSTGRES predicate locking
+  *      to support full serializable transaction isolation
+  *
+  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *
+  * IDENTIFICATION
+  *      $PostgreSQL$
+  *
+  *-------------------------------------------------------------------------
+  */
+ /*
+  * INTERFACE ROUTINES
+  *
+  *            PredicateLockRelation(Relation relation)
+  */
+ 
+ #include "postgres.h"
+ 
+ #include "access/xact.h"
+ #include "storage/predicate.h"
+ #include "storage/lmgr.h"
+ 
+ 
+ /* ----------------------------------------------------------------
+  * Gets a predicate lock at the relation level.
+  * Skip if not in full serializable transaction isolation level.
+  * Skip if this is a temporary table or toast table..
+  * Skip this if a write lock exists for the relation; otherwise,
+  * clear any finer-grained predicate locks this session has on the relation.
+  * TODO SSI: Some of the above.  Using SIREAD locks.
+  * ----------------------------------------------------------------
+  */
+ void
+ PredicateLockRelation(Relation relation)
+ {
+       if (IsXactIsoLevelFullySerializable && !(relation->rd_istemp))
+               LockRelation(relation, ShareLock);
+ }
*** a/src/backend/utils/adt/ri_triggers.c
--- b/src/backend/utils/adt/ri_triggers.c
***************
*** 3314,3320 **** ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan,
        /*
         * In READ COMMITTED mode, we just need to use an up-to-date regular
         * snapshot, and we will see all rows that could be interesting. But in
!        * SERIALIZABLE mode, we can't change the transaction snapshot. If the
         * caller passes detectNewRows == false then it's okay to do the query
         * with the transaction snapshot; otherwise we use a current snapshot, 
and
         * tell the executor to error out if it finds any rows under the current
--- 3314,3320 ----
        /*
         * In READ COMMITTED mode, we just need to use an up-to-date regular
         * snapshot, and we will see all rows that could be interesting. But in
!        * xact-snapshot-based modes, we can't change the transaction snapshot. 
If the
         * caller passes detectNewRows == false then it's okay to do the query
         * with the transaction snapshot; otherwise we use a current snapshot, 
and
         * tell the executor to error out if it finds any rows under the current
***************
*** 3322,3328 **** ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan,
         * that SPI_execute_snapshot will register the snapshots, so we don't 
need
         * to bother here.
         */
!       if (IsXactIsoLevelSerializable && detectNewRows)
        {
                CommandCounterIncrement();              /* be sure all my own 
work is visible */
                test_snapshot = GetLatestSnapshot();
--- 3322,3328 ----
         * that SPI_execute_snapshot will register the snapshots, so we don't 
need
         * to bother here.
         */
!       if (IsXactIsoLevelXactSnapshotBased && detectNewRows)
        {
                CommandCounterIncrement();              /* be sure all my own 
work is visible */
                test_snapshot = GetLatestSnapshot();
*** a/src/backend/utils/time/snapmgr.c
--- b/src/backend/utils/time/snapmgr.c
***************
*** 37,44 ****
  
  
  /*
!  * CurrentSnapshot points to the only snapshot taken in a serializable
!  * transaction, and to the latest one taken in a read-committed transaction.
   * SecondarySnapshot is a snapshot that's always up-to-date as of the current
   * instant, even on a serializable transaction.  It should only be used for
   * special-purpose code (say, RI checking.)
--- 37,44 ----
  
  
  /*
!  * CurrentSnapshot points to the only snapshot taken in a xact-snapshot-based
!  * transaction; otherwise to the latest one taken.
   * SecondarySnapshot is a snapshot that's always up-to-date as of the current
   * instant, even on a serializable transaction.  It should only be used for
   * special-purpose code (say, RI checking.)
***************
*** 97,107 **** static int      RegisteredSnapshots = 0;
  bool          FirstSnapshotSet = false;
  
  /*
!  * Remembers whether this transaction registered a serializable snapshot at
   * start.  We cannot trust FirstSnapshotSet in combination with
!  * IsXactIsoLevelSerializable, because GUC may be reset before us.
   */
! static bool registered_serializable = false;
  
  
  static Snapshot CopySnapshot(Snapshot snapshot);
--- 97,107 ----
  bool          FirstSnapshotSet = false;
  
  /*
!  * Remembers whether this transaction registered a transaction-based snapshot 
at
   * start.  We cannot trust FirstSnapshotSet in combination with
!  * IsXactIsoLevelXactSnapshotBased, because GUC may be reset before us.
   */
! static bool registered_xact_snapshot = false;
  
  
  static Snapshot CopySnapshot(Snapshot snapshot);
***************
*** 130,150 **** GetTransactionSnapshot(void)
                FirstSnapshotSet = true;
  
                /*
!                * In serializable mode, the first snapshot must live until end 
of
                 * xact regardless of what the caller does with it, so we must
                 * register it internally here and unregister it at end of xact.
                 */
!               if (IsXactIsoLevelSerializable)
                {
                        CurrentSnapshot = 
RegisterSnapshotOnOwner(CurrentSnapshot,
                                                                                
                TopTransactionResourceOwner);
!                       registered_serializable = true;
                }
  
                return CurrentSnapshot;
        }
  
!       if (IsXactIsoLevelSerializable)
                return CurrentSnapshot;
  
        CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
--- 130,150 ----
                FirstSnapshotSet = true;
  
                /*
!                * In xact-snapshot-based isolation levels, the first snapshot 
must live until end of
                 * xact regardless of what the caller does with it, so we must
                 * register it internally here and unregister it at end of xact.
                 */
!               if (IsXactIsoLevelXactSnapshotBased)
                {
                        CurrentSnapshot = 
RegisterSnapshotOnOwner(CurrentSnapshot,
                                                                                
                TopTransactionResourceOwner);
!                       registered_xact_snapshot = true;
                }
  
                return CurrentSnapshot;
        }
  
!       if (IsXactIsoLevelXactSnapshotBased)
                return CurrentSnapshot;
  
        CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
***************
*** 155,161 **** GetTransactionSnapshot(void)
  /*
   * GetLatestSnapshot
   *            Get a snapshot that is up-to-date as of the current instant,
!  *            even if we are executing in SERIALIZABLE mode.
   */
  Snapshot
  GetLatestSnapshot(void)
--- 155,161 ----
  /*
   * GetLatestSnapshot
   *            Get a snapshot that is up-to-date as of the current instant,
!  *            even if we are executing in xact-snapshot-based mode.
   */
  Snapshot
  GetLatestSnapshot(void)
***************
*** 515,527 **** void
  AtEarlyCommit_Snapshot(void)
  {
        /*
!        * On a serializable transaction we must unregister our private refcount
!        * to the serializable snapshot.
         */
!       if (registered_serializable)
                UnregisterSnapshotFromOwner(CurrentSnapshot,
                                                                        
TopTransactionResourceOwner);
!       registered_serializable = false;
  
  }
  
--- 515,527 ----
  AtEarlyCommit_Snapshot(void)
  {
        /*
!        * On a xact-snapshot-based transaction we must unregister our private 
refcount
!        * to the xact snapshot.
         */
!       if (registered_xact_snapshot)
                UnregisterSnapshotFromOwner(CurrentSnapshot,
                                                                        
TopTransactionResourceOwner);
!       registered_xact_snapshot = false;
  
  }
  
***************
*** 557,561 **** AtEOXact_Snapshot(bool isCommit)
        SecondarySnapshot = NULL;
  
        FirstSnapshotSet = false;
!       registered_serializable = false;
  }
--- 557,561 ----
        SecondarySnapshot = NULL;
  
        FirstSnapshotSet = false;
!       registered_xact_snapshot = false;
  }
*** a/src/include/access/xact.h
--- b/src/include/access/xact.h
***************
*** 32,41 **** extern int       DefaultXactIsoLevel;
  extern int    XactIsoLevel;
  
  /*
!  * We only implement two isolation levels internally.  This macro should
!  * be used to check which one is selected.
   */
! #define IsXactIsoLevelSerializable (XactIsoLevel >= XACT_REPEATABLE_READ)
  
  /* Xact read-only state */
  extern bool DefaultXactReadOnly;
--- 32,45 ----
  extern int    XactIsoLevel;
  
  /*
!  * We implement three isolation levels internally.
!  * The two stronger ones use one snapshot per database transaction;
!  * the others use one snapshot per statement.
!  * Serializable uses predicate locks.
!  * These macros should be used to check which isolation level is selected.
   */
! #define IsXactIsoLevelXactSnapshotBased (XactIsoLevel >= XACT_REPEATABLE_READ)
! #define IsXactIsoLevelFullySerializable (XactIsoLevel == XACT_SERIALIZABLE)
  
  /* Xact read-only state */
  extern bool DefaultXactReadOnly;
*** /dev/null
--- b/src/include/storage/predicate.h
***************
*** 0 ****
--- 1,22 ----
+ /*-------------------------------------------------------------------------
+  *
+  * predicate.h
+  *      POSTGRES predicate locking definitions.
+  *
+  *
+  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * $PostgreSQL$
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef PREDICATE_H
+ #define PREDICATE_H
+ 
+ #include "storage/lock.h"
+ #include "utils/relcache.h"
+ 
+ extern void PredicateLockRelation(Relation relation);
+ 
+ #endif   /* PREDICATE_H */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to