On Fri, Sep 8, 2017 at 4:32 AM, Masahiko Sawada <[email protected]> wrote:
> On Fri, Sep 8, 2017 at 7:24 AM, Thomas Munro
> <[email protected]> wrote:
>> On Wed, Aug 16, 2017 at 2:13 PM, Masahiko Sawada <[email protected]>
>> wrote:
>>> The previous patch conflicts with current HEAD, I rebased the patch to
>>> current HEAD.
>>
>> Hi Masahiko-san,
>>
>> FYI this doesn't build anymore. I think it's just because the wait
>> event enumerators were re-alphabetised in pgstat.h:
>>
>> ../../../../src/include/pgstat.h:820:2: error: redeclaration of
>> enumerator ‘WAIT_EVENT_LOGICAL_SYNC_DATA’
>> WAIT_EVENT_LOGICAL_SYNC_DATA,
>> ^
>> ../../../../src/include/pgstat.h:806:2: note: previous definition of
>> ‘WAIT_EVENT_LOGICAL_SYNC_DATA’ was here
>> WAIT_EVENT_LOGICAL_SYNC_DATA,
>> ^
>> ../../../../src/include/pgstat.h:821:2: error: redeclaration of
>> enumerator ‘WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE’
>> WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE,
>> ^
>> ../../../../src/include/pgstat.h:807:2: note: previous definition of
>> ‘WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE’ was here
>> WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE,
>> ^
>>
>
> Thank you for the information! Attached rebased patch.
>
Since the previous patch conflicts with current HEAD, I attached the
updated patch for next CF.
Regards,
--
Masahiko Sawada
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c
index 80f803e..b928c1a 100644
--- a/src/backend/access/brin/brin_pageops.c
+++ b/src/backend/access/brin/brin_pageops.c
@@ -609,8 +609,8 @@ brin_page_cleanup(Relation idxrel, Buffer buf)
*/
if (PageIsNew(page))
{
- LockRelationForExtension(idxrel, ShareLock);
- UnlockRelationForExtension(idxrel, ShareLock);
+ LockRelationForExtension(idxrel, RELEXT_SHARED);
+ UnlockRelationForExtension(idxrel, RELEXT_SHARED);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
if (PageIsNew(page))
@@ -702,7 +702,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
*/
if (!RELATION_IS_LOCAL(irel))
{
- LockRelationForExtension(irel, ExclusiveLock);
+ LockRelationForExtension(irel, RELEXT_EXCLUSIVE);
extensionLockHeld = true;
}
buf = ReadBuffer(irel, P_NEW);
@@ -754,7 +754,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
}
if (extensionLockHeld)
- UnlockRelationForExtension(irel, ExclusiveLock);
+ UnlockRelationForExtension(irel, RELEXT_EXCLUSIVE);
ReleaseBuffer(buf);
return InvalidBuffer;
@@ -764,7 +764,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
if (extensionLockHeld)
- UnlockRelationForExtension(irel, ExclusiveLock);
+ UnlockRelationForExtension(irel, RELEXT_EXCLUSIVE);
page = BufferGetPage(buf);
diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c
index 22f2076..4c15b45 100644
--- a/src/backend/access/brin/brin_revmap.c
+++ b/src/backend/access/brin/brin_revmap.c
@@ -570,7 +570,7 @@ revmap_physical_extend(BrinRevmap *revmap)
else
{
if (needLock)
- LockRelationForExtension(irel, ExclusiveLock);
+ LockRelationForExtension(irel, RELEXT_EXCLUSIVE);
buf = ReadBuffer(irel, P_NEW);
if (BufferGetBlockNumber(buf) != mapBlk)
@@ -582,7 +582,7 @@ revmap_physical_extend(BrinRevmap *revmap)
* page from under whoever is using it.
*/
if (needLock)
- UnlockRelationForExtension(irel, ExclusiveLock);
+ UnlockRelationForExtension(irel, RELEXT_EXCLUSIVE);
LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buf);
return;
@@ -591,7 +591,7 @@ revmap_physical_extend(BrinRevmap *revmap)
page = BufferGetPage(buf);
if (needLock)
- UnlockRelationForExtension(irel, ExclusiveLock);
+ UnlockRelationForExtension(irel, RELEXT_EXCLUSIVE);
}
/* Check that it's a regular block (or an empty page) */
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 136ea27..1690d21 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -325,13 +325,13 @@ GinNewBuffer(Relation index)
/* Must extend the file */
needLock = !RELATION_IS_LOCAL(index);
if (needLock)
- LockRelationForExtension(index, ExclusiveLock);
+ LockRelationForExtension(index, RELEXT_EXCLUSIVE);
buffer = ReadBuffer(index, P_NEW);
LockBuffer(buffer, GIN_EXCLUSIVE);
if (needLock)
- UnlockRelationForExtension(index, ExclusiveLock);
+ UnlockRelationForExtension(index, RELEXT_EXCLUSIVE);
return buffer;
}
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 31425e9..e9f84bc 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -716,10 +716,10 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
needLock = !RELATION_IS_LOCAL(index);
if (needLock)
- LockRelationForExtension(index, ExclusiveLock);
+ LockRelationForExtension(index, RELEXT_EXCLUSIVE);
npages = RelationGetNumberOfBlocks(index);
if (needLock)
- UnlockRelationForExtension(index, ExclusiveLock);
+ UnlockRelationForExtension(index, RELEXT_EXCLUSIVE);
totFreePages = 0;
@@ -766,10 +766,10 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
stats->pages_free = totFreePages;
if (needLock)
- LockRelationForExtension(index, ExclusiveLock);
+ LockRelationForExtension(index, RELEXT_EXCLUSIVE);
stats->num_pages = RelationGetNumberOfBlocks(index);
if (needLock)
- UnlockRelationForExtension(index, ExclusiveLock);
+ UnlockRelationForExtension(index, RELEXT_EXCLUSIVE);
return stats;
}
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 26d89f7..cd351d8 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -821,13 +821,13 @@ gistNewBuffer(Relation r)
needLock = !RELATION_IS_LOCAL(r);
if (needLock)
- LockRelationForExtension(r, ExclusiveLock);
+ LockRelationForExtension(r, RELEXT_EXCLUSIVE);
buffer = ReadBuffer(r, P_NEW);
LockBuffer(buffer, GIST_EXCLUSIVE);
if (needLock)
- UnlockRelationForExtension(r, ExclusiveLock);
+ UnlockRelationForExtension(r, RELEXT_EXCLUSIVE);
return buffer;
}
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index 77d9d12..ca45b06 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -59,10 +59,10 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
/* try to find deleted pages */
if (needLock)
- LockRelationForExtension(rel, ExclusiveLock);
+ LockRelationForExtension(rel, RELEXT_EXCLUSIVE);
npages = RelationGetNumberOfBlocks(rel);
if (needLock)
- UnlockRelationForExtension(rel, ExclusiveLock);
+ UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE);
totFreePages = 0;
for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++)
@@ -91,10 +91,10 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
/* return statistics */
stats->pages_free = totFreePages;
if (needLock)
- LockRelationForExtension(rel, ExclusiveLock);
+ LockRelationForExtension(rel, RELEXT_EXCLUSIVE);
stats->num_pages = RelationGetNumberOfBlocks(rel);
if (needLock)
- UnlockRelationForExtension(rel, ExclusiveLock);
+ UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE);
return stats;
}
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index 13e3bdc..a8ce6c7 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -519,11 +519,11 @@ loop:
if (needLock)
{
if (!use_fsm)
- LockRelationForExtension(relation, ExclusiveLock);
- else if (!ConditionalLockRelationForExtension(relation, ExclusiveLock))
+ LockRelationForExtension(relation, RELEXT_EXCLUSIVE);
+ else if (!ConditionalLockRelationForExtension(relation, RELEXT_EXCLUSIVE))
{
/* Couldn't get the lock immediately; wait for it. */
- LockRelationForExtension(relation, ExclusiveLock);
+ LockRelationForExtension(relation, RELEXT_EXCLUSIVE);
/*
* Check if some other backend has extended a block for us while
@@ -537,7 +537,7 @@ loop:
*/
if (targetBlock != InvalidBlockNumber)
{
- UnlockRelationForExtension(relation, ExclusiveLock);
+ UnlockRelationForExtension(relation, RELEXT_EXCLUSIVE);
goto loop;
}
@@ -576,7 +576,7 @@ loop:
* against vacuumlazy.c --- see comments therein.
*/
if (needLock)
- UnlockRelationForExtension(relation, ExclusiveLock);
+ UnlockRelationForExtension(relation, RELEXT_EXCLUSIVE);
/*
* We need to initialize the empty new page. Double-check that it really
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 4c2a13a..7dc3088 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -641,7 +641,7 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
* Note that another backend might have extended or created the relation
* by the time we get the lock.
*/
- LockRelationForExtension(rel, ExclusiveLock);
+ LockRelationForExtension(rel, RELEXT_EXCLUSIVE);
/* Might have to re-open if a cache flush happened */
RelationOpenSmgr(rel);
@@ -679,7 +679,7 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
/* Update local cache with the up-to-date size */
rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now;
- UnlockRelationForExtension(rel, ExclusiveLock);
+ UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE);
pfree(pg);
}
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 10697e9..e1407ac 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -658,7 +658,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
needLock = !RELATION_IS_LOCAL(rel);
if (needLock)
- LockRelationForExtension(rel, ExclusiveLock);
+ LockRelationForExtension(rel, RELEXT_EXCLUSIVE);
buf = ReadBuffer(rel, P_NEW);
@@ -672,7 +672,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
* condition against btvacuumscan --- see comments therein.
*/
if (needLock)
- UnlockRelationForExtension(rel, ExclusiveLock);
+ UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE);
/* Initialize the new page before returning it */
page = BufferGetPage(buf);
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 3dbafdd..394a660 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -1058,10 +1058,10 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
{
/* Get the current relation length */
if (needLock)
- LockRelationForExtension(rel, ExclusiveLock);
+ LockRelationForExtension(rel, RELEXT_EXCLUSIVE);
num_pages = RelationGetNumberOfBlocks(rel);
if (needLock)
- UnlockRelationForExtension(rel, ExclusiveLock);
+ UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE);
/* Quit if we've scanned the whole relation */
if (blkno >= num_pages)
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 22f64b0..12be667 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -230,13 +230,13 @@ SpGistNewBuffer(Relation index)
/* Must extend the file */
needLock = !RELATION_IS_LOCAL(index);
if (needLock)
- LockRelationForExtension(index, ExclusiveLock);
+ LockRelationForExtension(index, RELEXT_EXCLUSIVE);
buffer = ReadBuffer(index, P_NEW);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (needLock)
- UnlockRelationForExtension(index, ExclusiveLock);
+ UnlockRelationForExtension(index, RELEXT_EXCLUSIVE);
return buffer;
}
diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c
index d7d5e90..3888d93 100644
--- a/src/backend/access/spgist/spgvacuum.c
+++ b/src/backend/access/spgist/spgvacuum.c
@@ -824,10 +824,10 @@ spgvacuumscan(spgBulkDeleteState *bds)
{
/* Get the current relation length */
if (needLock)
- LockRelationForExtension(index, ExclusiveLock);
+ LockRelationForExtension(index, RELEXT_EXCLUSIVE);
num_pages = RelationGetNumberOfBlocks(index);
if (needLock)
- UnlockRelationForExtension(index, ExclusiveLock);
+ UnlockRelationForExtension(index, RELEXT_EXCLUSIVE);
/* Quit if we've scanned the whole relation */
if (blkno >= num_pages)
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 30b1c08..443e230 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -849,8 +849,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
* it's got exclusive lock on the whole relation.
*/
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
- LockRelationForExtension(onerel, ExclusiveLock);
- UnlockRelationForExtension(onerel, ExclusiveLock);
+ LockRelationForExtension(onerel, RELEXT_EXCLUSIVE);
+ UnlockRelationForExtension(onerel, RELEXT_EXCLUSIVE);
LockBufferForCleanup(buf);
if (PageIsNew(page))
{
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 3a0b49c..64e26df 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3628,6 +3628,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
case WAIT_EVENT_SYNC_REP:
event_name = "SyncRep";
break;
+ case WAIT_EVENT_RELATION_EXTENSION:
+ event_name = "RelationExtension";
+ break;
/* no default case, so that compiler will warn */
}
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 4648473..498223a 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -624,7 +624,7 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks)
* Note that another backend might have extended or created the relation
* by the time we get the lock.
*/
- LockRelationForExtension(rel, ExclusiveLock);
+ LockRelationForExtension(rel, RELEXT_EXCLUSIVE);
/* Might have to re-open if a cache flush happened */
RelationOpenSmgr(rel);
@@ -652,7 +652,7 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks)
/* Update local cache with the up-to-date size */
rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now;
- UnlockRelationForExtension(rel, ExclusiveLock);
+ UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE);
pfree(pg);
}
diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile
index e1b787e..2334a40 100644
--- a/src/backend/storage/lmgr/Makefile
+++ b/src/backend/storage/lmgr/Makefile
@@ -13,7 +13,7 @@ top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o lwlocknames.o spin.o \
- s_lock.o predicate.o condition_variable.o
+ s_lock.o predicate.o condition_variable.o extension_lock.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/lmgr/extension_lock.c b/src/backend/storage/lmgr/extension_lock.c
new file mode 100644
index 0000000..e8bbd5a
--- /dev/null
+++ b/src/backend/storage/lmgr/extension_lock.c
@@ -0,0 +1,380 @@
+/*-------------------------------------------------------------------------
+ *
+ * extension_lock.c
+ * Relation extension lock manager
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/storage/lmgr/extension_lock.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "pg_trace.h"
+#include "postmaster/postmaster.h"
+#include "replication/slot.h"
+#include "storage/ipc.h"
+#include "storage/proc.h"
+#include "storage/proclist.h"
+#include "storage/spin.h"
+#include "storage/extension_lock.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+#ifdef LWLOCK_STATS
+#include "utils/hsearch.h"
+#endif
+
+/*
+ * Compute the hash code associated with a RELEXTLOCK.
+ *
+ * To avoid unnecessary recomputations of the hash code, we try to do this
+ * just once per function, and then pass it around as needed. Aside from
+ * passing the hashcode to hash_search_with_hash_value(), we can extract
+ * the lock partition number from the hashcode.
+ */
+#define RelExtLockTargetTagHashCode(relextlocktargettag) \
+ get_hash_value(RelExtLockHash, (const void *) relextlocktargettag)
+
+/*
+ * The lockmgr's shared hash tables are partitioned to reduce contention.
+ * To determine which partition a given relid belongs to, compute the tag's
+ * hash code with ExtLockTagHashCode(), then apply one of these macros.
+ * NB: NUM_RELEXTENSIONLOCK_PARTITIONS must be a power of 2!
+ */
+#define RelExtLockHashPartition(hashcode) \
+ ((hashcode) % NUM_RELEXTLOCK_PARTITIONS)
+#define RelExtLockHashPartitionLock(hashcode) \
+ (&MainLWLockArray[RELEXTLOCK_MANAGER_LWLOCK_OFFSET + \
+ LockHashPartition(hashcode)].lock)
+#define RelExtLockHashPartitionLockByIndex(i) \
+ (&MainLWLockArray[RELEXTLOCK_MANAGER_LWLOCK_OFFSET + (i)].lock
+
+#define RELEXT_VAL_EXCLUSIVE ((uint32) 1 << 24)
+#define RELEXT_VAL_SHARED 1
+
+#define RELEXT_LOCKMASK ((uint32) ((1 << 25) - 1))
+
+/* */
+#define MAX_SIMUL_EXTLOCKS 8
+
+/*
+ * This structure holds information per-object relation extension
+ * lock. held_extlocks represents the ExtLocks we're holding.
+ */
+typedef struct relextlock_handle
+{
+ Oid relid;
+ RelExtLock *lock;
+ RelExtLockMode mode; /* lock mode for this table entry */
+} relextlock_handle;
+static relextlock_handle held_relextlocks[MAX_SIMUL_EXTLOCKS];
+static int num_held_relextlocks = 0;
+
+static bool RelExtLockAcquire(Oid relid, RelExtLockMode lockmode, bool conditional);
+static void RelExtLockRelease(Oid rleid, RelExtLockMode lockmode);
+static bool RelExtLockAttemptLock(RelExtLock *ext_lock, RelExtLockMode lockmode);
+
+/*
+ * Pointers to hash tables containing lock state
+ *
+ * The RelExtLockHash hash table is in shared memory
+ */
+static HTAB *RelExtLockHash;
+
+/*
+ * InitRelExtLock
+ * Initialize the relation extension lock manager's data structures.
+ */
+void
+InitRelExtLock(long max_table_size)
+{
+ HASHCTL info;
+ long init_table_size;
+
+ /*
+ * Compute init/max size to request for lock hashtables. Note these
+ * calculations must agree with LockShmemSize!
+ */
+ init_table_size = max_table_size / 2;
+
+ /*
+ * Allocate hash table for RELEXTLOCK structs. This stores per-relation
+ * lock.
+ */
+ MemSet(&info, 0, sizeof(info));
+ info.keysize = sizeof(Oid);
+ info.entrysize = sizeof(RelExtLock);
+ info.num_partitions = NUM_RELEXTLOCK_PARTITIONS;
+
+ RelExtLockHash = ShmemInitHash("RelExtLock Hash",
+ init_table_size,
+ max_table_size,
+ &info,
+ HASH_ELEM | HASH_BLOBS | HASH_PARTITION);
+}
+
+/*
+ * LockRelationForExtension
+ *
+ * This lock is used to interlock addition of pages to relations.
+ * We need such locking because bufmgr/smgr definition of P_NEW is not
+ * race-condition-proof.
+ *
+ * We assume the caller is already holding some type of regular lock on
+ * the relation, so no AcceptInvalidationMessages call is needed here.
+ */
+void
+LockRelationForExtension(Relation relation, RelExtLockMode lockmode)
+{
+ RelExtLockAcquire(relation->rd_id, lockmode, false);
+}
+
+/*
+ * ConditionalLockRelationForExtension
+ *
+ * As above, but only lock if we can get the lock without blocking.
+ * Returns TRUE iff the lock was acquired.
+ */
+bool
+ConditionalLockRelationForExtension(Relation relation, RelExtLockMode lockmode)
+{
+ return RelExtLockAcquire(relation->rd_id, lockmode, true);
+}
+
+/*
+ * RelationExtensionLockWaiterCount
+ *
+ * Count the number of processes waiting for the given relation extension lock.
+ * NOte that this routine doesn't acquire the partition lock. Please make sure
+ * that the caller must acquire partitionlock in exclusive mode or we must call
+ * this routine after acquired the relation extension lock of this relation.
+ */
+int
+RelationExtensionLockWaiterCount(Relation relation)
+{
+ RelExtLock *ext_lock;
+ Oid relid;
+ uint32 nwaiters;
+ uint32 hashcode;
+ bool found;
+
+ relid = relation->rd_id;
+ hashcode = RelExtLockTargetTagHashCode(&relid);
+
+ ext_lock = (RelExtLock *) hash_search_with_hash_value(RelExtLockHash,
+ (void *) &relid,
+ hashcode,
+ HASH_FIND, &found);
+ /* We assume that we already acquire this lock */
+ Assert(found);
+
+ nwaiters = pg_atomic_read_u32(&(ext_lock->nwaiters));
+
+ return nwaiters;
+}
+
+/*
+ * UnlockRelationForExtension
+ */
+void
+UnlockRelationForExtension(Relation relation, RelExtLockMode lockmode)
+{
+ RelExtLockRelease(relation->rd_id, lockmode);
+}
+
+/*
+ * Acquire relation extension lock and create RELEXTLOCK hash entry on shared
+ * hash table. To avoid dead-lock with partition lock and LWLock, we acquire
+ * them but don't release it here. The caller must call DeleteRelExtLock later
+ * to release these locks.
+ */
+static bool
+RelExtLockAcquire(Oid relid, RelExtLockMode lockmode, bool conditional)
+{
+ RelExtLock *ext_lock;
+ LWLock *partitionLock;
+ uint32 hashcode;
+ bool found;
+ bool got_lock = false;
+ bool waited = false;
+
+ hashcode = RelExtLockTargetTagHashCode(&relid);
+ partitionLock = RelExtLockHashPartitionLock(hashcode);
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
+
+ ext_lock = (RelExtLock *) hash_search_with_hash_value(RelExtLockHash,
+ (void * ) &relid,
+ hashcode, HASH_ENTER, &found);
+
+ if (!ext_lock)
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of shared memory"),
+ errhint("You might need to increase max_pred_locks_per_transaction.")));
+
+ for (;;)
+ {
+ bool ret;
+
+ ret = RelExtLockAttemptLock(ext_lock, lockmode);
+
+ if (ret)
+ {
+ got_lock = true;
+
+ if (waited)
+ pg_atomic_sub_fetch_u32(&(ext_lock->nwaiters), 1);
+
+ break; /* got the lock */
+ }
+
+ /* Could not get lock, return if in conditional lock */
+ if (!ret && conditional)
+ break;
+
+ /* Add to wait list */
+ pg_atomic_add_fetch_u32(&(ext_lock->nwaiters), 1);
+ ConditionVariableSleep(&(ext_lock->cv), WAIT_EVENT_RELATION_EXTENSION);
+ }
+
+ ConditionVariableCancelSleep();
+
+ if (got_lock)
+ {
+ /* Add lock to list relation extension locks held by this backend */
+ held_relextlocks[num_held_relextlocks].relid = relid;
+ held_relextlocks[num_held_relextlocks].lock = ext_lock;
+ held_relextlocks[num_held_relextlocks].mode = lockmode;
+ num_held_relextlocks++;
+ }
+ else
+ LWLockRelease(partitionLock);
+
+ /* Always end up with true if not conditional lock */
+ return got_lock;
+}
+
+/*
+ * RelationExtensionLockReleaseAll - release all currently-held relation extension locks
+ */
+void
+RelationExtensionLockReleaseAll(void)
+{
+ while (num_held_relextlocks > 0)
+ {
+ HOLD_INTERRUPTS();
+
+ RelExtLockRelease(held_relextlocks[num_held_relextlocks - 1].relid,
+ held_relextlocks[num_held_relextlocks - 1].mode);
+ }
+}
+
+/*
+ * ExstLockRelease
+ *
+ * Remove RELEXTLOCK from shared RelExtLockHash hash table. Since other backends
+ * might be acquiring it or waiting for this lock, we can delete it only if there
+ * is no longer backends who are interested in it.
+ *
+ * Note that we assume partition lock for hash table is already acquired when
+ * acquiring the lock. This routine should release partition lock as well after
+ * released LWLock.
+ */
+static void
+RelExtLockRelease(Oid relid, RelExtLockMode lockmode)
+{
+ RelExtLock *ext_lock;
+ RelExtLockMode mode;
+ uint32 hashcode;
+ LWLock *partitionLock;
+ uint32 oldstate;
+ uint32 nwaiters;
+ int i;
+
+ hashcode = RelExtLockTargetTagHashCode(&relid);
+ partitionLock = RelExtLockHashPartitionLock(hashcode);
+
+ for (i = num_held_relextlocks; --i >= 0;)
+ if (relid == held_relextlocks[i].relid &&
+ lockmode == held_relextlocks[i].mode)
+ break;
+
+ if (i < 0)
+ elog(ERROR, "relation extension lock for %u with lock mode %d is not held",
+ relid, lockmode);
+
+ ext_lock = held_relextlocks[i].lock;
+ mode = held_relextlocks[i].mode;
+
+ num_held_relextlocks--;
+
+ /* Shrink */
+ for (; i < num_held_relextlocks; i++)
+ held_relextlocks[i] = held_relextlocks[i + 1];
+
+ if (mode == RELEXT_EXCLUSIVE)
+ oldstate = pg_atomic_sub_fetch_u32(&(ext_lock->state), RELEXT_VAL_EXCLUSIVE);
+ else
+ oldstate = pg_atomic_sub_fetch_u32(&(ext_lock->state), RELEXT_VAL_SHARED);
+
+ nwaiters = pg_atomic_read_u32(&(ext_lock->nwaiters));
+
+ /* Wake up waiters if there are */
+ if (nwaiters > 0)
+ ConditionVariableBroadcast(&(ext_lock->cv));
+ else
+ hash_search_with_hash_value(RelExtLockHash, (void *) &relid,
+ hashcode, HASH_REMOVE, NULL);
+
+ LWLockRelease(partitionLock);
+}
+
+/*
+ * Internal function that tries to atomically acquire the relation extension
+ * lock in the passed in mode. Return true if we got the lock.
+ */
+static bool
+RelExtLockAttemptLock(RelExtLock *ext_lock, RelExtLockMode lockmode)
+{
+ uint32 oldstate;
+
+ oldstate = pg_atomic_read_u32(&ext_lock->state);
+
+ while (true)
+ {
+ uint32 desired_state;
+ bool lock_free;
+
+ desired_state = oldstate;
+
+ if (lockmode == RELEXT_EXCLUSIVE)
+ {
+ lock_free = (oldstate & RELEXT_LOCKMASK) == 0;
+ if (lock_free)
+ desired_state += RELEXT_VAL_EXCLUSIVE;
+ }
+ else
+ {
+ lock_free = (oldstate & RELEXT_VAL_EXCLUSIVE) == 0;
+ if (lock_free)
+ desired_state += RELEXT_VAL_SHARED;
+ }
+
+ if (pg_atomic_compare_exchange_u32(&ext_lock->state,
+ &oldstate, desired_state))
+ {
+ if (lock_free)
+ return true;
+ else
+ return false;
+ }
+ }
+ pg_unreachable();
+}
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index fe98898..34095cb 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -319,78 +319,6 @@ UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode)
}
/*
- * LockRelationForExtension
- *
- * This lock tag is used to interlock addition of pages to relations.
- * We need such locking because bufmgr/smgr definition of P_NEW is not
- * race-condition-proof.
- *
- * We assume the caller is already holding some type of regular lock on
- * the relation, so no AcceptInvalidationMessages call is needed here.
- */
-void
-LockRelationForExtension(Relation relation, LOCKMODE lockmode)
-{
- LOCKTAG tag;
-
- SET_LOCKTAG_RELATION_EXTEND(tag,
- relation->rd_lockInfo.lockRelId.dbId,
- relation->rd_lockInfo.lockRelId.relId);
-
- (void) LockAcquire(&tag, lockmode, false, false);
-}
-
-/*
- * ConditionalLockRelationForExtension
- *
- * As above, but only lock if we can get the lock without blocking.
- * Returns TRUE iff the lock was acquired.
- */
-bool
-ConditionalLockRelationForExtension(Relation relation, LOCKMODE lockmode)
-{
- LOCKTAG tag;
-
- SET_LOCKTAG_RELATION_EXTEND(tag,
- relation->rd_lockInfo.lockRelId.dbId,
- relation->rd_lockInfo.lockRelId.relId);
-
- return (LockAcquire(&tag, lockmode, false, true) != LOCKACQUIRE_NOT_AVAIL);
-}
-
-/*
- * RelationExtensionLockWaiterCount
- *
- * Count the number of processes waiting for the given relation extension lock.
- */
-int
-RelationExtensionLockWaiterCount(Relation relation)
-{
- LOCKTAG tag;
-
- SET_LOCKTAG_RELATION_EXTEND(tag,
- relation->rd_lockInfo.lockRelId.dbId,
- relation->rd_lockInfo.lockRelId.relId);
-
- return LockWaiterCount(&tag);
-}
-
-/*
- * UnlockRelationForExtension
- */
-void
-UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
-{
- LOCKTAG tag;
-
- SET_LOCKTAG_RELATION_EXTEND(tag,
- relation->rd_lockInfo.lockRelId.dbId,
- relation->rd_lockInfo.lockRelId.relId);
-
- LockRelease(&tag, lockmode, false);
-}
-
-/*
* LockPage
*
* Obtain a page-level lock. This is currently used by some index access
@@ -961,12 +889,6 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
tag->locktag_field2,
tag->locktag_field1);
break;
- case LOCKTAG_RELATION_EXTEND:
- appendStringInfo(buf,
- _("extension of relation %u of database %u"),
- tag->locktag_field2,
- tag->locktag_field1);
- break;
case LOCKTAG_PAGE:
appendStringInfo(buf,
_("page %u of relation %u of database %u"),
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 2b26173..bc576a7 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -45,6 +45,7 @@
#include "storage/sinvaladt.h"
#include "storage/spin.h"
#include "storage/standby.h"
+#include "storage/lmgr.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/resowner_private.h"
@@ -388,6 +389,10 @@ InitLocks(void)
max_table_size = NLOCKENTS();
init_table_size = max_table_size / 2;
+
+ /* Initialize lock structure for relation extension lock */
+ InitRelExtLock(max_table_size);
+
/*
* Allocate hash table for LOCK structs. This stores per-locked-object
* information.
@@ -3366,6 +3371,7 @@ LockShmemSize(void)
/* lock hash table */
max_table_size = NLOCKENTS();
size = add_size(size, hash_estimate_size(max_table_size, sizeof(LOCK)));
+ size = add_size(size, hash_estimate_size(max_table_size, sizeof(LWLock)));
/* proclock hash table */
max_table_size *= 2;
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index f1060f9..bc25a53 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -451,6 +451,13 @@ InitializeLWLocks(void)
for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
+ /* Initialize relation extension lmgr's LWLocks in main array */
+ lock = MainLWLockArray + NUM_INDIVIDUAL_LWLOCKS +
+ NUM_BUFFER_PARTITIONS + NUM_LOCK_PARTITIONS +
+ NUM_PREDICATELOCK_PARTITIONS;
+ for (id = 0; id < NUM_RELEXTLOCK_PARTITIONS; id++, lock++)
+ LWLockInitialize(&lock->lock, LWTRANCHE_RELEXT_LOCK_MANAGER);
+
/* Initialize named tranches. */
if (NamedLWLockTrancheRequests > 0)
{
@@ -508,6 +515,7 @@ RegisterLWLockTranches(void)
LWLockRegisterTranche(LWTRANCHE_LOCK_MANAGER, "lock_manager");
LWLockRegisterTranche(LWTRANCHE_PREDICATE_LOCK_MANAGER,
"predicate_lock_manager");
+ LWLockRegisterTranche(LWTRANCHE_RELEXT_LOCK_MANAGER, "relext_lock_manager");
LWLockRegisterTranche(LWTRANCHE_PARALLEL_QUERY_DSA,
"parallel_query_dsa");
LWLockRegisterTranche(LWTRANCHE_SESSION_DSA,
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index 9e0a8ab..6d8916c 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -25,7 +25,6 @@
/* This must match enum LockTagType! */
const char *const LockTagTypeNames[] = {
"relation",
- "extend",
"page",
"tuple",
"transactionid",
@@ -234,7 +233,6 @@ pg_lock_status(PG_FUNCTION_ARGS)
switch ((LockTagType) instance->locktag.locktag_type)
{
case LOCKTAG_RELATION:
- case LOCKTAG_RELATION_EXTEND:
values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
nulls[3] = true;
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 089b7c3..958822f 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -816,7 +816,8 @@ typedef enum
WAIT_EVENT_REPLICATION_ORIGIN_DROP,
WAIT_EVENT_REPLICATION_SLOT_DROP,
WAIT_EVENT_SAFE_SNAPSHOT,
- WAIT_EVENT_SYNC_REP
+ WAIT_EVENT_SYNC_REP,
+ WAIT_EVENT_RELATION_EXTENSION
} WaitEventIPC;
/* ----------
diff --git a/src/include/storage/extension_lock.h b/src/include/storage/extension_lock.h
new file mode 100644
index 0000000..f178672
--- /dev/null
+++ b/src/include/storage/extension_lock.h
@@ -0,0 +1,49 @@
+/*-------------------------------------------------------------------------
+ *
+ * extension_lock.h
+ * Relation extension lock manager
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/extension_lock.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef EXTENSION_H
+#define EXTENSION_H
+
+#ifdef FRONTEND
+#error "extension_lock.h may not be included from frontend code"
+#endif
+
+#include "storage/proclist_types.h"
+#include "storage/s_lock.h"
+#include "storage/condition_variable.h"
+#include "port/atomics.h"
+
+typedef struct RelExtLock
+{
+ Oid relid;
+ pg_atomic_uint32 state;
+ pg_atomic_uint32 nwaiters;
+ ConditionVariable cv;
+} RelExtLock;
+
+typedef enum RelExtLockMode
+{
+ RELEXT_EXCLUSIVE,
+ RELEXT_SHARED
+} RelExtLockMode;
+
+/* Lock a relation for extension */
+extern void InitRelExtLock(long max_table_size);
+extern void LockRelationForExtension(Relation relation, RelExtLockMode lockmode);
+extern void UnlockRelationForExtension(Relation relation, RelExtLockMode lockmode);
+extern bool ConditionalLockRelationForExtension(Relation relation, RelExtLockMode lockmode);
+extern int RelationExtensionLockWaiterCount(Relation relation);
+extern void RelationExtensionLockReleaseAll(void);
+
+#endif /* EXTENSION_H */
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
index 0b92322..ac23354 100644
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -15,6 +15,7 @@
#define LMGR_H
#include "lib/stringinfo.h"
+#include "storage/extension_lock.h"
#include "storage/itemptr.h"
#include "storage/lock.h"
#include "utils/rel.h"
@@ -34,6 +35,36 @@ typedef enum XLTW_Oper
XLTW_RecheckExclusionConstr
} XLTW_Oper;
+typedef struct RELEXTLOCKTAG
+{
+ Oid relid; /* identifies the lockable object */
+ LWLockMode mode; /* lock mode for this table entry */
+} RELEXTLOCKTAG;
+
+/*
+ * This structure holds information per-object relation extension
+ * lock.
+ */
+typedef struct RELEXTLOCK
+{
+ RELEXTLOCKTAG tag; /* hash key -- must be first */
+ LWLock lock; /* LWLock for relation extension */
+} RELEXTLOCK;
+
+/*
+ * The LOCALRELEXTLOCK struct represents a local copy of data which is
+ * also present in the RELEXTLOCK table, organized for fast access without
+ * needing to acquire a LWLock. It is strictly for optimization.
+ */
+typedef struct LOCALRELEXTLOCK
+{
+ /* hash key */
+ RELEXTLOCKTAG relid; /* unique identifier of locktable object */
+
+ /* data */
+ bool held; /* is lock held? */
+} LOCALRELEXTLOCK;
+
extern void RelationInitLockInfo(Relation relation);
/* Lock a relation */
@@ -50,13 +81,6 @@ extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode);
extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
-/* Lock a relation for extension */
-extern void LockRelationForExtension(Relation relation, LOCKMODE lockmode);
-extern void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode);
-extern bool ConditionalLockRelationForExtension(Relation relation,
- LOCKMODE lockmode);
-extern int RelationExtensionLockWaiterCount(Relation relation);
-
/* Lock a page (currently only used within indexes) */
extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index 765431e..3be18ea 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -138,8 +138,6 @@ typedef uint16 LOCKMETHODID;
typedef enum LockTagType
{
LOCKTAG_RELATION, /* whole relation */
- /* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */
- LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */
/* same ID info as RELATION */
LOCKTAG_PAGE, /* one page of a relation */
/* ID info for a page is RELATION info + BlockNumber */
@@ -198,14 +196,6 @@ typedef struct LOCKTAG
(locktag).locktag_type = LOCKTAG_RELATION, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
-#define SET_LOCKTAG_RELATION_EXTEND(locktag,dboid,reloid) \
- ((locktag).locktag_field1 = (dboid), \
- (locktag).locktag_field2 = (reloid), \
- (locktag).locktag_field3 = 0, \
- (locktag).locktag_field4 = 0, \
- (locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
- (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
-
#define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index f4c4aed..2e9a1ac 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -120,14 +120,21 @@ extern PGDLLIMPORT int NamedLWLockTrancheRequests;
#define LOG2_NUM_PREDICATELOCK_PARTITIONS 4
#define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
+/* Number of partitions the shared relation extension lock tables are divided into */
+#define LOG2_NUM_RELEXTLOCK_PARTITIONS 4
+#define NUM_RELEXTLOCK_PARTITIONS (1 << LOG2_NUM_RELEXTLOCK_PARTITIONS)
+
/* Offsets for various chunks of preallocated lwlocks. */
#define BUFFER_MAPPING_LWLOCK_OFFSET NUM_INDIVIDUAL_LWLOCKS
#define LOCK_MANAGER_LWLOCK_OFFSET \
(BUFFER_MAPPING_LWLOCK_OFFSET + NUM_BUFFER_PARTITIONS)
#define PREDICATELOCK_MANAGER_LWLOCK_OFFSET \
(LOCK_MANAGER_LWLOCK_OFFSET + NUM_LOCK_PARTITIONS)
-#define NUM_FIXED_LWLOCKS \
+#define RELEXTLOCK_MANAGER_LWLOCK_OFFSET \
(PREDICATELOCK_MANAGER_LWLOCK_OFFSET + NUM_PREDICATELOCK_PARTITIONS)
+#define NUM_FIXED_LWLOCKS \
+ (PREDICATELOCK_MANAGER_LWLOCK_OFFSET + NUM_PREDICATELOCK_PARTITIONS + \
+ NUM_RELEXTLOCK_PARTITIONS)
typedef enum LWLockMode
{
@@ -151,6 +158,8 @@ extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val);
extern void LWLockReleaseAll(void);
extern bool LWLockHeldByMe(LWLock *lock);
extern bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode);
+extern bool LWLockCheckForCleanup(LWLock *lock);
+extern int LWLockWaiterCount(LWLock *lock);
extern bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval);
extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
@@ -211,6 +220,7 @@ typedef enum BuiltinTrancheIds
LWTRANCHE_BUFFER_MAPPING,
LWTRANCHE_LOCK_MANAGER,
LWTRANCHE_PREDICATE_LOCK_MANAGER,
+ LWTRANCHE_RELEXT_LOCK_MANAGER,
LWTRANCHE_PARALLEL_QUERY_DSA,
LWTRANCHE_SESSION_DSA,
LWTRANCHE_SESSION_RECORD_TABLE,
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers