On Tue, Mar 29, 2016 at 1:29 PM, Dilip Kumar <dilipbal...@gmail.com> wrote:
> Attaching new version v18
>
> - Some cleanup work on v17.
> - Improved  UpdateFreeSpaceMap function.
> - Performance and space utilization are same as V17

Looks better.  Here's a v19 that I hacked on a bit.

Unfortunately, one compiler I tried this with had a pretty legitimate complaint:

hio.c: In function ‘RelationGetBufferForTuple’:
hio.c:231:20: error: ‘freespace’ may be used uninitialized in this
function [-Werror=uninitialized]
hio.c:185:7: note: ‘freespace’ was declared here
hio.c:231:20: error: ‘blockNum’ may be used uninitialized in this
function [-Werror=uninitialized]
hio.c:181:14: note: ‘blockNum’ was declared here

There's nothing whatsoever to prevent RelationExtensionLockWaiterCount
from returning 0.

It's also rather ugly that the call to UpdateFreeSpaceMap() assumes
that the last value returned by PageGetHeapFreeSpace() is as good as
any other, but maybe we can just install a comment explaining that
point; there's not an obviously better approach that I can see.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index 8140418..aeed40b 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -169,6 +169,69 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2,
 }
 
 /*
+ * Extend a relation by multiple blocks to avoid future contention on the
+ * relation extension lock.  Our goal is to pre-extend the relation by an
+ * amount which ramps up as the degree of contention ramps up, but limiting
+ * the result to some sane overall value.
+ */
+static void
+RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
+{
+	Page		page;
+	BlockNumber	blockNum,
+				firstBlock;
+	int			extraBlocks = 0;
+	int			lockWaiters = 0;
+	Size		freespace;
+	Buffer		buffer;
+
+	/*
+	 * We use the length of the lock wait queue to judge how much to extend.
+	 * It might seem like multiplying the number of lock waiters by as much
+	 * as 20 is too aggressive, but benchmarking revealed that smaller numbers
+	 * were insufficient.  512 is just an arbitrary cap to prevent pathological
+	 * results (and excessive wasted disk space).
+	 */
+	lockWaiters = RelationExtensionLockWaiterCount(relation);
+	extraBlocks = Min(512, lockWaiters * 20);
+
+	firstBlock = InvalidBlockNumber;
+
+	while (extraBlocks-- >= 0)
+	{
+		/* Ouch - an unnecessary lseek() each time through the loop! */
+		buffer = ReadBufferBI(relation, P_NEW, bistate);
+
+		/* Extend by one page. */
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+		page = BufferGetPage(buffer);
+		PageInit(page, BufferGetPageSize(buffer), 0);
+		MarkBufferDirty(buffer);
+		blockNum = BufferGetBlockNumber(buffer);
+		freespace = PageGetHeapFreeSpace(page);
+		UnlockReleaseBuffer(buffer);
+
+		if (firstBlock == InvalidBlockNumber)
+			firstBlock = blockNum;
+
+		/*
+		 * Immediately update the bottom level of the FSM.  This has a good
+		 * chance of making this page visible to other concurrently inserting
+		 * backends, and we want that to happen without delay.
+		 */
+		RecordPageWithFreeSpace(relation, blockNum, freespace);
+	}
+
+	/*
+	 * Updating the upper levels of the free space map is too expensive
+	 * to do for every block, but it's worth doing once at the end to make
+	 * sure that subsequent insertion activity sees all of those nifty free
+	 * pages we just inserted.
+	 */
+	UpdateFreeSpaceMap(relation, firstBlock, blockNum, freespace);
+}
+
+/*
  * RelationGetBufferForTuple
  *
  *	Returns pinned and exclusive-locked buffer of a page in given relation
@@ -233,8 +296,8 @@ RelationGetBufferForTuple(Relation relation, Size len,
 	bool		use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
 	Buffer		buffer = InvalidBuffer;
 	Page		page;
-	Size		pageFreeSpace,
-				saveFreeSpace;
+	Size		pageFreeSpace = 0,
+				saveFreeSpace = 0;
 	BlockNumber targetBlock,
 				otherBlock;
 	bool		needLock;
@@ -308,6 +371,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
 		}
 	}
 
+loop:
 	while (targetBlock != InvalidBlockNumber)
 	{
 		/*
@@ -440,10 +504,46 @@ RelationGetBufferForTuple(Relation relation, Size len,
 	 */
 	needLock = !RELATION_IS_LOCAL(relation);
 
+	/*
+	 * If we need the lock but are not able to acquire it immediately, we'll
+	 * consider extending the relation by multiple blocks at a time to manage
+	 * contention on the relation extension lock.  However, this only makes
+	 * sense if we're using the FSM; otherwise, there's no point.
+	 */
 	if (needLock)
-		LockRelationForExtension(relation, ExclusiveLock);
+	{
+		if (!use_fsm)
+			LockRelationForExtension(relation, ExclusiveLock);
+		else if (!ConditionalLockRelationForExtension(relation, ExclusiveLock))
+		{
+			/* Couldn't get the lock immediately; wait for it. */
+			LockRelationForExtension(relation, ExclusiveLock);
+
+			/*
+			 * Check if some other backend has extended a block for us while
+			 * we were waiting on the lock.
+			 */
+			targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
+
+			/*
+			 * If some other waiter has already extended the relation, we
+			 * don't need to do so; just use the existing freespace.
+			 */
+			if (targetBlock != InvalidBlockNumber)
+			{
+				UnlockRelationForExtension(relation, ExclusiveLock);
+				goto loop;
+			}
+
+			/* Time to bulk-extend. */
+			RelationAddExtraBlocks(relation, bistate);
+		}
+	}
 
 	/*
+	 * In addition to whatever extension we performed above, we always add
+	 * at least one block to satisfy our own request.
+	 *
 	 * XXX This does an lseek - rather expensive - but at the moment it is the
 	 * only way to accurately determine how many blocks are in a relation.  Is
 	 * it worth keeping an accurate file length in shared memory someplace,
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 2631080..b2361e5 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -109,6 +109,8 @@ static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
 				   uint8 newValue, uint8 minValue);
 static BlockNumber fsm_search(Relation rel, uint8 min_cat);
 static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof);
+static BlockNumber fsm_get_lastblckno(Relation rel, FSMAddress addr);
+static void fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat);
 
 
 /******** Public API ********/
@@ -189,6 +191,47 @@ RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
 }
 
 /*
+ * Update the upper levels of the free space map all the way up to the root
+ * to make sure we don't lose track of new blocks we just inserted.  This is
+ * intended to be used after adding many new blocks to the relation; we judge
+ * it not worth updating the upper levels of the tree every time data for
+ * a single page changes, but for a bulk-extend it's worth it.
+ */
+void
+UpdateFreeSpaceMap(Relation rel, BlockNumber startBlkNum,
+					BlockNumber endBlkNum, Size freespace)
+{
+	int			new_cat = fsm_space_avail_to_cat(freespace);
+	FSMAddress	addr;
+	uint16		slot;
+	BlockNumber	blockNum;
+	BlockNumber	lastBlkOnPage;
+
+	blockNum = startBlkNum;
+
+	while (blockNum <= endBlkNum)
+	{
+		/*
+		 * Get the FSM Address where this block resides and update the
+		 * FSM tree starting from that FSM address to all the way up till
+		 * root.
+		 */
+		addr = fsm_get_location(blockNum, &slot);
+		fsm_update_recursive(rel, addr, new_cat);
+
+		/*
+		 * Get the last block number on this FSM page.  If that's greater
+		 * than or equal to our endBlkNum, we're done.  Otherwise, advance
+		 * to the first block on the next page.
+		 */
+		lastBlkOnPage = fsm_get_lastblckno(rel, addr);
+		if (lastBlkOnPage >= endBlkNum)
+			break;
+		blockNum = lastBlkOnPage + 1;
+	}
+}
+
+/*
  * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
  *		WAL replay
  */
@@ -788,3 +831,42 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
 
 	return max_avail;
 }
+
+/*
+ * This function will return the last block number stored on given
+ * FSM page address.
+ */
+static BlockNumber
+fsm_get_lastblckno(Relation rel, FSMAddress addr)
+{
+	int			slot;
+
+	/*
+	 * Get the last slot number on the given address and convert that to
+	 * block number
+	 */
+	slot = SlotsPerFSMPage - 1;
+	return fsm_get_heap_blk(addr, slot);
+}
+
+/*
+ * Recursively update the FSM tree from given address to
+ * all the way up to root.
+ */
+static void
+fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat)
+{
+	uint16		parentslot;
+	FSMAddress	parent;
+
+	if (addr.level == FSM_ROOT_LEVEL)
+		return;
+
+	/*
+	 * Get the parent page and our slot in the parent page, and
+	 * update the information in that.
+	 */
+	parent = fsm_get_parent(addr, &parentslot);
+	fsm_set_and_search(rel, parent, parentslot, new_cat, 0);
+	fsm_update_recursive(rel, parent, new_cat);
+}
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index 0632fc0..7b08555 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -341,6 +341,41 @@ LockRelationForExtension(Relation relation, LOCKMODE lockmode)
 }
 
 /*
+ *		ConditionalLockRelationForExtension
+ *
+ * As above, but only lock if we can get the lock without blocking.
+ * Returns TRUE iff the lock was acquired.
+ */
+bool
+ConditionalLockRelationForExtension(Relation relation, LOCKMODE lockmode)
+{
+	LOCKTAG		tag;
+
+	SET_LOCKTAG_RELATION_EXTEND(tag,
+								relation->rd_lockInfo.lockRelId.dbId,
+								relation->rd_lockInfo.lockRelId.relId);
+
+	return (LockAcquire(&tag, lockmode, false, true) != LOCKACQUIRE_NOT_AVAIL);
+}
+
+/*
+ *		RelationExtensionLockWaiterCount
+ *
+ * Count the number of processes waiting for the given relation extension lock.
+ */
+int
+RelationExtensionLockWaiterCount(Relation relation)
+{
+	LOCKTAG		tag;
+
+	SET_LOCKTAG_RELATION_EXTEND(tag,
+								relation->rd_lockInfo.lockRelId.dbId,
+								relation->rd_lockInfo.lockRelId.relId);
+
+	return LockWaiterCount(&tag);
+}
+
+/*
  *		UnlockRelationForExtension
  */
 void
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index b30b7b1..41f6930 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -4380,3 +4380,40 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
 	LockRelease(&tag, ShareLock, false);
 	return true;
 }
+
+/*
+ * LockWaiterCount
+ *
+ * Find the number of lock requester on this locktag
+ */
+int
+LockWaiterCount(const LOCKTAG *locktag)
+{
+	LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
+	LOCK	   *lock;
+	bool		found;
+	uint32		hashcode;
+	LWLock	   *partitionLock;
+	int			waiters = 0;
+
+	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
+		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+
+	hashcode = LockTagHashCode(locktag);
+	partitionLock = LockHashPartitionLock(hashcode);
+	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
+
+	lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
+												(const void *) locktag,
+												hashcode,
+												HASH_FIND,
+												&found);
+	if (found)
+	{
+		Assert(lock != NULL);
+		waiters = lock->nRequested;
+	}
+	LWLockRelease(partitionLock);
+
+	return waiters;
+}
diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h
index 19dcb8d..16c052b 100644
--- a/src/include/storage/freespace.h
+++ b/src/include/storage/freespace.h
@@ -32,5 +32,9 @@ extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
 
 extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks);
 extern void FreeSpaceMapVacuum(Relation rel);
+extern void UpdateFreeSpaceMap(Relation rel,
+							BlockNumber firtsBlkNum,
+							BlockNumber lastBlkNum,
+							Size freespace);
 
 #endif   /* FREESPACE_H_ */
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
index 975b6f8..8288e7d 100644
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -53,6 +53,9 @@ extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
 /* Lock a relation for extension */
 extern void LockRelationForExtension(Relation relation, LOCKMODE lockmode);
 extern void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode);
+extern bool ConditionalLockRelationForExtension(Relation relation,
+									LOCKMODE lockmode);
+extern int	RelationExtensionLockWaiterCount(Relation relation);
 
 /* Lock a page (currently only used within indexes) */
 extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index b26427d..efa75ec 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -574,6 +574,8 @@ extern void RememberSimpleDeadLock(PGPROC *proc1,
 					   PGPROC *proc2);
 extern void InitDeadLockChecking(void);
 
+extern int	LockWaiterCount(const LOCKTAG *locktag);
+
 #ifdef LOCK_DEBUG
 extern void DumpLocks(PGPROC *proc);
 extern void DumpAllLocks(void);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to