Good day.

I found BufferAlloc unnecessary goes through dynahash's freelist when it
reuses valid buffer.

If it is avoided and dynahash's entry directly moved, 1-2% is gained in
select only pgbench (with scale factor 100 in 50 connections/50 threads
on 4 core 8ht notebook cpu 185krps=>190krps).

I've changed speculative call to BufferInsert to BufferLookup to avoid
insertion too early. (It also saves call to BufferDelete if conflicting
entry is already in). Then if buffer is valid and no conflicting entry
in a dynahash I'm moving old dynahash entry directly and without check
(since we already did the check).

If old buffer were invalid, new entry is unavoidably fetched from
freelist and inserted (also without check). But in steady state (if
there is no dropped/truncated tables/indices/databases) it is rare case.

Regards,
Sokolov Yura @ Postgres Professional
y.soko...@postgrespro.ru
funny.fal...@gmail.com
From 2afbc5a59c3ccd3fec14105aff40252eeaacf40c Mon Sep 17 00:00:00 2001
From: Yura Sokolov <y.soko...@postgrespro.ru>
Date: Wed, 22 Sep 2021 13:10:37 +0300
Subject: [PATCH] More gentle dynahash usage in buffer allocation.

When BufferAlloc reuses some valid buffer, there is no need
to go through dynahash's free list.

---
 src/backend/storage/buffer/buf_table.c |  38 ++---
 src/backend/storage/buffer/bufmgr.c    |   9 +-
 src/backend/utils/hash/dynahash.c      | 190 +++++++++++++++++++++++++
 src/include/storage/buf_internals.h    |   5 +-
 src/include/utils/hsearch.h            |   8 ++
 5 files changed, 230 insertions(+), 20 deletions(-)

diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c
index caa03ae1233..382c84f5149 100644
--- a/src/backend/storage/buffer/buf_table.c
+++ b/src/backend/storage/buffer/buf_table.c
@@ -107,36 +107,42 @@ BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
 
 /*
  * BufTableInsert
- *		Insert a hashtable entry for given tag and buffer ID,
- *		unless an entry already exists for that tag
- *
- * Returns -1 on successful insertion.  If a conflicting entry exists
- * already, returns the buffer ID in that entry.
+ *		Insert a hashtable entry for given tag and buffer ID.
+ *		Caller should be sure there is no conflicting entry.
  *
  * Caller must hold exclusive lock on BufMappingLock for tag's partition
+ * and call BufTableLookup to check for conflicting entry.
  */
-int
+void
 BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
 {
 	BufferLookupEnt *result;
-	bool		found;
 
 	Assert(buf_id >= 0);		/* -1 is reserved for not-in-table */
 	Assert(tagPtr->blockNum != P_NEW);	/* invalid tag */
 
 	result = (BufferLookupEnt *)
-		hash_search_with_hash_value(SharedBufHash,
-									(void *) tagPtr,
-									hashcode,
-									HASH_ENTER,
-									&found);
-
-	if (found)					/* found something already in the table */
-		return result->id;
+		hash_insert_with_hash_nocheck(SharedBufHash,
+									  (void *) tagPtr,
+									  hashcode);
 
 	result->id = buf_id;
+}
+
+void
+BufTableMove(BufferTag *oldTagPtr, uint32 oldHash,
+			 BufferTag *newTagPtr, uint32 newHash,
+			 int buf_id)
+{
+	BufferLookupEnt *result PG_USED_FOR_ASSERTS_ONLY;
 
-	return -1;
+	result = (BufferLookupEnt *)
+		hash_update_hash_key_with_hash_nocheck(SharedBufHash,
+											   (void *) oldTagPtr,
+											   oldHash,
+											   (void *) newTagPtr,
+											   newHash);
+	Assert(result->id == buf_id);
 }
 
 /*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index e88e4e918b0..8fbc676811c 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1325,7 +1325,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * Note that we have not yet removed the hashtable entry for the old
 		 * tag.
 		 */
-		buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
+		buf_id = BufTableLookup(&newTag, newHash);
 
 		if (buf_id >= 0)
 		{
@@ -1391,7 +1391,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			break;
 
 		UnlockBufHdr(buf, buf_state);
-		BufTableDelete(&newTag, newHash);
 		if (oldPartitionLock != NULL &&
 			oldPartitionLock != newPartitionLock)
 			LWLockRelease(oldPartitionLock);
@@ -1425,10 +1424,14 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 	if (oldPartitionLock != NULL)
 	{
-		BufTableDelete(&oldTag, oldHash);
+		BufTableMove(&oldTag, oldHash, &newTag, newHash, buf->buf_id);
 		if (oldPartitionLock != newPartitionLock)
 			LWLockRelease(oldPartitionLock);
 	}
+	else
+	{
+		BufTableInsert(&newTag, newHash, buf->buf_id);
+	}
 
 	LWLockRelease(newPartitionLock);
 
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 6546e3c7c79..c0625927131 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -1288,6 +1288,196 @@ hash_update_hash_key(HTAB *hashp,
 	return true;
 }
 
+/*
+ * hash_update_hash_key_nocheck -- change the hash key of an existing table
+ * entry without check for duplicate.
+ *
+ * Caller should be sure there is no conflicting entry.
+ */
+void *
+hash_update_hash_key_with_hash_nocheck(HTAB *hashp,
+									   const void *oldKeyPtr,
+									   uint32 oldhashvalue,
+									   const void *newKeyPtr,
+									   uint32 newhashvalue)
+{
+	HASHHDR    *hctl = hashp->hctl;
+	Size		keysize;
+	uint32		bucket;
+	uint32		newbucket;
+	long		segment_num;
+	long		segment_ndx;
+	HASHSEGMENT segp;
+	HASHBUCKET	currBucket;
+	HASHBUCKET *prevBucketPtr;
+	HASHBUCKET *oldPrevPtr;
+	HashCompareFunc match;
+
+#if HASH_STATISTICS
+	hash_accesses++;
+	hctl->accesses++;
+#endif
+
+	/* disallow updates if frozen */
+	if (hashp->frozen)
+		elog(ERROR, "cannot update in frozen hashtable \"%s\"",
+			 hashp->tabname);
+
+	/*
+	 * Lookup the existing element using its saved hash value.  We need to do
+	 * this to be able to unlink it from its hash chain, but as a side benefit
+	 * we can verify the validity of the passed existingEntry pointer.
+	 */
+	bucket = calc_bucket(hctl, oldhashvalue);
+
+	segment_num = bucket >> hashp->sshift;
+	segment_ndx = MOD(bucket, hashp->ssize);
+
+	segp = hashp->dir[segment_num];
+
+	if (segp == NULL)
+		hash_corrupted(hashp);
+
+	prevBucketPtr = &segp[segment_ndx];
+	currBucket = *prevBucketPtr;
+
+	match = hashp->match;		/* save one fetch in inner loop */
+	keysize = hashp->keysize;	/* ditto */
+
+	while (currBucket != NULL)
+	{
+		if (currBucket->hashvalue == oldhashvalue &&
+			match(ELEMENTKEY(currBucket), oldKeyPtr, keysize) == 0)
+			break;
+		prevBucketPtr = &(currBucket->link);
+		currBucket = *prevBucketPtr;
+	}
+
+	if (currBucket == NULL)
+		elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
+			 hashp->tabname);
+
+	oldPrevPtr = prevBucketPtr;
+
+	/*
+	 * Now perform the simplified insertion operation: - to locate the hash
+	 * chain we want to put the entry into, - put into beginning without
+	 * search for duplicate.
+	 */
+	newbucket = calc_bucket(hctl, newhashvalue);
+
+	segment_num = newbucket >> hashp->sshift;
+	segment_ndx = MOD(newbucket, hashp->ssize);
+
+	segp = hashp->dir[segment_num];
+
+	if (segp == NULL)
+		hash_corrupted(hashp);
+
+	prevBucketPtr = &segp[segment_ndx];
+
+	/* copy new key into record */
+	currBucket->hashvalue = newhashvalue;
+	hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
+
+	/*
+	 * If old and new hash values belong to the same bucket, we need not
+	 * change any chain links, and indeed should not since this simplistic
+	 * update will corrupt the list if currBucket is the last element.  (We
+	 * cannot fall out earlier, however, since we need to scan the bucket to
+	 * check for duplicate keys.)
+	 */
+	if (bucket != newbucket)
+	{
+		/* OK to remove record from old hash bucket's chain. */
+		*oldPrevPtr = currBucket->link;
+
+		/* link into new hashbucket chain */
+		currBucket->link = *prevBucketPtr;
+		*prevBucketPtr = currBucket;
+	}
+	/* no memory barrier in `else` part because bucket is locked */
+
+	return (void *) ELEMENTKEY(currBucket);
+}
+
+/*
+ * hash_insert_with_hash_nocheck - inserts new entry into bucket without
+ * checking for duplicates.
+ *
+ * Caller should be sure there is no conflicting entry.
+ */
+void *
+hash_insert_with_hash_nocheck(HTAB *hashp,
+							  const void *keyPtr,
+							  uint32 hashvalue)
+{
+	HASHHDR    *hctl = hashp->hctl;
+	int			freelist_idx = FREELIST_IDX(hctl, hashvalue);
+	uint32		bucket;
+	long		segment_num;
+	long		segment_ndx;
+	HASHSEGMENT segp;
+	HASHBUCKET	currBucket;
+	HASHBUCKET *prevBucketPtr;
+
+#if HASH_STATISTICS
+	hash_accesses++;
+	hctl->accesses++;
+#endif
+
+	/* disallow updates if frozen */
+	if (hashp->frozen)
+		elog(ERROR, "cannot update in frozen hashtable \"%s\"",
+			 hashp->tabname);
+
+	if (!IS_PARTITIONED(hctl) &&
+		hctl->freeList[0].nentries > (long) hctl->max_bucket &&
+		!has_seq_scans(hashp))
+		(void) expand_table(hashp);
+
+	/*
+	 * Lookup the existing element using its saved hash value.  We need to do
+	 * this to be able to unlink it from its hash chain, but as a side benefit
+	 * we can verify the validity of the passed existingEntry pointer.
+	 */
+	bucket = calc_bucket(hctl, hashvalue);
+
+	segment_num = bucket >> hashp->sshift;
+	segment_ndx = MOD(bucket, hashp->ssize);
+
+	segp = hashp->dir[segment_num];
+
+	if (segp == NULL)
+		hash_corrupted(hashp);
+
+	prevBucketPtr = &segp[segment_ndx];
+
+	currBucket = get_hash_entry(hashp, freelist_idx);
+
+	if (currBucket == NULL)
+	{
+		/* report a generic message */
+		if (hashp->isshared)
+			ereport(ERROR,
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("out of shared memory")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("out of memory")));
+	}
+
+	/* copy key into record */
+	currBucket->hashvalue = hashvalue;
+	currBucket->link = *prevBucketPtr;
+	hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, hashp->keysize);
+
+	*prevBucketPtr = currBucket;
+
+	return (void *) ELEMENTKEY(currBucket);
+}
+
 /*
  * Allocate a new hashtable entry if possible; return NULL if out of memory.
  * (Or, if the underlying space allocator throws error for out-of-memory,
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 33fcaf5c9a8..7dd8d0cb775 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -327,7 +327,10 @@ extern Size BufTableShmemSize(int size);
 extern void InitBufTable(int size);
 extern uint32 BufTableHashCode(BufferTag *tagPtr);
 extern int	BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
-extern int	BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
+extern void BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
+extern void BufTableMove(BufferTag *oldTagPtr, uint32 oldHash,
+						 BufferTag *newTagPtr, uint32 newHash,
+						 int buf_id);
 extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
 
 /* localbuf.c */
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index d7af0239c8c..9b086cf1112 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -139,6 +139,14 @@ extern void *hash_search_with_hash_value(HTAB *hashp, const void *keyPtr,
 										 bool *foundPtr);
 extern bool hash_update_hash_key(HTAB *hashp, void *existingEntry,
 								 const void *newKeyPtr);
+extern void *hash_update_hash_key_with_hash_nocheck(HTAB *hashp,
+													const void *oldKeyPtr,
+													uint32 oldhashvalue,
+													const void *newKeyPtr,
+													uint32 newhashvalue);
+extern void *hash_insert_with_hash_nocheck(HTAB *hashp,
+										   const void *keyPtr,
+										   uint32 hashvalue);
 extern long hash_get_num_entries(HTAB *hashp);
 extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
 extern void *hash_seq_search(HASH_SEQ_STATUS *status);
-- 
2.33.0

Reply via email to