From 3b4f2c964e40c36824ed18658a53f98568e20af7 Mon Sep 17 00:00:00 2001
From: "dgrowley@gmail.com" <dgrowley@gmail.com>
Date: Thu, 22 Apr 2021 17:03:46 +1200
Subject: [PATCH v2] Use simplehash.h hashtables in SMgr

The hash table lookups done in SMgr can quite often be a bottleneck during
crash recovery.  Traditionally these use dynahash. Here we swap dynahash
out and use simplehash instead.  This improves lookup performance.

Some changes are required from simplehash.h here to make this work.  The
reason for this is that code external to smgr.c does point to the hashed
SMgrRelation. Since simplehash does reallocate the bucket array when
increasing the size of the table and also shuffle entries around during
deletes, code pointing directly into hash entries would be a bad idea. To
overcome this issue we only store a pointer to the SMgrRelationData in the
hash table entry and maintain a separate allocation for that data. This
does mean an additional pointer dereference during lookups, but only when
the hash value matches, so the significant majority of the time that will
only be done for the item we are actually looking for.

Since the hash table key is stored in the referenced SMgrRelation, we need
to add two new macros to allow simplehash to allocate the memory for the
SMgrEntry during inserts before it tries to set the key.  A new macro has
also been added to allow simplehash implementations to perform cleanup
when items are removed from the table.
---
 src/backend/storage/smgr/smgr.c | 111 ++++++++++++++++++++++++--------
 src/include/lib/simplehash.h    |  20 +++++-
 src/include/port/pg_bitutils.h  |  13 +++-
 3 files changed, 114 insertions(+), 30 deletions(-)

diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 4dc24649df..8310f73212 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -18,14 +18,49 @@
 #include "postgres.h"
 
 #include "access/xlog.h"
+#include "common/hashfn.h"
 #include "lib/ilist.h"
 #include "storage/bufmgr.h"
 #include "storage/ipc.h"
 #include "storage/md.h"
 #include "storage/smgr.h"
-#include "utils/hsearch.h"
 #include "utils/inval.h"
 
+/* Hash table entry type for SMgrRelationHash */
+typedef struct SMgrEntry
+{
+	int			status;			/* Hash table status */
+	uint32		hash;			/* Hash value (cached) */
+	SMgrRelation data;			/* Pointer to the SMgrRelationData */
+} SMgrEntry;
+
+static inline uint32 relfilenodebackend_hash(RelFileNodeBackend *rnode);
+
+/*
+ * Because simplehash.h does not provide a stable pointer to hash table
+ * entries, we don't make the element type a SMgrRelation directly, instead we
+ * use an SMgrEntry type which has a pointer to the data field.  simplehash can
+ * move entries around when adding or removing items from the hash table so
+ * having the SMgrRelation as a pointer inside the SMgrEntry allows external
+ * code to keep their own pointers to the SMgrRelation.  Relcache does this.
+ * We use the SH_ENTRY_INITIALIZER to allocate memory for the SMgrRelationData
+ * when a new entry is created.  We also define SH_ENTRY_CLEANUP to execute
+ * some cleanup when removing an item from the table.
+ */
+#define SH_PREFIX		smgrtable
+#define SH_ELEMENT_TYPE	SMgrEntry
+#define SH_KEY_TYPE		RelFileNodeBackend
+#define	SH_KEY			data->smgr_rnode
+#define SH_HASH_KEY(tb, key)	relfilenodebackend_hash(&key)
+#define SH_EQUAL(tb, a, b)		(memcmp(&a, &b, sizeof(RelFileNodeBackend)) == 0)
+#define	SH_SCOPE		static inline
+#define SH_STORE_HASH
+#define SH_GET_HASH(tb, a) a->hash
+#define SH_ENTRY_INITIALIZER(a) a->data = MemoryContextAlloc(TopMemoryContext, sizeof(SMgrRelationData))
+#define SH_ENTRY_CLEANUP(a) pfree(a->data)
+#define SH_DEFINE
+#define SH_DECLARE
+#include "lib/simplehash.h"
 
 /*
  * This struct of function pointers defines the API between smgr.c and
@@ -91,13 +126,43 @@ static const int NSmgr = lengthof(smgrsw);
  * Each backend has a hashtable that stores all extant SMgrRelation objects.
  * In addition, "unowned" SMgrRelation objects are chained together in a list.
  */
-static HTAB *SMgrRelationHash = NULL;
+static smgrtable_hash *SMgrRelationHash = NULL;
 
 static dlist_head unowned_relns;
 
 /* local function prototypes */
 static void smgrshutdown(int code, Datum arg);
 
+/*
+ * relfilenodebackend_hash
+ *		Custom rolled hash function for simplehash table.
+ *
+ * smgropen() is often a bottleneck in CPU bound workloads during crash
+ * recovery.  We make use of this custom hash function rather than using
+ * hash_bytes as it gives us a little bit more performance.
+ *
+ * XXX What if sizeof(Oid) is not 4?
+ */
+static inline uint32
+relfilenodebackend_hash(RelFileNodeBackend *rnode)
+{
+	uint32		hashkey;
+
+	hashkey = murmurhash32((uint32) rnode->node.spcNode);
+
+	/* rotate hashkey left 1 bit at each step */
+	hashkey = pg_rotate_left32(hashkey, 1);
+	hashkey ^= murmurhash32((uint32) rnode->node.dbNode);
+
+	hashkey = pg_rotate_left32(hashkey, 1);
+	hashkey ^= murmurhash32((uint32) rnode->node.relNode);
+
+	hashkey = pg_rotate_left32(hashkey, 1);
+	hashkey ^= murmurhash32((uint32) rnode->backend);
+
+	return hashkey;
+}
+
 
 /*
  *	smgrinit(), smgrshutdown() -- Initialize or shut down storage
@@ -147,31 +212,26 @@ smgropen(RelFileNode rnode, BackendId backend)
 {
 	RelFileNodeBackend brnode;
 	SMgrRelation reln;
+	SMgrEntry  *entry;
 	bool		found;
 
-	if (SMgrRelationHash == NULL)
+	if (unlikely(SMgrRelationHash == NULL))
 	{
 		/* First time through: initialize the hash table */
-		HASHCTL		ctl;
-
-		ctl.keysize = sizeof(RelFileNodeBackend);
-		ctl.entrysize = sizeof(SMgrRelationData);
-		SMgrRelationHash = hash_create("smgr relation table", 400,
-									   &ctl, HASH_ELEM | HASH_BLOBS);
+		SMgrRelationHash = smgrtable_create(TopMemoryContext, 400, NULL);
 		dlist_init(&unowned_relns);
 	}
 
 	/* Look up or create an entry */
 	brnode.node = rnode;
 	brnode.backend = backend;
-	reln = (SMgrRelation) hash_search(SMgrRelationHash,
-									  (void *) &brnode,
-									  HASH_ENTER, &found);
+	entry = smgrtable_insert(SMgrRelationHash, brnode, &found);
+	reln = entry->data;
 
 	/* Initialize it if not present before */
 	if (!found)
 	{
-		/* hash_search already filled in the lookup key */
+		/* smgrtable_insert already filled in the lookup key */
 		reln->smgr_owner = NULL;
 		reln->smgr_targblock = InvalidBlockNumber;
 		for (int i = 0; i <= MAX_FORKNUM; ++i)
@@ -266,9 +326,7 @@ smgrclose(SMgrRelation reln)
 	if (!owner)
 		dlist_delete(&reln->node);
 
-	if (hash_search(SMgrRelationHash,
-					(void *) &(reln->smgr_rnode),
-					HASH_REMOVE, NULL) == NULL)
+	if (!smgrtable_delete(SMgrRelationHash, reln->smgr_rnode))
 		elog(ERROR, "SMgrRelation hashtable corrupted");
 
 	/*
@@ -285,17 +343,17 @@ smgrclose(SMgrRelation reln)
 void
 smgrcloseall(void)
 {
-	HASH_SEQ_STATUS status;
-	SMgrRelation reln;
+	smgrtable_iterator iterator;
+	SMgrEntry  *entry;
 
 	/* Nothing to do if hashtable not set up */
 	if (SMgrRelationHash == NULL)
 		return;
 
-	hash_seq_init(&status, SMgrRelationHash);
+	smgrtable_start_iterate(SMgrRelationHash, &iterator);
 
-	while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
-		smgrclose(reln);
+	while ((entry = smgrtable_iterate(SMgrRelationHash, &iterator)) != NULL)
+		smgrclose(entry->data);
 }
 
 /*
@@ -309,17 +367,14 @@ smgrcloseall(void)
 void
 smgrclosenode(RelFileNodeBackend rnode)
 {
-	SMgrRelation reln;
+	SMgrEntry  *entry;
 
 	/* Nothing to do if hashtable not set up */
 	if (SMgrRelationHash == NULL)
 		return;
-
-	reln = (SMgrRelation) hash_search(SMgrRelationHash,
-									  (void *) &rnode,
-									  HASH_FIND, NULL);
-	if (reln != NULL)
-		smgrclose(reln);
+	entry = smgrtable_lookup(SMgrRelationHash, rnode);
+	if (entry != NULL)
+		smgrclose(entry->data);
 }
 
 /*
diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
index da51781e98..4fce182de6 100644
--- a/src/include/lib/simplehash.h
+++ b/src/include/lib/simplehash.h
@@ -50,6 +50,11 @@
  *	  - SH_HASH_KEY(table, key) - generate hash for the key
  *	  - SH_STORE_HASH - if defined the hash is stored in the elements
  *	  - SH_GET_HASH(tb, a) - return the field to store the hash in
+ *	  - SH_ENTRY_INITIALIZER(a) - if defined, the code in this macro is called
+ *		for new entries directly before any other internal code makes any
+ *		changes to setup the new entry
+ *	  - SH_ENTRY_CLEANUP(a) - if defined, the code in this macro is called
+ *		when an entry is removed from the hash table
  *
  *	  The element type is required to contain a "status" member that can store
  *	  the range of values defined in the SH_STATUS enum.
@@ -634,6 +639,9 @@ restart:
 		if (entry->status == SH_STATUS_EMPTY)
 		{
 			tb->members++;
+#ifdef SH_ENTRY_INITIALIZER
+			SH_ENTRY_INITIALIZER(entry);
+#endif
 			entry->SH_KEY = key;
 #ifdef SH_STORE_HASH
 			SH_GET_HASH(tb, entry) = hash;
@@ -721,6 +729,9 @@ restart:
 
 			/* and fill the now empty spot */
 			tb->members++;
+#ifdef SH_ENTRY_INITIALIZER
+			SH_ENTRY_INITIALIZER(entry);
+#endif
 
 			entry->SH_KEY = key;
 #ifdef SH_STORE_HASH
@@ -856,7 +867,9 @@ SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
 			SH_ELEMENT_TYPE *lastentry = entry;
 
 			tb->members--;
-
+#ifdef SH_ENTRY_CLEANUP
+			SH_ENTRY_CLEANUP(entry);
+#endif
 			/*
 			 * Backward shift following elements till either an empty element
 			 * or an element at its optimal position is encountered.
@@ -919,6 +932,9 @@ SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
 	curelem = entry - &tb->data[0];
 
 	tb->members--;
+#ifdef SH_ENTRY_CLEANUP
+	SH_ENTRY_CLEANUP(entry);
+#endif
 
 	/*
 	 * Backward shift following elements till either an empty element or an
@@ -1133,6 +1149,8 @@ SH_STAT(SH_TYPE * tb)
 #undef SH_DECLARE
 #undef SH_DEFINE
 #undef SH_GET_HASH
+#undef SH_ENTRY_INITIALIZER
+#undef SH_ENTRY_CLEANUP
 #undef SH_STORE_HASH
 #undef SH_USE_NONDEFAULT_ALLOCATOR
 #undef SH_EQUAL
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index f9b77ec278..581957fe55 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -215,7 +215,8 @@ extern int	(*pg_popcount64) (uint64 word);
 extern uint64 pg_popcount(const char *buf, int bytes);
 
 /*
- * Rotate the bits of "word" to the right by n bits.
+ * pg_rotate_right32
+ *		Rotate the bits of 'word' to the right by 'n' bits.
  */
 static inline uint32
 pg_rotate_right32(uint32 word, int n)
@@ -223,4 +224,14 @@ pg_rotate_right32(uint32 word, int n)
 	return (word >> n) | (word << (sizeof(word) * BITS_PER_BYTE - n));
 }
 
+/*
+ * pg_rotate_left32
+ *		Rotate the bits of 'word' to the left by 'n' bits.
+ */
+static inline uint32
+pg_rotate_left32(uint32 word, int n)
+{
+	return (word << n) | (word >> (sizeof(word) * BITS_PER_BYTE - n));
+}
+
 #endif							/* PG_BITUTILS_H */
-- 
2.21.0.windows.1

