From ce604dfa855e765383656baf37bbc5ce51bb6034 Mon Sep 17 00:00:00 2001
From: "dgrowley@gmail.com" <dgrowley@gmail.com>
Date: Thu, 22 Apr 2021 17:03:46 +1200
Subject: [PATCH v3] Use simplehash.h hashtables in SMgr

The hash table lookups done in SMgr can quite often be a bottleneck during
crash recovery.  Traditionally these use dynahash. Here we swap dynahash
out and use simplehash instead.  This improves lookup performance.

Some changes are required from simplehash.h here to make this work.  The
reason for this is that code external to smgr.c does point to the hashed
SMgrRelation. Since simplehash does reallocate the bucket array when
increasing the size of the table and also shuffle entries around during
deletes, code pointing directly into hash entries would be a bad idea. To
overcome this issue we only store a pointer to the SMgrRelationData in the
hash table entry and maintain a separate allocation for that data. This
does mean an additional pointer dereference during lookups, but only when
the hash value matches, so the significant majority of the time that will
only be done for the item we are actually looking for.

Since the hash table key is stored in the referenced SMgrRelation, we need
to add two new macros to allow simplehash to allocate the memory for the
SMgrEntry during inserts before it tries to set the key.  A new macro has
also been added to allow simplehash implementations to perform cleanup
when items are removed from the table.
---
 src/backend/storage/smgr/smgr.c | 111 ++++++++++++++++++++++++--------
 src/include/lib/simplehash.h    |  32 +++++++++
 2 files changed, 115 insertions(+), 28 deletions(-)

diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 4dc24649df..209218f781 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -18,14 +18,49 @@
 #include "postgres.h"
 
 #include "access/xlog.h"
+#include "common/hashfn.h"
 #include "lib/ilist.h"
 #include "storage/bufmgr.h"
 #include "storage/ipc.h"
 #include "storage/md.h"
 #include "storage/smgr.h"
-#include "utils/hsearch.h"
 #include "utils/inval.h"
 
+/* Hash table entry type for SMgrRelationHash */
+typedef struct SMgrEntry
+{
+	int			status;			/* Hash table status */
+	uint32		hash;			/* Hash value (cached) */
+	SMgrRelation data;			/* Pointer to the SMgrRelationData */
+} SMgrEntry;
+
+static inline uint32 relfilenodebackend_hash(RelFileNodeBackend *rnode);
+
+/*
+ * Because simplehash.h does not provide a stable pointer to hash table
+ * entries, we don't make the element type a SMgrRelation directly, instead we
+ * use an SMgrEntry type which has a pointer to the data field.  simplehash can
+ * move entries around when adding or removing items from the hash table so
+ * having the SMgrRelation as a pointer inside the SMgrEntry allows external
+ * code to keep their own pointers to the SMgrRelation.  Relcache does this.
+ * We use the SH_ENTRY_INITIALIZER to allocate memory for the SMgrRelationData
+ * when a new entry is created.  We also define SH_ENTRY_CLEANUP to execute
+ * some cleanup when removing an item from the table.
+ */
+#define SH_PREFIX		smgrtable
+#define SH_ELEMENT_TYPE	SMgrEntry
+#define SH_KEY_TYPE		RelFileNodeBackend
+#define SH_KEY			data->smgr_rnode
+#define SH_HASH_KEY(tb, key)	relfilenodebackend_hash(&key)
+#define SH_EQUAL(tb, a, b)		(memcmp(&a, &b, sizeof(RelFileNodeBackend)) == 0)
+#define SH_SCOPE		static inline
+#define SH_STORE_HASH
+#define SH_GET_HASH(tb, a) a->hash
+#define SH_ENTRY_INITIALIZER(a) a->data = MemoryContextAlloc(TopMemoryContext, sizeof(SMgrRelationData))
+#define SH_ENTRY_CLEANUP(a) pfree(a->data)
+#define SH_DEFINE
+#define SH_DECLARE
+#include "lib/simplehash.h"
 
 /*
  * This struct of function pointers defines the API between smgr.c and
@@ -91,13 +126,43 @@ static const int NSmgr = lengthof(smgrsw);
  * Each backend has a hashtable that stores all extant SMgrRelation objects.
  * In addition, "unowned" SMgrRelation objects are chained together in a list.
  */
-static HTAB *SMgrRelationHash = NULL;
+static smgrtable_hash *SMgrRelationHash = NULL;
 
 static dlist_head unowned_relns;
 
 /* local function prototypes */
 static void smgrshutdown(int code, Datum arg);
 
+/*
+ * relfilenodebackend_hash
+ *		Custom rolled hash function for simplehash table.
+ *
+ * smgropen() is often a bottleneck in CPU bound workloads during crash
+ * recovery.  We make use of this custom hash function rather than using
+ * hash_bytes as it gives us a little bit more performance.
+ *
+ * XXX What if sizeof(Oid) is not 4?
+ */
+static inline uint32
+relfilenodebackend_hash(RelFileNodeBackend *rnode)
+{
+	uint32		hashkey;
+
+	hashkey = murmurhash32((uint32) rnode->node.spcNode);
+
+	/* rotate hashkey left 1 bit at each step */
+	hashkey = pg_rotate_right32(hashkey, 31);
+	hashkey ^= murmurhash32((uint32) rnode->node.dbNode);
+
+	hashkey = pg_rotate_right32(hashkey, 31);
+	hashkey ^= murmurhash32((uint32) rnode->node.relNode);
+
+	hashkey = pg_rotate_right32(hashkey, 31);
+	hashkey ^= murmurhash32((uint32) rnode->backend);
+
+	return hashkey;
+}
+
 
 /*
  *	smgrinit(), smgrshutdown() -- Initialize or shut down storage
@@ -147,31 +212,26 @@ smgropen(RelFileNode rnode, BackendId backend)
 {
 	RelFileNodeBackend brnode;
 	SMgrRelation reln;
+	SMgrEntry  *entry;
 	bool		found;
 
-	if (SMgrRelationHash == NULL)
+	if (unlikely(SMgrRelationHash == NULL))
 	{
 		/* First time through: initialize the hash table */
-		HASHCTL		ctl;
-
-		ctl.keysize = sizeof(RelFileNodeBackend);
-		ctl.entrysize = sizeof(SMgrRelationData);
-		SMgrRelationHash = hash_create("smgr relation table", 400,
-									   &ctl, HASH_ELEM | HASH_BLOBS);
+		SMgrRelationHash = smgrtable_create(TopMemoryContext, 400, NULL);
 		dlist_init(&unowned_relns);
 	}
 
 	/* Look up or create an entry */
 	brnode.node = rnode;
 	brnode.backend = backend;
-	reln = (SMgrRelation) hash_search(SMgrRelationHash,
-									  (void *) &brnode,
-									  HASH_ENTER, &found);
+	entry = smgrtable_insert(SMgrRelationHash, brnode, &found);
+	reln = entry->data;
 
 	/* Initialize it if not present before */
 	if (!found)
 	{
-		/* hash_search already filled in the lookup key */
+		/* smgrtable_insert already filled in the lookup key */
 		reln->smgr_owner = NULL;
 		reln->smgr_targblock = InvalidBlockNumber;
 		for (int i = 0; i <= MAX_FORKNUM; ++i)
@@ -266,9 +326,7 @@ smgrclose(SMgrRelation reln)
 	if (!owner)
 		dlist_delete(&reln->node);
 
-	if (hash_search(SMgrRelationHash,
-					(void *) &(reln->smgr_rnode),
-					HASH_REMOVE, NULL) == NULL)
+	if (!smgrtable_delete(SMgrRelationHash, reln->smgr_rnode))
 		elog(ERROR, "SMgrRelation hashtable corrupted");
 
 	/*
@@ -285,17 +343,17 @@ smgrclose(SMgrRelation reln)
 void
 smgrcloseall(void)
 {
-	HASH_SEQ_STATUS status;
-	SMgrRelation reln;
+	smgrtable_iterator iterator;
+	SMgrEntry  *entry;
 
 	/* Nothing to do if hashtable not set up */
 	if (SMgrRelationHash == NULL)
 		return;
 
-	hash_seq_init(&status, SMgrRelationHash);
+	smgrtable_start_iterate(SMgrRelationHash, &iterator);
 
-	while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
-		smgrclose(reln);
+	while ((entry = smgrtable_iterate(SMgrRelationHash, &iterator)) != NULL)
+		smgrclose(entry->data);
 }
 
 /*
@@ -309,17 +367,14 @@ smgrcloseall(void)
 void
 smgrclosenode(RelFileNodeBackend rnode)
 {
-	SMgrRelation reln;
+	SMgrEntry  *entry;
 
 	/* Nothing to do if hashtable not set up */
 	if (SMgrRelationHash == NULL)
 		return;
-
-	reln = (SMgrRelation) hash_search(SMgrRelationHash,
-									  (void *) &rnode,
-									  HASH_FIND, NULL);
-	if (reln != NULL)
-		smgrclose(reln);
+	entry = smgrtable_lookup(SMgrRelationHash, rnode);
+	if (entry != NULL)
+		smgrclose(entry->data);
 }
 
 /*
diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
index da51781e98..2c4fc7e8c4 100644
--- a/src/include/lib/simplehash.h
+++ b/src/include/lib/simplehash.h
@@ -50,6 +50,13 @@
  *	  - SH_HASH_KEY(table, key) - generate hash for the key
  *	  - SH_STORE_HASH - if defined the hash is stored in the elements
  *	  - SH_GET_HASH(tb, a) - return the field to store the hash in
+ *	  - SH_ENTRY_INITIALIZER(a) - if defined, the code in this macro is called
+ *		for new entries directly before any other internal code makes any
+ *		changes to setup the new entry.  This could be used to do things like
+ *		initialize memory for the bucket.
+ *	  - SH_ENTRY_CLEANUP(a) - if defined, the code in this macro is called
+ *		when an entry is removed from the hash table.  This could be used to
+ *		free memory allocated in the bucket by SH_ENTRY_INITIALIZER
  *
  *	  The element type is required to contain a "status" member that can store
  *	  the range of values defined in the SH_STATUS enum.
@@ -464,6 +471,17 @@ SH_DESTROY(SH_TYPE * tb)
 SH_SCOPE void
 SH_RESET(SH_TYPE * tb)
 {
+#ifdef SH_ENTRY_CLEANUP
+	/* Execute the cleanup code when SH_ENTRY_CLEANUP has been defined */
+	for (int i = 0; i < tb->size; i++)
+	{
+		SH_ELEMENT_TYPE *entry = &tb->data[i];
+
+		if (entry->status == SH_STATUS_IN_USE)
+			SH_ENTRY_CLEANUP(entry);
+	}
+#endif
+
 	memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
 	tb->members = 0;
 }
@@ -634,6 +652,9 @@ restart:
 		if (entry->status == SH_STATUS_EMPTY)
 		{
 			tb->members++;
+#ifdef SH_ENTRY_INITIALIZER
+			SH_ENTRY_INITIALIZER(entry);
+#endif
 			entry->SH_KEY = key;
 #ifdef SH_STORE_HASH
 			SH_GET_HASH(tb, entry) = hash;
@@ -721,6 +742,9 @@ restart:
 
 			/* and fill the now empty spot */
 			tb->members++;
+#ifdef SH_ENTRY_INITIALIZER
+			SH_ENTRY_INITIALIZER(entry);
+#endif
 
 			entry->SH_KEY = key;
 #ifdef SH_STORE_HASH
@@ -856,6 +880,9 @@ SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
 			SH_ELEMENT_TYPE *lastentry = entry;
 
 			tb->members--;
+#ifdef SH_ENTRY_CLEANUP
+			SH_ENTRY_CLEANUP(entry);
+#endif
 
 			/*
 			 * Backward shift following elements till either an empty element
@@ -919,6 +946,9 @@ SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
 	curelem = entry - &tb->data[0];
 
 	tb->members--;
+#ifdef SH_ENTRY_CLEANUP
+	SH_ENTRY_CLEANUP(entry);
+#endif
 
 	/*
 	 * Backward shift following elements till either an empty element or an
@@ -1133,6 +1163,8 @@ SH_STAT(SH_TYPE * tb)
 #undef SH_DECLARE
 #undef SH_DEFINE
 #undef SH_GET_HASH
+#undef SH_ENTRY_INITIALIZER
+#undef SH_ENTRY_CLEANUP
 #undef SH_STORE_HASH
 #undef SH_USE_NONDEFAULT_ALLOCATOR
 #undef SH_EQUAL
-- 
2.27.0

