On Sun, 25 Apr 2021 at 18:48, Yura Sokolov <[email protected]> wrote: > If you read comments in SH_START_ITERATE, you'll see: > > * Search for the first empty element. As deletions during iterations > are > * supported, we want to start/end at an element that cannot be > affected > * by elements being shifted. > > * Iterate backwards, that allows the current element to be deleted, > even > * if there are backward shifts > > Therefore, it is safe to delete during iteration, and it doesn't lead > nor > require loop restart.
I had only skimmed that with a pre-loaded assumption that it wouldn't be safe. I didn't do a very good job of reading it as I failed to notice the lack of guarantees were about deleting items other than the current one. I didn't consider the option of finding a free bucket then looping backwards to avoid missing entries that are moved up during a delete. With that, I changed the patch to get rid of the SH_TRUNCATE and got rid of the smgrclose_internal which skips the hash delete. The code is much more similar to how it was now. In regards to the hashing stuff. I added a new function to pg_bitutils.h to rotate left and I'm using that instead of the other expression that was taken from nodeHash.c For the hash function, I've done some further benchmarking with: 1) The attached v2 patch 2) The attached + plus use_hash_combine.patch.txt which uses hash_combine() instead of pg_rotate_left32()ing the hashkey each time. 3) The attached v2 with use_hash_bytes.patch.txt applied. 4) Master I've also included the hash stats from each version of the hash function. I hope the numbers help indicate the reason I picked the hash function that I did. 1) v2 patch. CPU: user: 108.23 s, system: 6.97 s, elapsed: 115.63 s CPU: user: 114.78 s, system: 6.88 s, elapsed: 121.71 s CPU: user: 107.53 s, system: 5.70 s, elapsed: 113.28 s CPU: user: 108.43 s, system: 5.73 s, elapsed: 114.22 s CPU: user: 106.18 s, system: 5.73 s, elapsed: 111.96 s CPU: user: 108.04 s, system: 5.29 s, elapsed: 113.39 s CPU: user: 107.64 s, system: 5.64 s, elapsed: 113.34 s CPU: user: 106.64 s, system: 5.58 s, elapsed: 112.27 s CPU: user: 107.91 s, system: 5.40 s, elapsed: 113.36 s CPU: user: 115.35 s, system: 6.60 s, elapsed: 122.01 s Median = 113.375 s LOG: size: 4096, members: 2032, filled: 0.496094, total chain: 997, max chain: 5, avg chain: 0.490650, total_collisions: 422, max_collisions: 3, avg_collisions: 0.207677 2) v2 patch + use_hash_combine.patch.txt CPU: user: 113.22 s, system: 5.52 s, elapsed: 118.80 s CPU: user: 116.63 s, system: 5.87 s, elapsed: 122.56 s CPU: user: 115.33 s, system: 5.73 s, elapsed: 121.12 s CPU: user: 113.11 s, system: 5.61 s, elapsed: 118.78 s CPU: user: 112.56 s, system: 5.51 s, elapsed: 118.13 s CPU: user: 114.55 s, system: 5.80 s, elapsed: 120.40 s CPU: user: 121.79 s, system: 6.45 s, elapsed: 128.29 s CPU: user: 113.98 s, system: 4.50 s, elapsed: 118.52 s CPU: user: 113.24 s, system: 5.63 s, elapsed: 118.93 s CPU: user: 114.11 s, system: 5.60 s, elapsed: 119.78 s Median = 119.355 s LOG: size: 4096, members: 2032, filled: 0.496094, total chain: 971, max chain: 6, avg chain: 0.477854, total_collisions: 433, max_collisions: 4, avg_collisions: 0.213091 3) v2 patch + use_hash_bytes.patch.txt CPU: user: 120.87 s, system: 6.69 s, elapsed: 127.62 s CPU: user: 112.40 s, system: 4.68 s, elapsed: 117.14 s CPU: user: 113.19 s, system: 5.44 s, elapsed: 118.69 s CPU: user: 112.15 s, system: 4.73 s, elapsed: 116.93 s CPU: user: 111.10 s, system: 5.59 s, elapsed: 116.74 s CPU: user: 112.03 s, system: 5.74 s, elapsed: 117.82 s CPU: user: 113.69 s, system: 4.33 s, elapsed: 118.07 s CPU: user: 113.30 s, system: 4.19 s, elapsed: 117.55 s CPU: user: 112.77 s, system: 5.57 s, elapsed: 118.39 s CPU: user: 112.25 s, system: 4.59 s, elapsed: 116.88 s Median = 117.685 s LOG: size: 4096, members: 2032, filled: 0.496094, total chain: 900, max chain: 4, avg chain: 0.442913, total_collisions: 415, max_collisions: 4, avg_collisions: 0.204232 4) master CPU: user: 117.89 s, system: 5.7 s, elapsed: 123.65 s CPU: user: 117.81 s, system: 5.74 s, elapsed: 123.62 s CPU: user: 119.39 s, system: 5.75 s, elapsed: 125.2 s CPU: user: 117.98 s, system: 4.39 s, elapsed: 122.41 s CPU: user: 117.92 s, system: 4.79 s, elapsed: 122.76 s CPU: user: 119.84 s, system: 4.75 s, elapsed: 124.64 s CPU: user: 120.6 s, system: 5.82 s, elapsed: 126.49 s CPU: user: 118.74 s, system: 5.71 s, elapsed: 124.51 s CPU: user: 124.29 s, system: 6.79 s, elapsed: 131.14 s CPU: user: 118.73 s, system: 5.67 s, elapsed: 124.47 s Median = 124.49 s You can see that the bare v2 patch is quite a bit faster than any of the alternatives. We'd be better of with hash_bytes than using hash_combine() both for performance and for the seemingly better job the hash function does at reducing the hash collisions. David
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 8310f73212..33e5cadd03 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -34,8 +34,6 @@ typedef struct SMgrEntry
SMgrRelation data; /* Pointer to the
SMgrRelationData */
} SMgrEntry;
-static inline uint32 relfilenodebackend_hash(RelFileNodeBackend *rnode);
-
/*
* Because simplehash.h does not provide a stable pointer to hash table
* entries, we don't make the element type a SMgrRelation directly, instead we
@@ -51,7 +49,7 @@ static inline uint32
relfilenodebackend_hash(RelFileNodeBackend *rnode);
#define SH_ELEMENT_TYPE SMgrEntry
#define SH_KEY_TYPE RelFileNodeBackend
#define SH_KEY data->smgr_rnode
-#define SH_HASH_KEY(tb, key) relfilenodebackend_hash(&key)
+#define SH_HASH_KEY(tb, key) hash_bytes((const unsigned char *) &key,
sizeof(RelFileNodeBackend))
#define SH_EQUAL(tb, a, b) (memcmp(&a, &b,
sizeof(RelFileNodeBackend)) == 0)
#define SH_SCOPE static inline
#define SH_STORE_HASH
@@ -133,37 +131,6 @@ static dlist_head unowned_relns;
/* local function prototypes */
static void smgrshutdown(int code, Datum arg);
-/*
- * relfilenodebackend_hash
- * Custom rolled hash function for simplehash table.
- *
- * smgropen() is often a bottleneck in CPU bound workloads during crash
- * recovery. We make use of this custom hash function rather than using
- * hash_bytes as it gives us a little bit more performance.
- *
- * XXX What if sizeof(Oid) is not 4?
- */
-static inline uint32
-relfilenodebackend_hash(RelFileNodeBackend *rnode)
-{
- uint32 hashkey;
-
- hashkey = murmurhash32((uint32) rnode->node.spcNode);
-
- /* rotate hashkey left 1 bit at each step */
- hashkey = pg_rotate_left32(hashkey, 1);
- hashkey ^= murmurhash32((uint32) rnode->node.dbNode);
-
- hashkey = pg_rotate_left32(hashkey, 1);
- hashkey ^= murmurhash32((uint32) rnode->node.relNode);
-
- hashkey = pg_rotate_left32(hashkey, 1);
- hashkey ^= murmurhash32((uint32) rnode->backend);
-
- return hashkey;
-}
-
-
/*
* smgrinit(), smgrshutdown() -- Initialize or shut down storage
* managers.
v2-0001-Use-simplehash.h-hashtables-in-SMgr.patch
Description: Binary data
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 8310f73212..f291ded795 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -147,18 +147,19 @@ static inline uint32
relfilenodebackend_hash(RelFileNodeBackend *rnode)
{
uint32 hashkey;
+ uint32 hashkey2;
hashkey = murmurhash32((uint32) rnode->node.spcNode);
/* rotate hashkey left 1 bit at each step */
- hashkey = pg_rotate_left32(hashkey, 1);
- hashkey ^= murmurhash32((uint32) rnode->node.dbNode);
+ hashkey2 = murmurhash32((uint32) rnode->node.dbNode);
+ hashkey = hash_combine(hashkey, hashkey2);
- hashkey = pg_rotate_left32(hashkey, 1);
- hashkey ^= murmurhash32((uint32) rnode->node.relNode);
+ hashkey2 = murmurhash32((uint32) rnode->node.relNode);
+ hashkey = hash_combine(hashkey, hashkey2);
- hashkey = pg_rotate_left32(hashkey, 1);
- hashkey ^= murmurhash32((uint32) rnode->backend);
+ hashkey2 = murmurhash32((uint32) rnode->backend);
+ hashkey = hash_combine(hashkey, hashkey2);
return hashkey;
}
diff --git a/src/backend/access/transam/xlog.c
b/src/backend/access/transam/xlog.c
index adfc6f67e2..5d34e06eab 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7615,7 +7615,8 @@ StartupXLOG(void)
ereport(LOG,
(errmsg("last completed
transaction was at log time %s",
timestamptz_to_str(xtime))));
-
+ smgrstats();
+ elog(PANIC, "recovery PANIC");
InRedo = false;
}
else
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 64a26e06c6..850b51e316 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -183,6 +183,12 @@ smgr_entry_cleanup(SMgrRelation reln)
pfree(reln);
}
+void
+smgrstats(void)
+{
+ if (SMgrRelationHash != NULL)
+ smgrtable_stat(SMgrRelationHash);
+}
/*
* smgrinit(), smgrshutdown() -- Initialize or shut down storage
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index a6fbf7b6a6..ac010af74a 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -77,6 +77,7 @@ typedef SMgrRelationData *SMgrRelation;
#define SmgrIsTemp(smgr) \
RelFileNodeBackendIsTemp((smgr)->smgr_rnode)
+extern void smgrstats(void);
extern void smgrinit(void);
extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend);
extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
