On Tue, Jan 30, 2024 at 5:04 PM John Naylor <johncnaylo...@gmail.com> wrote:
>
> On Tue, Jan 30, 2024 at 4:13 AM Ants Aasma <ants.aa...@cybertec.at> wrote:
> > But given that we know the data length and we have it in a register
> > already, it's easy enough to just mask out data past the end with a
> > shift. See patch 1. Performance benefit is about 1.5x Measured on a
> > small test harness that just hashes and finalizes an array of strings,
> > with a data dependency between consecutive hashes (next address
> > depends on the previous hash output).
>
> Interesting work! I've taken this idea and (I'm guessing, haven't
> tested) improved it by re-using an intermediate step for the
> conditional, simplifying the creation of the mask, and moving the
> bitscan out of the longest dependency chain.

This needed a rebase, and is now 0001. I plan to push this soon.

I also went and looked at the simplehash instances and found a few
that would be easy to switch over. Rather than try to figure out which
could benefit from shaving cycles, I changed all the string hashes,
and one more, in 0002 so they can act as examples.

0003 uses fasthash for resowner, as suggested by Heikki upthread.  Now
murmur64 has no callers, but it (or similar *) could be used in
pg_dump/common.c for hashing CatalogId (8 bytes).

Commit 42a1de3013 added a new use for string_hash, but I can't tell
from a quick glance whether it uses the truncation, so I'm going to
take a closer look before re-attaching the proposed dynahash change
again.

* some examples here:
https://www.boost.org/doc/libs/1_84_0/boost/container_hash/detail/hash_mix.hpp
From 63d8140f146b58ea044f3516ae5472febd6d1caf Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Tue, 6 Feb 2024 13:11:33 +0700
Subject: [PATCH v19 1/3] Speed up tail processing when hashing aligned C
 strings

After encountering the NUL terminator, the word-at-a-time loop exits
and we must hash the remaining bytes. Previously we calculated the
terminator's position and re-loaded the remaining bytes from the input
string. We already have all the data we need in a register, so let's
just mask off the bytes we need and hash them immediately. The mask can
be cheaply computed without knowing the terminator's position. We still
need that position for the length calculation, but the CPU can now
do that in parallel with other work, shortening the dependency chain.

Ants Aasma and John Naylor

Discussion: https://postgr.es/m/CANwKhkP7pCiW_5fAswLhs71-JKGEz1c1%2BPC0a_w1fwY4iGMqUA%40mail.gmail.com
---
 src/include/common/hashfn_unstable.h | 44 +++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 791750d136..bd7323fe05 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -219,8 +219,9 @@ static inline size_t
 fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
 {
 	const char *const start = str;
-	size_t		remainder;
+	uint64		chunk;
 	uint64		zero_byte_low;
+	uint64		mask;
 
 	Assert(PointerIsAligned(start, uint64));
 
@@ -239,7 +240,7 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
 	 */
 	for (;;)
 	{
-		uint64		chunk = *(uint64 *) str;
+		chunk = *(uint64 *) str;
 
 #ifdef WORDS_BIGENDIAN
 		zero_byte_low = haszero64(pg_bswap64(chunk));
@@ -254,14 +255,37 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
 		str += FH_SIZEOF_ACCUM;
 	}
 
-	/*
-	 * The byte corresponding to the NUL will be 0x80, so the rightmost bit
-	 * position will be in the range 7, 15, ..., 63. Turn this into byte
-	 * position by dividing by 8.
-	 */
-	remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
-	fasthash_accum(hs, str, remainder);
-	str += remainder;
+	if (zero_byte_low & 0xFF)
+	{
+		/*
+		 * The next byte in the input is the NUL terminator, so we have
+		 * nothing to do.
+		 */
+	}
+	else
+	{
+		/*
+		 * Create a mask for the remaining bytes so we can combine them into
+		 * the hash. The mask also covers the NUL terminator, but that's
+		 * harmless. The mask could contain 0x80 in bytes corresponding to the
+		 * input past the terminator, but only where the input byte is zero or
+		 * one, so also harmless.
+		 */
+		mask = zero_byte_low | (zero_byte_low - 1);
+#ifdef WORDS_BIGENDIAN
+		/* need to mask the upper bytes */
+		mask = pg_bswap64(mask);
+#endif
+		hs->accum = chunk & mask;
+		fasthash_combine(hs);
+
+		/*
+		 * The byte corresponding to the NUL will be 0x80, so the rightmost
+		 * bit position will be in the range 15, 23, ..., 63. Turn this into
+		 * byte position by dividing by 8.
+		 */
+		str += pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
+	}
 
 	return str - start;
 }
-- 
2.43.0

From 5dad8c783bc5d0d5f573ea136b13e79aa22d0371 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Tue, 5 Mar 2024 17:22:16 +0700
Subject: [PATCH v19 3/3] Use fasthash for hash_resource_elem

---
 src/backend/utils/resowner/resowner.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/backend/utils/resowner/resowner.c b/src/backend/utils/resowner/resowner.c
index ab9343bc5c..ea47c6cade 100644
--- a/src/backend/utils/resowner/resowner.c
+++ b/src/backend/utils/resowner/resowner.c
@@ -45,7 +45,7 @@
  */
 #include "postgres.h"
 
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "common/int.h"
 #include "storage/ipc.h"
 #include "storage/predicate.h"
@@ -220,14 +220,9 @@ hash_resource_elem(Datum value, const ResourceOwnerDesc *kind)
 	 * the hash too, otherwise those resources will collide a lot.  But
 	 * because there are only a few resource kinds like that - and only a few
 	 * resource kinds to begin with - we don't need to work too hard to mix
-	 * 'kind' into the hash.  Just add it with hash_combine(), it perturbs the
-	 * result enough for our purposes.
+	 * 'kind' into the hash.  Just use it as the seed for fasthash.
 	 */
-#if SIZEOF_DATUM == 8
-	return hash_combine64(murmurhash64((uint64) value), (uint64) kind);
-#else
-	return hash_combine(murmurhash32((uint32) value), (uint32) kind);
-#endif
+	return fasthash32((char *) &value, sizeof(value), (uint64) kind);
 }
 
 /*
-- 
2.43.0

From 3f87dc06e40dab708a70fe1a96cf9d909b6652cb Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Tue, 5 Mar 2024 16:59:39 +0700
Subject: [PATCH v19 2/3] Convert simplehash hash functions on strings to
 fasthash

---
 src/bin/pg_combinebackup/load_manifest.c  | 12 +++++++++---
 src/bin/pg_dump/pg_dumpall.c              | 12 +++++++++---
 src/bin/pg_rewind/filemap.c               | 12 +++++++++---
 src/bin/pg_verifybackup/pg_verifybackup.c | 12 +++++++++---
 src/common/blkreftable.c                  |  4 ++--
 5 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/src/bin/pg_combinebackup/load_manifest.c b/src/bin/pg_combinebackup/load_manifest.c
index 2b8e74fcf3..514e657ddc 100644
--- a/src/bin/pg_combinebackup/load_manifest.c
+++ b/src/bin/pg_combinebackup/load_manifest.c
@@ -15,7 +15,7 @@
 #include <sys/stat.h>
 #include <unistd.h>
 
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "common/logging.h"
 #include "common/parse_manifest.h"
 #include "load_manifest.h"
@@ -239,7 +239,13 @@ combinebackup_per_wal_range_cb(JsonManifestParseContext *context,
 static uint32
 hash_string_pointer(char *s)
 {
-	unsigned char *ss = (unsigned char *) s;
+	fasthash_state hs;
+	size_t		s_len;
 
-	return hash_bytes(ss, strlen(s));
+	fasthash_init(&hs, 0);
+
+	/* Hash string and save the length for tweaking the final mix. */
+	s_len = fasthash_accum_cstring(&hs, s);
+
+	return fasthash_final32(&hs, s_len);
 }
diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c
index 491311fe79..00ad33d803 100644
--- a/src/bin/pg_dump/pg_dumpall.c
+++ b/src/bin/pg_dump/pg_dumpall.c
@@ -21,7 +21,7 @@
 #include "catalog/pg_authid_d.h"
 #include "common/connect.h"
 #include "common/file_utils.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "common/logging.h"
 #include "common/string.h"
 #include "dumputils.h"
@@ -1941,9 +1941,15 @@ dumpTimestamp(const char *msg)
 static uint32
 hash_string_pointer(char *s)
 {
-	unsigned char *ss = (unsigned char *) s;
+	fasthash_state hs;
+	size_t		s_len;
 
-	return hash_bytes(ss, strlen(s));
+	fasthash_init(&hs, 0);
+
+	/* Hash string and save the length for tweaking the final mix. */
+	s_len = fasthash_accum_cstring(&hs, s);
+
+	return fasthash_final32(&hs, s_len);
 }
 
 /*
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 255ddf2ffa..fbc8df27ba 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -28,7 +28,7 @@
 
 #include "catalog/pg_tablespace_d.h"
 #include "common/file_utils.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "common/string.h"
 #include "datapagemap.h"
 #include "filemap.h"
@@ -829,7 +829,13 @@ decide_file_actions(void)
 static uint32
 hash_string_pointer(const char *s)
 {
-	unsigned char *ss = (unsigned char *) s;
+	fasthash_state hs;
+	size_t		s_len;
 
-	return hash_bytes(ss, strlen(s));
+	fasthash_init(&hs, 0);
+
+	/* Hash string and save the length for tweaking the final mix. */
+	s_len = fasthash_accum_cstring(&hs, s);
+
+	return fasthash_final32(&hs, s_len);
 }
diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c
index ae8c18f373..40c48a2e54 100644
--- a/src/bin/pg_verifybackup/pg_verifybackup.c
+++ b/src/bin/pg_verifybackup/pg_verifybackup.c
@@ -18,7 +18,7 @@
 #include <sys/stat.h>
 #include <time.h>
 
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "common/logging.h"
 #include "common/parse_manifest.h"
 #include "fe_utils/simple_list.h"
@@ -923,9 +923,15 @@ should_ignore_relpath(verifier_context *context, char *relpath)
 static uint32
 hash_string_pointer(char *s)
 {
-	unsigned char *ss = (unsigned char *) s;
+	fasthash_state hs;
+	size_t		s_len;
 
-	return hash_bytes(ss, strlen(s));
+	fasthash_init(&hs, 0);
+
+	/* Hash string and save the length for tweaking the final mix. */
+	s_len = fasthash_accum_cstring(&hs, s);
+
+	return fasthash_final32(&hs, s_len);
 }
 
 /*
diff --git a/src/common/blkreftable.c b/src/common/blkreftable.c
index bfa6f7ab5d..980f44c9df 100644
--- a/src/common/blkreftable.c
+++ b/src/common/blkreftable.c
@@ -37,7 +37,7 @@
 #endif
 
 #include "common/blkreftable.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "port/pg_crc32c.h"
 
 /*
@@ -124,7 +124,7 @@ struct BlockRefTableEntry
 #define SH_KEY_TYPE BlockRefTableKey
 #define SH_KEY key
 #define SH_HASH_KEY(tb, key) \
-	hash_bytes((const unsigned char *) &key, sizeof(BlockRefTableKey))
+	fasthash32((const char *) &key, sizeof(BlockRefTableKey), 0)
 #define SH_EQUAL(tb, a, b) (memcmp(&a, &b, sizeof(BlockRefTableKey)) == 0)
 #define SH_SCOPE static inline
 #ifdef FRONTEND
-- 
2.43.0

Reply via email to