From f0ce8884c0588a89dff01ba8a9e989f6284ba495 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@2ndquadrant.com>
Date: Tue, 2 Mar 2021 12:24:49 -0400
Subject: [PATCH v1 2/4] Use platform-specific implementations of pg_popcount.

Since this function takes a buffer that's likely much longer than
the word size, we can better amortize the cost of an indirect
function call. For the asm version, use the existing
pg_popcount{32,64}_asm functions, but inline them.
---
 src/include/port/pg_bitutils.h |  2 +-
 src/port/pg_bitutils.c         | 87 ++++++++++++++++++++++++++++++++--
 2 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 2c40784830..708b9a6a67 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -212,7 +212,7 @@ extern int	pg_popcount32 (uint32 word);
 extern int	pg_popcount64 (uint64 word);
 
 /* Count the number of one-bits in a byte array */
-extern uint64 pg_popcount(const char *buf, int bytes);
+extern uint64 (*pg_popcount) (const char *buf, int bytes);
 
 /*
  * Rotate the bits of "word" to the right by n bits.
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
index 5dab793e49..9be8b78cff 100644
--- a/src/port/pg_bitutils.c
+++ b/src/port/pg_bitutils.c
@@ -141,7 +141,7 @@ pg_popcount_available(void)
  * pg_popcount32_asm
  *		Return the number of 1 bits set in word
  */
-static int
+static inline int
 pg_popcount32_asm(uint32 word)
 {
 	uint32		res;
@@ -154,7 +154,7 @@ __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
  * pg_popcount64_asm
  *		Return the number of 1 bits set in word
  */
-static int
+static inline int
 pg_popcount64_asm(uint64 word)
 {
 	uint64		res;
@@ -165,6 +165,18 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
 
 #endif							/* USE_POPCNT_ASM */
 
+static uint64	pg_popcount_slow(const char *buf, int bytes);
+
+#ifdef USE_POPCNT_ASM
+static bool pg_popcount_available(void);
+static uint64	pg_popcount_choose(const char *buf, int bytes);
+static uint64	pg_popcount_asm(const char *buf, int bytes);
+
+uint64 (*pg_popcount) (const char *buf, int bytes) = pg_popcount_choose;
+#else
+uint64 (*pg_popcount) (const char *buf, int bytes) = pg_popcount_slow;
+#endif							/* USE_POPCNT_ASM */
+
 
 /*
  * pg_popcount32
@@ -216,13 +228,30 @@ pg_popcount64(uint64 word)
 #endif							/* HAVE__BUILTIN_POPCOUNT */
 }
 
+/*
+ * This function gets called on the first call to pg_popcount.
+ * It detects whether we can use the asm implementation, and replace
+ * the function pointer so that subsequent calls are routed directly to
+ * the chosen implementation.
+ */
+static uint64
+pg_popcount_choose(const char *buf, int bytes)
+{
+	if (pg_popcount_available())
+		pg_popcount = pg_popcount_asm;
+	else
+		pg_popcount = pg_popcount_slow;
+
+	return pg_popcount(buf, bytes);
+}
+
 
 /*
- * pg_popcount
+ * pg_popcount_slow
  *		Returns the number of 1-bits in buf
  */
 uint64
-pg_popcount(const char *buf, int bytes)
+pg_popcount_slow(const char *buf, int bytes)
 {
 	uint64		popcnt = 0;
 
@@ -262,3 +291,53 @@ pg_popcount(const char *buf, int bytes)
 
 	return popcnt;
 }
+
+#ifdef USE_POPCNT_ASM
+
+/*
+ * pg_popcount_asm
+ *		Returns the number of 1-bits in buf using POPCNT
+ */
+uint64
+pg_popcount_asm(const char *buf, int bytes)
+{
+	uint64		popcnt = 0;
+
+#if SIZEOF_VOID_P >= 8
+	/* Process in 64-bit chunks if the buffer is aligned. */
+	if (buf == (const char *) TYPEALIGN(8, buf))
+	{
+		const uint64 *words = (const uint64 *) buf;
+
+		while (bytes >= 8)
+		{
+			popcnt += pg_popcount64_asm(*words++);
+			bytes -= 8;
+		}
+
+		buf = (const char *) words;
+	}
+#else
+	/* Process in 32-bit chunks if the buffer is aligned. */
+	if (buf == (const char *) TYPEALIGN(4, buf))
+	{
+		const uint32 *words = (const uint32 *) buf;
+
+		while (bytes >= 4)
+		{
+			popcnt += pg_popcount32_asm(*words++);
+			bytes -= 4;
+		}
+
+		buf = (const char *) words;
+	}
+#endif
+
+	/* Process any remaining bytes */
+	while (bytes--)
+		popcnt += pg_number_of_ones[(unsigned char) *buf++];
+
+	return popcnt;
+}
+
+#endif							/* USE_POPCNT_ASM */
-- 
2.22.0

