[HACKERS] Improve catcache/syscache performance.

Andres Freund Wed, 13 Sep 2017 23:12:49 -0700

Hi,

There's plenty workloads where SearchSysCache()/SearchCatCache() shows
up as a major runtime factor. Primarily in workloads with very fast
queries.


A fair while ago, before I had my commit bit, I'd posted [1]. Looking at
the profiles/benchmarks I was easily able to confirm that it still
helps, but that there's also still a lot left on the table.

Attached is a patch that tries to improve sys/catcache performance,
going further than the patch referenced earlier.

This primarily includes four pieces:

1) Avoidance of FunctionCallInfo based function calls, replaced by
   more efficient functions with a native C argument interface.
2) Only initializing the ScanKey when necessary, i.e. catcache misses,
   reduces cache unnecessary cpu cache misses.
3) Allowing the compiler to specialize critical SearchCatCache for a
   specific number of attributes allows to unroll loops and avoid
   other nkeys dependant initialization.
4) Split of the heap lookup from the hash lookup, reducing stack
   allocations etc in the common case.

There's further potential:
- replace open coded hash with simplehash - the list walk right now
  shows up in profiles.
- As oid is the only system column supported, avoid the use of
  heap_getsysattr(), by adding an explicit branch for
  ObjectIdAttributeNumber. This shows up in profiles.
- move cache initialization out of the search path
- add more proper functions, rather than macros for
  SearchSysCacheCopyN etc., but right now they don't show up in profiles.

The reason the macro wrapper for syscache.c/h have to be changed,
rather than just catcache, is that doing otherwise would require
exposing the SysCache array to the outside.  That might be a good idea
anyway, but it's for another day.

This patch gives me roughly 8% speedup in a workload that consists out
of a fast query that returns a lot of columns.  If I apply a few
other performance patches, this patch itself starts to make a bigger
difference, of around 11%.

Greetings,

Andres Freund

[1] 
https://www.postgresql.org/message-id/20130905191323.gc490...@alap2.anarazel.de

>From 2b3e06380d5a339efc94e748aa57985d3bb80223 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Wed, 13 Sep 2017 18:43:46 -0700
Subject: [PATCH 4/8] Add inline murmurhash32(int32) function.

The function already existed in tidbitmap.c but more users requiring
fast hashing of 32bit ints are coming up.
---
 src/backend/nodes/tidbitmap.c | 20 ++------------------
 src/include/utils/hashutils.h | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index c4e53adb0c..01d6bc5c11 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -45,6 +45,7 @@
 #include "nodes/tidbitmap.h"
 #include "storage/lwlock.h"
 #include "utils/dsa.h"
+#include "utils/hashutils.h"
 
 /*
  * The maximum number of tuples per page is not large (typically 256 with
@@ -237,30 +238,13 @@ static int	tbm_comparator(const void *left, const void *right);
 static int tbm_shared_comparator(const void *left, const void *right,
 					  void *arg);
 
-/*
- * Simple inline murmur hash implementation for the exact width required, for
- * performance.
- */
-static inline uint32
-hash_blockno(BlockNumber b)
-{
-	uint32		h = b;
-
-	h ^= h >> 16;
-	h *= 0x85ebca6b;
-	h ^= h >> 13;
-	h *= 0xc2b2ae35;
-	h ^= h >> 16;
-	return h;
-}
-
 /* define hashtable mapping block numbers to PagetableEntry's */
 #define SH_USE_NONDEFAULT_ALLOCATOR
 #define SH_PREFIX pagetable
 #define SH_ELEMENT_TYPE PagetableEntry
 #define SH_KEY_TYPE BlockNumber
 #define SH_KEY blockno
-#define SH_HASH_KEY(tb, key) hash_blockno(key)
+#define SH_HASH_KEY(tb, key) murmurhash32(key)
 #define SH_EQUAL(tb, a, b) a == b
 #define SH_SCOPE static inline
 #define SH_DEFINE
diff --git a/src/include/utils/hashutils.h b/src/include/utils/hashutils.h
index 56b7bfc9cb..35281689e8 100644
--- a/src/include/utils/hashutils.h
+++ b/src/include/utils/hashutils.h
@@ -20,4 +20,22 @@ hash_combine(uint32 a, uint32 b)
 	return a;
 }
 
+
+/*
+ * Simple inline murmur hash implementation hashing a 32 bit ingeger, for
+ * performance.
+ */
+static inline uint32
+murmurhash32(uint32 data)
+{
+	uint32		h = data;
+
+	h ^= h >> 16;
+	h *= 0x85ebca6b;
+	h ^= h >> 13;
+	h *= 0xc2b2ae35;
+	h ^= h >> 16;
+	return h;
+}
+
 #endif							/* HASHUTILS_H */
-- 
2.14.1.536.g6867272d5b.dirty

>From 0feaa0c4a39b3e0e995cd5897cfd3ebba6a92c48 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Wed, 13 Sep 2017 19:58:43 -0700
Subject: [PATCH 6/8] Add pg_noinline macro to c.h.

Forcing a function not to be inlined can be useful if it's the
slow-path of a performance critical function, or should be visible in
profiles to allow for proper cost attribution.

Author: Andres Freund
Discussion: https://postgr.es/m/
---
 src/include/c.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/include/c.h b/src/include/c.h
index 630dfbfc41..087b1eac5b 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -644,6 +644,22 @@ typedef NameData *Name;
 #define pg_attribute_noreturn()
 #endif
 
+
+/*
+ * Forcing a function not to be inlined can be useful if it's the slow-path of
+ * a performance critical function, or should be visible in profiles to allow
+ * for proper cost attribution.
+ */
+/* GCC, Sunpro and XLC support noinline via __attribute */
+#if defined(__GNUC__) || defined(__SUNPRO_C) || defined(__IBMC__)
+#define pg_noinline __attribute__((noinline))
+/* msvc via declspec */
+#elif defined(_MSC_VER)
+#define pg_noinline __declspec(noinline)
+#else
+#define pg_noinline
+#endif
+
 /* ----------------------------------------------------------------
  *				Section 6:	assertions
  * ----------------------------------------------------------------
-- 
2.14.1.536.g6867272d5b.dirty

>From 334fd03974d01b389c6d615345522df207c60c74 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Mon, 11 Sep 2017 18:25:39 -0700
Subject: [PATCH 7/8] Improve sys/catcache performance.

This primarily includes four pieces:

1) Avoidance of FunctionCallInfo based function calls, replaced by
   more efficient functions with a native C argument interface.
2) Only initializing the ScanKey when necessary, i.e. catcache misses,
   reduces cache unnecessary cpu cache misses.
3) Allowing the compiler to specialize critical SearchCatCache for a
   specific number of attributes allows to unroll loops and avoid
   other nkeys dependant initialization.
4) Split of the heap lookup from the hash lookup, reducing stack
   allocations etc in the common case.

There's further potential:
- replace open coded hash with simplehash - the list walk right now
  shows up in profiles.
- As oid is the only system column supported, avoid the use of
  heap_getsysattr(), by adding an explicit branch for
  ObjectIdAttributeNumber. This shows up in profiles.
- move cache initialization out of the search path
- add more proper functions, rather than macros for
  SearchSysCacheCopyN etc., but right now they don't show up in profiles.

The reason the macro wrapper for syscache.c/h have to be changed,
rather than just catcache, is that doing otherwise would require
exposing the SysCache array to the outside.  That might be a good idea
anyway, but it's for another day.

Author: Andres Freund
---
 src/backend/utils/cache/catcache.c | 438 ++++++++++++++++++++++++++-----------
 src/backend/utils/cache/syscache.c |  49 ++++-
 src/include/utils/catcache.h       |  18 +-
 src/include/utils/syscache.h       |  23 +-
 4 files changed, 390 insertions(+), 138 deletions(-)

diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index e092801025..67c596d29b 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -31,6 +31,7 @@
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
+#include "utils/hashutils.h"
 #include "utils/inval.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
@@ -72,11 +73,25 @@
 /* Cache management header --- pointer is NULL until created */
 static CatCacheHeader *CacheHdr = NULL;
 
+static inline HeapTuple SearchCatCacheInternal(CatCache *cache,
+											   int nkeys,
+											   Datum v1, Datum v2,
+											   Datum v3, Datum v4);
+
+static pg_noinline HeapTuple SearchCatCacheMiss(CatCache *cache,
+												int nkeys,
+												uint32 hashValue,
+												Index hashIndex,
+												Datum v1, Datum v2,
+												Datum v3, Datum v4);
 
 static uint32 CatalogCacheComputeHashValue(CatCache *cache, int nkeys,
-							 ScanKey cur_skey);
-static uint32 CatalogCacheComputeTupleHashValue(CatCache *cache,
+							 Datum v1, Datum v2, Datum v3, Datum v4);
+static uint32 CatalogCacheComputeTupleHashValue(CatCache *cache, int nkeys,
 								  HeapTuple tuple);
+static inline bool CatalogCacheCompareTuple(const CatCache *cache, int nkeys,
+											const HeapTuple tuple,
+											const Datum *arguments);
 
 #ifdef CATCACHE_STATS
 static void CatCachePrintStats(int code, Datum arg);
@@ -95,45 +110,127 @@ static HeapTuple build_dummy_tuple(CatCache *cache, int nkeys, ScanKey skeys);
  */
 
 /*
- * Look up the hash and equality functions for system types that are used
- * as cache key fields.
- *
- * XXX this should be replaced by catalog lookups,
- * but that seems to pose considerable risk of circularity...
+ * Hash and equality functions for system types that are used as cache key
+ * fields.  To compute hashes, and to check for hash collisions, use functions
+ * hardcoded for that purpose. This is sufficiently performance critical that
+ * the overhead of SQL style function calls is noticeable.
  */
+
+static bool
+chareqfast(Datum a, Datum b)
+{
+	return DatumGetChar(a) == DatumGetChar(b);
+}
+
+static uint32
+charhashfast(Datum datum)
+{
+	return murmurhash32((int32) DatumGetChar(datum));
+}
+
+static bool
+nameeqfast(Datum a, Datum b)
+{
+	char	   *ca = NameStr(*DatumGetName(a));
+	char	   *cb = NameStr(*DatumGetName(b));
+
+	return strncmp(ca, cb, NAMEDATALEN) == 0;
+}
+
+static uint32
+namehashfast(Datum datum)
+{
+	char	   *key = NameStr(*DatumGetName(datum));
+
+	return hash_any((unsigned char *) key, strlen(key));
+}
+
+static bool
+int2eqfast(Datum a, Datum b)
+{
+	return DatumGetInt16(a) == DatumGetInt16(b);
+}
+
+static uint32
+int2hashfast(Datum datum)
+{
+	return murmurhash32((int32) DatumGetInt16(datum));
+}
+
+static bool
+int4eqfast(Datum a, Datum b)
+{
+	return DatumGetInt32(a) == DatumGetInt32(b);
+}
+
+static uint32
+int4hashfast(Datum datum)
+{
+	return murmurhash32((int32) DatumGetInt32(datum));
+}
+
+static bool
+texteqfast(Datum a, Datum b)
+{
+	/* not as performance critical & "complicated" */
+	return DatumGetBool(DirectFunctionCall2(texteq, a, b));
+}
+
+static uint32
+texthashfast(Datum datum)
+{
+	/* not as performance critical & "complicated" */
+	return DatumGetInt32(DirectFunctionCall1(hashtext, datum));
+}
+
+static bool
+oidvectoreqfast(Datum a, Datum b)
+{
+	/* not as performance critical & "complicated" */
+	return DatumGetBool(DirectFunctionCall2(oidvectoreq, a, b));
+}
+
+static uint32
+oidvectorhashfast(Datum datum)
+{
+	/* not as performance critical & "complicated" */
+	return DatumGetInt32(DirectFunctionCall1(hashoidvector, datum));
+}
+
+/* Lookup support functions for a type. */
 static void
-GetCCHashEqFuncs(Oid keytype, PGFunction *hashfunc, RegProcedure *eqfunc)
+GetCCHashEqFuncs(Oid keytype, CCHashFN *hashfunc, RegProcedure *eqfunc, CCFastEqualFN *fasteqfunc)
 {
 	switch (keytype)
 	{
 		case BOOLOID:
-			*hashfunc = hashchar;
-
+			*hashfunc = charhashfast;
+			*fasteqfunc = chareqfast;
 			*eqfunc = F_BOOLEQ;
 			break;
 		case CHAROID:
-			*hashfunc = hashchar;
-
+			*hashfunc = charhashfast;
+			*fasteqfunc = chareqfast;
 			*eqfunc = F_CHAREQ;
 			break;
 		case NAMEOID:
-			*hashfunc = hashname;
-
+			*hashfunc = namehashfast;
+			*fasteqfunc = nameeqfast;
 			*eqfunc = F_NAMEEQ;
 			break;
 		case INT2OID:
-			*hashfunc = hashint2;
-
+			*hashfunc = int2hashfast;
+			*fasteqfunc = int2eqfast;
 			*eqfunc = F_INT2EQ;
 			break;
 		case INT4OID:
-			*hashfunc = hashint4;
-
+			*hashfunc = int4hashfast;
+			*fasteqfunc = int4eqfast;
 			*eqfunc = F_INT4EQ;
 			break;
 		case TEXTOID:
-			*hashfunc = hashtext;
-
+			*hashfunc = texthashfast;
+			*fasteqfunc = texteqfast;
 			*eqfunc = F_TEXTEQ;
 			break;
 		case OIDOID:
@@ -147,13 +244,13 @@ GetCCHashEqFuncs(Oid keytype, PGFunction *hashfunc, RegProcedure *eqfunc)
 		case REGDICTIONARYOID:
 		case REGROLEOID:
 		case REGNAMESPACEOID:
-			*hashfunc = hashoid;
-
+			*hashfunc = int4hashfast;
+			*fasteqfunc = int4eqfast;
 			*eqfunc = F_OIDEQ;
 			break;
 		case OIDVECTOROID:
-			*hashfunc = hashoidvector;
-
+			*hashfunc = oidvectorhashfast;
+			*fasteqfunc = oidvectoreqfast;
 			*eqfunc = F_OIDVECTOREQ;
 			break;
 		default:
@@ -171,10 +268,12 @@ GetCCHashEqFuncs(Oid keytype, PGFunction *hashfunc, RegProcedure *eqfunc)
  * Compute the hash value associated with a given set of lookup keys
  */
 static uint32
-CatalogCacheComputeHashValue(CatCache *cache, int nkeys, ScanKey cur_skey)
+CatalogCacheComputeHashValue(CatCache *cache, int nkeys,
+							 Datum v1, Datum v2, Datum v3, Datum v4)
 {
 	uint32		hashValue = 0;
 	uint32		oneHash;
+	CCHashFN   *cc_hashfunc = cache->cc_hashfunc;
 
 	CACHE4_elog(DEBUG2, "CatalogCacheComputeHashValue %s %d %p",
 				cache->cc_relname,
@@ -184,30 +283,26 @@ CatalogCacheComputeHashValue(CatCache *cache, int nkeys, ScanKey cur_skey)
 	switch (nkeys)
 	{
 		case 4:
-			oneHash =
-				DatumGetUInt32(DirectFunctionCall1(cache->cc_hashfunc[3],
-												   cur_skey[3].sk_argument));
+			oneHash = (cc_hashfunc[3])(v4);
+
 			hashValue ^= oneHash << 24;
 			hashValue ^= oneHash >> 8;
 			/* FALLTHROUGH */
 		case 3:
-			oneHash =
-				DatumGetUInt32(DirectFunctionCall1(cache->cc_hashfunc[2],
-												   cur_skey[2].sk_argument));
+			oneHash = (cc_hashfunc[2])(v3);
+
 			hashValue ^= oneHash << 16;
 			hashValue ^= oneHash >> 16;
 			/* FALLTHROUGH */
 		case 2:
-			oneHash =
-				DatumGetUInt32(DirectFunctionCall1(cache->cc_hashfunc[1],
-												   cur_skey[1].sk_argument));
+			oneHash = (cc_hashfunc[1])(v2);
+
 			hashValue ^= oneHash << 8;
 			hashValue ^= oneHash >> 24;
 			/* FALLTHROUGH */
 		case 1:
-			oneHash =
-				DatumGetUInt32(DirectFunctionCall1(cache->cc_hashfunc[0],
-												   cur_skey[0].sk_argument));
+			oneHash = (cc_hashfunc[0])(v1);
+
 			hashValue ^= oneHash;
 			break;
 		default:
@@ -224,63 +319,96 @@ CatalogCacheComputeHashValue(CatCache *cache, int nkeys, ScanKey cur_skey)
  * Compute the hash value associated with a given tuple to be cached
  */
 static uint32
-CatalogCacheComputeTupleHashValue(CatCache *cache, HeapTuple tuple)
+CatalogCacheComputeTupleHashValue(CatCache *cache, int nkeys, HeapTuple tuple)
 {
-	ScanKeyData cur_skey[CATCACHE_MAXKEYS];
+	Datum		v1 = 0, v2 = 0, v3 = 0, v4 = 0;
 	bool		isNull = false;
-
-	/* Copy pre-initialized overhead data for scankey */
-	memcpy(cur_skey, cache->cc_skey, sizeof(cur_skey));
+	int		   *cc_key = cache->cc_key;
+	TupleDesc	cc_tupdesc = cache->cc_tupdesc;
 
 	/* Now extract key fields from tuple, insert into scankey */
-	switch (cache->cc_nkeys)
+	switch (nkeys)
 	{
 		case 4:
-			cur_skey[3].sk_argument =
-				(cache->cc_key[3] == ObjectIdAttributeNumber)
+			v4 = (cc_key[3] == ObjectIdAttributeNumber)
 				? ObjectIdGetDatum(HeapTupleGetOid(tuple))
 				: fastgetattr(tuple,
-							  cache->cc_key[3],
-							  cache->cc_tupdesc,
+							  cc_key[3],
+							  cc_tupdesc,
 							  &isNull);
 			Assert(!isNull);
 			/* FALLTHROUGH */
 		case 3:
-			cur_skey[2].sk_argument =
-				(cache->cc_key[2] == ObjectIdAttributeNumber)
+			v3 = (cc_key[2] == ObjectIdAttributeNumber)
 				? ObjectIdGetDatum(HeapTupleGetOid(tuple))
 				: fastgetattr(tuple,
-							  cache->cc_key[2],
-							  cache->cc_tupdesc,
+							  cc_key[2],
+							  cc_tupdesc,
 							  &isNull);
 			Assert(!isNull);
 			/* FALLTHROUGH */
 		case 2:
-			cur_skey[1].sk_argument =
-				(cache->cc_key[1] == ObjectIdAttributeNumber)
+			v2 = (cc_key[1] == ObjectIdAttributeNumber)
 				? ObjectIdGetDatum(HeapTupleGetOid(tuple))
 				: fastgetattr(tuple,
-							  cache->cc_key[1],
-							  cache->cc_tupdesc,
+							  cc_key[1],
+							  cc_tupdesc,
 							  &isNull);
 			Assert(!isNull);
 			/* FALLTHROUGH */
 		case 1:
-			cur_skey[0].sk_argument =
-				(cache->cc_key[0] == ObjectIdAttributeNumber)
+			v1 = (cc_key[0] == ObjectIdAttributeNumber)
 				? ObjectIdGetDatum(HeapTupleGetOid(tuple))
 				: fastgetattr(tuple,
-							  cache->cc_key[0],
-							  cache->cc_tupdesc,
+							  cc_key[0],
+							  cc_tupdesc,
 							  &isNull);
 			Assert(!isNull);
 			break;
 		default:
-			elog(FATAL, "wrong number of hash keys: %d", cache->cc_nkeys);
+			elog(FATAL, "wrong number of hash keys: %d", nkeys);
 			break;
 	}
 
-	return CatalogCacheComputeHashValue(cache, cache->cc_nkeys, cur_skey);
+	return CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4);
+}
+
+/*
+ *		CatalogCacheCompareTuple
+ *
+ * Compare a tuple to the passed arguments.
+ */
+static inline bool
+CatalogCacheCompareTuple(const CatCache *cache, int nkeys,
+						 const HeapTuple tuple,
+						 const Datum *arguments)
+{
+	TupleDesc	tupdesc = cache->cc_tupdesc;
+	const int  *cc_key = cache->cc_key;
+	const CCFastEqualFN *cc_fastequal = cache->cc_fastequal;
+	int i;
+
+	for (i = 0; i < nkeys; i++)
+	{
+		Datum atp;
+		bool isnull;
+
+		/*
+		 * XXX: might be worthwhile to only handle oid sysattr, to reduce
+		 * overhead - it's the most common key.
+		 */
+		atp = heap_getattr(tuple,
+						   cc_key[i],
+						   tupdesc,
+						   &isnull);
+		Assert(!isnull);
+
+		if (!(cc_fastequal[i])(atp, arguments[i]))
+		{
+			return false;
+		}
+	}
+	return true;
 }
 
 
@@ -878,7 +1006,8 @@ CatalogCacheInitializeCache(CatCache *cache)
 
 		GetCCHashEqFuncs(keytype,
 						 &cache->cc_hashfunc[i],
-						 &eqfunc);
+						 &eqfunc,
+						 &cache->cc_fastequal[i]);
 
 		cache->cc_isname[i] = (keytype == NAMEOID);
 
@@ -1020,7 +1149,7 @@ IndexScanOK(CatCache *cache, ScanKey cur_skey)
 }
 
 /*
- *	SearchCatCache
+ *	SearchCatCacheInternal
  *
  *		This call searches a system cache for a tuple, opening the relation
  *		if necessary (on the first access to a particular cache).
@@ -1042,15 +1171,64 @@ SearchCatCache(CatCache *cache,
 			   Datum v3,
 			   Datum v4)
 {
-	ScanKeyData cur_skey[CATCACHE_MAXKEYS];
+	return SearchCatCacheInternal(cache, cache->cc_nkeys, v1, v2, v3, v4);
+}
+
+
+/*
+ * SearchCatCacheN() are SearchCatCache() versions for a specific number of
+ * arguments. The compiler can inline the body and unroll the loop, making
+ * them a bit faster than SearchCatCache().
+ */
+
+HeapTuple
+SearchCatCache1(CatCache *cache,
+				Datum v1)
+{
+	return SearchCatCacheInternal(cache, 1, v1, 0, 0, 0);
+}
+
+
+HeapTuple
+SearchCatCache2(CatCache *cache,
+				Datum v1, Datum v2)
+{
+	return SearchCatCacheInternal(cache, 2, v1, v2, 0, 0);
+}
+
+
+HeapTuple
+SearchCatCache3(CatCache *cache,
+				Datum v1, Datum v2, Datum v3)
+{
+	return SearchCatCacheInternal(cache, 3, v1, v2, v3, 0);
+}
+
+
+HeapTuple
+SearchCatCache4(CatCache *cache,
+				Datum v1, Datum v2, Datum v3, Datum v4)
+{
+	return SearchCatCacheInternal(cache, 4, v1, v2, v3, v4);
+}
+
+/*
+ * Work-horse for SearchCatCache/SearchCatCacheN.
+ */
+static inline HeapTuple
+SearchCatCacheInternal(CatCache *cache,
+			   int nkeys,
+			   Datum v1,
+			   Datum v2,
+			   Datum v3,
+			   Datum v4)
+{
+	Datum		arguments[CATCACHE_MAXKEYS];
 	uint32		hashValue;
 	Index		hashIndex;
 	dlist_iter	iter;
 	dlist_head *bucket;
 	CatCTup    *ct;
-	Relation	relation;
-	SysScanDesc scandesc;
-	HeapTuple	ntp;
 
 	/* Make sure we're in an xact, even if this ends up being a cache hit */
 	Assert(IsTransactionState());
@@ -1058,26 +1236,23 @@ SearchCatCache(CatCache *cache,
 	/*
 	 * one-time startup overhead for each cache
 	 */
-	if (cache->cc_tupdesc == NULL)
+	if (unlikely(cache->cc_tupdesc == NULL))
 		CatalogCacheInitializeCache(cache);
 
 #ifdef CATCACHE_STATS
 	cache->cc_searches++;
 #endif
 
-	/*
-	 * initialize the search key information
-	 */
-	memcpy(cur_skey, cache->cc_skey, sizeof(cur_skey));
-	cur_skey[0].sk_argument = v1;
-	cur_skey[1].sk_argument = v2;
-	cur_skey[2].sk_argument = v3;
-	cur_skey[3].sk_argument = v4;
+	/* Initialize local parameter array */
+	arguments[0] = v1;
+	arguments[1] = v2;
+	arguments[2] = v3;
+	arguments[3] = v4;
 
 	/*
 	 * find the hash bucket in which to look for the tuple
 	 */
-	hashValue = CatalogCacheComputeHashValue(cache, cache->cc_nkeys, cur_skey);
+	hashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4);
 	hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets);
 
 	/*
@@ -1089,8 +1264,6 @@ SearchCatCache(CatCache *cache,
 	bucket = &cache->cc_bucket[hashIndex];
 	dlist_foreach(iter, bucket)
 	{
-		bool		res;
-
 		ct = dlist_container(CatCTup, cache_elem, iter.cur);
 
 		if (ct->dead)
@@ -1099,15 +1272,7 @@ SearchCatCache(CatCache *cache,
 		if (ct->hash_value != hashValue)
 			continue;			/* quickly skip entry if wrong hash val */
 
-		/*
-		 * see if the cached tuple matches our key.
-		 */
-		HeapKeyTest(&ct->tuple,
-					cache->cc_tupdesc,
-					cache->cc_nkeys,
-					cur_skey,
-					res);
-		if (!res)
+		if (!CatalogCacheCompareTuple(cache, nkeys, &ct->tuple, arguments))
 			continue;
 
 		/*
@@ -1150,6 +1315,42 @@ SearchCatCache(CatCache *cache,
 		}
 	}
 
+	return SearchCatCacheMiss(cache, nkeys, hashValue, hashIndex, v1, v2, v3, v4);
+}
+
+/*
+ * Search the actual catalogs, rather than the cache.
+ *
+ * This is kept separate from SearchCatCacheInternal() to keep the fast-path
+ * as small as possible.  To avoid that effort being undone, try to explicitly
+ * forbid inlining.
+ */
+static pg_noinline HeapTuple
+SearchCatCacheMiss(CatCache *cache,
+				   int nkeys,
+				   uint32 hashValue,
+				   Index hashIndex,
+				   Datum v1,
+				   Datum v2,
+				   Datum v3,
+				   Datum v4)
+{
+	ScanKeyData cur_skey[CATCACHE_MAXKEYS];
+	Relation	relation;
+	SysScanDesc scandesc;
+	HeapTuple	ntp;
+	CatCTup    *ct;
+
+	/*
+	 * Ok, need to make a lookup in the relation, copy the scankey and fill out
+	 * any per-call fields.
+	 */
+	memcpy(cur_skey, cache->cc_skey, sizeof(ScanKeyData) * nkeys);
+	cur_skey[0].sk_argument = v1;
+	cur_skey[1].sk_argument = v2;
+	cur_skey[2].sk_argument = v3;
+	cur_skey[3].sk_argument = v4;
+
 	/*
 	 * Tuple was not found in cache, so we have to try to retrieve it directly
 	 * from the relation.  If found, we will add it to the cache; if not
@@ -1171,7 +1372,7 @@ SearchCatCache(CatCache *cache,
 								  cache->cc_indexoid,
 								  IndexScanOK(cache, cur_skey),
 								  NULL,
-								  cache->cc_nkeys,
+								  nkeys,
 								  cur_skey);
 
 	ct = NULL;
@@ -1207,7 +1408,7 @@ SearchCatCache(CatCache *cache,
 		if (IsBootstrapProcessingMode())
 			return NULL;
 
-		ntp = build_dummy_tuple(cache, cache->cc_nkeys, cur_skey);
+		ntp = build_dummy_tuple(cache, nkeys, cur_skey);
 		ct = CatalogCacheCreateEntry(cache, ntp,
 									 hashValue, hashIndex,
 									 true);
@@ -1288,27 +1489,16 @@ GetCatCacheHashValue(CatCache *cache,
 					 Datum v3,
 					 Datum v4)
 {
-	ScanKeyData cur_skey[CATCACHE_MAXKEYS];
-
 	/*
 	 * one-time startup overhead for each cache
 	 */
 	if (cache->cc_tupdesc == NULL)
 		CatalogCacheInitializeCache(cache);
 
-	/*
-	 * initialize the search key information
-	 */
-	memcpy(cur_skey, cache->cc_skey, sizeof(cur_skey));
-	cur_skey[0].sk_argument = v1;
-	cur_skey[1].sk_argument = v2;
-	cur_skey[2].sk_argument = v3;
-	cur_skey[3].sk_argument = v4;
-
 	/*
 	 * calculate the hash value
 	 */
-	return CatalogCacheComputeHashValue(cache, cache->cc_nkeys, cur_skey);
+	return CatalogCacheComputeHashValue(cache, cache->cc_nkeys, v1, v2, v3, v4);
 }
 
 
@@ -1329,7 +1519,7 @@ SearchCatCacheList(CatCache *cache,
 				   Datum v3,
 				   Datum v4)
 {
-	ScanKeyData cur_skey[CATCACHE_MAXKEYS];
+	Datum		arguments[CATCACHE_MAXKEYS];
 	uint32		lHashValue;
 	dlist_iter	iter;
 	CatCList   *cl;
@@ -1354,21 +1544,18 @@ SearchCatCacheList(CatCache *cache,
 	cache->cc_lsearches++;
 #endif
 
-	/*
-	 * initialize the search key information
-	 */
-	memcpy(cur_skey, cache->cc_skey, sizeof(cur_skey));
-	cur_skey[0].sk_argument = v1;
-	cur_skey[1].sk_argument = v2;
-	cur_skey[2].sk_argument = v3;
-	cur_skey[3].sk_argument = v4;
+	/* Initialize local parameter array */
+	arguments[0] = v1;
+	arguments[1] = v2;
+	arguments[2] = v3;
+	arguments[3] = v4;
 
 	/*
 	 * compute a hash value of the given keys for faster search.  We don't
 	 * presently divide the CatCList items into buckets, but this still lets
 	 * us skip non-matching items quickly most of the time.
 	 */
-	lHashValue = CatalogCacheComputeHashValue(cache, nkeys, cur_skey);
+	lHashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4);
 
 	/*
 	 * scan the items until we find a match or exhaust our list
@@ -1378,8 +1565,6 @@ SearchCatCacheList(CatCache *cache,
 	 */
 	dlist_foreach(iter, &cache->cc_lists)
 	{
-		bool		res;
-
 		cl = dlist_container(CatCList, cache_elem, iter.cur);
 
 		if (cl->dead)
@@ -1393,12 +1578,8 @@ SearchCatCacheList(CatCache *cache,
 		 */
 		if (cl->nkeys != nkeys)
 			continue;
-		HeapKeyTest(&cl->tuple,
-					cache->cc_tupdesc,
-					nkeys,
-					cur_skey,
-					res);
-		if (!res)
+
+		if (!CatalogCacheCompareTuple(cache, nkeys, &cl->tuple, arguments))
 			continue;
 
 		/*
@@ -1441,9 +1622,20 @@ SearchCatCacheList(CatCache *cache,
 
 	PG_TRY();
 	{
+		ScanKeyData cur_skey[CATCACHE_MAXKEYS];
 		Relation	relation;
 		SysScanDesc scandesc;
 
+		/*
+		 * Ok, need to make a lookup in the relation, copy the scankey and fill out
+		 * any per-call fields.
+		 */
+		memcpy(cur_skey, cache->cc_skey, sizeof(ScanKeyData) * cache->cc_nkeys);
+		cur_skey[0].sk_argument = v1;
+		cur_skey[1].sk_argument = v2;
+		cur_skey[2].sk_argument = v3;
+		cur_skey[3].sk_argument = v4;
+
 		relation = heap_open(cache->cc_reloid, AccessShareLock);
 
 		scandesc = systable_beginscan(relation,
@@ -1467,7 +1659,7 @@ SearchCatCacheList(CatCache *cache,
 			 * See if there's an entry for this tuple already.
 			 */
 			ct = NULL;
-			hashValue = CatalogCacheComputeTupleHashValue(cache, ntp);
+			hashValue = CatalogCacheComputeTupleHashValue(cache, cache->cc_nkeys, ntp);
 			hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets);
 
 			bucket = &cache->cc_bucket[hashIndex];
@@ -1820,7 +2012,7 @@ PrepareToInvalidateCacheTuple(Relation relation,
 		if (ccp->cc_tupdesc == NULL)
 			CatalogCacheInitializeCache(ccp);
 
-		hashvalue = CatalogCacheComputeTupleHashValue(ccp, tuple);
+		hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple);
 		dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId;
 
 		(*function) (ccp->id, hashvalue, dbid);
@@ -1829,7 +2021,7 @@ PrepareToInvalidateCacheTuple(Relation relation,
 		{
 			uint32		newhashvalue;
 
-			newhashvalue = CatalogCacheComputeTupleHashValue(ccp, newtuple);
+			newhashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, newtuple);
 
 			if (newhashvalue != hashvalue)
 				(*function) (ccp->id, newhashvalue, dbid);
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index fcbb683a99..888edbb325 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -1102,13 +1102,56 @@ SearchSysCache(int cacheId,
 			   Datum key3,
 			   Datum key4)
 {
-	if (cacheId < 0 || cacheId >= SysCacheSize ||
-		!PointerIsValid(SysCache[cacheId]))
-		elog(ERROR, "invalid cache ID: %d", cacheId);
+	Assert(cacheId >= 0 && cacheId < SysCacheSize &&
+		   PointerIsValid(SysCache[cacheId]));
 
 	return SearchCatCache(SysCache[cacheId], key1, key2, key3, key4);
 }
 
+HeapTuple
+SearchSysCache1(int cacheId,
+				Datum key1)
+{
+	Assert(cacheId >= 0 && cacheId < SysCacheSize &&
+		   PointerIsValid(SysCache[cacheId]));
+	Assert(SysCache[cacheId]->cc_nkeys == 1);
+
+	return SearchCatCache1(SysCache[cacheId], key1);
+}
+
+HeapTuple
+SearchSysCache2(int cacheId,
+				Datum key1, Datum key2)
+{
+	Assert(cacheId >= 0 && cacheId < SysCacheSize &&
+		   PointerIsValid(SysCache[cacheId]));
+	Assert(SysCache[cacheId]->cc_nkeys == 2);
+
+	return SearchCatCache2(SysCache[cacheId], key1, key2);
+}
+
+HeapTuple
+SearchSysCache3(int cacheId,
+				Datum key1, Datum key2, Datum key3)
+{
+	Assert(cacheId >= 0 && cacheId < SysCacheSize &&
+		   PointerIsValid(SysCache[cacheId]));
+	Assert(SysCache[cacheId]->cc_nkeys == 3);
+
+	return SearchCatCache3(SysCache[cacheId], key1, key2, key3);
+}
+
+HeapTuple
+SearchSysCache4(int cacheId,
+				Datum key1, Datum key2, Datum key3, Datum key4)
+{
+	Assert(cacheId >= 0 && cacheId < SysCacheSize &&
+		   PointerIsValid(SysCache[cacheId]));
+	Assert(SysCache[cacheId]->cc_nkeys == 4);
+
+	return SearchCatCache4(SysCache[cacheId], key1, key2, key3, key4);
+}
+
 /*
  * ReleaseSysCache
  *		Release previously grabbed reference count on a tuple
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h
index 200a3022e7..360f0c5dd5 100644
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -34,6 +34,10 @@
 
 #define CATCACHE_MAXKEYS		4
 
+
+typedef uint32 (*CCHashFN) (Datum datum);
+typedef bool (*CCFastEqualFN) (Datum a, Datum b);
+
 typedef struct catcache
 {
 	int			id;				/* cache identifier --- see syscache.h */
@@ -47,7 +51,8 @@ typedef struct catcache
 	int			cc_nbuckets;	/* # of hash buckets in this cache */
 	int			cc_nkeys;		/* # of keys (1..CATCACHE_MAXKEYS) */
 	int			cc_key[CATCACHE_MAXKEYS];	/* AttrNumber of each key */
-	PGFunction	cc_hashfunc[CATCACHE_MAXKEYS];	/* hash function for each key */
+	CCHashFN	cc_hashfunc[CATCACHE_MAXKEYS];	/* hash function for each key */
+	CCFastEqualFN cc_fastequal[CATCACHE_MAXKEYS];	/* fast equal function for each key */
 	ScanKeyData cc_skey[CATCACHE_MAXKEYS];	/* precomputed key info for heap
 											 * scans */
 	bool		cc_isname[CATCACHE_MAXKEYS];	/* flag "name" key columns */
@@ -174,8 +179,15 @@ extern CatCache *InitCatCache(int id, Oid reloid, Oid indexoid,
 extern void InitCatCachePhase2(CatCache *cache, bool touch_index);
 
 extern HeapTuple SearchCatCache(CatCache *cache,
-			   Datum v1, Datum v2,
-			   Datum v3, Datum v4);
+			   Datum v1, Datum v2, Datum v3, Datum v4);
+extern HeapTuple SearchCatCache1(CatCache *cache,
+			   Datum v1);
+extern HeapTuple SearchCatCache2(CatCache *cache,
+			   Datum v1, Datum v2);
+extern HeapTuple SearchCatCache3(CatCache *cache,
+			   Datum v1, Datum v2, Datum v3);
+extern HeapTuple SearchCatCache4(CatCache *cache,
+			   Datum v1, Datum v2, Datum v3, Datum v4);
 extern void ReleaseCatCache(HeapTuple tuple);
 
 extern uint32 GetCatCacheHashValue(CatCache *cache,
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index 8a92ea27ac..12bda02cd7 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -117,6 +117,20 @@ extern void InitCatalogCachePhase2(void);
 
 extern HeapTuple SearchSysCache(int cacheId,
 			   Datum key1, Datum key2, Datum key3, Datum key4);
+
+/*
+ * The use of argument specific numbers is encouraged, they're faster, and
+ * insulates the caller from changes in the maximum number of keys.
+ */
+extern HeapTuple SearchSysCache1(int cacheId,
+			   Datum key1);
+extern HeapTuple SearchSysCache2(int cacheId,
+			   Datum key1, Datum key2);
+extern HeapTuple SearchSysCache3(int cacheId,
+			   Datum key1, Datum key2, Datum key3);
+extern HeapTuple SearchSysCache4(int cacheId,
+			   Datum key1, Datum key2, Datum key3, Datum key4);
+
 extern void ReleaseSysCache(HeapTuple tuple);
 
 /* convenience routines */
@@ -156,15 +170,6 @@ extern bool RelationSupportsSysCache(Oid relid);
  * functions is encouraged, as it insulates the caller from changes in the
  * maximum number of keys.
  */
-#define SearchSysCache1(cacheId, key1) \
-	SearchSysCache(cacheId, key1, 0, 0, 0)
-#define SearchSysCache2(cacheId, key1, key2) \
-	SearchSysCache(cacheId, key1, key2, 0, 0)
-#define SearchSysCache3(cacheId, key1, key2, key3) \
-	SearchSysCache(cacheId, key1, key2, key3, 0)
-#define SearchSysCache4(cacheId, key1, key2, key3, key4) \
-	SearchSysCache(cacheId, key1, key2, key3, key4)
-
 #define SearchSysCacheCopy1(cacheId, key1) \
 	SearchSysCacheCopy(cacheId, key1, 0, 0, 0)
 #define SearchSysCacheCopy2(cacheId, key1, key2) \
-- 
2.14.1.536.g6867272d5b.dirty

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] Improve catcache/syscache performance.

Reply via email to