At Fri, 6 Nov 2020 10:42:15 +0200, Heikki Linnakangas <[email protected]> wrote
in
> Do you need the "ntaccess == 2" test? You could always increment the
> counter, and in the code that uses ntaccess to decide what to evict,
> treat all values >= 2 the same.
>
> Need to handle integer overflow somehow. Or maybe not: integer
> overflow is so infrequent that even if a hot syscache entry gets
> evicted prematurely because its ntaccess count wrapped around to 0, it
> will happen so rarely that it won't make any difference in practice.
That relaxing simplifies the code significantly, but a significant
degradation by about 5% still exists.
(SearchCatCacheInternal())
+ ct->naccess++;
!+ ct->lastaccess = catcacheclock;
If I removed the second line above, the degradation disappears
(-0.7%). However, I don't find the corresponding numbers in the output
of perf. The sum of the numbers for the removed instructions is (0.02
+ 0.28 = 0.3%). I don't think the degradation as the whole doesn't
always reflect to the instruction level profiling, but I'm stuck here,
anyway.
% samples
master p2 patched (p2 = patched - "ct->lastaccess = catcacheclock)
=============================================================================
0.47 | 0.27 | 0.17 | mov %rbx,0x8(%rbp)
| | | SearchCatCacheInternal():
| | | ct->naccess++;
| | | ct->lastaccess = catcacheclock;
----- |----- | 0.02 |10f: mov catcacheclock,%rax
| | | ct->naccess++;
----- | 0.96 | 1.00 | addl $0x1,0x14(%rbx)
| | | return NULL;
----- | 0.11 | 0.16 | xor %ebp,%ebp
| | | if (!ct->negative)
0.27 | 0.30 | 0.03 | cmpb $0x0,0x21(%rbx)
| | | ct->lastaccess = catcacheclock;
----- | ---- | 0.28 | mov %rax,0x18(%rbx)
| | | if (!ct->negative)
0.34 | 0.08 | 0.59 | ↓ jne 149
For your information, the same table for a bit wider range follows.
% samples
master p2 patched (p2 = patched - "ct->lastaccess = catcacheclock)
=============================================================================
| | | dlist_foreach(iter, bucket)
6.91 | 7.06 | 5.89 | mov 0x8(%rbp),%rbx
0.78 | 0.73 | 0.81 | test %rbx,%rbx
| | | ↓ je 160
| | | cmp %rbx,%rbp
0.46 | 0.52 | 0.39 | ↓ jne 9d
| | | ↓ jmpq 160
| | | nop
5.68 | 5.54 | 6.03 | 90: mov 0x8(%rbx),%rbx
1.44 | 1.42 | 1.43 | cmp %rbx,%rbp
| | | ↓ je 160
| | | {
| | | ct = dlist_container(CatCTup, cache_elem, iter.cur);
| | |
| | | if (ct->dead)
30.36 |30.97 | 31.48 | 9d: cmpb $0x0,0x20(%rbx)
2.63 | 2.60 | 2.69 | ↑ jne 90
| | | continue; /* ignore dead
entries */
| | |
| | | if (ct->hash_value != hashValue)
1.41 | 1.37 | 1.35 | cmp -0x24(%rbx),%edx
3.19 | 2.97 | 2.87 | ↑ jne 90
7.17 | 5.53 | 6.89 | mov %r13,%rsi
0.02 | 0.04 | 0.04 | xor %r12d,%r12d
3.00 | 2.98 | 2.95 | ↓ jmp b5
0.15 | 0.61 | 0.20 | b0: mov 0x10(%rsp,%r12,1),%rsi
6.58 | 5.04 | 5.95 | b5: mov %ecx,0xc(%rsp)
| | | CatalogCacheCompareTuple():
| | | if (!(cc_fastequal[i]) (cachekeys[i], searchkeys[i]))
1.51 | 0.92 | 1.66 | mov -0x20(%rbx,%r12,1),%rdi
0.54 | 1.64 | 0.58 | mov %edx,0x8(%rsp)
3.78 | 3.11 | 3.86 | → callq *0x38(%r14,%r12,1)
0.43 | 2.30 | 0.34 | mov 0x8(%rsp),%edx
0.20 | 0.94 | 0.25 | mov 0xc(%rsp),%ecx
0.44 | 0.41 | 0.44 | test %al,%al
| | | ↑ je 90
| | | for (i = 0; i < nkeys; i++)
2.28 | 1.07 | 2.26 | add $0x8,%r12
0.08 | 0.23 | 0.07 | cmp $0x18,%r12
0.11 | 0.64 | 0.10 | ↑ jne b0
| | | dlist_move_head():
| | | */
| | | static inline void
| | | dlist_move_head(dlist_head *head, dlist_node *node)
| | | {
| | | /* fast path if it's already at the head */
| | | if (head->head.next == node)
0.08 | 0.61 | 0.04 | cmp 0x8(%rbp),%rbx
0.02 | 0.10 | 0.00 | ↓ je 10f
| | | return;
| | |
| | | dlist_delete(node);
0.01 | 0.20 | 0.06 | mov 0x8(%rbx),%rax
| | | dlist_delete():
| | | node->prev->next = node->next;
0.75 | 0.13 | 0.72 | mov (%rbx),%rdx
2.89 | 3.42 | 2.22 | mov %rax,0x8(%rdx)
| | | node->next->prev = node->prev;
0.01 | 0.09 | 0.00 | mov (%rbx),%rdx
0.04 | 0.62 | 0.58 | mov %rdx,(%rax)
| | | dlist_push_head():
| | | if (head->head.next == NULL) /* convert NULL
header to circular */
0.31 | 0.08 | 0.28 | mov 0x8(%rbp),%rax
0.55 | 0.44 | 0.28 | test %rax,%rax
| | | ↓ je 180
| | | node->next = head->head.next;
0.00 | 0.08 | 0.06 |101: mov %rax,0x8(%rbx)
| | | node->prev = &head->head;
0.17 | 0.73 | 0.37 | mov %rbp,(%rbx)
| | | node->next->prev = node;
0.34 | 0.08 | 1.13 | mov %rbx,(%rax)
| | | head->head.next = node;
0.47 | 0.27 | 0.17 | mov %rbx,0x8(%rbp)
| | | SearchCatCacheInternal():
| | | ct->naccess++;
| | | ct->lastaccess = catcacheclock;
----- |----- | 0.02 |10f: mov catcacheclock,%rax
| | | ct->naccess++;
----- | 0.96 | 1.00 | addl $0x1,0x14(%rbx)
| | | return NULL;
----- | 0.11 | 0.16 | xor %ebp,%ebp
| | | if (!ct->negative)
0.27 | 0.30 | 0.03 | cmpb $0x0,0x21(%rbx)
| | | ct->lastaccess = catcacheclock;
----- | ---- | 0.28 | mov %rax,0x18(%rbx)
| | | if (!ct->negative)
0.34 | 0.08 | 0.59 | ↓ jne 149
--
Kyotaro Horiguchi
NTT Open Source Software Center
>From 498a55ff07f19646ca09034dfdc4c68459a74855 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <[email protected]>
Date: Fri, 6 Nov 2020 17:27:18 +0900
Subject: [PATCH v5] CatCache expiration feature
---
src/backend/access/transam/xact.c | 3 +
src/backend/utils/cache/catcache.c | 118 +++++++++++++++++++++++++++++
src/backend/utils/misc/guc.c | 12 +++
src/include/utils/catcache.h | 20 +++++
4 files changed, 153 insertions(+)
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index af6afcebb1..a246fcc4c0 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -1086,6 +1086,9 @@ static void
AtStart_Cache(void)
{
AcceptInvalidationMessages();
+
+ if (xactStartTimestamp != 0)
+ SetCatCacheClock(xactStartTimestamp);
}
/*
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 3613ae5f44..b457fed7ab 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -38,6 +38,7 @@
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/syscache.h"
+#include "utils/timestamp.h"
/* #define CACHEDEBUG */ /* turns DEBUG elogs on */
@@ -60,9 +61,18 @@
#define CACHE_elog(...)
#endif
+/*
+ * GUC variable to define the minimum age of entries that will be considered
+ * to be evicted in seconds. -1 to disable the feature.
+ */
+int catalog_cache_prune_min_age = -1;
+
/* Cache management header --- pointer is NULL until created */
static CatCacheHeader *CacheHdr = NULL;
+/* Clock for the last accessed time of a catcache entry. */
+TimestampTz catcacheclock = 0;
+
static inline HeapTuple SearchCatCacheInternal(CatCache *cache,
int nkeys,
Datum v1, Datum v2,
@@ -74,6 +84,7 @@ static pg_noinline HeapTuple SearchCatCacheMiss(CatCache *cache,
Index hashIndex,
Datum v1, Datum v2,
Datum v3, Datum v4);
+static bool CatCacheCleanupOldEntries(CatCache *cp);
static uint32 CatalogCacheComputeHashValue(CatCache *cache, int nkeys,
Datum v1, Datum v2, Datum v3, Datum v4);
@@ -99,6 +110,12 @@ static void CatCacheFreeKeys(TupleDesc tupdesc, int nkeys, int *attnos,
static void CatCacheCopyKeys(TupleDesc tupdesc, int nkeys, int *attnos,
Datum *srckeys, Datum *dstkeys);
+/* GUC assign function */
+void
+assign_catalog_cache_prune_min_age(int newval, void *extra)
+{
+ catalog_cache_prune_min_age = newval;
+}
/*
* internal support functions
@@ -863,6 +880,10 @@ RehashCatCache(CatCache *cp)
int newnbuckets;
int i;
+ /* try removing old entries before expanding hash */
+ if (CatCacheCleanupOldEntries(cp))
+ return;
+
elog(DEBUG1, "rehashing catalog cache id %d for %s; %d tups, %d buckets",
cp->id, cp->cc_relname, cp->cc_ntup, cp->cc_nbuckets);
@@ -1264,6 +1285,16 @@ SearchCatCacheInternal(CatCache *cache,
*/
dlist_move_head(bucket, &ct->cache_elem);
+ /*
+ * Prolong life of this entry. Since we want run as less instructions
+ * as possible and want the branch be stable for performance reasons,
+ * we don't care of wrap-around and possible false-negative for old
+ * entries. The window is quite narrow and the counter doesn't gets so
+ * large while expiration is active.
+ */
+ ct->naccess++;
+ ct->lastaccess = catcacheclock;
+
/*
* If it's a positive entry, bump its refcount and return it. If it's
* negative, we can report failure to the caller.
@@ -1425,6 +1456,91 @@ SearchCatCacheMiss(CatCache *cache,
return &ct->tuple;
}
+/*
+ * CatCacheCleanupOldEntries - Remove infrequently-used entries
+ *
+ * Catcache entries happen to be left unused for a long time for several
+ * reasons. Remove such entries to prevent catcache from bloating. It is based
+ * on the similar algorithm with buffer eviction. Entries that are accessed
+ * several times in a certain period live longer than those that have had less
+ * access in the same duration.
+ */
+static bool
+CatCacheCleanupOldEntries(CatCache *cp)
+{
+ int nremoved = 0;
+ int i;
+ long oldest_ts = catcacheclock;
+ long age;
+ int us;
+
+ /* Return immediately if disabled */
+ if (likely(catalog_cache_prune_min_age < 0))
+ return false;
+
+ /* Don't scan the hash when we know we don't have prunable entries */
+ TimestampDifference(cp->cc_oldest_ts, catcacheclock, &age, &us);
+ if (age < catalog_cache_prune_min_age)
+ return false;
+
+ /* Scan over the whole hash to find entries to remove */
+ for (i = 0 ; i < cp->cc_nbuckets ; i++)
+ {
+ dlist_mutable_iter iter;
+
+ dlist_foreach_modify(iter, &cp->cc_bucket[i])
+ {
+ CatCTup *ct = dlist_container(CatCTup, cache_elem, iter.cur);
+
+ /* Don't remove referenced entries */
+ if (ct->refcount == 0 &&
+ (ct->c_list == NULL || ct->c_list->refcount == 0))
+ {
+ /*
+ * Calculate the duration from the time from the last access
+ * to the "current" time. catcacheclock is updated
+ * per-statement basis and additionaly udpated periodically
+ * during a long running query.
+ */
+ TimestampDifference(ct->lastaccess, catcacheclock, &age, &us);
+
+ if (age > catalog_cache_prune_min_age)
+ {
+ /*
+ * Entries that are not accessed after the last pruning
+ * are removed in that seconds, and their lives are
+ * prolonged according to how many times they are accessed
+ * up to three times of the duration. We don't try shrink
+ * buckets since pruning effectively caps catcache
+ * expansion in the long term.
+ */
+ ct->naccess = Min(2, ct->naccess);
+ if (--ct->naccess == 0)
+ {
+ CatCacheRemoveCTup(cp, ct);
+ nremoved++;
+
+ /* don't update oldest_ts by removed entry */
+ continue;
+ }
+ }
+ }
+
+ /* update oldest timestamp if the entry remains alive */
+ if (ct->lastaccess < oldest_ts)
+ oldest_ts = ct->lastaccess;
+ }
+ }
+
+ cp->cc_oldest_ts = oldest_ts;
+
+ if (nremoved > 0)
+ elog(DEBUG1, "pruning catalog cache id=%d for %s: removed %d / %d",
+ cp->id, cp->cc_relname, nremoved, cp->cc_ntup + nremoved);
+
+ return nremoved > 0;
+}
+
/*
* ReleaseCatCache
*
@@ -1888,6 +2004,8 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
ct->dead = false;
ct->negative = negative;
ct->hash_value = hashValue;
+ ct->naccess = 1;
+ ct->lastaccess = catcacheclock;
dlist_push_head(&cache->cc_bucket[hashIndex], &ct->cache_elem);
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index bb34630e8e..95213853aa 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -88,6 +88,7 @@
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
+#include "utils/catcache.h"
#include "utils/float.h"
#include "utils/guc_tables.h"
#include "utils/memutils.h"
@@ -3399,6 +3400,17 @@ static struct config_int ConfigureNamesInt[] =
check_huge_page_size, NULL, NULL
},
+ {
+ {"catalog_cache_prune_min_age", PGC_USERSET, RESOURCES_MEM,
+ gettext_noop("System catalog cache entries that are living unused more than this seconds are considered for removal."),
+ gettext_noop("The value of -1 turns off pruning."),
+ GUC_UNIT_S
+ },
+ &catalog_cache_prune_min_age,
+ -1, -1, INT_MAX,
+ NULL, assign_catalog_cache_prune_min_age, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h
index f4aa316604..a11736f767 100644
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -22,6 +22,7 @@
#include "access/htup.h"
#include "access/skey.h"
+#include "datatype/timestamp.h"
#include "lib/ilist.h"
#include "utils/relcache.h"
@@ -61,6 +62,7 @@ typedef struct catcache
slist_node cc_next; /* list link */
ScanKeyData cc_skey[CATCACHE_MAXKEYS]; /* precomputed key info for heap
* scans */
+ TimestampTz cc_oldest_ts; /* timestamp of the oldest tuple in the hash */
/*
* Keep these at the end, so that compiling catcache.c with CATCACHE_STATS
@@ -119,6 +121,8 @@ typedef struct catctup
bool dead; /* dead but not yet removed? */
bool negative; /* negative cache entry? */
HeapTupleData tuple; /* tuple management header */
+ unsigned int naccess; /* # of access to this entry */
+ TimestampTz lastaccess; /* timestamp of the last usage */
/*
* The tuple may also be a member of at most one CatCList. (If a single
@@ -189,6 +193,22 @@ typedef struct catcacheheader
/* this extern duplicates utils/memutils.h... */
extern PGDLLIMPORT MemoryContext CacheMemoryContext;
+
+/* for guc.c, not PGDLLPMPORT'ed */
+extern int catalog_cache_prune_min_age;
+
+/* source clock for access timestamp of catcache entries */
+extern TimestampTz catcacheclock;
+
+/* SetCatCacheClock - set catcache timestamp source clodk */
+static inline void
+SetCatCacheClock(TimestampTz ts)
+{
+ catcacheclock = ts;
+}
+
+extern void assign_catalog_cache_prune_min_age(int newval, void *extra);
+
extern void CreateCacheMemoryContext(void);
extern CatCache *InitCatCache(int id, Oid reloid, Oid indexoid,
--
2.18.4