On 24/12/2024 09:38, Michael Paquier wrote:
On Tue, Dec 24, 2024 at 12:18:09AM +0200, Heikki Linnakangas wrote:
My first attempt was to insert the CatCTup or CatCList entry to the catcache
before starting to build it, marked with a flag to indicate that the entry
isn't fully built yet. But when I started to write that it got pretty
invasive, and it felt easier to add another structure to hold the
in-progress entries instead.

 From ec248c69cb42a0747ecc6a63ac4e4682cce2ee93 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Fri, 20 Dec 2024 18:37:44 +0200
Subject: [PATCH 1/1] Don't allow GetTransactionSnapshot() in logical decoding

It seems to me that this is not what you intended to attach for the
catcache inconsistency fix?

Right, sorry, here are the correct patches.

--
Heikki Linnakangas
Neon (https://neon.tech)
From 9fb473ba6d252998967f6d69a566c2be1d8d7f79 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Tue, 24 Dec 2024 00:13:36 +0200
Subject: [PATCH v2 1/2] Demonstrate catcache list invalidation bug

---
 src/backend/utils/cache/catcache.c       |  3 +
 src/test/modules/test_misc/meson.build   |  1 +
 src/test/modules/test_misc/t/007_bugs.pl | 96 ++++++++++++++++++++++++
 3 files changed, 100 insertions(+)
 create mode 100644 src/test/modules/test_misc/t/007_bugs.pl

diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index ee303dc501d..4f06b45239c 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -34,6 +34,7 @@
 #include "utils/catcache.h"
 #include "utils/datum.h"
 #include "utils/fmgroids.h"
+#include "utils/injection_point.h"
 #include "utils/inval.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
@@ -1811,6 +1812,8 @@ SearchCatCacheList(CatCache *cache,
 
 			stale = false;
 
+			INJECTION_POINT("catcache-list-miss-systable-scan-started");
+
 			while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
 			{
 				uint32		hashValue;
diff --git a/src/test/modules/test_misc/meson.build b/src/test/modules/test_misc/meson.build
index 283ffa751aa..3abcd471c03 100644
--- a/src/test/modules/test_misc/meson.build
+++ b/src/test/modules/test_misc/meson.build
@@ -15,6 +15,7 @@ tests += {
       't/004_io_direct.pl',
       't/005_timeouts.pl',
       't/006_signal_autovacuum.pl',
+      't/007_bugs.pl',
     ],
   },
 }
diff --git a/src/test/modules/test_misc/t/007_bugs.pl b/src/test/modules/test_misc/t/007_bugs.pl
new file mode 100644
index 00000000000..48adf2f7b65
--- /dev/null
+++ b/src/test/modules/test_misc/t/007_bugs.pl
@@ -0,0 +1,96 @@
+
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+use locale;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Time::HiRes qw(usleep);
+use Test::More;
+
+# Test timeouts that will cause FATAL errors.  This test relies on injection
+# points to await a timeout occurrence. Relying on sleep proved to be unstable
+# on buildfarm. It's difficult to rely on the NOTICE injection point because
+# the backend under FATAL error can behave differently.
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+# Node initialization
+my $node = PostgreSQL::Test::Cluster->new('master');
+$node->init();
+$node->start;
+
+# Check if the extension injection_points is available, as it may be
+# possible that this script is run with installcheck, where the module
+# would not be installed by default.
+if (!$node->check_extension('injection_points'))
+{
+	plan skip_all => 'Extension injection_points not installed';
+}
+
+$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
+
+sub randStr {
+	my $len = shift;
+	my @chars = ("A".."Z", "a".."z", "0".."9");
+	return join '', map { @chars[rand @chars] } 1 .. $len;
+}
+
+# XXX: If you make this shorter, the pg_proc tuple is not toasted, and the test passes
+my $longtext = randStr(10000);    # test fails
+#my $longtext = randStr(10);       # test passes
+
+$node->safe_psql('postgres', qq[
+    CREATE FUNCTION foofunc(dummy integer) RETURNS integer AS \$\$ SELECT 1; /* $longtext */ \$\$ LANGUAGE SQL
+]);
+
+my $psql_session = $node->background_psql('postgres');
+my $psql_session2 = $node->background_psql('postgres');
+
+# Set injection point in the session, to pause while populating the catcache list
+$psql_session->query_safe(
+	qq[
+    SELECT injection_points_set_local();
+    SELECT injection_points_attach('catcache-list-miss-systable-scan-started', 'wait');
+]);
+
+# This pauses on the injection point while populating catcache list for
+# functions with name "foofunc"
+$psql_session->query_until(
+	qr/starting_bg_psql/, q(
+   \echo starting_bg_psql
+   SELECT foofunc(1);
+));
+
+# While the first session is building the catcache list, create a new
+# function that overloads the same name. This sends a catcache invalidation.
+$node->safe_psql('postgres', qq[
+    CREATE FUNCTION foofunc() RETURNS integer AS \$\$ SELECT 123 \$\$ LANGUAGE SQL
+]);
+
+# Continue the paused session. It will continue to construct the catcache
+# list, and will accept invalidations while doing that, but it misses the
+# new function we just created. The "SELECT foofunc(1)" query will now
+# finish.
+$psql_session2->query_safe(qq[
+    SELECT injection_points_wakeup('catcache-list-miss-systable-scan-started');
+    SELECT injection_points_detach('catcache-list-miss-systable-scan-started');
+]);
+
+# Demonstrate that we can run other queries in the session. (Not necessary, but
+# makes it more clear that the session is permanently missing the catcache entry)
+$psql_session->query_safe("SELECT now();");
+
+# This should find the new function, but it does not.
+$psql_session->query_safe("SELECT foofunc();");
+
+ok($psql_session->quit);
+ok($psql_session2->quit);
+
+done_testing();
-- 
2.39.5

From 9ed223c36591b9ab777de48e8e3f28e321d3f9d4 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Tue, 24 Dec 2024 00:07:38 +0200
Subject: [PATCH v2 2/2] Fix bug in catcache invalidation

If a new entry is added that belongs to a catche list entry, and cache
invalidation happens while the list entry is being built, the new entry
can be missed.

To fix, change the way we detect concurrent invalidations while a
catcache entry is being built. Keep a stack of entries that are being
built, and apply cache invalidation to those entries in addition to the
real catcache entries.

Discussion: XXX
---
 src/backend/utils/cache/catcache.c | 185 +++++++++++++++++------------
 src/tools/pgindent/typedefs.list   |   1 +
 2 files changed, 112 insertions(+), 74 deletions(-)

diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 4f06b45239c..2332c52c2bd 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -41,6 +41,23 @@
 #include "utils/resowner.h"
 #include "utils/syscache.h"
 
+/*
+ * If a catcache invalidation happens while we are in the middle of creating a
+ * catcache entry (or list), it might apply to the entry we're creating,
+ * making it invalid before it's inserted to the catcache.  To catch such
+ * cases, we have a stack of "create-in-progress" entries. Cache invalidation
+ * marks any matching entries in the stack as dead, in addition to the actual
+ * CatCTup and CatCList entries .
+ */
+typedef struct CatCCreating
+{
+	CatCache   *cache;
+	uint32		hash_value;
+	bool		list;
+	bool		dead;
+	struct CatCCreating *next;
+} CatCCreating;
+static CatCCreating *catcache_creating_stack = NULL;
 
  /* #define CACHEDEBUG */	/* turns DEBUG elogs on */
 
@@ -93,8 +110,7 @@ static void CatCacheRemoveCList(CatCache *cache, CatCList *cl);
 static void RehashCatCache(CatCache *cp);
 static void RehashCatCacheLists(CatCache *cp);
 static void CatalogCacheInitializeCache(CatCache *cache);
-static CatCTup *CatalogCacheCreateEntry(CatCache *cache,
-										HeapTuple ntp, SysScanDesc scandesc,
+static CatCTup *CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp,
 										Datum *arguments,
 										uint32 hashValue, Index hashIndex);
 
@@ -662,6 +678,16 @@ CatCacheInvalidate(CatCache *cache, uint32 hashValue)
 			/* could be multiple matches, so keep looking! */
 		}
 	}
+
+	/* Also invalidate any entries that are being built */
+	for (CatCCreating *e = catcache_creating_stack; e != NULL; e = e->next)
+	{
+		if (e->cache == cache)
+		{
+			if (e->list || e->hash_value == hashValue)
+				e->dead = true;
+		}
+	}
 }
 
 /* ----------------------------------------------------------------
@@ -744,6 +770,13 @@ ResetCatalogCache(CatCache *cache)
 #endif
 		}
 	}
+
+	/* Also invalidate any entries that are being built */
+	for (CatCCreating *e = catcache_creating_stack; e != NULL; e = e->next)
+	{
+		if (e->cache == cache)
+			e->dead = true;
+	}
 }
 
 /*
@@ -1493,7 +1526,7 @@ SearchCatCacheMiss(CatCache *cache,
 
 		while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
 		{
-			ct = CatalogCacheCreateEntry(cache, ntp, scandesc, NULL,
+			ct = CatalogCacheCreateEntry(cache, ntp, NULL,
 										 hashValue, hashIndex);
 			/* upon failure, we must start the scan over */
 			if (ct == NULL)
@@ -1528,7 +1561,7 @@ SearchCatCacheMiss(CatCache *cache,
 		if (IsBootstrapProcessingMode())
 			return NULL;
 
-		ct = CatalogCacheCreateEntry(cache, NULL, NULL, arguments,
+		ct = CatalogCacheCreateEntry(cache, NULL, arguments,
 									 hashValue, hashIndex);
 
 		/* Creating a negative cache entry shouldn't fail */
@@ -1665,6 +1698,8 @@ SearchCatCacheList(CatCache *cache,
 	HeapTuple	ntp;
 	MemoryContext oldcxt;
 	int			i;
+	volatile CatCCreating creating_list;
+	bool		first_iter = true;
 
 	/*
 	 * one-time startup overhead for each cache
@@ -1779,12 +1814,25 @@ SearchCatCacheList(CatCache *cache,
 	 */
 	ctlist = NIL;
 
+	/*
+	 * Cache invalidation can happen while we're building the list.
+	 * CatalogCacheCreateEntry() handles concurrent invalidation of individual
+	 * tuples, but it's also possible that a new entry is concurrently added
+	 * that we might miss. Push an "in-progress" entry that will receive the
+	 * invalidation, until we have built the final list entry.
+	 */
+	creating_list.cache = cache;
+	creating_list.hash_value = lHashValue;
+	creating_list.list = true;
+	creating_list.dead = false;
+	creating_list.next = catcache_creating_stack;
+	catcache_creating_stack = (CatCCreating *) &creating_list;
+
 	PG_TRY();
 	{
 		ScanKeyData cur_skey[CATCACHE_MAXKEYS];
 		Relation	relation;
 		SysScanDesc scandesc;
-		bool		stale;
 
 		relation = table_open(cache->cc_reloid, AccessShareLock);
 
@@ -1810,9 +1858,10 @@ SearchCatCacheList(CatCache *cache,
 			/* The list will be ordered iff we are doing an index scan */
 			ordered = (scandesc->irel != NULL);
 
-			stale = false;
+			creating_list.dead = false;
 
-			INJECTION_POINT("catcache-list-miss-systable-scan-started");
+			if (first_iter)
+				INJECTION_POINT("catcache-list-miss-systable-scan-started");
 
 			while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
 			{
@@ -1856,30 +1905,13 @@ SearchCatCacheList(CatCache *cache,
 				if (!found)
 				{
 					/* We didn't find a usable entry, so make a new one */
-					ct = CatalogCacheCreateEntry(cache, ntp, scandesc, NULL,
+					ct = CatalogCacheCreateEntry(cache, ntp, NULL,
 												 hashValue, hashIndex);
+
 					/* upon failure, we must start the scan over */
 					if (ct == NULL)
 					{
-						/*
-						 * Release refcounts on any items we already had.  We
-						 * dare not try to free them if they're now
-						 * unreferenced, since an error while doing that would
-						 * result in the PG_CATCH below doing extra refcount
-						 * decrements.  Besides, we'll likely re-adopt those
-						 * items in the next iteration, so it's not worth
-						 * complicating matters to try to get rid of them.
-						 */
-						foreach(ctlist_item, ctlist)
-						{
-							ct = (CatCTup *) lfirst(ctlist_item);
-							Assert(ct->c_list == NULL);
-							Assert(ct->refcount > 0);
-							ct->refcount--;
-						}
-						/* Reset ctlist in preparation for new try */
-						ctlist = NIL;
-						stale = true;
+						creating_list.dead = true;
 						break;
 					}
 				}
@@ -1891,7 +1923,32 @@ SearchCatCacheList(CatCache *cache,
 			}
 
 			systable_endscan(scandesc);
-		} while (stale);
+
+			/* upon failure, we must start the scan over */
+			if (creating_list.dead)
+			{
+				/*
+				 * Release refcounts on any items we already had.  We dare not
+				 * try to free them if they're now unreferenced, since an
+				 * error while doing that would result in the PG_CATCH below
+				 * doing extra refcount decrements.  Besides, we'll likely
+				 * re-adopt those items in the next iteration, so it's not
+				 * worth complicating matters to try to get rid of them.
+				 */
+				foreach(ctlist_item, ctlist)
+				{
+					ct = (CatCTup *) lfirst(ctlist_item);
+					Assert(ct->c_list == NULL);
+					Assert(ct->refcount > 0);
+					ct->refcount--;
+				}
+				/* Reset ctlist in preparation for new try */
+				ctlist = NIL;
+				first_iter = false;
+				continue;
+			}
+			break;
+		} while (true);
 
 		table_close(relation, AccessShareLock);
 
@@ -1919,6 +1976,7 @@ SearchCatCacheList(CatCache *cache,
 	}
 	PG_CATCH();
 	{
+		catcache_creating_stack = creating_list.next;
 		foreach(ctlist_item, ctlist)
 		{
 			ct = (CatCTup *) lfirst(ctlist_item);
@@ -1937,6 +1995,8 @@ SearchCatCacheList(CatCache *cache,
 		PG_RE_THROW();
 	}
 	PG_END_TRY();
+	catcache_creating_stack = creating_list.next;
+	Assert(!creating_list.dead);
 
 	cl->cl_magic = CL_MAGIC;
 	cl->my_cache = cache;
@@ -2009,23 +2069,6 @@ ReleaseCatCacheListWithOwner(CatCList *list, ResourceOwner resowner)
 }
 
 
-/*
- * equalTuple
- *		Are these tuples memcmp()-equal?
- */
-static bool
-equalTuple(HeapTuple a, HeapTuple b)
-{
-	uint32		alen;
-	uint32		blen;
-
-	alen = a->t_len;
-	blen = b->t_len;
-	return (alen == blen &&
-			memcmp((char *) a->t_data,
-				   (char *) b->t_data, blen) == 0);
-}
-
 /*
  * CatalogCacheCreateEntry
  *		Create a new CatCTup entry, copying the given HeapTuple and other
@@ -2033,17 +2076,16 @@ equalTuple(HeapTuple a, HeapTuple b)
  *
  * To create a normal cache entry, ntp must be the HeapTuple just fetched
  * from scandesc, and "arguments" is not used.  To create a negative cache
- * entry, pass NULL for ntp and scandesc; then "arguments" is the cache
- * keys to use.  In either case, hashValue/hashIndex are the hash values
- * computed from the cache keys.
+ * entry, pass NULL for ntp; then "arguments" is the cache keys to use.
+ * In either case, hashValue/hashIndex are the hash values computed from
+ * the cache keys.
  *
  * Returns NULL if we attempt to detoast the tuple and observe that it
  * became stale.  (This cannot happen for a negative entry.)  Caller must
  * retry the tuple lookup in that case.
  */
 static CatCTup *
-CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, SysScanDesc scandesc,
-						Datum *arguments,
+CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
 						uint32 hashValue, Index hashIndex)
 {
 	CatCTup    *ct;
@@ -2076,38 +2118,33 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, SysScanDesc scandesc,
 		 */
 		if (HeapTupleHasExternal(ntp))
 		{
-			bool		need_cmp = IsInplaceUpdateOid(cache->cc_reloid);
-			HeapTuple	before = NULL;
-			bool		matches = true;
-
-			if (need_cmp)
-				before = heap_copytuple(ntp);
-			dtp = toast_flatten_tuple(ntp, cache->cc_tupdesc);
+			volatile CatCCreating creating;
 
 			/*
 			 * The tuple could become stale while we are doing toast table
-			 * access (since AcceptInvalidationMessages can run then).
-			 * equalTuple() detects staleness from inplace updates, while
-			 * systable_recheck_tuple() detects staleness from normal updates.
-			 *
-			 * While this equalTuple() follows the usual rule of reading with
-			 * a pin and no buffer lock, it warrants suspicion since an
-			 * inplace update could appear at any moment.  It's safe because
-			 * the inplace update sends an invalidation that can't reorder
-			 * before the inplace heap change.  If the heap change reaches
-			 * this process just after equalTuple() looks, we've not missed
-			 * its inval.
+			 * access (since AcceptInvalidationMessages can run then). The
+			 * invalidation will mark our 'creating' entry as dead.
 			 */
-			if (need_cmp)
+			creating.cache = cache;
+			creating.hash_value = hashValue;
+			creating.list = false;
+			creating.dead = false;
+			creating.next = catcache_creating_stack;
+			catcache_creating_stack = (CatCCreating *) &creating;
+
+			PG_TRY();
 			{
-				matches = equalTuple(before, ntp);
-				heap_freetuple(before);
+				dtp = toast_flatten_tuple(ntp, cache->cc_tupdesc);
 			}
-			if (!matches || !systable_recheck_tuple(scandesc, ntp))
+			PG_FINALLY();
 			{
-				heap_freetuple(dtp);
-				return NULL;
+				Assert(catcache_creating_stack == &creating);
+				catcache_creating_stack = creating.next;
 			}
+			PG_END_TRY();
+
+			if (creating.dead)
+				return NULL;
 		}
 		else
 			dtp = ntp;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e1c4f913f84..e1d63776581 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -383,6 +383,7 @@ CaseTestExpr
 CaseWhen
 Cash
 CastInfo
+CatCCreating
 CatCList
 CatCTup
 CatCache
-- 
2.39.5

Reply via email to