On 13/12/2024 17:30, Tom Lane wrote:
Heikki Linnakangas <hlinn...@iki.fi> writes:
CatalogCacheCreateEntry() can accept catcache invalidations when it
opens the toast table, and it now has recheck logic to detect the case
that the tuple it's processing (ntp) is invalidated. However, isn't it
also possible that it accepts an invalidation message for a tuple that
we had processed in an earlier iteration of the loop? Or that a new
catalog tuple was inserted that should be part of the list we're building?
The expectation is that the list will be built and returned to the
caller, but it's already marked as stale so it will be rebuilt
on next request.
Ah, you mean this at the end:
/* mark list dead if any members already dead */
if (ct->dead)
cl->dead = true;
Ok, I missed that. It does not handle the 2nd scenario though: If a new
catalog tuple is concurrently inserted that should be part of the list,
it is missed.
I was able to reproduce that, by pausing a process with gdb while it's
building the list in SearchCatCacheList():
1. Create a function called foofunc(integer). It must be large so that
its pg_proc tuple is toasted.
2. In one backend, run "SELECT foofunc(1)". It calls
FuncnameGetCandidates() which calls
"SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(funcname));". Put
a break point in SearchCatCacheList() just after the systable_beginscan().
3. In another backend, create function foofunc() with no args.
4. continue execution from the breakpoint.
5. Run "SELECT foofunc()" in the first session. It fails to find the
function. The error persists, it will fail to find that function if you
try again, until the syscache is invalidated again for some reason.
Attached is an injection point test case to reproduce that. If you
change the test so that the function's body is shorter, so that it's not
toasted, the test passes.
--
Heikki Linnakangas
Neon (https://neon.tech)
From 0a3bd2805f332c849333c071d39a6595726fa4c9 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Sat, 14 Dec 2024 02:05:55 +0200
Subject: [PATCH 1/1] Demonstrate catcache list invalidation bug
---
src/backend/utils/cache/catcache.c | 3 +
src/test/modules/test_misc/meson.build | 1 +
src/test/modules/test_misc/t/007_bugs.pl | 96 ++++++++++++++++++++++++
3 files changed, 100 insertions(+)
create mode 100644 src/test/modules/test_misc/t/007_bugs.pl
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index ee303dc501d..4f06b45239c 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -34,6 +34,7 @@
#include "utils/catcache.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
+#include "utils/injection_point.h"
#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@@ -1811,6 +1812,8 @@ SearchCatCacheList(CatCache *cache,
stale = false;
+ INJECTION_POINT("catcache-list-miss-systable-scan-started");
+
while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
{
uint32 hashValue;
diff --git a/src/test/modules/test_misc/meson.build b/src/test/modules/test_misc/meson.build
index 283ffa751aa..3abcd471c03 100644
--- a/src/test/modules/test_misc/meson.build
+++ b/src/test/modules/test_misc/meson.build
@@ -15,6 +15,7 @@ tests += {
't/004_io_direct.pl',
't/005_timeouts.pl',
't/006_signal_autovacuum.pl',
+ 't/007_bugs.pl',
],
},
}
diff --git a/src/test/modules/test_misc/t/007_bugs.pl b/src/test/modules/test_misc/t/007_bugs.pl
new file mode 100644
index 00000000000..48adf2f7b65
--- /dev/null
+++ b/src/test/modules/test_misc/t/007_bugs.pl
@@ -0,0 +1,96 @@
+
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+use locale;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Time::HiRes qw(usleep);
+use Test::More;
+
+# Test timeouts that will cause FATAL errors. This test relies on injection
+# points to await a timeout occurrence. Relying on sleep proved to be unstable
+# on buildfarm. It's difficult to rely on the NOTICE injection point because
+# the backend under FATAL error can behave differently.
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+ plan skip_all => 'Injection points not supported by this build';
+}
+
+# Node initialization
+my $node = PostgreSQL::Test::Cluster->new('master');
+$node->init();
+$node->start;
+
+# Check if the extension injection_points is available, as it may be
+# possible that this script is run with installcheck, where the module
+# would not be installed by default.
+if (!$node->check_extension('injection_points'))
+{
+ plan skip_all => 'Extension injection_points not installed';
+}
+
+$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
+
+sub randStr {
+ my $len = shift;
+ my @chars = ("A".."Z", "a".."z", "0".."9");
+ return join '', map { @chars[rand @chars] } 1 .. $len;
+}
+
+# XXX: If you make this shorter, the pg_proc tuple is not toasted, and the test passes
+my $longtext = randStr(10000); # test fails
+#my $longtext = randStr(10); # test passes
+
+$node->safe_psql('postgres', qq[
+ CREATE FUNCTION foofunc(dummy integer) RETURNS integer AS \$\$ SELECT 1; /* $longtext */ \$\$ LANGUAGE SQL
+]);
+
+my $psql_session = $node->background_psql('postgres');
+my $psql_session2 = $node->background_psql('postgres');
+
+# Set injection point in the session, to pause while populating the catcache list
+$psql_session->query_safe(
+ qq[
+ SELECT injection_points_set_local();
+ SELECT injection_points_attach('catcache-list-miss-systable-scan-started', 'wait');
+]);
+
+# This pauses on the injection point while populating catcache list for
+# functions with name "foofunc"
+$psql_session->query_until(
+ qr/starting_bg_psql/, q(
+ \echo starting_bg_psql
+ SELECT foofunc(1);
+));
+
+# While the first session is building the catcache list, create a new
+# function that overloads the same name. This sends a catcache invalidation.
+$node->safe_psql('postgres', qq[
+ CREATE FUNCTION foofunc() RETURNS integer AS \$\$ SELECT 123 \$\$ LANGUAGE SQL
+]);
+
+# Continue the paused session. It will continue to construct the catcache
+# list, and will accept invalidations while doing that, but it misses the
+# new function we just created. The "SELECT foofunc(1)" query will now
+# finish.
+$psql_session2->query_safe(qq[
+ SELECT injection_points_wakeup('catcache-list-miss-systable-scan-started');
+ SELECT injection_points_detach('catcache-list-miss-systable-scan-started');
+]);
+
+# Demonstrate that we can run other queries in the session. (Not necessary, but
+# makes it more clear that the session is permanently missing the catcache entry)
+$psql_session->query_safe("SELECT now();");
+
+# This should find the new function, but it does not.
+$psql_session->query_safe("SELECT foofunc();");
+
+ok($psql_session->quit);
+ok($psql_session2->quit);
+
+done_testing();
--
2.39.5