Hello.
> But I was unable to reproduce that using some random usleep(), however -
maybe it is a wrong assumption.
It seems like the assumption is correct - we may use an invalid index as
arbiter due to race condition.
The attached patch adds a check for that case, and now the test fails like
this:
# pgbench: error: client 16 script 1 aborted in command 1 query 0:
ERROR: duplicate key value violates unique constraint "tbl_pkey_ccold"
# DETAIL: Key (i)=(42) already exists.
# pgbench: error: client 9 script 1 aborted in command 1 query 0:
ERROR: ON CONFLICT does not support invalid indexes as arbiters
# pgbench: error: client 0 script 2 aborted in command 1 query 0:
ERROR: duplicate key value violates unique constraint "tbl_pkey"
# DETAIL: Key (i)=(69) already exists.
# pgbench: error: client 7 script 0 aborted in command 1 query 0:
ERROR: ON CONFLICT does not support invalid indexes as arbiters
# pgbench: error: client 10 script 0 aborted in command 1 query 0:
ERROR: ON CONFLICT does not support invalid indexes as arbiters
# pgbench: error: client 11 script 0 aborted in command 1 query 0:
ERROR: ON CONFLICT does not support invalid indexes as arbiters
I think It is even possible to see !alive index in the same situation (it
is worse case), but I was unable to reproduce it so far.
Best regards,
Mikhail.
Subject: [PATCH] error report in case of invalid index used as arbiter
test for issue with upsert fail
---
Index: src/test/modules/test_misc/t/006_concurrently_unique_fail.pl
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/test/modules/test_misc/t/006_concurrently_unique_fail.pl b/src/test/modules/test_misc/t/006_concurrently_unique_fail.pl
new file mode 100644
--- /dev/null (revision 9446f944b415306d9e5d5ab98f69938d8f5ee87f)
+++ b/src/test/modules/test_misc/t/006_concurrently_unique_fail.pl (revision 9446f944b415306d9e5d5ab98f69938d8f5ee87f)
@@ -0,0 +1,158 @@
+
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+# Test REINDEX CONCURRENTLY with concurrent modifications and HOT updates
+use strict;
+use warnings;
+
+use Config;
+use Errno;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Time::HiRes qw(usleep);
+use IPC::SysV;
+use threads;
+use Time::HiRes qw( time );
+use Test::More;
+use Test::Builder;
+
+if ($@ || $windows_os)
+{
+ plan skip_all => 'Fork and shared memory are not supported by this platform';
+}
+
+my ($pid, $shmem_id, $shmem_key, $shmem_size);
+eval 'sub IPC_CREAT {0001000}' unless defined &IPC_CREAT;
+$shmem_size = 4;
+$shmem_key = rand(1000000);
+$shmem_id = shmget($shmem_key, $shmem_size, &IPC_CREAT | 0777) or die "Can't shmget: $!";
+shmwrite($shmem_id, "wait", 0, $shmem_size) or die "Can't shmwrite: $!";
+
+my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
+#
+# Test set-up
+#
+my ($node, $result);
+$node = PostgreSQL::Test::Cluster->new('RC_test');
+$node->init;
+$node->append_conf('postgresql.conf',
+ 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default));
+$node->append_conf('postgresql.conf', 'fsync = off');
+$node->append_conf('postgresql.conf', 'autovacuum = off');
+$node->start;
+$node->safe_psql('postgres', q(CREATE UNLOGGED TABLE tbl(i int primary key, updated_at timestamp)));
+
+
+#######################################################################################################################
+#######################################################################################################################
+#######################################################################################################################
+
+# IT IS NOT REQUIRED TO REPRODUCE THE ISSUE BUT MAKES IT TO HAPPEN FASTER
+$node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i, updated_at)));
+
+#######################################################################################################################
+#######################################################################################################################
+#######################################################################################################################
+
+my $builder = Test::More->builder;
+$builder->use_numbers(0);
+$builder->no_plan();
+
+my $child = $builder->child("pg_bench");
+
+if(!defined($pid = fork())) {
+ # fork returned undef, so unsuccessful
+ die "Cannot fork a child: $!";
+} elsif ($pid == 0) {
+
+ $node->pgbench(
+ '--no-vacuum --client=20 -j 2 --transactions=100000',
+ 0,
+ [qr{actually processed}],
+ [qr{^$}],
+ 'concurrent INSERTs, UPDATES and RC',
+ {
+ # Ensure some HOT updates happen
+ '002_pgbench_concurrent_transaction_updates' => q(
+ BEGIN;
+ INSERT INTO tbl VALUES(13,now()) on conflict(i) do update set updated_at = now();
+ COMMIT;
+ ),
+ '003_pgbench_concurrent_transaction_updates' => q(
+ BEGIN;
+ INSERT INTO tbl VALUES(42,now()) on conflict(i) do update set updated_at = now();
+ COMMIT;
+ ),
+ '004_pgbench_concurrent_transaction_updates' => q(
+ BEGIN;
+ INSERT INTO tbl VALUES(69,now()) on conflict(i) do update set updated_at = now();
+ COMMIT;
+ ),
+ });
+
+ if ($child->is_passing()) {
+ shmwrite($shmem_id, "done", 0, $shmem_size) or die "Can't shmwrite: $!";
+ } else {
+ shmwrite($shmem_id, "fail", 0, $shmem_size) or die "Can't shmwrite: $!";
+ }
+
+ my $pg_bench_fork_flag;
+ while (1) {
+ shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+ sleep(0.1);
+ last if $pg_bench_fork_flag eq "stop";
+ }
+} else {
+ my $pg_bench_fork_flag;
+ shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+
+ subtest 'reindex run subtest' => sub {
+ is($pg_bench_fork_flag, "wait", "pg_bench_fork_flag is correct");
+
+ my %psql = (stdin => '', stdout => '', stderr => '');
+ $psql{run} = IPC::Run::start(
+ [ 'psql', '-XA', '-f', '-', '-d', $node->connstr('postgres') ],
+ '<',
+ \$psql{stdin},
+ '>',
+ \$psql{stdout},
+ '2>',
+ \$psql{stderr},
+ $psql_timeout);
+
+ my ($result, $stdout, $stderr, $n, $begin_time, $end_time);
+
+ # IT IS NOT REQUIRED, JUST FOR CONSISTENCY
+ ($result, $stdout, $stderr) = $node->psql('postgres', q(ALTER TABLE tbl SET (parallel_workers=0);));
+ is($result, '0', 'ALTER TABLE is correct');
+
+ $begin_time = time();
+ while (1)
+ {
+ my $sql = q(REINDEX INDEX CONCURRENTLY tbl_pkey;);
+
+ ($result, $stdout, $stderr) = $node->psql('postgres', $sql);
+ is($result, '0', 'REINDEX INDEX CONCURRENTLY is correct');
+
+ $end_time = time();
+ diag('waiting for an about 3000, now is ' . $n++ . ', seconds passed : ' . int($end_time - $begin_time));
+
+ shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+ last if $pg_bench_fork_flag ne "wait";
+ }
+
+ # explicitly shut down psql instances gracefully
+ $psql{stdin} .= "\\q\n";
+ $psql{run}->finish;
+
+ is($pg_bench_fork_flag, "done", "pg_bench_fork_flag is correct");
+ };
+
+ $child->finalize();
+ $child->summary();
+ $node->stop;
+ done_testing();
+
+ shmwrite($shmem_id, "stop", 0, $shmem_size) or die "Can't shmwrite: $!";
+}
Index: src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl b/src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl
new file mode 100644
--- /dev/null (revision 9446f944b415306d9e5d5ab98f69938d8f5ee87f)
+++ b/src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl (revision 9446f944b415306d9e5d5ab98f69938d8f5ee87f)
@@ -0,0 +1,165 @@
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+# Test REINDEX CONCURRENTLY with concurrent modifications and HOT updates
+use strict;
+use warnings;
+
+use Config;
+use Errno;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Time::HiRes qw(usleep);
+use IPC::SysV;
+use threads;
+use Time::HiRes qw( time );
+use Test::More;
+use Test::Builder;
+
+if ($@ || $windows_os)
+{
+ plan skip_all => 'Fork and shared memory are not supported by this platform';
+}
+
+my ($pid, $shmem_id, $shmem_key, $shmem_size);
+eval 'sub IPC_CREAT {0001000}' unless defined &IPC_CREAT;
+$shmem_size = 4;
+$shmem_key = rand(1000000);
+$shmem_id = shmget($shmem_key, $shmem_size, &IPC_CREAT | 0777) or die "Can't shmget: $!";
+shmwrite($shmem_id, "wait", 0, $shmem_size) or die "Can't shmwrite: $!";
+
+my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
+#
+# Test set-up
+#
+my ($node, $result);
+$node = PostgreSQL::Test::Cluster->new('RC_test');
+$node->init;
+$node->append_conf('postgresql.conf',
+ 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default));
+$node->append_conf('postgresql.conf', 'fsync = off');
+$node->append_conf('postgresql.conf', 'autovacuum = off');
+$node->start;
+$node->safe_psql('postgres', q(CREATE UNLOGGED TABLE tbl(i int primary key, n int)));
+
+
+#######################################################################################################################
+#######################################################################################################################
+#######################################################################################################################
+
+# IT IS **REQUIRED** TO REPRODUCE THE ISSUE
+$node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i, n)));
+$node->safe_psql('postgres', q(INSERT INTO tbl VALUES(13,1)));
+
+#######################################################################################################################
+#######################################################################################################################
+#######################################################################################################################
+
+my $builder = Test::More->builder;
+$builder->use_numbers(0);
+$builder->no_plan();
+
+my $child = $builder->child("pg_bench");
+
+if(!defined($pid = fork())) {
+ # fork returned undef, so unsuccessful
+ die "Cannot fork a child: $!";
+} elsif ($pid == 0) {
+
+ $pid = fork();
+ if ($pid == 0) {
+ $node->pgbench(
+ '--no-vacuum --client=30 -j 2 --transactions=1000000',
+ 0,
+ [qr{actually processed}],
+ [qr{^$}],
+ 'concurrent INSERTs, UPDATES and RC',
+ {
+ '002_pgbench_concurrent_transaction_updates' => q(
+ BEGIN;
+ INSERT INTO tbl VALUES(13,1) on conflict(i) do update set n = tbl.n + EXCLUDED.n;
+ COMMIT;
+ ),
+ });
+
+ if ($child->is_passing()) {
+ shmwrite($shmem_id, "done", 0, $shmem_size) or die "Can't shmwrite: $!";
+ } else {
+ shmwrite($shmem_id, "fail", 0, $shmem_size) or die "Can't shmwrite: $!";
+ }
+
+ my $pg_bench_fork_flag;
+ while (1) {
+ shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+ sleep(0.1);
+ last if $pg_bench_fork_flag eq "stop";
+ }
+ }
+ else {
+ my ($result, $stdout, $stderr, $n, $prev_n, $pg_bench_fork_flag);
+ while (1) {
+ sleep(1);
+ $prev_n = $n;
+ ($result, $n, $stderr) = $node->psql('postgres', q(SELECT n from tbl WHERE i = 13;));
+ diag(" current n is " . $n . ', ' . ($n - $prev_n) . ' per one second');
+ shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+ last if $pg_bench_fork_flag eq "stop";
+ }
+ }
+} else {
+ my $pg_bench_fork_flag;
+ shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+
+ subtest 'reindex run subtest' => sub {
+ is($pg_bench_fork_flag, "wait", "pg_bench_fork_flag is correct");
+
+ my %psql = (stdin => '', stdout => '', stderr => '');
+ $psql{run} = IPC::Run::start(
+ [ 'psql', '-XA', '-f', '-', '-d', $node->connstr('postgres') ],
+ '<',
+ \$psql{stdin},
+ '>',
+ \$psql{stdout},
+ '2>',
+ \$psql{stderr},
+ $psql_timeout);
+
+ my ($result, $stdout, $stderr, $n, $begin_time, $end_time, $before_reindex, $after_reindex);
+
+ # IT IS NOT REQUIRED, JUST FOR CONSISTENCY
+ ($result, $stdout, $stderr) = $node->psql('postgres', q(ALTER TABLE tbl SET (parallel_workers=0);));
+ is($result, '0', 'ALTER TABLE is correct');
+
+ while (1)
+ {
+ my $sql = q(REINDEX INDEX CONCURRENTLY tbl_pkey;);
+
+ ($result, $before_reindex, $stderr) = $node->psql('postgres', q(SELECT n from tbl WHERE i = 13;));
+
+ diag('going to start reindex, num tuples in table is ' . $before_reindex);
+ $begin_time = time();
+ ($result, $stdout, $stderr) = $node->psql('postgres', $sql);
+ is($result, '0', 'REINDEX INDEX CONCURRENTLY is correct');
+
+ $end_time = time();
+ ($result, $after_reindex, $stderr) = $node->psql('postgres', q(SELECT n from tbl WHERE i = 13;));
+ diag('reindex ' . $n++ . ' done in ' . ($end_time - $begin_time) . ' seconds, num inserted during reindex tuples is ' . (int($after_reindex) - int($before_reindex)) . ' speed is ' . ((int($after_reindex) - int($before_reindex)) / ($end_time - $begin_time)) . ' per second');
+
+ shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+ last if $pg_bench_fork_flag ne "wait";
+ }
+
+ # explicitly shut down psql instances gracefully
+ $psql{stdin} .= "\\q\n";
+ $psql{run}->finish;
+
+ is($pg_bench_fork_flag, "done", "pg_bench_fork_flag is correct");
+ };
+
+ $child->finalize();
+ $child->summary();
+ $node->stop;
+ done_testing();
+
+ shmwrite($shmem_id, "stop", 0, $shmem_size) or die "Can't shmwrite: $!";
+}
Index: src/backend/executor/execIndexing.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
--- a/src/backend/executor/execIndexing.c (revision cb460d43c12db970056b2e994e023de06eabc494)
+++ b/src/backend/executor/execIndexing.c (revision dd72fa154aed98254b47637fed62a1d584131dbf)
@@ -587,6 +587,13 @@
indexRelation->rd_index->indexrelid))
continue;
+ if (!indexRelation->rd_index->indisvalid)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("ON CONFLICT does not support invalid indexes as arbiters"),
+ errtableconstraint(heapRelation,
+ RelationGetRelationName(indexRelation))));
+
if (!indexRelation->rd_index->indimmediate)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),