Hi,
Here's a reproducer which enabled me to reach this stuck state:
pid | wait_event | query
-------+---------------+-----------------------------------------------------------------------------
64617 | | select pid, wait_event, query from
pg_stat_activity where state = 'active';
64619 | BufferPin | VACUUM jobs
64620 | ExecuteGather | SELECT COUNT(*) FROM jobs
64621 | ExecuteGather | SELECT COUNT(*) FROM jobs
64622 | ExecuteGather | SELECT COUNT(*) FROM jobs
64623 | ExecuteGather | SELECT COUNT(*) FROM jobs
84167 | BtreePage | SELECT COUNT(*) FROM jobs
84168 | BtreePage | SELECT COUNT(*) FROM jobs
96440 | | SELECT COUNT(*) FROM jobs
96438 | | SELECT COUNT(*) FROM jobs
96439 | | SELECT COUNT(*) FROM jobs
(11 rows)
The main thread deletes stuff in the middle of the key range (not sure
if this is important) and vacuum in a loop, and meanwhile 4 threads
(probably not important, might as well be 1) run Parallel Index Scans
over the whole range, in the hope of hitting the interesting case. In
the locked-up case I just saw now opaque->btpo_flags had the
BTP_DELETED bit set, not BTP_HALF_DEAD (I could tell because I added
logging). Clearly pages are periodically being marked half-dead but I
haven't yet managed to get an index scan to hit one of those.
--
Thomas Munro
http://www.enterprisedb.com
/*
Set up the schema like this:
drop table jobs;
create table jobs (id int, blah text, status text);
create index on jobs(status);
insert into jobs select generate_series(1, 100000), 'hello world';
update jobs set status = 'A' where id % 100 = 0;
analyze jobs;
alter table jobs set (parallel_workers = 2);
You should get a Parallel Index Only Scan like this:
set enable_seqscan = off;
set max_parallel_workers_per_gather = 2;
set parallel_leader_participation = off;
set min_parallel_index_scan_size = 0;
set parallel_setup_cost = 0;
set parallel_tuple_cost = 0;
EXPLAIN SELECT COUNT(*) FROM jobs;
*/
#include <libpq-fe.h>
#include <assert.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#define CONNINFO "dbname=postgres"
#define NTHREADS 4
static void *
thread_main(void *data)
{
int i;
PGconn *conn;
PGresult *res;
conn = PQconnectdb(CONNINFO);
assert(PQstatus(conn) == CONNECTION_OK);
res = PQexec(conn, "SET enable_seqscan = off");
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
res = PQexec(conn, "SET max_parallel_workers_per_gather = 2");
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
res = PQexec(conn, "SET parallel_leader_participation = off");
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
res = PQexec(conn, "SET min_parallel_index_scan_size = 0");
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
res = PQexec(conn, "SET parallel_setup_cost = 0");
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
res = PQexec(conn, "SET parallel_tuple_cost = 0");
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
for (i = 0; i < 1000000; ++i)
{
//res = PQexec(conn, "SELECT COUNT(*) FROM jobs WHERE status = 'B'");
res = PQexec(conn, "SELECT COUNT(*) FROM jobs");
assert(PQresultStatus(res) == PGRES_TUPLES_OK);
assert(PQnfields(res) == 1);
assert(PQntuples(res) == 1);
PQclear(res);
}
PQfinish(conn);
return 0;
}
int
main(int argc, char **argv)
{
pthread_t threads[NTHREADS];
pthread_attr_t attr;
int i;
void *r;
PGconn *conn;
PGresult *res;
conn = PQconnectdb(CONNINFO);
assert(PQstatus(conn) == CONNECTION_OK);
pthread_attr_init(&attr);
for (i = 0; i < NTHREADS; ++i)
{
if (pthread_create(&threads[i], &attr, &thread_main, NULL) != 0)
{
fprintf(stderr, "failed to create thread\n");
exit(1);
}
}
for (i = 50000; i < 100000; ++i)
{
char buf[1024];
printf("updating %d\n", i);
res = PQexec(conn, "BEGIN");
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
snprintf(buf, sizeof(buf), "DELETE FROM jobs WHERE id = %d", i);
res = PQexec(conn, buf);
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
snprintf(buf, sizeof(buf), "INSERT INTO jobs VALUES (%d, 'xxx', %s)", i + 50000, (i % 100 == 0) ? "'A'" : (i % 100 == 1) ? "'B'" : (i % 100 == 2) ? "'C'" : "NULL");
res = PQexec(conn, buf);
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
res = PQexec(conn, "COMMIT");
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
res = PQexec(conn, "VACUUM jobs");
assert(PQresultStatus(res) == PGRES_COMMAND_OK);
PQclear(res);
}
PQfinish(conn);
for (i = 0; i < NTHREADS; ++i)
pthread_cancel(threads[i]);
for (i = 0; i < NTHREADS; ++i)
pthread_join(threads[i], &r);
}