Hello,
02.06.2025 09:00, Alexander Lakhin wrote:
With additional logging (the patch is attached), I can see the following:
...
!!!pgaio_io_reclaim [63817]| ioh: 0x1046b5660, ioh->op: 1, ioh->state: 6,
ioh->result: 8192, ioh->num_callbacks: 2
!!!AsyncReadBuffers [63817] (1)| blocknum: 18, ioh: 0x1046b5660, ioh->op: 0, ioh->state: 1, ioh->result: 0,
ioh->num_callbacks: 0
2025-06-02 00:45:11.391 EDT [63817:14] pg_regress/brin LOG: !!!pgaio_io_before_start| ioh: 0x1046b5660, ioh->op: 1,
ioh->state: 1, ioh->result: 0, ioh->num_callbacks: 2
TRAP: failed Assert("ioh->op == PGAIO_OP_INVALID"), File: "aio_io.c", Line:
164, PID: 63817
I've added logging of the ioh->generation field at the beginning and at
the end of pgaio_io_reclaim() (the complete patch is attached) and got
even more interesting output:
...
!!!pgaio_io_reclaim [25175]| ioh: 0x104c3e1a0, ioh->op: 1, ioh->state: 6, ioh->result: 8192, ioh->num_callbacks: 2,
ioh->generation: 21692
!!!pgaio_io_reclaim [25175]| ioh: 0x104c3e1a0, ioh->op: 0, ioh->generation:
21693
!!!AsyncReadBuffers [25175] (1)| blocknum: 40, ioh: 0x104c3e1a0, ioh->op: 0, ioh->state: 1, ioh->result: 0,
ioh->num_callbacks: 0, ioh->generation: 21693
!!!pgaio_io_start_readv [25175]| ioh: 0x104c3e1a0, ioh->op: 1, ioh->generation:
21693
!!!pgaio_io_reclaim [25176]| ioh: 0x104c3e080, ioh->op: 1, ioh->state: 6, ioh->result: 8192, ioh->num_callbacks: 2,
ioh->generation: 21408
!!!pgaio_io_reclaim [25176]| ioh: 0x104c3e080, ioh->op: 0, ioh->generation:
21409
!!!AsyncReadBuffers [25176] (1)| blocknum: 49, ioh: 0x104c3e080, ioh->op: 0, ioh->state: 1, ioh->result: 0,
ioh->num_callbacks: 0, ioh->generation: 21409
!!!pgaio_io_start_readv [25176]| ioh: 0x104c3e080, ioh->op: 1, ioh->generation:
21409
!!!pgaio_io_reclaim [25175]| ioh: 0x104c3e1a0, ioh->op: 1, ioh->state: 6, ioh->result: 8192, ioh->num_callbacks: 2,
ioh->generation: 21693
!!!pgaio_io_reclaim [25175]| ioh: 0x104c3e1a0, ioh->op: 0, ioh->generation:
21694
!!!AsyncReadBuffers [25175] (1)| blocknum: 41, ioh: 0x104c3e1a0, ioh->op: 0, ioh->state: 1, ioh->result: 0,
ioh->num_callbacks: 0, ioh->generation: 21694
2025-06-03 00:19:09.282 EDT [25175:1] LOG: !!!pgaio_io_before_start| ioh: 0x104c3e1a0, ioh->op: 1, ioh->state: 1,
ioh->result: 0, ioh->num_callbacks: 2, ioh->generation: 21694
2025-06-03 00:19:09.282 EDT [25175:2] STATEMENT: explain (analyze, format
'json')
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2
using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
TRAP: failed Assert("ioh->op == PGAIO_OP_INVALID"), File: "aio_io.c", Line:
164, PID: 25175
0 postgres 0x0000000103563550 ExceptionalCondition
+ 236
1 postgres 0x000000010326a260
pgaio_io_before_start + 516
2 postgres 0x0000000103269fb4 pgaio_io_start_readv
+ 36
3 postgres 0x0000000103289144 FileStartReadV + 252
4 postgres 0x00000001032dc424 mdstartreadv + 668
5 postgres 0x00000001032dfa0c smgrstartreadv + 116
6 postgres 0x00000001032723b8 AsyncReadBuffers +
2028
7 postgres 0x0000000103271354 StartReadBuffersImpl
+ 1196
8 postgres 0x0000000103270e98 StartReadBuffers + 64
9 postgres 0x000000010326e4a0
read_stream_start_pending_read + 1204
10 postgres 0x000000010326db84
read_stream_look_ahead + 812
11 postgres 0x000000010326d71c
read_stream_next_buffer + 2356
...
Best regards,
Alexander Lakhin
Neon (https://neon.tech)
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index c64d815ebd1..24fffa6fa76 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -668,6 +668,7 @@ pgaio_io_reclaim(PgAioHandle *ioh)
Assert(ioh->owner_procno == MyProcNumber);
Assert(ioh->state != PGAIO_HS_IDLE);
+fprintf(stderr, "!!!pgaio_io_reclaim [%d]| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d, ioh->generation: %llu\n", getpid(), ioh, ioh->op, ioh->state, ioh->result, ioh->num_callbacks, ioh->generation);
/* see comment in function header */
HOLD_INTERRUPTS();
@@ -740,6 +741,7 @@ pgaio_io_reclaim(PgAioHandle *ioh)
dclist_push_head(&pgaio_my_backend->idle_ios, &ioh->node);
RESUME_INTERRUPTS();
+fprintf(stderr, "!!!pgaio_io_reclaim [%d]| ioh: %p, ioh->op: %d, ioh->generation: %llu\n", getpid(), ioh, ioh->op, ioh->generation);
}
/*
diff --git a/src/backend/storage/aio/aio_io.c b/src/backend/storage/aio/aio_io.c
index 00e176135a6..f802af81963 100644
--- a/src/backend/storage/aio/aio_io.c
+++ b/src/backend/storage/aio/aio_io.c
@@ -85,6 +85,7 @@ pgaio_io_start_readv(PgAioHandle *ioh,
ioh->op_data.read.iov_length = iovcnt;
pgaio_io_stage(ioh, PGAIO_OP_READV);
+fprintf(stderr, "!!!pgaio_io_start_readv [%d]| ioh: %p, ioh->op: %d, ioh->generation: %llu\n", getpid(), ioh, ioh->op, ioh->generation);
}
void
@@ -158,6 +159,8 @@ pgaio_io_before_start(PgAioHandle *ioh)
Assert(ioh->state == PGAIO_HS_HANDED_OUT);
Assert(pgaio_my_backend->handed_out_io == ioh);
Assert(pgaio_io_has_target(ioh));
+if (ioh->op != PGAIO_OP_INVALID) elog(LOG, "!!!pgaio_io_before_start| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d, ioh->generation: %llu", ioh, ioh->op, ioh->state, ioh->result, ioh->num_callbacks, ioh->generation);
+
Assert(ioh->op == PGAIO_OP_INVALID);
/*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f93131a645e..e48975452f8 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -67,6 +67,7 @@
#include "utils/resowner.h"
#include "utils/timestamp.h"
+#include "storage/aio_internal.h"
/* Note: these two macros only work on shared buffers, not local ones! */
#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
@@ -1855,7 +1856,10 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
pgaio_submit_staged();
ioh = pgaio_io_acquire(CurrentResourceOwner, &operation->io_return);
+fprintf(stderr, "!!!AsyncReadBuffers [%d] (2)| blocknum: %d, ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d, ioh->generation: %llu\n", getpid(), blocknum, ioh, ((PgAioHandle *)ioh)->op, ((PgAioHandle *)ioh)->state, ((PgAioHandle *)ioh)->result, ((PgAioHandle *)ioh)->num_callbacks, ((PgAioHandle *)ioh)->generation);
}
+else
+fprintf(stderr, "!!!AsyncReadBuffers [%d] (1)| blocknum: %d, ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d, ioh->generation: %llu\n", getpid(), blocknum, ioh, ((PgAioHandle *)ioh)->op, ((PgAioHandle *)ioh)->state, ((PgAioHandle *)ioh)->result, ((PgAioHandle *)ioh)->num_callbacks, ((PgAioHandle *)ioh)->generation);
/*
* Check if we can start IO on the first to-be-read buffer.
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm
index 1c11750ac1d..1135d6dc399 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -712,7 +712,7 @@ sub init
print $conf "wal_log_hints = on\n";
print $conf "hot_standby = on\n";
# conservative settings to ensure we can run multiple postmasters:
- print $conf "shared_buffers = 1MB\n";
+ print $conf "shared_buffers = 512kB\n";
print $conf "max_connections = 10\n";
# limit disk space consumption, too:
print $conf "max_wal_size = 128MB\n";