Hello hackers,

A recent buildfarm failure [1]:
 38/273 recovery - postgresql:recovery/035_standby_logical_decoding ERROR       
     74.36s   exit status 1

[14:34:25.522](0.001s) ok 65 - pg_recvlogical exited non-zero
[14:34:25.522](0.000s) not ok 66 - slot has been invalidated
[14:34:25.523](0.000s) #   Failed test 'slot has been invalidated'
#   at 
/home/bf/bf-build/francolin/REL_16_STABLE/pgsql/src/test/recovery/t/035_standby_logical_decoding.pl
 line 118.
[14:34:25.523](0.001s) #                   'pg_recvlogical: error: could not send replication command "START_REPLICATION SLOT "drop_db_activeslot" LOGICAL 0/0 ("include-xids" '0', "skip-empty-xacts" '1')": server closed the connection unexpectedly
#     This probably means the server terminated abnormally
#     before or while processing the request.
# pg_recvlogical: error: disconnected
# '
#     doesn't match '(?^:conflict with recovery)'
[14:34:25.675](0.152s) ok 67 - otherslot on standby not dropped

duplicated one that happened the last year [2] and had no explanation so
far. This time I've managed to reproduce it with the check_slots_dropped()
repeated in the loop (the patch is attached).

With extra logging added, I could see:
[08:47:52.385](0.000s) not ok 114 - slot has been invalidated
[08:47:52.385](0.000s) #   Failed test 'slot has been invalidated'
#   at 
/home/user/postgresql/src/test/recovery_9/t/035_standby_logical_decoding.pl 
line 120.
[08:47:52.385](0.000s) #                   '!!!pqsecure_raw_read| len=16384 n=554 errno=0 buf=760000002b00030002000000015f70715f2e746573745f70726f746f636f6c5f6e65676f74696174696f6e00520000000800000000530000001b496e74657276616c5374796c6500706f7374677265730053000000207365617263685f7061746800222475736572222c207075626c696300530000001469735f737570657275736572006f6e0053000000237374616e646172645f636f6e666f726d696e675f737472696e6773006f6e00530000001f73657373696f6e5f617574686f72697a6174696f6e0075736572005300000019636c69656e745f656e636f64696e67005554463800530000001b7365727665725f76657273696f6e003139646576656c # !!!pqsecure_raw_read| len=16384 n=85 errno=0 buf=540000002300017365745f636f6e6669670000000000000000000019ffffffffffff0000440000000a000100000000430000000d53454c45435420310053000000117365617263685f7061746800005a0000000549 # !!!pqsecure_raw_read| len=16384 n=76 errno=0 buf=540000002c0001646174615f6469726563746f72795f6d6f64650000000000000000000019ffffffffffff0000440000000e00010000000430373030430000000953484f57005a0000000549 # !!!pqsecure_raw_read| len=16384 n=198 errno=0 buf=540000006f000473797374656d69640000000000000000000019ffffffffffff000074696d656c696e6500000000000000000000140008ffffffff0000786c6f67706f730000000000000000000019ffffffffffff000064626e616d650000000000000000000019ffffffffffff0000440000003a0004000000133736343038333834313938393731313334343000000001310000000a302f30373138433739380000000674657374646243000000144944454e544946595f53595354454d005a0000000549 # !!!pqsecure_raw_read| len=16384 n=181 errno=0 buf=45000000b453464154414c0056464154414c00433537503034004d7465726d696e6174696e6720636f6e6e656374696f6e2064756520746f20636f6e666c6963742077697468207265636f766572790044557365722077617320636f6e6e656374656420746f20612064617461626173652074686174206d7573742062652064726f707065642e0046706f7374677265732e63004c3332313300527265706f72745f7265636f766572795f636f6e666c6963740000
### FATAL...57P04...terminating connection due to conflict with recovery
# !!!pqsecure_raw_read| len=16384 n=0 errno=0 buf=
# !!!pqsecure_raw_read| len=16384 n=0 errno=0 buf=
# pg_recvlogical: error: could not send replication command "START_REPLICATION SLOT "drop_db_activeslot" LOGICAL 0/00000000 ("include-xids" '0', "skip-empty-xacts" '1')": server closed the connection unexpectedly
#       This probably means the server terminated abnormally
#       before or while processing the request.
# pg_recvlogical: error: disconnected
# '
#     doesn't match '(?^:conflict with recovery)'

That is, the FATAL message was received from the server, but libpq lost it 
inside.

A successful run shows this:
!!!check_pg_recvlogical_stderr:
!!!pqsecure_raw_read| len=16384 n=554 errno=0 buf=760000002b00030002000000015f70715f2e746573745f70726f746f636f6c5f6e65676f74696174696f6e00520000000800000000530000001b496e74657276616c5374796c6500706f7374677265730053000000207365617263685f7061746800222475736572222c207075626c696300530000001469735f737570657275736572006f6e0053000000237374616e646172645f636f6e666f726d696e675f737472696e6773006f6e00530000001f73657373696f6e5f617574686f72697a6174696f6e0075736572005300000019636c69656e745f656e636f64696e67005554463800530000001b7365727665725f76657273696f6e003139646576656c !!!pqsecure_raw_read| len=16384 n=85 errno=0 buf=540000002300017365745f636f6e6669670000000000000000000019ffffffffffff0000440000000a000100000000430000000d53454c45435420310053000000117365617263685f7061746800005a0000000549 !!!pqsecure_raw_read| len=16384 n=76 errno=0 buf=540000002c0001646174615f6469726563746f72795f6d6f64650000000000000000000019ffffffffffff0000440000000e00010000000430373030430000000953484f57005a0000000549 !!!pqsecure_raw_read| len=16384 n=198 errno=0 buf=540000006f000473797374656d69640000000000000000000019ffffffffffff000074696d656c696e6500000000000000000000140008ffffffff0000786c6f67706f730000000000000000000019ffffffffffff000064626e616d650000000000000000000019ffffffffffff0000440000003a0004000000133736343038333834313938393731313334343000000001310000000a302f30364432393539380000000674657374646243000000144944454e544946595f53595354454d005a0000000549
!!!pqsecure_raw_read| len=16384 n=8 errno=0 buf=5700000007000000
!!!pqsecure_raw_read| len=16384 n=46 errno=0 buf=64000000166b0000000006d295980002f5019f6590590064000000166b0000000006d295980002f5019f6590c100 !!!pqsecure_raw_read| len=16384 n=46 errno=0 buf=64000000166b0000000006d295980002f5019f66df5b0064000000166b0000000006d295d00002f5019f66e01d00 !!!pqsecure_raw_read| len=16384 n=46 errno=0 buf=64000000166b0000000006d295d00002f5019f6719390064000000166b0000000006d296a00002f5019f67197c00
!!!pqsecure_raw_read| len=16384 n=23 errno=0 
buf=64000000166b0000000006d296a00002f5019f671b3300
!!!pqsecure_raw_read| len=16384 n=23 errno=0 
buf=64000000166b0000000006d297800002f5019f671bc900
!!!pqsecure_raw_read| len=16384 n=46 errno=0 buf=64000000166b0000000006d297800002f5019f671c970064000000166b0000000006d298000002f5019f671cbf00 !!!pqsecure_raw_read| len=16384 n=181 errno=0 buf=45000000b453464154414c0056464154414c00433537503034004d7465726d696e6174696e6720636f6e6e656374696f6e2064756520746f20636f6e666c6963742077697468207265636f766572790044557365722077617320636f6e6e656374656420746f20612064617461626173652074686174206d7573742062652064726f707065642e0046706f7374677265732e63004c3332313300527265706f72745f7265636f766572795f636f6e666c6963740000
### FATAL...57P04...terminating connection due to conflict with recovery
pg_recvlogical: error: unexpected termination of replication stream: FATAL:  terminating connection due to conflict with recovery
DETAIL:  User was connected to a database that must be dropped.
pg_recvlogical: error: disconnected

Thus, FATAL is received by pg_recvlogical as expected, if it follows
CopyBothResponse in the stream.

Having this knowledge, I could reproduce such behavior simply with:
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1501,6 +1501,7 @@ StartLogicalReplication(StartReplicationCmd *cmd)

     ReplicationSlotAcquire(cmd->slotname, true, true);
 +pg_usleep(1000000);

     /*
      * Force a disconnect, so that the decoding code doesn't need to care
      * about an eventual switch from running in recovery, to running in a

# +++ tap check in src/test/recovery +++
t/035_standby_logical_decoding.pl .. 69/?
#   Failed test 'slot has been invalidated'
#   at t/035_standby_logical_decoding.pl line 119.
#                   'pg_recvlogical: error: could not send replication command "START_REPLICATION SLOT "drop_db_activeslot" LOGICAL 0/00000000 ("include-xids" '0', "skip-empty-xacts" '1')": server closed the connection unexpectedly
#       This probably means the server terminated abnormally
#       before or while processing the request.
# pg_recvlogical: error: disconnected
# '
#     doesn't match '(?^:conflict with recovery)'
t/035_standby_logical_decoding.pl .. 88/? # Looks like you failed 1 test of 89.

Reproduced on REL_16_STABLE (where this test was landed) .. master.

[1] 
https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=francolin&dt=2026-05-11%2012%3A25%3A04
[2] 
https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=kestrel&dt=2025-08-04%2005%3A45%3A46

Best regards,
Alexander
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index a33a685dcc6..0c37ebdf9d1 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -603,6 +603,7 @@ CreateDecodingContext(XLogRecPtr start_lsn,
 
 	ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
 
+pg_usleep(100000);
 	ereport(LogicalDecodingLogLevel(),
 			(errmsg("starting logical decoding for slot \"%s\"",
 					NameStr(slot->data.name)),
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index 31d5b48d3f9..c08c2987c46 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -200,6 +200,16 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
 
 	n = recv(conn->sock, ptr, len, 0);
 
+if (conn->replication && conn->replication[0])
+{
+int	save_errno = SOCK_ERRNO;
+char	hb[1024];
+hb[0] = 0;
+for (size_t i = 0; i < n && i < 256; i++) sprintf(hb + (i * 2), "%02x", ((char *)ptr)[i]);
+fprintf(stderr, "!!!pqsecure_raw_read| len=%zu n=%zd errno=%d buf=%s\n", len, n, (n < 0) ? save_errno : 0, hb);
+SOCK_ERRNO_SET(save_errno);
+}
+
 	if (n < 0)
 	{
 		result_errno = SOCK_ERRNO;
diff --git a/src/test/meson.build b/src/test/meson.build
index cd45cbf57fb..cbb6d70a439 100644
--- a/src/test/meson.build
+++ b/src/test/meson.build
@@ -5,6 +5,26 @@ subdir('isolation')
 
 subdir('authentication')
 subdir('postmaster')
+subdir('recovery_1')
+subdir('recovery_2')
+subdir('recovery_3')
+subdir('recovery_4')
+subdir('recovery_5')
+subdir('recovery_6')
+subdir('recovery_7')
+subdir('recovery_8')
+subdir('recovery_9')
+subdir('recovery_10')
+subdir('recovery_11')
+subdir('recovery_12')
+subdir('recovery_13')
+subdir('recovery_14')
+subdir('recovery_15')
+subdir('recovery_16')
+subdir('recovery_17')
+subdir('recovery_18')
+subdir('recovery_19')
+subdir('recovery_20')
 subdir('recovery')
 subdir('subscription')
 subdir('modules')
diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl
index 4421059f100..44727a5d9f7 100644
--- a/src/test/recovery/t/035_standby_logical_decoding.pl
+++ b/src/test/recovery/t/035_standby_logical_decoding.pl
@@ -114,6 +114,7 @@ sub check_pg_recvlogical_stderr
 	$slot_user_handle->finish;
 	$return = $?;
 	cmp_ok($return, "!=", 0, "pg_recvlogical exited non-zero");
+print("!!!check_pg_recvlogical_stderr:\n$stderr\n===\n");
 	if ($return)
 	{
 		like($stderr, qr/$check_stderr/, 'slot has been invalidated');
@@ -907,6 +908,10 @@ check_pg_recvlogical_stderr($handle,
 # drop the logical slots
 drop_logical_slots('wal_level_');
 
+for my $i (1 .. 100)
+{
+print("DROP DATABASE scenario: iteration $i\n");
+
 # create the logical slots
 create_logical_slots($node_standby, 'drop_db_');
 
@@ -933,10 +938,17 @@ check_slots_dropped('drop_db', $handle);
 is($node_standby->slot('otherslot')->{'slot_type'},
 	'logical', 'otherslot on standby not dropped');
 
+$node_standby->safe_psql('postgres', q[SELECT pg_drop_replication_slot('otherslot')]);
+$node_primary->safe_psql('postgres', q[CREATE DATABASE testdb]);
+$node_primary->wait_for_replay_catchup($node_standby);
+}
+
 # Cleanup : manually drop the slot that was not dropped.
 $node_standby->psql('postgres',
 	q[SELECT pg_drop_replication_slot('otherslot')]);
 
+if (0)
+{
 ##################################################
 # Test standby promotion and logical decoding behavior
 # after the standby gets promoted.
@@ -1059,5 +1071,6 @@ chomp($cascading_stdout);
 is($cascading_stdout, $expected,
 	'got same expected output from pg_recvlogical decoding session on cascading standby'
 );
+}
 
 done_testing();

Reply via email to