Hi I found that "CREATE DATABASE ... TEMPLATE" with WAL_LOG strategy can cause sequence values to rollback after a streaming replication failover.
When creating a new database with "CREATE DATABASE ... TEMPLATE" from
a template database that has sequences, calling nextval() on those
sequences in the new database causes the primary to have a higher
last_value than the standby temporarily.
When a failover occurs and the standby is promoted in this state, the
sequence value appears to have rollbacked on the new primary.
I found this issue in PostgreSQL version 15, but I've confirmed that it also
happens on the master branch.
I show the example as follows.
After creating a database with "CREATE DATABASE ... TEMPLATE",
the sequence initially has last_value = 5:
-- Initial state on both Primary and Standby:
=====
test_db=# SELECT last_value, log_cnt, is_called FROM test_seq;
last_value | log_cnt | is_called
------------+---------+-----------
5 | 28 | t
(1 row)
=====
When I run nextval() 5 times, the values of primary and standby are
as follows. The values in the standby are not changed.
-- Primary:
=====
test_db=# SELECT last_value, log_cnt, is_called FROM test_seq;
last_value | log_cnt | is_called
------------+---------+-----------
10 | 23 | t
(1 row)
=====
-- Standby:
=====
test_db=# SELECT last_value, log_cnt, is_called FROM test_seq;
last_value | log_cnt | is_called
------------+---------+-----------
5 | 28 | t
(1 row)
=====
When I run nextval() in this state, it returns 6 even though 6 - 10
were already returned on the previous primary. It means that the
sequence rollback after a failover.
I understand that sequences cannot be used to obtain "gapless", but I
think it is an unintended behavior that sequence values rollback.
I attached the script to reproduce this issue and the patch to fix.
In the attached patch, during "CREATE DATABASE ... TEMPLATE" with
WAL_LOG strategy, the sequence's log_cnt is reset to 0 so that
SEQ_LOG_VALS (32) values are fetched in advance on the next nextval()
call. This ensures that the standby's last_value is greater than or
equal to the primary's last_value.
And, this patch opens sequence page using readBufferWithoutRelcache()
without relying on relcache entries and resets log_cnt. This follows
the same approach as ScanSourceDatabasePGClass().
Note that this issue does not occur with FILE_COPY strategy because
this performs a checkpoint internally and a WAL record to fetch
SEQ_LOG_VALS in advance is always emitted on the next nextval() call.
However, I could not apply the same approach because I understand the
WAL_LOG strategy is designed to avoid checkpoints.
Regards,
Daisuke, Higuchi
reproduce_script_sequence_rollback_issue.sh
Description: Bourne shell script
diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build
index 9eb8ed11425..611af3470f4 100644
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@@ -62,6 +62,7 @@ tests += {
't/051_effective_wal_level.pl',
't/052_checkpoint_segment_missing.pl',
't/053_standby_login_event_trigger.pl',
+ 't/054_sequence_createdb_template.pl',
],
},
}
diff --git a/src/test/recovery/t/054_sequence_createdb_template.pl b/src/test/recovery/t/054_sequence_createdb_template.pl
new file mode 100644
index 00000000000..f9054690f27
--- /dev/null
+++ b/src/test/recovery/t/054_sequence_createdb_template.pl
@@ -0,0 +1,81 @@
+# Copyright (c) 2026, PostgreSQL Global Development Group
+#
+# Test: Verify that sequences in databases created with CREATE DATABASE ...
+# TEMPLATE properly generate WAL records on first nextval(),
+# ensuring that failover does not cause sequence value rollback.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Set up primary and a streaming standby
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+$node_primary->start;
+
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby->start;
+
+# Create and advance a sequence on template database
+$node_primary->safe_psql('postgres', 'CREATE DATABASE template_db');
+$node_primary->safe_psql('template_db', 'CREATE SEQUENCE test_seq');
+
+for (my $i = 0; $i < 5; $i++) {
+ $node_primary->safe_psql('template_db',
+ "SELECT nextval('test_seq')");
+}
+
+my $tmpl_last_value = $node_primary->safe_psql('template_db',
+ 'SELECT last_value FROM test_seq');
+is($tmpl_last_value, '5', 'the sequence last_value is 5');
+
+my $tmpl_log_cnt = $node_primary->safe_psql('template_db',
+ 'SELECT log_cnt FROM test_seq');
+ok($tmpl_log_cnt > 0, "the sequence has pre-logged values (log_cnt=$tmpl_log_cnt)");
+
+# Create new database test_db by CREATE DATABASE ... TEMPLATE and verify initial state
+$node_primary->safe_psql('postgres',
+ 'CREATE DATABASE test_db TEMPLATE template_db');
+my $new_last_value_before = $node_primary->safe_psql('test_db',
+ 'SELECT last_value FROM test_seq');
+is($new_last_value_before, '5',
+ 'test_db sequence last_value starts at 5 (same as template)');
+
+# Call nextval() on primary and verify if standby's last_value >= primary's value
+for (my $i = 0; $i < 5; $i++) {
+ $node_primary->safe_psql('test_db', "SELECT nextval('test_seq')");
+}
+my $primary_last_value = $node_primary->safe_psql('test_db',
+ 'SELECT last_value FROM test_seq');
+is($primary_last_value, '10',
+ 'primary test_db sequence advanced to 10 after 5 nextval calls');
+
+$node_primary->wait_for_catchup($node_standby);
+
+my $standby_last_value = $node_standby->safe_psql('test_db',
+ 'SELECT last_value FROM test_seq');
+ok($standby_last_value >= $primary_last_value,
+ "standby sequence last_value ($standby_last_value) >= primary ($primary_last_value) - WAL was generated");
+ok($standby_last_value > 5,
+ "standby sequence was updated after nextval on primary (value=$standby_last_value)");
+
+# Failover and verify if nextval() should return a value larger than previous primary's value
+$node_primary->stop;
+$node_standby->promote;
+
+my $promoted_nextval = $node_standby->safe_psql('test_db',
+ "SELECT nextval('test_seq')");
+ok($promoted_nextval > 10,
+ "after failover, nextval returns $promoted_nextval (> 10, no rollback)");
+
+$node_standby->safe_psql('postgres', 'DROP DATABASE test_db');
+$node_standby->safe_psql('postgres', 'DROP DATABASE template_db');
+
+done_testing();
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index f0819d15ab7..c587278e0f6 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -44,6 +44,8 @@
#include "catalog/pg_tablespace.h"
#include "commands/comment.h"
#include "commands/dbcommands.h"
+#include "commands/sequence.h"
+#include "commands/sequence_xlog.h"
#include "commands/dbcommands_xlog.h"
#include "commands/defrem.h"
#include "commands/seclabel.h"
@@ -109,6 +111,7 @@ typedef struct CreateDBRelInfo
RelFileLocator rlocator; /* physical relation identifier */
Oid reloid; /* relation oid */
bool permanent; /* relation is permanent or unlogged */
+ char relkind; /* relation kind */
} CreateDBRelInfo;
@@ -129,6 +132,7 @@ static bool check_db_file_conflict(Oid db_id);
static int errdetail_busy_db(int notherbackends, int npreparedxacts);
static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
Oid dst_tsid);
+static void ResetSequenceLogCnt(RelFileLocator rlocator, bool permanent);
static List *ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath);
static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid,
Oid dbid, char *srcpath,
@@ -218,6 +222,10 @@ CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid,
/* Copy relation storage from source to the destination. */
CreateAndCopyRelationData(srcrlocator, dstrlocator, relinfo->permanent);
+ /* Reset log_cnt for sequences to ensure WAL on first nextval() */
+ if (relinfo->relkind == RELKIND_SEQUENCE)
+ ResetSequenceLogCnt(dstrlocator, relinfo->permanent);
+
/* Release the relation locks. */
UnlockRelationId(&srcrelid, AccessShareLock);
UnlockRelationId(&dstrelid, AccessShareLock);
@@ -228,6 +236,58 @@ CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid,
list_free_deep(rlocatorlist);
}
+/*
+ * Reset log_cnt of a sequence to zero, ensuring the next nextval() call
+ * will emit a WAL record. This is needed after copying a sequence via
+ * the WAL_LOG strategy for CREATE DATABASE.
+ */
+static void
+ResetSequenceLogCnt(RelFileLocator rlocator, bool permanent)
+{
+ Buffer buf;
+ Page page;
+ ItemId lp;
+ HeapTupleData seqtuple;
+ Form_pg_sequence_data seq;
+
+ buf = ReadBufferWithoutRelcache(rlocator, MAIN_FORKNUM, 0,
+ RBM_NORMAL, NULL, permanent);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ page = BufferGetPage(buf);
+ lp = PageGetItemId(page, FirstOffsetNumber);
+ Assert(ItemIdIsNormal(lp));
+
+ seqtuple.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ seqtuple.t_len = ItemIdGetLength(lp);
+ seq = (Form_pg_sequence_data) GETSTRUCT(&seqtuple);
+
+ if (seq->log_cnt != 0)
+ {
+ START_CRIT_SECTION();
+ seq->log_cnt = 0;
+ MarkBufferDirty(buf);
+
+ if (permanent)
+ {
+ xl_seq_rec xlrec;
+ XLogRecPtr recptr;
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
+ xlrec.locator = rlocator;
+ XLogRegisterData(&xlrec, sizeof(xl_seq_rec));
+ XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
+
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
+
+ PageSetLSN(page, recptr);
+ }
+ END_CRIT_SECTION();
+ }
+ UnlockReleaseBuffer(buf);
+}
+
/*
* Scan the pg_class table in the source database to identify the relations
* that need to be copied to the destination database.
@@ -442,6 +502,7 @@ ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, Oid tbid, Oid dbid,
relinfo->rlocator.dbOid = dbid;
relinfo->rlocator.relNumber = relfilenumber;
relinfo->reloid = classForm->oid;
+ relinfo->relkind = classForm->relkind;
/* Temporary relations were rejected above. */
Assert(classForm->relpersistence != RELPERSISTENCE_TEMP);
