Hi,
The patch has been significantly updated.
1. Added the --pg-commit-ts command line key for pg_upgrade.
Migration of the pg_commit_ts directory will be performed only if the user explicitly wishes.
2. The roident values are being transferred from pg_replication_origin to a new cluster to resolve conflict messages {UPDATE|DELETE}_ORIGIN_DIFFERS.
During testing, it turned out that the replication sources created by the pg_replication_origin_create('no_link_sub_4') functions are not migrated to the new cluster. This is probably a bug.
The principle of operation: A query to the old cluster generates SQL commands for roident adjustments, which are then executed in the new cluster.
From 91d216b1b840e4af1ba0ded3842803d7b1806ae9 Mon Sep 17 00:00:00 2001 From: Sergey Levin <[email protected]> Date: Sat, 21 Mar 2026 13:45:40 +0500 Subject: [PATCH v12] pg_upgrade: transfer commit timestamps to the new cluster
This commit preserves commit timestamps during an upgrade.
Migration of the pg_commit_ts directory will be performed when
the --pg-commit-ts command line key is specified.
The roident values are being transferred from pg_replication_origin
to a new cluster to resolve conflict messages {UPDATE|DELETE}_ORIGIN_DIFFERS.
Files in the pg_commit_ts directory are copied when track_commit_timestamp=on.
Also, pg_resetwal specifies the oldest and newest transaction IDs to the new
cluster.
If the old cluster enables tracking commit timestamps but the new cluster does
not, the pg_upgrade fails to avoid missing them.
Author: Sergey Levin <[email protected]>
Author: Hayato Kuroda <[email protected]>
Reviewed-by: Maxim Orlov <orlovmg.gmail.com>
---
src/bin/pg_upgrade/check.c | 79 +++++++
src/bin/pg_upgrade/controldata.c | 20 ++
src/bin/pg_upgrade/meson.build | 1 +
src/bin/pg_upgrade/option.c | 5 +
src/bin/pg_upgrade/pg_upgrade.c | 49 ++++-
src/bin/pg_upgrade/pg_upgrade.h | 3 +
.../pg_upgrade/t/009_transfer_commit_ts.pl | 199 ++++++++++++++++++
7 files changed, 353 insertions(+), 3 deletions(-)
create mode 100644 src/bin/pg_upgrade/t/009_transfer_commit_ts.pl
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index eb35c68d450..9fef02843ff 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -34,8 +34,11 @@ static void check_new_cluster_replication_slots(void);
static void check_new_cluster_subscription_configuration(void);
static void check_old_cluster_for_valid_slots(void);
static void check_old_cluster_subscription_state(void);
+static void check_new_cluster_pg_commit_ts(void);
static void check_old_cluster_global_names(ClusterInfo *cluster);
+extern PQExpBuffer sql_roident_correction;
+
/*
* DataTypesUsageChecks - definitions of data type checks for the old cluster
* in order to determine if an upgrade can be performed. See the comment on
@@ -791,9 +794,35 @@ check_new_cluster(void)
check_new_cluster_replication_slots();
check_new_cluster_subscription_configuration();
+
+ if (user_opts.do_copy_pg_commit_ts)
+ check_new_cluster_pg_commit_ts();
+
}
+void
+check_new_cluster_pg_commit_ts(void)
+{
+ PGconn *conn;
+ PGresult *res;
+ bool commit_ts_is_enabled;
+
+
+ prep_status("Checking for new cluster configuration for commit timestamp");
+
+ conn = connectToServer(&new_cluster, "template1");
+ res = executeQueryOrDie(conn, "SELECT setting FROM pg_settings "
+ "WHERE name = 'track_commit_timestamp'");
+ commit_ts_is_enabled = strcmp(PQgetvalue(res, 0, 0), "on") == 0;
+ PQclear(res);
+ PQfinish(conn);
+
+ if (!commit_ts_is_enabled &&
+ old_cluster.controldata.chkpnt_newstCommitTsxid > 0)
+ pg_fatal("\"track_commit_timestamp\" must be \"on\" but is set to \"off\"");
+ check_ok();
+}
void
report_clusters_compatible(void)
{
@@ -2452,6 +2481,56 @@ check_old_cluster_subscription_state(void)
PQclear(res);
PQfinish(conn);
+ if (user_opts.do_copy_pg_commit_ts)
+ {
+ /*
+ * Save pg_replication_origin.roident and
+ * pg_replication_origin_status.remote_lsn old cluster.
+ */
+ conn = connectToServer(&old_cluster, old_cluster.dbarr.dbs[0].db_name);
+ res = executeQueryOrDie(conn,
+ "SELECT string_agg(CASE WHEN os.remote_lsn is not null THEN format('"
+ " SELECT pg_catalog.pg_replication_origin_advance("
+ " (SELECT roname FROM pg_catalog.pg_replication_origin "
+ " WHERE roident=%%s), %%L)', o.roident, os.remote_lsn) END, ';'),"
+ " format('UPDATE pg_catalog.pg_replication_origin r SET roident=roident::int+10000'), "
+ " string_agg(CASE WHEN s.subname is not null THEN "
+ " format('UPDATE pg_catalog.pg_replication_origin r "
+ " SET roident=%%s from pg_catalog.pg_subscription s WHERE r.roname=''pg_''||s.oid "
+ " and s.subname=%%L'"
+ " ,o.roident, s.subname) "
+ " ELSE"
+ " format('UPDATE pg_catalog.pg_replication_origin r "
+ " SET roident=%%s WHERE r.roname=%%L'"
+ " ,o.roident, o.roname) "
+ " END"
+ " ,';' ORDER BY o.roident DESC), "
+ " max(o.roident)>9999 "
+ "FROM pg_catalog.pg_replication_origin o "
+ "LEFT JOIN pg_catalog.pg_subscription s "
+ " ON o.roname = 'pg_' || s.oid "
+ "LEFT JOIN pg_catalog.pg_replication_origin_status os "
+ " ON os.external_id = o.roname;");
+ ntup = PQntuples(res);
+ if (ntup > 0)
+ {
+ if (strcmp(PQgetvalue(res, 0, 3), "t") == 0)
+ pg_fatal("The origin ID exceeds 9999");
+ sql_roident_correction = createPQExpBuffer();
+ /* Prepare roident in new cluster for execute update */
+ appendPQExpBufferStr(sql_roident_correction, PQgetvalue(res, 0, 1));
+ appendPQExpBufferStr(sql_roident_correction, ";\n");
+ /* Restore roident */
+ appendPQExpBufferStr(sql_roident_correction, PQgetvalue(res, 0, 2));
+ appendPQExpBufferStr(sql_roident_correction, ";\n");
+ /* Restore remote_lsn if exists */
+ appendPQExpBufferStr(sql_roident_correction, PQgetvalue(res, 0, 0));
+ appendPQExpBufferStr(sql_roident_correction, ";\n");
+ PQclear(res);
+ PQfinish(conn);
+ }
+ }
+
/*
* We don't allow upgrade if there is a risk of dangling slot or origin
* corresponding to initial sync after upgrade.
diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c
index aa6e8b4de5d..fa8b28adf43 100644
--- a/src/bin/pg_upgrade/controldata.c
+++ b/src/bin/pg_upgrade/controldata.c
@@ -321,6 +321,26 @@ get_control_data(ClusterInfo *cluster)
cluster->controldata.chkpnt_nxtmulti = str2uint(p);
got_multi = true;
}
+ else if ((p = strstr(bufin, "Latest checkpoint's oldestCommitTsXid:")) != NULL)
+ {
+ p = strchr(p, ':');
+
+ if (p == NULL || strlen(p) <= 1)
+ pg_fatal("%d: controldata retrieval problem", __LINE__);
+
+ p++; /* remove ':' char */
+ cluster->controldata.chkpnt_oldstCommitTsxid = str2uint(p);
+ }
+ else if ((p = strstr(bufin, "Latest checkpoint's newestCommitTsXid:")) != NULL)
+ {
+ p = strchr(p, ':');
+
+ if (p == NULL || strlen(p) <= 1)
+ pg_fatal("%d: controldata retrieval problem", __LINE__);
+
+ p++; /* remove ':' char */
+ cluster->controldata.chkpnt_newstCommitTsxid = str2uint(p);
+ }
else if ((p = strstr(bufin, "Latest checkpoint's oldestXID:")) != NULL)
{
p = strchr(p, ':');
diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build
index ffbf6ae8d75..5077805b8cf 100644
--- a/src/bin/pg_upgrade/meson.build
+++ b/src/bin/pg_upgrade/meson.build
@@ -69,6 +69,7 @@ tests += {
't/006_transfer_modes.pl',
't/007_multixact_conversion.pl',
't/008_extension_control_path.pl',
+ 't/009_transfer_commit_ts.pl',
],
'deps': [test_ext],
'test_kwargs': {'priority': 40}, # pg_upgrade tests are slow
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index f01d2f92d95..5714e77ff4b 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -63,6 +63,7 @@ parseCommandLine(int argc, char *argv[])
{"no-statistics", no_argument, NULL, 5},
{"set-char-signedness", required_argument, NULL, 6},
{"swap", no_argument, NULL, 7},
+ {"pg-commit-ts", no_argument, NULL, 8},
{NULL, 0, NULL, 0}
};
@@ -233,6 +234,9 @@ parseCommandLine(int argc, char *argv[])
case 7:
user_opts.transfer_mode = TRANSFER_MODE_SWAP;
break;
+ case 8:
+ user_opts.do_copy_pg_commit_ts = true;
+ break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
@@ -325,6 +329,7 @@ usage(void)
printf(_(" -U, --username=NAME cluster superuser (default \"%s\")\n"), os_info.user);
printf(_(" -v, --verbose enable verbose internal logging\n"));
printf(_(" -V, --version display version information, then exit\n"));
+ printf(_(" --pg-commit-ts copy pg_commit_ts directory\n"));
printf(_(" --clone clone instead of copying files to new cluster\n"));
printf(_(" --copy copy files to new cluster (default)\n"));
printf(_(" --copy-file-range copy files to new cluster with copy_file_range\n"));
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 2127d297bfe..f1baf5ba88c 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -85,6 +85,8 @@ char *output_files[] = {
NULL
};
+/* list SQL commands for new cluster */
+PQExpBuffer sql_roident_correction;
int
main(int argc, char **argv)
@@ -254,6 +256,23 @@ main(int argc, char **argv)
issue_warnings_and_set_wal_level();
+ if (user_opts.do_copy_pg_commit_ts && sql_roident_correction)
+ {
+ /*
+ * Correction pg_replication_origin.roident and
+ * pg_replication_origin_status.remote_lsn for new cluster.
+ */
+ start_postmaster(&new_cluster, true);
+
+ PGconn *conn_new_template1;
+
+ conn_new_template1 = connectToServer(&new_cluster, "template1");
+ PQclear(executeQueryOrDie(conn_new_template1, "%s", sql_roident_correction->data));
+ PQfinish(conn_new_template1);
+
+ stop_postmaster(false);
+ }
+
pg_log(PG_REPORT,
"\n"
"Upgrade Complete\n"
@@ -773,6 +792,10 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir)
static void
copy_xact_xlog_xid(void)
{
+ bool is_copy_commit_ts;
+ uint32 oldest_xid,
+ newest_xid;
+
/*
* Copy old commit logs to new data dir. pg_clog has been renamed to
* pg_xact in post-10 clusters.
@@ -782,6 +805,23 @@ copy_xact_xlog_xid(void)
GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ?
"pg_clog" : "pg_xact");
+ /*
+ * Copy old commit_timestamp data to new, if available.
+ */
+ is_copy_commit_ts =
+ (old_cluster.controldata.chkpnt_oldstCommitTsxid > 0 &&
+ old_cluster.controldata.chkpnt_newstCommitTsxid > 0 &&
+ user_opts.do_copy_pg_commit_ts);
+
+ if (is_copy_commit_ts)
+ {
+ copy_subdir_files("pg_commit_ts", "pg_commit_ts");
+ oldest_xid = old_cluster.controldata.chkpnt_oldstCommitTsxid;
+ newest_xid = old_cluster.controldata.chkpnt_newstCommitTsxid;
+ }
+ else
+ oldest_xid = newest_xid = old_cluster.controldata.chkpnt_nxtxid;
+
prep_status("Setting oldest XID for new cluster");
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -u %u \"%s\"",
@@ -799,12 +839,15 @@ copy_xact_xlog_xid(void)
"\"%s/pg_resetwal\" -f -e %u \"%s\"",
new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch,
new_cluster.pgdata);
- /* must reset commit timestamp limits also */
+
+ /*
+ * must reset commit timestamp limits also or copy from the old cluster
+ */
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -c %u,%u \"%s\"",
new_cluster.bindir,
- old_cluster.controldata.chkpnt_nxtxid,
- old_cluster.controldata.chkpnt_nxtxid,
+ oldest_xid,
+ newest_xid,
new_cluster.pgdata);
check_ok();
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 1d767bbda2d..76a2e067a53 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -245,6 +245,8 @@ typedef struct
uint64 chkpnt_nxtmxoff;
uint32 chkpnt_oldstMulti;
uint32 chkpnt_oldstxid;
+ uint32 chkpnt_oldstCommitTsxid;
+ uint32 chkpnt_newstCommitTsxid;
uint32 align;
uint32 blocksz;
uint32 largesz;
@@ -341,6 +343,7 @@ typedef struct
bool check; /* check clusters only, don't change any data */
bool live_check; /* check clusters only, old server is running */
bool do_sync; /* flush changes to disk */
+ bool do_copy_pg_commit_ts; /* copy pg_commit_ts directory */
transferMode transfer_mode; /* copy files or link them? */
int jobs; /* number of processes/threads to use */
char *socketdir; /* directory to use for Unix sockets */
diff --git a/src/bin/pg_upgrade/t/009_transfer_commit_ts.pl b/src/bin/pg_upgrade/t/009_transfer_commit_ts.pl
new file mode 100644
index 00000000000..6cb623beb32
--- /dev/null
+++ b/src/bin/pg_upgrade/t/009_transfer_commit_ts.pl
@@ -0,0 +1,199 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+
+# Tests for transfer pg_commit_ts directory.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Can be changed to test the other modes
+my $mode = $ENV{PG_TEST_PG_UPGRADE_MODE} || '--copy';
+
+# Initialize old cluster
+my $old = PostgreSQL::Test::Cluster->new('old');
+$old->init;
+$old->append_conf('postgresql.conf', 'track_commit_timestamp = on');
+$old->start;
+my $resold = $old->safe_psql(
+ 'postgres', qq{
+ create table a(a int);
+ select xid,timestamp from pg_last_committed_xact();
+});
+
+my ($xid) = $resold =~ /\s*(\d+)\s*\|.*/;
+$old->stop;
+
+# Initialize new cluster
+my $new = PostgreSQL::Test::Cluster->new('new');
+$new->init;
+
+# Setup a common pg_upgrade command to be used by all the test cases
+my @pg_upgrade_cmd = (
+ 'pg_upgrade', '--no-sync','--pg-commit-ts',
+ '--old-datadir' => $old->data_dir,
+ '--new-datadir' => $new->data_dir,
+ '--old-bindir' => $old->config_data('--bindir'),
+ '--new-bindir' => $new->config_data('--bindir'),
+ '--socketdir' => $new->host,
+ '--old-port' => $old->port,
+ '--new-port' => $new->port,
+ $mode);
+
+# In a VPATH build, we'll be started in the source directory, but we want
+# to run pg_upgrade in the build directory so that any files generated finish
+# in it, like delete_old_cluster.{sh,bat}.
+chdir ${PostgreSQL::Test::Utils::tmp_check};
+
+command_checks_all(
+ [@pg_upgrade_cmd], 1,
+ [qr{"track_commit_timestamp" must be "on" but is set to "off"}], [],
+ 'run of pg_upgrade for mismatch parameter track_commit_timestamp');
+
+$new->append_conf('postgresql.conf', 'track_commit_timestamp = on');
+
+command_ok([@pg_upgrade_cmd], 'run of pg_upgrade ok');
+
+$new->start;
+my $resnew = $new->safe_psql(
+ 'postgres', qq{
+ select $xid,pg_xact_commit_timestamp(${xid}::text::xid);
+});
+
+$new->stop;
+ok($resold eq $resnew, "timestamp transferred successfully");
+
+# Check migrate with subscriptions and restore origin and remote_lsn
+
+my $publisher = PostgreSQL::Test::Cluster->new('publisher');
+$publisher->init(allows_streaming => 'logical');
+$publisher->start;
+
+# Initialize the old subscriber node
+my $old_sub = PostgreSQL::Test::Cluster->new('old_sub');
+$old_sub->init;
+$old_sub->append_conf('postgresql.conf', 'track_commit_timestamp = on');
+$old_sub->start;
+my $oldbindir = $old_sub->config_data('--bindir');
+
+# Initialize the new subscriber
+my $new_sub = PostgreSQL::Test::Cluster->new('new_sub');
+$new_sub->init;
+$new_sub->append_conf('postgresql.conf', 'track_commit_timestamp = on');
+my $newbindir = $new_sub->config_data('--bindir');
+
+# In a VPATH build, we'll be started in the source directory, but we want
+# to run pg_upgrade in the build directory so that any files generated finish
+# in it, like delete_old_cluster.{sh,bat}.
+chdir ${PostgreSQL::Test::Utils::tmp_check};
+
+# Remember a connection string for the publisher node. It would be used
+# several times.
+my $appname='tap_sub';
+my $connstr = $publisher->connstr . ' dbname=postgres ';
+
+$publisher->safe_psql('postgres', "CREATE TABLE tab (a int PRIMARY KEY)");
+$old_sub->safe_psql('postgres', "CREATE TABLE tab (a int PRIMARY KEY)");
+$publisher->safe_psql('postgres', "CREATE PUBLICATION regress_pub1 FOR TABLE tab");
+#Create 1 origin
+$old_sub->safe_psql(
+ 'postgres', "
+ CREATE SUBSCRIPTION a_dummy
+ CONNECTION '$connstr'
+ PUBLICATION regress_pub1
+ WITH (connect = false, enabled = false,create_slot = false)");
+#Create 2 origin
+$old_sub->safe_psql('postgres',
+ "CREATE SUBSCRIPTION regress_sub2 CONNECTION '$connstr application_name=$appname' PUBLICATION regress_pub1 WITH(copy_data = false)"
+);
+#Create 3 origin
+$old_sub->safe_psql(
+ 'postgres', "
+ CREATE SUBSCRIPTION z_dummy
+ CONNECTION '$connstr'
+ PUBLICATION regress_pub1
+ WITH (connect = false, enabled = false,create_slot = false)");
+#Create 4,5 origin no link subscription
+$old_sub->safe_psql('postgres',
+ "SELECT pg_replication_origin_create('no_link_sub_4'),pg_replication_origin_create('no_link_sub_5')"
+);
+
+# Wait for initial table sync to finish
+$old_sub->wait_for_subscription_sync($publisher, $appname);
+$publisher->safe_psql('postgres', "INSERT INTO tab VALUES (11);");
+$publisher->wait_for_catchup($appname);
+
+my $result = $old_sub->safe_psql('postgres',
+ "SELECT count(1) = 1 FROM tab");
+is($result, qq(t), "Check that the table is 1 row");
+
+my $remote_lsn = $old_sub->safe_psql('postgres',
+ "SELECT remote_lsn FROM pg_replication_origin_status os, pg_subscription s WHERE os.external_id = 'pg_' || s.oid AND s.subname = 'regress_sub2'"
+);
+
+#Delete 1 origin
+$old_sub->safe_psql('postgres', "ALTER SUBSCRIPTION a_dummy DISABLE");
+$old_sub->safe_psql('postgres', "ALTER SUBSCRIPTION a_dummy SET (slot_name = NONE)");
+$old_sub->safe_psql('postgres', "DROP SUBSCRIPTION a_dummy");
+
+my $origin_others= $old_sub->safe_psql('postgres',
+ "SELECT roident,roname FROM pg_replication_origin o LEFT JOIN pg_subscription s ON o.roname = 'pg_' || s.oid WHERE s.subname is null ORDER BY o.roident"
+);
+
+$old_sub->stop;
+
+command_ok(
+ [
+ 'pg_upgrade',
+ '--no-sync','--pg-commit-ts',
+ '--old-datadir' => $old_sub->data_dir,
+ '--new-datadir' => $new_sub->data_dir,
+ '--old-bindir' => $oldbindir,
+ '--new-bindir' => $newbindir,
+ '--socketdir' => $new_sub->host,
+ '--old-port' => $old_sub->port,
+ '--new-port' => $new_sub->port,
+ $mode
+ ],
+ 'run of pg_upgrade for old instance when the subscription tables not empty'
+);
+ok( !-d $new_sub->data_dir . "/pg_upgrade_output.d",
+ "pg_upgrade_output.d/ removed after successful pg_upgrade");
+
+$new_sub->start;
+
+$result = $new_sub->safe_psql('postgres',
+ "SELECT roident,s.subname FROM pg_replication_origin o LEFT JOIN pg_subscription s ON o.roname = 'pg_' || s.oid WHERE s.subname is not null ORDER BY o.roident");
+is($result, qq(2|regress_sub2
+3|z_dummy), "Check that the roident this restore old cluster (subscribtions)");
+
+
+$result = $new_sub->safe_psql('postgres',
+ "SELECT roident,roname FROM pg_replication_origin o LEFT JOIN pg_subscription s ON o.roname = 'pg_' || s.oid WHERE s.subname is null ORDER BY o.roident");
+# No migrate origin create finction pg_replication_origin_create
+# Comment next line if fix this bug
+$origin_others="";
+is($result, $origin_others, "Check that the roident this restore old cluster (origin id without subscribtions)");
+
+my $remote_lsn_new_sub = $new_sub->safe_psql('postgres',
+ "SELECT remote_lsn FROM pg_replication_origin_status os, pg_subscription s WHERE os.external_id = 'pg_' || s.oid AND s.subname = 'regress_sub2'"
+);
+is($remote_lsn_new_sub, qq($remote_lsn), "remote_lsn should have been preserved");
+
+
+my $log_offset = -s $new_sub->logfile;
+
+#Check replication new cluster
+$publisher->safe_psql('postgres', "UPDATE tab set a=32 where a=11;");
+
+$publisher->wait_for_catchup($appname);
+
+$result = $new_sub->safe_psql('postgres',
+ "SELECT a FROM tab WHERE a=32");
+is($result,32, "update row ok");
+
+$new_sub->log_check("no conflict",$log_offset,log_unlike => [ qr/conflict detected on relation \"public.tab\": conflict=/, ]);
+
+done_testing();
--
2.50.1
