From 0385f6d006e189edff93ac434f0b46daa28413a5 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Wed, 2 Oct 2024 15:12:27 -0700
Subject: [PATCH v4 4/5] pg_upgrade: Add --set-char-signedness to set the
 default char signedness of new cluster.

This change adds a new option --set-char-signedness to pg_upgrade. It
enables user to set arbitrary signedness during pg_upgrade. This helps
cases where user who knew they copied the v17 source cluster from
x86 (signedness=true) to ARM (signedness=false) can pg_upgrade
properly without the prerequisite of acquiring an x86 VM.

Reviewed-by: Noah Misch
Discussion: https://postgr.es/m/CB11ADBC-0C3F-4FE0-A678-666EE80CBB07%40amazon.com
---
 doc/src/sgml/ref/pgupgrade.sgml             | 53 +++++++++++++++++++++
 src/bin/pg_upgrade/check.c                  | 12 +++++
 src/bin/pg_upgrade/option.c                 | 12 +++++
 src/bin/pg_upgrade/pg_upgrade.c             | 10 +++-
 src/bin/pg_upgrade/pg_upgrade.h             |  3 ++
 src/bin/pg_upgrade/t/005_char_signedness.pl | 17 +++++++
 6 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 4777381dac2..cb9cc838b27 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -276,6 +276,59 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--set-char-signedness=</option><replaceable>option</replaceable></term>
+      <listitem>
+       <para>
+        Manually set the default char signedness of new clusters. Possible values
+        are <literal>signed</literal> and <literal>unsigned</literal>.
+       </para>
+       <para>
+        In the C language, the default signedness of the <type>char</type> type
+        (when not explicitly specified) varies across platforms. For example,
+        <type>char</type> defaults to <type>signed char</type> on x86 CPUs but
+        to <type>unsigned char</type> on ARM CPUs.
+       </para>
+       <para>
+        Starting from <productname>PostgreSQL</productname> 18, database clusters
+        maintain their own default char signedness setting, which can be used to
+        ensure consistent behavior across platforms with different default char
+        signedness. By default, <application>pg_upgrade</application> preserves
+        the char signedness setting when upgrading from an existing cluster.
+        However, when upgrading from <productname>PostgreSQL</productname> 17 or,
+        earlier <application>pg_upgrade</application> adopts the char signedness
+        of the platform on which it was built.
+       </para>
+       <para>
+        This option allows you to explicitly set the default char signedness for
+        the new cluster, overriding any inherited values. There are two specific
+        scenarios where this option is relevant:
+        <itemizedlist>
+         <listitem>
+          <para>
+           If you are planning to migrate to a different platform after the upgrade,
+           you should not use this option. The default behavior is right in this case.
+           Instead, perform the upgrade on the original platform without this flag,
+           and then migrate the cluster afterward. This is the recommended and safest
+           approach.
+          </para>
+         </listitem>
+         <listitem>
+          <para>
+           If you have already migrated the cluster to a platform with different
+           char signedness (for example, from an x86-based system to an ARM-based
+           system), you should use this option to specify the signedness matching
+           the original platform's default char signedness. Additionally, it's
+           essential not to modify any data files between migrating data files and
+           running <command>pg_upgrade</command>. <command>pg_upgrade</command>
+           should be the first operation that starts the cluster on the new platform.
+          </para>
+         </listitem>
+        </itemizedlist>
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-?</option></term>
       <term><option>--help</option></term>
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 7ca1d8fffc9..d6f629dd3a2 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -838,6 +838,18 @@ check_cluster_versions(void)
 		GET_MAJOR_VERSION(new_cluster.bin_version))
 		pg_fatal("New cluster data and binary directories are from different major versions.");
 
+	/*
+	 * Since from version 18, newly created database clusters always have
+	 * 'signed' default char-signedness, it makes less sense to use
+	 * --set-char-signedness option for upgrading from version 18 or later.
+	 * Users who want to change the default char signedness of the new
+	 * cluster, they can use pg_resetwal manually before the upgrade.
+	 */
+	if (GET_MAJOR_VERSION(old_cluster.major_version) >= 1800 &&
+		user_opts.char_signedness != -1)
+		pg_fatal("%s option cannot be used to upgrade from PostgreSQL %s and later.",
+				 "--set-char-signedness", "18");
+
 	check_ok();
 }
 
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 108eb7a1ba4..1a580d656bb 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -60,6 +60,7 @@ parseCommandLine(int argc, char *argv[])
 		{"copy", no_argument, NULL, 2},
 		{"copy-file-range", no_argument, NULL, 3},
 		{"sync-method", required_argument, NULL, 4},
+		{"set-char-signedness", required_argument, NULL, 5},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -70,6 +71,7 @@ parseCommandLine(int argc, char *argv[])
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
+	user_opts.char_signedness = -1;
 
 	os_info.progname = get_progname(argv[0]);
 
@@ -212,6 +214,14 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.sync_method = pg_strdup(optarg);
 				break;
 
+			case 5:
+				if (pg_strcasecmp(optarg, "signed") == 0)
+					user_opts.char_signedness = 1;
+				else if (pg_strcasecmp(optarg, "unsigned") == 0)
+					user_opts.char_signedness = 0;
+				else
+					pg_fatal("invalid argument for option %s", "--set-char-signedness");
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -306,6 +316,8 @@ usage(void)
 	printf(_("  --clone                       clone instead of copying files to new cluster\n"));
 	printf(_("  --copy                        copy files to new cluster (default)\n"));
 	printf(_("  --copy-file-range             copy files to new cluster with copy_file_range\n"));
+	printf(_("  --set-char-signedness=OPTION  set new cluster char signedness to \"signed\" or\n"));
+	printf(_("                                \"unsigned\"\n"));
 	printf(_("  --sync-method=METHOD          set method for syncing files to disk\n"));
 	printf(_("  -?, --help                    show this help, then exit\n"));
 	printf(_("\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index cc7357b5599..e95be8b459d 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -399,8 +399,14 @@ set_new_cluster_char_signedness(void)
 {
 	bool		new_char_signedness;
 
-	/* Inherit the source database's signedness */
-	new_char_signedness = old_cluster.controldata.default_char_signedness;
+	/*
+	 * Use the specified char signedness if specified. Otherwise we inherit
+	 * inherit the source database's signedness.
+	 */
+	if (user_opts.char_signedness != -1)
+		new_char_signedness = (user_opts.char_signedness == 1);
+	else
+		new_char_signedness = old_cluster.controldata.default_char_signedness;
 
 	/* Change the char signedness of the new cluster, if necessary */
 	if (new_cluster.controldata.default_char_signedness != new_char_signedness)
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 26991e71009..7d50c83d0bf 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -334,6 +334,9 @@ typedef struct
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
 	char	   *sync_method;
+	int			char_signedness;	/* default char signedness: -1 for initial
+									 * value, 1 for "signed" and 0 for
+									 * "unsigned" */
 } UserOpts;
 
 typedef struct
diff --git a/src/bin/pg_upgrade/t/005_char_signedness.pl b/src/bin/pg_upgrade/t/005_char_signedness.pl
index 05c3014a27d..c024106863e 100644
--- a/src/bin/pg_upgrade/t/005_char_signedness.pl
+++ b/src/bin/pg_upgrade/t/005_char_signedness.pl
@@ -40,6 +40,23 @@ command_like(
 	qr/Default char data signedness:\s+unsigned/,
 	'updated default char signedness is unsigned in control file');
 
+# Cannot use --set-char-signedness option for upgrading from v18+
+command_fails(
+	[
+		'pg_upgrade', '--no-sync',
+		'-d', $old->data_dir,
+		'-D', $new->data_dir,
+		'-b', $old->config_data('--bindir'),
+		'-B', $new->config_data('--bindir'),
+		'-s', $new->host,
+		'-p', $old->port,
+		'-P', $new->port,
+		'-set-char-signedness', 'signed',
+		$mode
+	],
+	'--set-char-signedness option cannot be used for upgrading from v18 or later'
+);
+
 # pg_upgrade should be successful.
 command_ok(
 	[
-- 
2.43.5

