On Fri, 2025-10-10 at 11:32 +0200, Peter Eisentraut wrote:
> > * Use environment variable name PG_LOCALE_PROVIDER, which seems
> > more
> > consistent.
> 
> Is this not something that could already be done using
> PG_TEST_INITDB_EXTRA_OPTS ?

1. PG_LOCALE_PROVIDER is a documented user-facing option, which will
make it easier for users to set their preferred provider in scripts,
etc.

2. This change also creates default locales for the builtin and ICU
providers, so that initdb without any other locale options will succeed
regardless of the provider.

I broke these up into two patches as v3 to make it easier to
understand.

These patches are independently useful, but also important if we ever
want to change the initdb default to builtin or ICU.

Regards,
        Jeff Davis

From 7ce735b1e85b9f3f9ab6d48588de5824667323d2 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Fri, 10 Oct 2025 10:49:05 -0700
Subject: [PATCH v3 1/2] initdb: add default locales for builtin and ICU
 providers.

Allows initdb to succeed with any provider even if no other locale
options are specified. Will be useful if the provider comes from
another source, like an environment variable, or if we ever change the
initdb default provider.

Discussion: https://postgr.es/m/[email protected]
---
 src/bin/initdb/initdb.c           | 84 +++++++++++++++++++++++++++----
 src/bin/initdb/t/001_initdb.pl    | 11 ++--
 src/bin/scripts/t/020_createdb.pl | 69 ++++++++++++++-----------
 3 files changed, 120 insertions(+), 44 deletions(-)

diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 92fe2f531f7..e2960e5f17c 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -82,6 +82,8 @@
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 
+#define DEFAULT_BUILTIN_LOCALE		"C.UTF-8"
+#define DEFAULT_ICU_LOCALE			"und"
 
 /* Ideally this would be in a .h file, but it hardly seems worth the trouble */
 extern const char *select_default_timezone(const char *share_path);
@@ -2412,6 +2414,25 @@ icu_validate_locale(const char *loc_str)
 #endif
 }
 
+/*
+ * Is the given locale name UTF-8 compatible?
+ */
+static bool
+utf8_compatible(const char *localename)
+{
+#ifndef WIN32
+	int			ctype_enc;
+
+	Assert(localename != NULL);
+	ctype_enc = pg_get_encoding_from_locale(localename, false);
+
+	return (ctype_enc == PG_UTF8 || ctype_enc == PG_SQL_ASCII);
+#else
+	/* on windows, all locales are compatible with UTF-8 */
+	return true;
+#endif
+}
+
 /*
  * set up the locale variables
  *
@@ -2420,6 +2441,8 @@ icu_validate_locale(const char *loc_str)
 static void
 setlocales(void)
 {
+	bool		ctype_from_env;
+	bool		collate_from_env;
 	char	   *canonname;
 
 	/* set empty lc_* and datlocale values to locale config if set */
@@ -2442,6 +2465,9 @@ setlocales(void)
 			datlocale = locale;
 	}
 
+	ctype_from_env = (lc_ctype == NULL);
+	collate_from_env = (lc_collate == NULL);
+
 	/*
 	 * canonicalize locale names, and obtain any missing values from our
 	 * current environment
@@ -2465,12 +2491,11 @@ setlocales(void)
 	lc_messages = canonname;
 #endif
 
-	if (locale_provider != COLLPROVIDER_LIBC && datlocale == NULL)
-		pg_fatal("locale must be specified if provider is %s",
-				 collprovider_name(locale_provider));
-
 	if (locale_provider == COLLPROVIDER_BUILTIN)
 	{
+		if (!datlocale)
+			datlocale = DEFAULT_BUILTIN_LOCALE;
+
 		if (strcmp(datlocale, "C") == 0)
 			canonname = "C";
 		else if (strcmp(datlocale, "C.UTF-8") == 0 ||
@@ -2488,11 +2513,13 @@ setlocales(void)
 	{
 		char	   *langtag;
 
+		if (!datlocale)
+			datlocale = DEFAULT_ICU_LOCALE;
+
 		/* canonicalize to a language tag */
 		langtag = icu_language_tag(datlocale);
 		printf(_("Using language tag \"%s\" for ICU locale \"%s\".\n"),
 			   langtag, datlocale);
-		pg_free(datlocale);
 		datlocale = langtag;
 
 		icu_validate_locale(datlocale);
@@ -2505,6 +2532,46 @@ setlocales(void)
 		pg_fatal("ICU is not supported in this build");
 #endif
 	}
+
+	/*
+	 * If using the builtin provider with a locale requiring UTF-8, avoid
+	 * taking incompatible settings from the environment.
+	 */
+	if (locale_provider == COLLPROVIDER_BUILTIN &&
+		strcmp(datlocale, "C") != 0)
+	{
+		if (!encoding)
+			encoding = "UTF-8";
+
+		/*
+		 * LC_CTYPE has little effect unless using the libc provider, but does
+		 * still affect some places, such translation of error messages from
+		 * the OS. Overriding it here may be an inconvenience, but in the
+		 * absence of specified locale options, it's the best choice.
+		 *
+		 * XXX: minimize the effects of LC_CTYPE when not using libc.
+		 */
+		if (ctype_from_env && !utf8_compatible(lc_ctype))
+		{
+			pg_log_warning("setting LC_CTYPE to \"C\"");
+			pg_log_warning_detail("Encoding of LC_CTYPE locale \"%s\" does not match encoding required by builtin locale \"%s\".",
+								  lc_ctype, datlocale);
+			pg_log_warning_hint("Specify a UTF-8 compatible locale with --lc-ctype, or choose a different locale provider.");
+			lc_ctype = "C";
+		}
+
+		/*
+		 * LC_COLLATE has no effect unless using the libc provider.
+		 */
+		if (collate_from_env && !utf8_compatible(lc_collate))
+		{
+			pg_log_warning("setting LC_COLLATE to \"C\"");
+			pg_log_warning_detail("Encoding of LC_COLLATE locale \"%s\" does not match encoding required by builtin locale \"%s\".",
+								  lc_collate, datlocale);
+			pg_log_warning_hint("Specify a UTF-8 compatible locale with --lc-collate, or choose a different locale provider.");
+			lc_collate = "C";
+		}
+	}
 }
 
 /*
@@ -2770,11 +2837,10 @@ setup_locale_encoding(void)
 		!check_locale_encoding(lc_collate, encodingid))
 		exit(1);				/* check_locale_encoding printed the error */
 
-	if (locale_provider == COLLPROVIDER_BUILTIN)
+	if (locale_provider == COLLPROVIDER_BUILTIN &&
+		strcmp(datlocale, "C") != 0)
 	{
-		if ((strcmp(datlocale, "C.UTF-8") == 0 ||
-			 strcmp(datlocale, "PG_UNICODE_FAST") == 0) &&
-			encodingid != PG_UTF8)
+		if (encodingid != PG_UTF8)
 			pg_fatal("builtin provider locale \"%s\" requires encoding \"%s\"",
 					 datlocale, "UTF-8");
 	}
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index b7ef7ed8d06..ba3211a4aa6 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -113,14 +113,13 @@ SKIP:
 
 if ($ENV{with_icu} eq 'yes')
 {
-	command_fails_like(
+	command_ok(
 		[
 			'initdb', '--no-sync',
 			'--locale-provider' => 'icu',
 			"$tempdir/data2"
 		],
-		qr/initdb: error: locale must be specified if provider is icu/,
-		'locale provider ICU requires --icu-locale');
+		'locale provider ICU default locale');
 
 	command_ok(
 		[
@@ -200,13 +199,15 @@ else
 		'locale provider ICU fails since no ICU support');
 }
 
-command_fails(
+command_like(
 	[
 		'initdb', '--no-sync',
+		'--auth' => 'trust',
 		'--locale-provider' => 'builtin',
 		"$tempdir/data6"
 	],
-	'locale provider builtin fails without --locale');
+	qr/^\s+default collation:\s+C.UTF-8\n/ms,
+	'locale provider builtin defaults to C.UTF-8');
 
 command_ok(
 	[
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index a8293390ede..6003d213e89 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -16,6 +16,9 @@ my $node = PostgreSQL::Test::Cluster->new('main');
 $node->init;
 $node->start;
 
+my $datlocprovider = $node->safe_psql('postgres',
+	"SELECT datlocprovider FROM pg_database WHERE datname='template1'");
+
 $node->issues_sql_like(
 	[ 'createdb', 'foobar1' ],
 	qr/statement: CREATE DATABASE foobar1/,
@@ -33,19 +36,6 @@ $node->issues_sql_like(
 
 if ($ENV{with_icu} eq 'yes')
 {
-	# This fails because template0 uses libc provider and has no ICU
-	# locale set.  It would succeed if template0 used the icu
-	# provider.  XXX Maybe split into multiple tests?
-	$node->command_fails(
-		[
-			'createdb',
-			'--template' => 'template0',
-			'--encoding' => 'UTF8',
-			'--locale-provider' => 'icu',
-			'foobar4',
-		],
-		'create database with ICU fails without ICU locale specified');
-
 	$node->issues_sql_like(
 		[
 			'createdb',
@@ -130,14 +120,18 @@ else
 		'create database with ICU fails since no ICU support');
 }
 
-$node->command_fails(
-	[
-		'createdb',
-		'--template' => 'template0',
-		'--locale-provider' => 'builtin',
-		'tbuiltin1',
-	],
-	'create database with provider "builtin" fails without --locale');
+if ($datlocprovider eq 'c')
+{
+	$node->command_fails(
+		[
+			'createdb',
+			'--template' => 'template0',
+			'--encoding' => 'UTF8',
+			'--locale-provider' => 'builtin',
+			'foobar4',
+		],
+		'create database with builtin provider fails without locale specified');
+}
 
 $node->command_ok(
 	[
@@ -219,15 +213,30 @@ $node->command_fails(
 	],
 	'create database with provider "builtin" and ICU_RULES=""');
 
-$node->command_fails(
-	[
-		'createdb',
-		'--template' => 'template1',
-		'--locale-provider' => 'builtin',
-		'--locale' => 'C',
-		'tbuiltin9',
-	],
-	'create database with provider "builtin" not matching template');
+if ($datlocprovider eq 'b')
+{
+	$node->command_fails(
+		[
+			'createdb',
+			'--template' => 'template1',
+			'--locale-provider' => 'libc',
+			'--locale' => 'C',
+			'tbuiltin9',
+		],
+		'create database with provider "libc" not matching template');
+}
+else
+{
+	$node->command_fails(
+		[
+			'createdb',
+			'--template' => 'template1',
+			'--locale-provider' => 'builtin',
+			'--locale' => 'C',
+			'tbuiltin9',
+		],
+		'create database with provider "builtin" not matching template');
+}
 
 $node->command_fails([ 'createdb', 'foobar1' ],
 	'fails if database already exists');
-- 
2.43.0

From ee66fa7b75dafa87e134a820bbd55efda82a5b2d Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Fri, 10 Oct 2025 11:03:59 -0700
Subject: [PATCH v3 2/2] initdb: add PG_LOCALE_PROVIDER environment variable.

Discussion: https://postgr.es/m/[email protected]
---
 doc/src/sgml/ref/initdb.sgml                  | 11 +++++++++++
 src/bin/initdb/initdb.c                       | 19 ++++++++++++++++++-
 .../modules/test_escape/t/001_test_escape.pl  |  2 +-
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 7613174c18b..e0437357d27 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -662,6 +662,17 @@ PostgreSQL documentation
     </listitem>
    </varlistentry>
 
+   <varlistentry id="app-initdb-environment-pg-locale-provider">
+    <term><envar>PG_LOCALE_PROVIDER</envar></term>
+
+    <listitem>
+     <para>
+      Sets the locale provider; can be overridden using the
+      <option>--locale-provider</option> option.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry id="app-initdb-environment-pg-color">
     <term><envar>PG_COLOR</envar></term>
     <listitem>
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index e2960e5f17c..3b3ca9c377f 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -82,6 +82,7 @@
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 
+#define DEFAULT_LOCALE_PROVIDER		COLLPROVIDER_LIBC
 #define DEFAULT_BUILTIN_LOCALE		"C.UTF-8"
 #define DEFAULT_ICU_LOCALE			"und"
 
@@ -146,7 +147,7 @@ static char *lc_monetary = NULL;
 static char *lc_numeric = NULL;
 static char *lc_time = NULL;
 static char *lc_messages = NULL;
-static char locale_provider = COLLPROVIDER_LIBC;
+static char locale_provider = '\0';
 static bool builtin_locale_specified = false;
 static char *datlocale = NULL;
 static bool icu_locale_specified = false;
@@ -3484,6 +3485,22 @@ main(int argc, char *argv[])
 		exit(1);
 	}
 
+	if (locale_provider == '\0')
+	{
+		char	   *provider_name = getenv("PG_LOCALE_PROVIDER");
+
+		if (!provider_name)
+			locale_provider = DEFAULT_LOCALE_PROVIDER;
+		else if (strcmp(provider_name, "builtin") == 0)
+			locale_provider = COLLPROVIDER_BUILTIN;
+		else if (strcmp(provider_name, "icu") == 0)
+			locale_provider = COLLPROVIDER_ICU;
+		else if (strcmp(provider_name, "libc") == 0)
+			locale_provider = COLLPROVIDER_LIBC;
+		else
+			pg_fatal("unrecognized locale provider: %s", provider_name);
+	}
+
 	if (builtin_locale_specified && locale_provider != COLLPROVIDER_BUILTIN)
 		pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen",
 				 "--builtin-locale", "builtin");
diff --git a/src/test/modules/test_escape/t/001_test_escape.pl b/src/test/modules/test_escape/t/001_test_escape.pl
index 0d5aec3ed74..b29f093db28 100644
--- a/src/test/modules/test_escape/t/001_test_escape.pl
+++ b/src/test/modules/test_escape/t/001_test_escape.pl
@@ -12,7 +12,7 @@ $node->init();
 $node->start();
 
 $node->safe_psql('postgres',
-	q(CREATE DATABASE db_sql_ascii ENCODING "sql_ascii" TEMPLATE template0;));
+	q(CREATE DATABASE db_sql_ascii LOCALE_PROVIDER "builtin" LOCALE "C" ENCODING "sql_ascii" TEMPLATE template0;));
 
 my $cmd =
   [ 'test_escape', '--conninfo', $node->connstr . " dbname=db_sql_ascii" ];
-- 
2.43.0

Reply via email to