On Thu, 2023-02-02 at 18:10 -0500, Tom Lane wrote:
> Yeah.  I would be resistant to making ICU a required dependency,
> but it doesn't seem unreasonable to start moving towards it being
> our default collation support.

Patch attached.

To get the default locale, the patch initializes a UCollator with NULL
for the locale name, and then queries it for the locale name. Then it's
converted to a language tag, which is consistent with the initial
collation import. I'm not sure that's the best way, but it seems
reasonable.

If it's a user-provided locale (--icu-locale=), then the patch leaves
it as-is, and does not convert it to a language tag (consistent with
CREATE COLLATION and CREATE DATABASE).

I opened another discussion about whether we want to try harder to
validate or canonicalize the locale name:

https://www.postgresql.org/message-id/11b1eeb7e7667fdd4178497aeb796c48d26e69b9.ca...@j-davis.com

-- 
Jeff Davis
PostgreSQL Contributor Team - AWS


From 1b7d940c0f12062185b8b42bf8d3c0a6f05a74d4 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 8 Feb 2023 12:06:26 -0800
Subject: [PATCH v1] Use ICU by default at initdb time.

If the ICU locale is not specified, initialize the default collator
and retrieve the locale name from that.

Discussion: https://postgr.es/m/510d284759f6e943ce15096167760b2edcb2e700.ca...@j-davis.com
---
 src/bin/initdb/initdb.c | 74 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 72 insertions(+), 2 deletions(-)

diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 7a58c33ace..7321652db3 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -53,6 +53,7 @@
 #include <netdb.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
+#include <unicode/ucol.h>
 #include <unistd.h>
 #include <signal.h>
 #include <time.h>
@@ -133,7 +134,11 @@ static char *lc_monetary = NULL;
 static char *lc_numeric = NULL;
 static char *lc_time = NULL;
 static char *lc_messages = NULL;
+#ifdef USE_ICU
+static char locale_provider = COLLPROVIDER_ICU;
+#else
 static char locale_provider = COLLPROVIDER_LIBC;
+#endif
 static char *icu_locale = NULL;
 static const char *default_text_search_config = NULL;
 static char *username = NULL;
@@ -2024,6 +2029,72 @@ check_icu_locale_encoding(int user_enc)
 	return true;
 }
 
+/*
+ * Check that ICU accepts the locale name; or if not specified, retrieve the
+ * default ICU locale.
+ */
+static void
+check_icu_locale()
+{
+#ifdef USE_ICU
+	UCollator	*collator;
+	UErrorCode   status;
+
+	status = U_ZERO_ERROR;
+	collator = ucol_open(icu_locale, &status);
+	if (U_FAILURE(status))
+	{
+		if (icu_locale)
+			pg_fatal("ICU locale \"%s\" could not be opened: %s",
+					 icu_locale, u_errorName(status));
+		else
+			pg_fatal("default ICU locale could not be opened: %s",
+					 u_errorName(status));
+	}
+
+	/* if not specified, get locale from default collator */
+	if (icu_locale == NULL)
+	{
+		const char	*default_locale;
+
+		status = U_ZERO_ERROR;
+		default_locale = ucol_getLocaleByType(collator, ULOC_VALID_LOCALE,
+											  &status);
+		if (U_FAILURE(status))
+		{
+			ucol_close(collator);
+			pg_fatal("could not determine default ICU locale");
+		}
+
+		if (U_ICU_VERSION_MAJOR_NUM >= 54)
+		{
+			const bool	 strict = true;
+			char		*langtag;
+			int			 len;
+
+			len = uloc_toLanguageTag(default_locale, NULL, 0, strict, &status);
+			langtag = pg_malloc(len + 1);
+			status = U_ZERO_ERROR;
+			uloc_toLanguageTag(default_locale, langtag, len + 1, strict,
+							   &status);
+
+			if (U_FAILURE(status))
+			{
+				ucol_close(collator);
+				pg_fatal("could not determine language tag for default locale \"%s\": %s",
+						 default_locale, u_errorName(status));
+			}
+
+			icu_locale = langtag;
+		}
+		else
+			icu_locale = pg_strdup(default_locale);
+	}
+
+	ucol_close(collator);
+#endif
+}
+
 /*
  * set up the locale variables
  *
@@ -2077,8 +2148,7 @@ setlocales(void)
 
 	if (locale_provider == COLLPROVIDER_ICU)
 	{
-		if (!icu_locale)
-			pg_fatal("ICU locale must be specified");
+		check_icu_locale();
 
 		/*
 		 * In supported builds, the ICU locale ID will be checked by the
-- 
2.34.1

Reply via email to