On Thu, 2023-02-02 at 18:10 -0500, Tom Lane wrote: > Yeah. I would be resistant to making ICU a required dependency, > but it doesn't seem unreasonable to start moving towards it being > our default collation support.
Patch attached. To get the default locale, the patch initializes a UCollator with NULL for the locale name, and then queries it for the locale name. Then it's converted to a language tag, which is consistent with the initial collation import. I'm not sure that's the best way, but it seems reasonable. If it's a user-provided locale (--icu-locale=), then the patch leaves it as-is, and does not convert it to a language tag (consistent with CREATE COLLATION and CREATE DATABASE). I opened another discussion about whether we want to try harder to validate or canonicalize the locale name: https://www.postgresql.org/message-id/11b1eeb7e7667fdd4178497aeb796c48d26e69b9.ca...@j-davis.com -- Jeff Davis PostgreSQL Contributor Team - AWS
From 1b7d940c0f12062185b8b42bf8d3c0a6f05a74d4 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 8 Feb 2023 12:06:26 -0800 Subject: [PATCH v1] Use ICU by default at initdb time. If the ICU locale is not specified, initialize the default collator and retrieve the locale name from that. Discussion: https://postgr.es/m/510d284759f6e943ce15096167760b2edcb2e700.ca...@j-davis.com --- src/bin/initdb/initdb.c | 74 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 7a58c33ace..7321652db3 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -53,6 +53,7 @@ #include <netdb.h> #include <sys/socket.h> #include <sys/stat.h> +#include <unicode/ucol.h> #include <unistd.h> #include <signal.h> #include <time.h> @@ -133,7 +134,11 @@ static char *lc_monetary = NULL; static char *lc_numeric = NULL; static char *lc_time = NULL; static char *lc_messages = NULL; +#ifdef USE_ICU +static char locale_provider = COLLPROVIDER_ICU; +#else static char locale_provider = COLLPROVIDER_LIBC; +#endif static char *icu_locale = NULL; static const char *default_text_search_config = NULL; static char *username = NULL; @@ -2024,6 +2029,72 @@ check_icu_locale_encoding(int user_enc) return true; } +/* + * Check that ICU accepts the locale name; or if not specified, retrieve the + * default ICU locale. + */ +static void +check_icu_locale() +{ +#ifdef USE_ICU + UCollator *collator; + UErrorCode status; + + status = U_ZERO_ERROR; + collator = ucol_open(icu_locale, &status); + if (U_FAILURE(status)) + { + if (icu_locale) + pg_fatal("ICU locale \"%s\" could not be opened: %s", + icu_locale, u_errorName(status)); + else + pg_fatal("default ICU locale could not be opened: %s", + u_errorName(status)); + } + + /* if not specified, get locale from default collator */ + if (icu_locale == NULL) + { + const char *default_locale; + + status = U_ZERO_ERROR; + default_locale = ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, + &status); + if (U_FAILURE(status)) + { + ucol_close(collator); + pg_fatal("could not determine default ICU locale"); + } + + if (U_ICU_VERSION_MAJOR_NUM >= 54) + { + const bool strict = true; + char *langtag; + int len; + + len = uloc_toLanguageTag(default_locale, NULL, 0, strict, &status); + langtag = pg_malloc(len + 1); + status = U_ZERO_ERROR; + uloc_toLanguageTag(default_locale, langtag, len + 1, strict, + &status); + + if (U_FAILURE(status)) + { + ucol_close(collator); + pg_fatal("could not determine language tag for default locale \"%s\": %s", + default_locale, u_errorName(status)); + } + + icu_locale = langtag; + } + else + icu_locale = pg_strdup(default_locale); + } + + ucol_close(collator); +#endif +} + /* * set up the locale variables * @@ -2077,8 +2148,7 @@ setlocales(void) if (locale_provider == COLLPROVIDER_ICU) { - if (!icu_locale) - pg_fatal("ICU locale must be specified"); + check_icu_locale(); /* * In supported builds, the ICU locale ID will be checked by the -- 2.34.1