On Fri, Jun 10, 2022 at 4:30 PM Thomas Munro <thomas.mu...@gmail.com> wrote:
> I'm not sold on any particular plan, but working through some examples
> helped me see your idea better...  I may try to code that up in a
> minimal way so we can kick the tyres...

I did a bit of hacking on that idea.  The goal was to stamp each index
with an ICU major version (not sure where, not done in the attached),
and if that doesn't match the library we're linked against, we'd try
to dlopen() libraries via symlinks with known name formats under
PGDATA/pg_icu_lib, which an administrator would have to create.  That
seemed a bit simpler than dealing with new catalogs for now...

See attached unfinished patch, which implements some of that.  It has
a single collation for en-US-x-icu, and routes calls to different
libraries depending on dynamic scope (which in cold hard reality
translates into a nasty global variable "current_icu_library").  The
idea was that it would normally point to the library we're linked
against, but whenever computing anything related to an index stamped
with ICU 63, we'd do pg_icu_activate_major_version(63), and afterwards
undo that.  Performance concerns aside, that now seems a bit too ugly
and fragile to me, and I gave up.  How could we convince ourselves
that we'd set the active ICU library correctly in all the required
dynamic scopes, but not leaked it into any other scopes?  Does that
even make sense?  But if not done like that, how else could we do it?

Better ideas/code welcome.

Executive summary of experiments so far: the "distinct collations"
concept is quite simple and robust, but exposes all the versions to
users and probably makes it really hard to upgrade (details not worked
out), while the "time travelling collations" concept is nice for users
but hard to pin down and prove correctness for since it seems to
require dynamic scoping/global state changes affecting code in far
away places.
From e842402fd6eeca413915c3808191d7928dfc0889 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Wed, 8 Jun 2022 17:43:53 +1200
Subject: [PATCH v2] WIP: allow multiple ICU libraries

XXX This is highly experimental code
---
 src/backend/access/hash/hashfunc.c |  18 +-
 src/backend/utils/adt/pg_locale.c  | 280 +++++++++++++++++++++++++++--
 src/backend/utils/adt/varchar.c    |  16 +-
 src/backend/utils/adt/varlena.c    |  57 +++---
 src/backend/utils/init/miscinit.c  |   9 +
 src/bin/initdb/initdb.c            |   1 +
 src/include/utils/pg_locale.h      |  94 +++++++++-
 7 files changed, 420 insertions(+), 55 deletions(-)

diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index b57ed946c4..81571658ed 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -298,11 +298,13 @@ hashtext(PG_FUNCTION_ARGS)
 
 			ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
 
-			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
-									uchar, ulen, NULL, 0);
+			bsize = current_icu_library->getSortKey(pg_icu_collator(current_icu_library,
+																	mylocale),
+													uchar, ulen, NULL, 0);
 			buf = palloc(bsize);
-			ucol_getSortKey(mylocale->info.icu.ucol,
-							uchar, ulen, buf, bsize);
+			current_icu_library->getSortKey(pg_icu_collator(current_icu_library,
+															mylocale),
+											uchar, ulen, buf, bsize);
 
 			result = hash_any(buf, bsize);
 
@@ -352,14 +354,14 @@ hashtextextended(PG_FUNCTION_ARGS)
 			UChar	   *uchar = NULL;
 			Size		bsize;
 			uint8_t    *buf;
+			UCollator  *ucol;
 
 			ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
 
-			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
-									uchar, ulen, NULL, 0);
+			ucol = pg_icu_collator(current_icu_library, mylocale);
+			bsize = current_icu_library->getSortKey(ucol, uchar, ulen, NULL, 0);
 			buf = palloc(bsize);
-			ucol_getSortKey(mylocale->info.icu.ucol,
-							uchar, ulen, buf, bsize);
+			current_icu_library->getSortKey(ucol, uchar, ulen, buf, bsize);
 
 			result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
 
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index a0490a7522..d9d17f1b8c 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -58,6 +58,7 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_control.h"
 #include "mb/pg_wchar.h"
+#include "miscadmin.h"
 #include "utils/builtins.h"
 #include "utils/formatting.h"
 #include "utils/hsearch.h"
@@ -78,6 +79,10 @@
 #include <shlwapi.h>
 #endif
 
+#ifdef HAVE_DLOPEN
+#include <dlfcn.h>
+#endif
+
 #define		MAX_L10N_DATA		80
 
 
@@ -1435,29 +1440,265 @@ lc_ctype_is_c(Oid collation)
 	return (lookup_collation_cache(collation, true))->ctype_is_c;
 }
 
+#ifdef USE_ICU
+
 struct pg_locale_struct default_locale;
 
+/* Table of ICU libraries we have loaded. */
+static pg_icu_library *icu_libraries[PG_NUM_ICU_MAJOR_VERSIONS];
+
+/*
+ * Currently active ICU library.  Normally this corresponds to the ICU library
+ * that we were compiled and linked against, but while interacting with
+ * indexes built with an old ICU library, it'll be changed.
+ */
+pg_icu_library *current_icu_library;
+
+/*
+ * Free an ICU library.  pg_icu_library objects that are successfully
+ * constructed stick around for the lifetime of the backend, but this is used
+ * to clean up if initialization fails.
+ */
+static void
+free_icu_library(pg_icu_library *l)
+{
+	if (l->handle)
+		dlclose(l->handle);
+	pfree(l);
+}
+
+static void *
+get_icu_function(void *handle, const char *function, int version)
+{
+	char name[80];
+
+	snprintf(name, sizeof(name), "%s_%d", function, version);
+
+	return dlsym(handle, name);
+}
+
+/*
+ * Probe a dynamically loaded library to see which major version of ICU it
+ * contains.
+ */
+static int
+get_icu_library_major_version(void *handle)
+{
+	for (int i = PG_MIN_ICU_MAJOR_VERSION; i <= PG_MAX_ICU_MAJOR_VERSION; ++i)
+		if (get_icu_function(handle, "ucol_open", i))
+			return i;
+
+	/*
+	 * It's a later version we don't dare use, an old version we don't
+	 * support, an ICU build with symbol suffixes disabled, or not ICU.
+	 */
+	return -1;
+}
+
+/*
+ * Given an ICU major version number, return the object we need to access it,
+ * or fail while trying to load it.
+ */
+static pg_icu_library *
+get_icu_library(int major_version)
+{
+	pg_icu_library *l;
+
+	Assert(major_version >= PG_MIN_ICU_MAJOR_VERSION &&
+		   major_version <= PG_MAX_ICU_MAJOR_VERSION);
+
+	/* See if it's already loaded. */
+	l = icu_libraries[PG_ICU_SLOT(major_version)];
+	if (l)
+		return l;
+
+	/* Make a new entry. */
+	l = MemoryContextAllocZero(TopMemoryContext, sizeof(*l));
+	if (major_version == U_ICU_VERSION_MAJOR_NUM)
+	{
+		/*
+		 * This is the version we were compiled and linked against.  Simply
+		 * assign the function pointers.
+		 *
+		 * These assignments will fail to compile if an incompatible API
+		 * change is made to some future version of ICU, at which point we
+		 * might need to consider special treatment for different major
+		 * version ranges, with intermediate trampoline functions.
+		 */
+		l->major_version = major_version;
+		l->open = ucol_open;
+		l->close = ucol_close;
+		l->getVersion = ucol_getVersion;
+		l->versionToString = u_versionToString;
+		l->strcoll = ucol_strcoll;
+		l->strcollUTF8 = ucol_strcollUTF8;
+		l->getSortKey = ucol_getSortKey;
+		l->nextSortKeyPart = ucol_nextSortKeyPart;
+		l->errorName = u_errorName;
+
+		/*
+		 * Also assert the size of a couple of types used as output buffers,
+		 * as a canary to tell us to add extra padding in the (unlikely) event
+		 * that a later release makes these values smaller.
+		 */
+		StaticAssertStmt(U_MAX_VERSION_STRING_LENGTH == 20,
+						 "u_versionToString output buffer size changed incompatibly");
+		StaticAssertStmt(U_MAX_VERSION_LENGTH == 4,
+						 "ucol_getVersion output buffer size changed incompatibly");
+	}
+	else
+	{
+		/* This is an older version, so we'll need to use dlopen(). */
+#ifdef HAVE_DLOPEN
+		char		libicui18n_name[MAXPGPATH];
+		char		libicuuc_name[MAXPGPATH];
+		int			found_major_version;
+
+		/*
+		 * We don't like to open versions newer than what we're linked
+		 * against, to reduce the risk of an API change biting us.
+		 */
+		if (major_version > U_ICU_VERSION_MAJOR_NUM)
+			elog(ERROR, "ICU major version %d higher than linked version %d, refusing to open",
+				 major_version, U_ICU_VERSION_MAJOR_NUM);
+
+		/*
+		 * On many distributions, multiple ICU libraries can be installed
+		 * concurrently, but we don't want to guess how to find them.  The
+		 * administrator will need to put libraries or symlinks under
+		 * pg_icu_lib.
+		 */
+		snprintf(libicui18n_name,
+				 sizeof(libicui18n_name),
+				 "%s/pg_icu_lib/libicui18n.so.%d",
+				 DataDir,
+				 major_version);
+		snprintf(libicuuc_name,
+				 sizeof(libicuuc_name),
+				 "%s/pg_icu_lib/libicuuc.so.%d",
+				 DataDir,
+				 major_version);
+
+		l->handle = dlopen(libicui18n_name, RTLD_NOW | RTLD_GLOBAL);
+		if (l->handle == NULL)
+		{
+			int errno_save = errno;
+			free_icu_library(l);
+			errno = errno_save;
+
+			ereport(ERROR,
+					(errmsg("could not load library \"%s\": %m", libicui18n_name)));
+		}
+
+		/* Sanity check the version. */
+		found_major_version = get_icu_library_major_version(l->handle);
+		if (found_major_version < 0)
+		{
+			free_icu_library(l);
+			ereport(ERROR,
+					(errmsg("could not find compatible ICU major version in library \"%s\"",
+							libicui18n_name)));
+		}
+		if (found_major_version != major_version)
+		{
+			free_icu_library(l);
+			ereport(ERROR,
+					(errmsg("expected to find ICU major version %d in library \"%s\", but found %d",
+							major_version, libicui18n_name, major_version)));
+		}
+		l->major_version = major_version;
+
+		/* Look up all the functions we need. */
+		l->open = get_icu_function(l->handle, "ucol_open", major_version);
+		l->close = get_icu_function(l->handle, "ucol_close", major_version);
+		l->getVersion = get_icu_function(l->handle, "ucol_getVersion",
+										 major_version);
+		l->versionToString = get_icu_function(l->handle, "u_versionToString",
+											  major_version);
+		l->strcoll = get_icu_function(l->handle, "ucol_strcoll",
+									  major_version);
+		l->strcollUTF8 = get_icu_function(l->handle, "ucol_strcollUTF8",
+										  major_version);
+		l->getSortKey = get_icu_function(l->handle, "ucol_getSortKey",
+										 major_version);
+		l->nextSortKeyPart = get_icu_function(l->handle, "ucol_nextSortKeyPart",
+											  major_version);
+		l->errorName = get_icu_function(l->handle, "u_errorName",
+										major_version);
+		if (!l->open ||
+			!l->close ||
+			!l->getVersion ||
+			!l->versionToString ||
+			!l->strcoll ||
+			!l->strcollUTF8 ||
+			!l->getSortKey ||
+			!l->nextSortKeyPart ||
+			!l->errorName)
+		{
+			free_icu_library(l);
+			ereport(ERROR,
+					(errmsg("could not find expected symbols in library \"%s\"",
+							libicui18n_name)));
+		}
+#else
+		ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("dynamically loaded ICU libraries are not supported in this build")));
+#endif
+	}
+
+	icu_libraries[major_version - PG_MIN_ICU_MAJOR_VERSION] = l;
+
+	return l;
+}
+
+/*
+ * Set the current active ICU major version.  Use -1 for the ICU library we're
+ * linked against.  (That avoids callers needing to include ICU headers to
+ * find that).
+ *
+ * This is set to the ICU library we linked against at transaction start, but
+ * might temporarily be changed to an older dlopen'd one while interacting
+ * with objects created by a PostgreSQL binary linked against an older ICU
+ * library.
+ */
+void pg_icu_activate_major_version(int major_version)
+{
+	current_icu_library =
+		get_icu_library(major_version == -1 ?
+						U_ICU_VERSION_MAJOR_NUM : major_version);
+}
+
+#endif
+
 void
 make_icu_collator(const char *iculocstr,
 				  struct pg_locale_struct *resultp)
 {
 #ifdef USE_ICU
+	pg_icu_library *l;
 	UCollator  *collator;
 	UErrorCode	status;
 
+	/*
+	 * Initially we will open the collator with the linked ICU library only.
+	 * Collators for any other versions we need later will be opened on demand
+	 * with the appropriate library.
+	 */
+	l = get_icu_library(U_ICU_VERSION_MAJOR_NUM);
 	status = U_ZERO_ERROR;
-	collator = ucol_open(iculocstr, &status);
+	collator = l->open(iculocstr, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("could not open collator for locale \"%s\": %s",
-						iculocstr, u_errorName(status))));
+						iculocstr, l->errorName(status))));
 
-	if (U_ICU_VERSION_MAJOR_NUM < 54)
+	if (l->major_version < 54)
 		icu_set_collation_attributes(collator, iculocstr);
 
 	/* We will leak this string if the caller errors later :-( */
 	resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr);
-	resultp->info.icu.ucol = collator;
+	resultp->info.icu.ucol[PG_ICU_SLOT(U_ICU_VERSION_MAJOR_NUM)] = collator;
 #else							/* not USE_ICU */
 	/* could get here if a collation was created by a build with ICU */
 	ereport(ERROR,
@@ -1688,21 +1929,29 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 #ifdef USE_ICU
 	if (collprovider == COLLPROVIDER_ICU)
 	{
+		pg_icu_library *l;
 		UCollator  *collator;
 		UErrorCode	status;
 		UVersionInfo versioninfo;
 		char		buf[U_MAX_VERSION_STRING_LENGTH];
 
+		/*
+		 * XXX Here we're only reporting the version from the linked ICU
+		 * library!  The catalog arrangement for collversion doesn't make any
+		 * sense, in a world with multiple ICU libraries accessible through
+		 * one collation OID.
+		 */
+		l = get_icu_library(U_ICU_VERSION_MAJOR_NUM);
 		status = U_ZERO_ERROR;
-		collator = ucol_open(collcollate, &status);
+		collator = l->open(collcollate, &status);
 		if (U_FAILURE(status))
 			ereport(ERROR,
 					(errmsg("could not open collator for locale \"%s\": %s",
-							collcollate, u_errorName(status))));
-		ucol_getVersion(collator, versioninfo);
-		ucol_close(collator);
+							collcollate, l->errorName(status))));
+		l->getVersion(collator, versioninfo);
+		l->close(collator);
 
-		u_versionToString(versioninfo, buf);
+		l->versionToString(versioninfo, buf);
 		collversion = pstrdup(buf);
 	}
 	else
@@ -1770,6 +2019,8 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 
 
 #ifdef USE_ICU
+
+
 /*
  * Converter object for converting between ICU's UChar strings and C strings
  * in database encoding.  Since the database encoding doesn't change, we only
@@ -1991,19 +2242,22 @@ void
 check_icu_locale(const char *icu_locale)
 {
 #ifdef USE_ICU
+	pg_icu_library *l;
 	UCollator  *collator;
 	UErrorCode	status;
 
+	/* We'll use the linked ICU library to check for validity. */
+	l = get_icu_library(U_ICU_VERSION_MAJOR_NUM);
 	status = U_ZERO_ERROR;
-	collator = ucol_open(icu_locale, &status);
+	collator = l->open(icu_locale, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("could not open collator for locale \"%s\": %s",
-						icu_locale, u_errorName(status))));
+						icu_locale, l->errorName(status))));
 
-	if (U_ICU_VERSION_MAJOR_NUM < 54)
+	if (l->major_version < 54)
 		icu_set_collation_attributes(collator, icu_locale);
-	ucol_close(collator);
+	l->close(collator);
 #else
 	ereport(ERROR,
 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index bbeb0a2653..dfd2b1c575 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -1022,14 +1022,14 @@ hashbpchar(PG_FUNCTION_ARGS)
 			UChar	   *uchar = NULL;
 			Size		bsize;
 			uint8_t    *buf;
+			UCollator  *ucol;
 
 			ulen = icu_to_uchar(&uchar, keydata, keylen);
 
-			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
-									uchar, ulen, NULL, 0);
+			ucol = pg_icu_collator(current_icu_library, mylocale);
+			bsize = current_icu_library->getSortKey(ucol, uchar, ulen, NULL, 0);
 			buf = palloc(bsize);
-			ucol_getSortKey(mylocale->info.icu.ucol,
-							uchar, ulen, buf, bsize);
+			current_icu_library->getSortKey(ucol, uchar, ulen, buf, bsize);
 
 			result = hash_any(buf, bsize);
 
@@ -1083,14 +1083,14 @@ hashbpcharextended(PG_FUNCTION_ARGS)
 			UChar	   *uchar = NULL;
 			Size		bsize;
 			uint8_t    *buf;
+			UCollator  *ucol;
 
 			ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
 
-			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
-									uchar, ulen, NULL, 0);
+			ucol = pg_icu_collator(current_icu_library, mylocale);
+			bsize = current_icu_library->getSortKey(ucol, uchar, ulen, NULL, 0);
 			buf = palloc(bsize);
-			ucol_getSortKey(mylocale->info.icu.ucol,
-							uchar, ulen, buf, bsize);
+			current_icu_library->getSortKey(ucol, uchar, ulen, buf, bsize);
 
 			result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
 
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 919138eaf3..abcc623f5a 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1666,10 +1666,11 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 					UErrorCode	status;
 
 					status = U_ZERO_ERROR;
-					result = ucol_strcollUTF8(mylocale->info.icu.ucol,
-											  arg1, len1,
-											  arg2, len2,
-											  &status);
+					result = current_icu_library->strcollUTF8(pg_icu_collator(current_icu_library,
+																			  mylocale),
+															  arg1, len1,
+															  arg2, len2,
+															  &status);
 					if (U_FAILURE(status))
 						ereport(ERROR,
 								(errmsg("collation failed: %s", u_errorName(status))));
@@ -1685,9 +1686,10 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 					ulen1 = icu_to_uchar(&uchar1, arg1, len1);
 					ulen2 = icu_to_uchar(&uchar2, arg2, len2);
 
-					result = ucol_strcoll(mylocale->info.icu.ucol,
-										  uchar1, ulen1,
-										  uchar2, ulen2);
+					result = current_icu_library->strcoll(pg_icu_collator(current_icu_library,
+																		  mylocale),
+														  uchar1, ulen1,
+														  uchar2, ulen2);
 
 					pfree(uchar1);
 					pfree(uchar2);
@@ -2389,13 +2391,15 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
 				UErrorCode	status;
 
 				status = U_ZERO_ERROR;
-				result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
-										  a1p, len1,
-										  a2p, len2,
-										  &status);
+				result = current_icu_library->strcollUTF8(pg_icu_collator(current_icu_library,
+																		  sss->locale),
+														  a1p, len1,
+														  a2p, len2,
+														  &status);
 				if (U_FAILURE(status))
 					ereport(ERROR,
-							(errmsg("collation failed: %s", u_errorName(status))));
+							(errmsg("collation failed: %s",
+									current_icu_library->errorName(status))));
 			}
 			else
 #endif
@@ -2408,9 +2412,10 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
 				ulen1 = icu_to_uchar(&uchar1, a1p, len1);
 				ulen2 = icu_to_uchar(&uchar2, a2p, len2);
 
-				result = ucol_strcoll(sss->locale->info.icu.ucol,
-									  uchar1, ulen1,
-									  uchar2, ulen2);
+				result = current_icu_library->strcoll(pg_icu_collator(current_icu_library,
+																	  sss->locale),
+													  uchar1, ulen1,
+													  uchar2, ulen2);
 
 				pfree(uchar1);
 				pfree(uchar2);
@@ -2574,21 +2579,23 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
 					uiter_setUTF8(&iter, sss->buf1, len);
 					state[0] = state[1] = 0;	/* won't need that again */
 					status = U_ZERO_ERROR;
-					bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
-												 &iter,
-												 state,
-												 (uint8_t *) sss->buf2,
-												 Min(sizeof(Datum), sss->buflen2),
-												 &status);
+					bsize = current_icu_library->nextSortKeyPart(pg_icu_collator(current_icu_library,
+																				 sss->locale),
+																 &iter,
+																 state,
+																 (uint8_t *) sss->buf2,
+																 Min(sizeof(Datum), sss->buflen2),
+																 &status);
 					if (U_FAILURE(status))
 						ereport(ERROR,
 								(errmsg("sort key generation failed: %s",
-										u_errorName(status))));
+										current_icu_library->errorName(status))));
 				}
 				else
-					bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
-											uchar, ulen,
-											(uint8_t *) sss->buf2, sss->buflen2);
+					bsize = current_icu_library->getSortKey(pg_icu_collator(current_icu_library,
+																			sss->locale),
+															uchar, ulen,
+															(uint8_t *) sss->buf2, sss->buflen2);
 			}
 			else
 #endif
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index b25bd0e583..bd257e9b4b 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -51,6 +51,7 @@
 #include "utils/guc.h"
 #include "utils/inval.h"
 #include "utils/memutils.h"
+#include "utils/pg_locale.h"
 #include "utils/pidfile.h"
 #include "utils/syscache.h"
 #include "utils/varlena.h"
@@ -164,6 +165,10 @@ InitPostmasterChild(void)
 
 	/* Request a signal if the postmaster dies, if possible. */
 	PostmasterDeathSignalInit();
+
+#ifdef USE_ICU
+	pg_icu_activate_major_version(-1);
+#endif
 }
 
 /*
@@ -208,6 +213,10 @@ InitStandaloneProcess(const char *argv0)
 
 	if (pkglib_path[0] == '\0')
 		get_pkglib_path(my_exec_path, pkglib_path);
+
+#ifdef USE_ICU
+	pg_icu_activate_major_version(-1);
+#endif
 }
 
 void
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index ed6de7ca94..68bd181a02 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -214,6 +214,7 @@ static const char *const subdirs[] = {
 	"pg_wal/archive_status",
 	"pg_commit_ts",
 	"pg_dynshmem",
+	"pg_icu_lib",
 	"pg_notify",
 	"pg_serial",
 	"pg_snapshots",
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index e7385faef8..67a3dab0b1 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -74,6 +74,68 @@ extern struct lconv *PGLC_localeconv(void);
 
 extern void cache_locale_time(void);
 
+#ifdef USE_ICU
+
+/*
+ * We don't want to call into dlopen'd ICU libraries that are newer than the
+ * one we were compiled and linked against, just in case there is an
+ * incompatible API change.
+ */
+#define PG_MAX_ICU_MAJOR_VERSION U_ICU_VERSION_MAJOR_NUM
+
+/* An old ICU release that we know has the right API. */
+#define PG_MIN_ICU_MAJOR_VERSION 54
+
+/*
+ * In a couple of places we use an array of possible versions as a fast
+ * associative table, which isn't too big for now.
+ */
+#define PG_NUM_ICU_MAJOR_VERSIONS								\
+	(PG_MAX_ICU_MAJOR_VERSION - PG_MIN_ICU_MAJOR_VERSION + 1)
+#define PG_ICU_SLOT(major_version)					\
+	((major_version) - PG_MIN_ICU_MAJOR_VERSION)
+
+/*
+ * An ICU library version that we're either linked against or have loaded at
+ * runtime.
+ */
+typedef struct pg_icu_library
+{
+	void	   *handle;			/* if loaded with dlopen() */
+	int			major_version;	/* major version of ICU */
+	UCollator *(*open)(const char *loc, UErrorCode *status);
+	void (*close)(UCollator *coll);
+	void (*getVersion)(const UCollator *coll, UVersionInfo info);
+	void (*versionToString)(const UVersionInfo versionArray,
+							char *versionString);
+	UCollationResult (*strcoll)(const UCollator *coll,
+								const UChar *source,
+								int32_t sourceLength,
+								const UChar *target,
+								int32_t targetLength);
+	UCollationResult (*strcollUTF8)(const UCollator *coll,
+									const char *source,
+									int32_t sourceLength,
+									const char *target,
+									int32_t targetLength,
+									UErrorCode *status);
+	int32_t (*getSortKey)(const UCollator *coll,
+						  const UChar *source,
+						  int32_t sourceLength,
+						  uint8_t *result,
+						  int32_t resultLength);
+	int32_t (*nextSortKeyPart)(const UCollator *coll,
+							   UCharIterator *iter,
+							   uint32_t state[2],
+							   uint8_t *dest,
+							   int32_t count,
+							   UErrorCode *status);
+	const char *(*errorName)(UErrorCode code);
+} pg_icu_library;
+
+extern pg_icu_library *current_icu_library;
+
+#endif
 
 /*
  * We define our own wrapper around locale_t so we can keep the same
@@ -94,7 +156,7 @@ struct pg_locale_struct
 		struct
 		{
 			const char *locale;
-			UCollator  *ucol;
+			UCollator  *ucol[PG_NUM_ICU_MAJOR_VERSIONS];
 		}			icu;
 #endif
 		int			dummy;		/* in case we have neither LOCALE_T nor ICU */
@@ -103,6 +165,36 @@ struct pg_locale_struct
 
 typedef struct pg_locale_struct *pg_locale_t;
 
+#ifdef USE_ICU
+/*
+ * Get a collator for 'loc' suitable for use with ICU library 'lib'.
+ */
+static inline UCollator *
+pg_icu_collator(pg_icu_library *lib, pg_locale_t loc)
+{
+	int major_version = lib->major_version;
+	UCollator *collator = loc->info.icu.ucol[PG_ICU_SLOT(major_version)];
+
+	if (unlikely(!collator))
+	{
+		UErrorCode status;
+
+		collator =lib->open(loc->info.icu.locale, &status);
+		if (U_FAILURE(status))
+			ereport(ERROR,
+					(errmsg("could not open collator for locale \"%s\", ICU major version %d: %s",
+							loc->info.icu.locale,
+							major_version,
+							lib->errorName(status))));
+		loc->info.icu.ucol[PG_ICU_SLOT(major_version)] = collator;
+	}
+
+	return collator;
+}
+
+extern void pg_icu_activate_major_version(int major_version);
+#endif
+
 extern PGDLLIMPORT struct pg_locale_struct default_locale;
 
 extern void make_icu_collator(const char *iculocstr,
-- 
2.36.0

Reply via email to