On Wed, Jun 4, 2025 at 9:17 PM Peter Eisentraut <pe...@eisentraut.org> wrote:
> I wonder why you want to capture LOCPATH early in main.c.  It seems
> sufficient to look it up when needed?

Right, it is setenv() that we're trying to avoid.  Updated.
From 5482ccf5a61061411f9a996da84f14471b791d83 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Wed, 4 Jun 2025 12:19:53 +1200
Subject: [PATCH v2] Load optional collation version from glibc LOCPATH.

One technique for dealing with glibc locale definition changes across
Linux distribution upgrades or migrations is to compile the locale
definitions from the source system with the target system's localedef
tool, and then point to the newly compiled locales with the LOCPATH
environment variable, with certain caveats.

Unfortunately this breaks the rather simple-minded approach in commit
d5ac14f9, which reports the new system's glibc library version for lack
of anything better.  Spurious warnings about mismatched collation are
reported, defeating the goal of the LOCPATH-based upgrade/migration
technique.

Since neither POSIX nor glibc defines a way for locales to report their
version (cf FreeBSD querylocale(), ICU ucol_getVersion(), Windows
GetNLSVersionEx()), invent a way for a user of the LOCPATH technique to
supply the version.  This can be used to store the version string of the
source system, or to invent a new convention for labeling collation
versions.  The version is read from the first text file found in this
list:

  * $LOCPATH/<collcollate>/LC_COLLATE.version
  * $LOCPATH/<collcollate>/version
  * $LOCPATH/LC_COLLATE.version
  * $LOCPATH/version

Only if none of these files are found will the glibc library version be
used, as before.  There is no change in behavior for most users, since
LOCPATH is not normally defined.  Non-glibc builds are not affected.
glibc itself has no knowledge of these files, which are a PostgreSQL
invention that relies only on knowledge of how glibc's compiled locales
are laid out, including "normalization" of the codeset part of the
<collcollate> part.

Back-patch to 13, where d5ac14f9 shipped.

Reviewed-by: Peter Eisentraut <pe...@eisentraut.org>
Discussion: https://postgr.es/m/CA%2BhUKG%2BUngA4H%3DYtsz6iiz_xAzqG3JX9eC9CBSzpubfRz9gYeQ%40mail.gmail.com
---
 src/backend/utils/adt/pg_locale_libc.c | 106 ++++++++++++++++++++++++-
 1 file changed, 104 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 199857e22db..a09492c05e7 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -19,6 +19,7 @@
 #include "catalog/pg_collation.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "storage/fd.h"
 #include "utils/builtins.h"
 #include "utils/formatting.h"
 #include "utils/memutils.h"
@@ -655,6 +656,43 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	return result;
 }
 
+#if defined(__GLIBC__)
+/*
+ * Read version string from user-supplied file into a palloc'd string with any
+ * trailing whitespace removed.  Return NULL if the file doesn't exist.
+ */
+static char *
+read_collversion_from_file(const char *path)
+{
+	char		buffer[TEXTBUFLEN];
+	ssize_t		size;
+	int			fd;
+
+	fd = OpenTransientFile(path, O_RDONLY);
+	if (fd < 0)
+	{
+		if (errno == ENOENT)
+			return NULL;
+		ereport(ERROR, errmsg("could not open file \"%s\": %m", path));
+	}
+	size = read(fd, buffer, sizeof(buffer) - 1);
+	if (size < 0)
+	{
+		int			save_errno = errno;
+
+		CloseTransientFile(fd);
+		errno = save_errno;
+		ereport(ERROR, errmsg("could not read from file \"%s\": %m", path));
+	}
+	while (size > 0 && isspace((unsigned char) buffer[size - 1]))
+		size--;
+	buffer[size] = 0;
+	CloseTransientFile(fd);
+
+	return pstrdup(buffer);
+}
+#endif
+
 char *
 get_collation_actual_version_libc(const char *collcollate)
 {
@@ -665,8 +703,72 @@ get_collation_actual_version_libc(const char *collcollate)
 		pg_strcasecmp("POSIX", collcollate) != 0)
 	{
 #if defined(__GLIBC__)
-		/* Use the glibc version because we don't have anything better. */
-		collversion = pstrdup(gnu_get_libc_version());
+		char	   *locpath;
+
+		/*
+		 * If the user defined the environment variable LOCPATH (a glibc
+		 * extension) to override the search location for locale definitions,
+		 * perhaps pointing to definitions compiled from another distribution
+		 * or version of glibc as part of an upgrade strategy, provide a way
+		 * for the reported version string to be loaded from
+		 * $LOCPATH/{collcollate}/LC_COLLATE.version, ../version, or the same
+		 * names at top level in $LOCPATH.
+		 *
+		 * This convention is a PostgreSQL invention not known to glibc.
+		 * Neither glibc nor POSIX provides a way to store or query a version
+		 * string inside locale components themselves.
+		 */
+		if ((locpath = getenv("LOCPATH")))
+		{
+			char		collcollate_dir[LOCALE_NAME_BUFLEN];
+			char		pathname[MAXPGPATH];
+			char	   *p;
+
+			/* lower-case and digits only in codeset part, .UTF-8 -> .utf8 */
+			snprintf(collcollate_dir, sizeof(collcollate_dir), "%s",
+					 collcollate);
+			p = strchr(collcollate_dir, '.');
+			if (p)
+			{
+				++p;
+				while (*p)
+				{
+					if (!isalnum(*p))
+					{
+						memmove(p, p + 1, strlen(p));	/* counts terminator */
+						continue;
+					}
+					*p = tolower((unsigned char) *p);
+					++p;
+				}
+			}
+
+			snprintf(pathname, sizeof(pathname), "%s/%s/LC_COLLATE.version",
+					 locpath, collcollate_dir);
+			collversion = read_collversion_from_file(pathname);
+			if (collversion == NULL)
+			{
+				snprintf(pathname, sizeof(pathname), "%s/%s/version",
+						 locpath, collcollate_dir);
+				collversion = read_collversion_from_file(pathname);
+			}
+			if (collversion == NULL)
+			{
+				snprintf(pathname, sizeof(pathname), "%s/LC_COLLATE.version",
+						 locpath);
+				collversion = read_collversion_from_file(pathname);
+			}
+			if (collversion == NULL)
+			{
+				snprintf(pathname, sizeof(pathname), "%s/version",
+						 locpath);
+				collversion = read_collversion_from_file(pathname);
+			}
+		}
+
+		/* Use the glibc version if we don't have anything better. */
+		if (collversion == NULL)
+			collversion = pstrdup(gnu_get_libc_version());
 #elif defined(LC_VERSION_MASK)
 		locale_t	loc;
 
-- 
2.39.5

Reply via email to