On Wed, 2025-07-23 at 19:11 -0700, Jeff Davis wrote:
> On Fri, 2025-07-11 at 11:48 +1200, Thomas Munro wrote:
> > On Fri, Jul 11, 2025 at 6:22 AM Jeff Davis <[email protected]>
> > wrote:
> > > I don't have a great windows development environment, and it
> > > appears CI
> > > and the buildfarm don't offer great coverage either. Can I ask
> > > for
> > > a
> > > volunteer to do the windows side of this work?
> > 
> > Me neither but I'm willing to help with that, and have done lots of
> > closely related things through trial-by-CI...

Attached a new patch series, v6.

Rather than creating new global locale_t objects, this series (along
with a separate patch for NLS[1]) removes the dependency on the global
LC_CTYPE entirely. It's a bunch of small patches that replace direct
calls to tolower()/toupper() with calls into the provider.

An assumption of these patches is that, in the UTF-8 encoding, the
logic in pg_tolower()/pg_toupper() is equivalent to
pg_ascii_tolower()/pg_ascii_toupper().

Generally these preserve existing behavior, but there are a couple
differences:

  * If using the builtin C locale (not C.UTF-8) along with a datctype
that's a non-C locale with single-byte encoding, it could affect the
results of downcase_identifier(), ltree, and fuzzystrmatch on
characters > 127. For ICU, I went to a bit of extra effort to preserve
the existing behavior here, because it's more likely to be used for
single-byte encodings.

  * When using ICU or builtin C.UTF-8, along with a datctype of
"tr_TR.UTF-8", then it will affect ltree's and fuzzystrmatch's
treatment of i/I.

If these are a concern we can fix them with some hacks, but those
behaviors seem fairly obscure to me.

Regards,
        Jeff Davis

[1]
https://www.postgresql.org/message-id/[email protected]

From 78fbb9220930918221dc0a6aa48b1d0023860707 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Sun, 26 Oct 2025 14:58:02 -0700
Subject: [PATCH v6 1/9] Avoid global LC_CTYPE dependency in pg_locale_libc.c.

Call tolower_l() directly instead of through pg_tolower(), because the
latter depends on the global LC_CTYPE.
---
 src/backend/utils/adt/pg_locale_libc.c | 28 ++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 9c7fcd1fc7a..716f005066a 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -450,7 +450,12 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 		for (p = dest; *p; p++)
 		{
 			if (locale->is_default)
-				*p = pg_tolower((unsigned char) *p);
+			{
+				if (*p >= 'A' && *p <= 'Z')
+					*p += 'a' - 'A';
+				else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
+					*p = tolower_l((unsigned char) *p, loc);
+			}
 			else
 				*p = tolower_l((unsigned char) *p, loc);
 		}
@@ -535,9 +540,19 @@ strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			if (locale->is_default)
 			{
 				if (wasalnum)
-					*p = pg_tolower((unsigned char) *p);
+				{
+					if (*p >= 'A' && *p <= 'Z')
+						*p += 'a' - 'A';
+					else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
+						*p = tolower_l((unsigned char) *p, loc);
+				}
 				else
-					*p = pg_toupper((unsigned char) *p);
+				{
+					if (*p >= 'a' && *p <= 'z')
+						*p -= 'a' - 'A';
+					else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
+						*p = toupper_l((unsigned char) *p, loc);
+				}
 			}
 			else
 			{
@@ -633,7 +648,12 @@ strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 		for (p = dest; *p; p++)
 		{
 			if (locale->is_default)
-				*p = pg_toupper((unsigned char) *p);
+			{
+				if (*p >= 'a' && *p <= 'z')
+					*p -= 'a' - 'A';
+				else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
+					*p = toupper_l((unsigned char) *p, loc);
+			}
 			else
 				*p = toupper_l((unsigned char) *p, loc);
 		}
-- 
2.43.0

From 631daededebd9649169951764c72d8a372897b5c Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Sun, 26 Oct 2025 14:51:47 -0700
Subject: [PATCH v6 2/9] Define char_tolower()/char_toupper() for all locale
 providers.

The behavior is defined for each locale provider rather than
unconditionally depending on the global LC_CTYPE setting. Needed as an
alternative for tolower()/toupper() for some callers.
---
 src/backend/utils/adt/like.c              |  4 +--
 src/backend/utils/adt/pg_locale.c         | 32 ++++++++++++++++-------
 src/backend/utils/adt/pg_locale_builtin.c | 18 +++++++++++++
 src/backend/utils/adt/pg_locale_icu.c     | 23 ++++++++++++++++
 src/backend/utils/adt/pg_locale_libc.c    | 21 +++++++++++++--
 src/include/utils/pg_locale.h             | 10 +++----
 6 files changed, 89 insertions(+), 19 deletions(-)

diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 4216ac17f43..37c1c86aee8 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -209,9 +209,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 	 * way.
 	 */
 
-	if (locale->ctype_is_c ||
-		(char_tolower_enabled(locale) &&
-		 pg_database_encoding_max_length() == 1))
+	if (locale->ctype_is_c || locale->ctype->pattern_casefold_char)
 	{
 		p = VARDATA_ANY(pat);
 		plen = VARSIZE_ANY_EXHDR(pat);
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 67299c55ed8..26a7244c3db 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1551,25 +1551,39 @@ char_is_cased(char ch, pg_locale_t locale)
 }
 
 /*
- * char_tolower_enabled()
+ * char_tolower()
  *
- * Does the provider support char_tolower()?
+ * Convert single-byte char to lowercase. Not correct for multibyte encodings,
+ * but needed for historical compatibility purposes.
  */
-bool
-char_tolower_enabled(pg_locale_t locale)
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
 {
-	return (locale->ctype->char_tolower != NULL);
+	if (locale->ctype == NULL)
+	{
+		if (ch >= 'A' && ch <= 'Z')
+			return ch + ('a' - 'A');
+		return ch;
+	}
+	return locale->ctype->char_tolower(ch, locale);
 }
 
 /*
- * char_tolower()
+ * char_toupper()
  *
- * Convert char (single-byte encoding) to lowercase.
+ * Convert single-byte char to uppercase. Not correct for multibyte encodings,
+ * but needed for historical compatibility purposes.
  */
 char
-char_tolower(unsigned char ch, pg_locale_t locale)
+char_toupper(unsigned char ch, pg_locale_t locale)
 {
-	return locale->ctype->char_tolower(ch, locale);
+	if (locale->ctype == NULL)
+	{
+		if (ch >= 'a' && ch <= 'z')
+			return ch - ('a' - 'A');
+		return ch;
+	}
+	return locale->ctype->char_toupper(ch, locale);
 }
 
 /*
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 3dc611b50e1..cfef6a86377 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -169,6 +169,22 @@ wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
 	return pg_u_isxdigit(wc, !locale->builtin.casemap_full);
 }
 
+static char
+char_tolower_builtin(unsigned char ch, pg_locale_t locale)
+{
+	if (ch >= 'A' && ch <= 'Z')
+		return ch + ('a' - 'A');
+	return ch;
+}
+
+static char
+char_toupper_builtin(unsigned char ch, pg_locale_t locale)
+{
+	if (ch >= 'a' && ch <= 'z')
+		return ch - ('a' - 'A');
+	return ch;
+}
+
 static bool
 char_is_cased_builtin(char ch, pg_locale_t locale)
 {
@@ -203,6 +219,8 @@ static const struct ctype_methods ctype_methods_builtin = {
 	.wc_ispunct = wc_ispunct_builtin,
 	.wc_isspace = wc_isspace_builtin,
 	.wc_isxdigit = wc_isxdigit_builtin,
+	.char_tolower = char_tolower_builtin,
+	.char_toupper = char_toupper_builtin,
 	.char_is_cased = char_is_cased_builtin,
 	.wc_tolower = wc_tolower_builtin,
 	.wc_toupper = wc_toupper_builtin,
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index f5a0cc8fe41..449e3bbb7a6 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -121,6 +121,27 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
 									 const char *locale,
 									 UErrorCode *pErrorCode);
 
+/*
+ * ICU still depends on libc for compatibility with certain historical
+ * behavior for single-byte encodings.  XXX: consider fixing by decoding the
+ * single byte into a code point, and using u_tolower().
+ */
+static char
+char_tolower_icu(unsigned char ch, pg_locale_t locale)
+{
+	if (isupper(ch))
+		return tolower(ch);
+	return ch;
+}
+
+static char
+char_toupper_icu(unsigned char ch, pg_locale_t locale)
+{
+	if (islower(ch))
+		return toupper(ch);
+	return ch;
+}
+
 static bool
 char_is_cased_icu(char ch, pg_locale_t locale)
 {
@@ -238,6 +259,8 @@ static const struct ctype_methods ctype_methods_icu = {
 	.wc_ispunct = wc_ispunct_icu,
 	.wc_isspace = wc_isspace_icu,
 	.wc_isxdigit = wc_isxdigit_icu,
+	.char_tolower = char_tolower_icu,
+	.char_toupper = char_toupper_icu,
 	.char_is_cased = char_is_cased_icu,
 	.wc_toupper = toupper_icu,
 	.wc_tolower = tolower_icu,
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 716f005066a..b0428ad288e 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -251,8 +251,21 @@ wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
 static char
 char_tolower_libc(unsigned char ch, pg_locale_t locale)
 {
-	Assert(pg_database_encoding_max_length() == 1);
-	return tolower_l(ch, locale->lt);
+	locale_t	loc = locale->lt;
+
+	if (isupper_l(ch, loc))
+		return tolower_l(ch, loc);
+	return ch;
+}
+
+static char
+char_toupper_libc(unsigned char ch, pg_locale_t locale)
+{
+	locale_t	loc = locale->lt;
+
+	if (islower_l(ch, loc))
+		return toupper_l(ch, loc);
+	return ch;
 }
 
 static bool
@@ -338,9 +351,11 @@ static const struct ctype_methods ctype_methods_libc_sb = {
 	.wc_isxdigit = wc_isxdigit_libc_sb,
 	.char_is_cased = char_is_cased_libc,
 	.char_tolower = char_tolower_libc,
+	.char_toupper = char_toupper_libc,
 	.wc_toupper = toupper_libc_sb,
 	.wc_tolower = tolower_libc_sb,
 	.max_chr = UCHAR_MAX,
+	.pattern_casefold_char = true,
 };
 
 /*
@@ -363,6 +378,7 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
 	.wc_isxdigit = wc_isxdigit_libc_sb,
 	.char_is_cased = char_is_cased_libc,
 	.char_tolower = char_tolower_libc,
+	.char_toupper = char_toupper_libc,
 	.wc_toupper = toupper_libc_sb,
 	.wc_tolower = tolower_libc_sb,
 	.max_chr = UCHAR_MAX,
@@ -384,6 +400,7 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
 	.wc_isxdigit = wc_isxdigit_libc_mb,
 	.char_is_cased = char_is_cased_libc,
 	.char_tolower = char_tolower_libc,
+	.char_toupper = char_toupper_libc,
 	.wc_toupper = toupper_libc_mb,
 	.wc_tolower = tolower_libc_mb,
 };
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 683e1a0eef8..790db566e91 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -113,13 +113,13 @@ struct ctype_methods
 
 	/* required */
 	bool		(*char_is_cased) (char ch, pg_locale_t locale);
+	char		(*char_tolower) (unsigned char ch, pg_locale_t locale);
+	char		(*char_toupper) (unsigned char ch, pg_locale_t locale);
 
 	/*
-	 * Optional. If defined, will only be called for single-byte encodings. If
-	 * not defined, or if the encoding is multibyte, will fall back to
-	 * pg_strlower().
+	 * Use byte-at-a-time case folding for case-insensitive patterns.
 	 */
-	char		(*char_tolower) (unsigned char ch, pg_locale_t locale);
+	bool		pattern_casefold_char;
 
 	/*
 	 * For regex and pattern matching efficiency, the maximum char value
@@ -177,8 +177,8 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 extern char *get_collation_actual_version(char collprovider, const char *collcollate);
 
 extern bool char_is_cased(char ch, pg_locale_t locale);
-extern bool char_tolower_enabled(pg_locale_t locale);
 extern char char_tolower(unsigned char ch, pg_locale_t locale);
+extern char char_toupper(unsigned char ch, pg_locale_t locale);
 extern size_t pg_strlower(char *dst, size_t dstsize,
 						  const char *src, ssize_t srclen,
 						  pg_locale_t locale);
-- 
2.43.0

From a9f365b0ebd0c71ad2fec3bba8dbf7a21b502e3a Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Sun, 26 Oct 2025 14:59:40 -0700
Subject: [PATCH v6 3/9] Avoid global LC_CTYPE dependency in like.c.

Call char_tolower() instead of pg_tolower().
---
 src/backend/utils/adt/like.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 37c1c86aee8..364c39cf4fb 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -96,7 +96,14 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
 	if (locale->ctype_is_c)
 		return pg_ascii_tolower(c);
 	else if (locale->is_default)
-		return pg_tolower(c);
+	{
+		if (c >= 'A' && c <= 'Z')
+			return c + ('a' - 'A');
+		else if (IS_HIGHBIT_SET(c))
+			return char_tolower(c, locale);
+		else
+			return c;
+	}
 	else
 		return char_tolower(c, locale);
 }
-- 
2.43.0

From 0dad412eb555550dd8a5d4ef3581695328fb8f12 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Sun, 26 Oct 2025 15:04:58 -0700
Subject: [PATCH v6 4/9] Avoid global LC_CTYPE dependency in scansup.c.

Call char_tolower() instead of tolower() in downcase_identifier().

The function downcase_identifier() may be called before locale support
is initialized -- e.g. during GUC processing in the postmaster -- so
if the locale is unavailable, char_tolower() uses plain ASCII
semantics.

That can result in a difference in behavior during that early stage of
processing, but previously it would have depended on the postmaster
environment variable LC_CTYPE, which would have been fragile anyway.
---
 src/backend/parser/scansup.c      |  5 +++--
 src/backend/utils/adt/pg_locale.c | 16 ++++++++++++++--
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index 2feb2b6cf5a..872075ba220 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -18,6 +18,7 @@
 
 #include "mb/pg_wchar.h"
 #include "parser/scansup.h"
+#include "utils/pg_locale.h"
 
 
 /*
@@ -67,8 +68,8 @@ downcase_identifier(const char *ident, int len, bool warn, bool truncate)
 
 		if (ch >= 'A' && ch <= 'Z')
 			ch += 'a' - 'A';
-		else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
-			ch = tolower(ch);
+		else if (enc_is_single_byte && IS_HIGHBIT_SET(ch))
+			ch = char_tolower(ch, NULL);
 		result[i] = (char) ch;
 	}
 	result[i] = '\0';
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 26a7244c3db..363215edb80 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1555,11 +1555,17 @@ char_is_cased(char ch, pg_locale_t locale)
  *
  * Convert single-byte char to lowercase. Not correct for multibyte encodings,
  * but needed for historical compatibility purposes.
+ *
+ * If locale is NULL, use the default database locale. This function may be
+ * called before the database locale is initialized, in which case it uses
+ * plain ASCII semantics.
  */
 char
 char_tolower(unsigned char ch, pg_locale_t locale)
 {
-	if (locale->ctype == NULL)
+	if (locale == NULL)
+		locale = default_locale;
+	if (locale == NULL || locale->ctype == NULL)
 	{
 		if (ch >= 'A' && ch <= 'Z')
 			return ch + ('a' - 'A');
@@ -1573,11 +1579,17 @@ char_tolower(unsigned char ch, pg_locale_t locale)
  *
  * Convert single-byte char to uppercase. Not correct for multibyte encodings,
  * but needed for historical compatibility purposes.
+ *
+ * If locale is NULL, use the default database locale. This function may be
+ * called before the database locale is initialized, in which case it uses
+ * plain ASCII semantics.
  */
 char
 char_toupper(unsigned char ch, pg_locale_t locale)
 {
-	if (locale->ctype == NULL)
+	if (locale == NULL)
+		locale = default_locale;
+	if (locale == NULL || locale->ctype == NULL)
 	{
 		if (ch >= 'a' && ch <= 'z')
 			return ch - ('a' - 'A');
-- 
2.43.0

From af958c9318ade598d74ea1e7ae720c287c83dee0 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Sun, 26 Oct 2025 15:12:38 -0700
Subject: [PATCH v6 5/9] Avoid global LC_CTYPE dependency in pg_locale_icu.c.

ICU still depends on libc for compatibility with certain historical
behavior for single-byte encodings. Make the dependency explicit by
holding a locale_t object in the pg_locale_t object, so that at least
it does not depend on the global LC_CTYPE setting.
---
 src/backend/utils/adt/pg_locale_icu.c | 66 ++++++++++++++++++++++-----
 src/include/utils/pg_locale.h         |  1 +
 2 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 449e3bbb7a6..da250a23630 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -121,25 +121,34 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
 									 const char *locale,
 									 UErrorCode *pErrorCode);
 
-/*
- * ICU still depends on libc for compatibility with certain historical
- * behavior for single-byte encodings.  XXX: consider fixing by decoding the
- * single byte into a code point, and using u_tolower().
- */
 static char
 char_tolower_icu(unsigned char ch, pg_locale_t locale)
 {
-	if (isupper(ch))
-		return tolower(ch);
-	return ch;
+	locale_t	loc = locale->icu.lt;
+
+	if (loc)
+	{
+		if (isupper_l(ch, loc))
+			return tolower_l(ch, loc);
+		return ch;
+	}
+	else
+		return pg_ascii_tolower(ch);
 }
 
 static char
 char_toupper_icu(unsigned char ch, pg_locale_t locale)
 {
-	if (islower(ch))
-		return toupper(ch);
-	return ch;
+	locale_t	loc = locale->icu.lt;
+
+	if (loc)
+	{
+		if (islower_l(ch, loc))
+			return toupper_l(ch, loc);
+		return ch;
+	}
+	else
+		return pg_ascii_toupper(ch);
 }
 
 static bool
@@ -265,6 +274,29 @@ static const struct ctype_methods ctype_methods_icu = {
 	.wc_toupper = toupper_icu,
 	.wc_tolower = tolower_icu,
 };
+
+/*
+ * ICU still depends on libc for compatibility with certain historical
+ * behavior for single-byte encodings.  See char_tolower_libc().
+ *
+ * XXX: consider fixing by decoding the single byte into a code point, and
+ * using u_tolower().
+ */
+static locale_t
+make_libc_ctype_locale(const char *ctype)
+{
+	locale_t	loc;
+
+#ifndef WIN32
+	loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
+#else
+	loc = _create_locale(LC_ALL, ctype);
+#endif
+	if (!loc)
+		report_newlocale_failure(ctype);
+
+	return loc;
+}
 #endif
 
 pg_locale_t
@@ -275,11 +307,13 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 	const char *iculocstr;
 	const char *icurules = NULL;
 	UCollator  *collator;
+	locale_t	loc = (locale_t) 0;
 	pg_locale_t result;
 
 	if (collid == DEFAULT_COLLATION_OID)
 	{
 		HeapTuple	tp;
+		const char *ctype;
 		Datum		datum;
 		bool		isnull;
 
@@ -297,6 +331,15 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 		if (!isnull)
 			icurules = TextDatumGetCString(datum);
 
+		if (pg_database_encoding_max_length() == 1)
+		{
+			datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
+										   Anum_pg_database_datctype);
+			ctype = TextDatumGetCString(datum);
+
+			loc = make_libc_ctype_locale(ctype);
+		}
+
 		ReleaseSysCache(tp);
 	}
 	else
@@ -327,6 +370,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 	result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
 	result->icu.locale = MemoryContextStrdup(context, iculocstr);
 	result->icu.ucol = collator;
+	result->icu.lt = loc;
 	result->deterministic = deterministic;
 	result->collate_is_c = false;
 	result->ctype_is_c = false;
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 790db566e91..c5978d903cc 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -165,6 +165,7 @@ struct pg_locale_struct
 		{
 			const char *locale;
 			UCollator  *ucol;
+			locale_t	lt;
 		}			icu;
 #endif
 	};
-- 
2.43.0

From 5ffbafb4051e0bfd763a64a134d71644e66847a4 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 27 Oct 2025 16:23:14 -0700
Subject: [PATCH v6 6/9] Avoid global LC_CTYPE dependency in ltree/crc32.c.

Use char_tolower() instead of tolower().
---
 contrib/ltree/crc32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/ltree/crc32.c b/contrib/ltree/crc32.c
index 134f46a805e..5969f75c158 100644
--- a/contrib/ltree/crc32.c
+++ b/contrib/ltree/crc32.c
@@ -12,7 +12,7 @@
 
 #ifdef LOWER_NODE
 #include <ctype.h>
-#define TOLOWER(x)	tolower((unsigned char) (x))
+#define TOLOWER(x)	char_tolower((unsigned char) (x), NULL)
 #else
 #define TOLOWER(x)	(x)
 #endif
-- 
2.43.0

From 7399368ce4ee497cf26c1a1f4abfe0fdf192bbd8 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 27 Oct 2025 16:24:18 -0700
Subject: [PATCH v6 7/9] Avoid global LC_CTYPE dependency in fuzzystrmatch.

Use char_toupper() instead of toupper().
---
 contrib/fuzzystrmatch/dmetaphone.c    |  5 ++++-
 contrib/fuzzystrmatch/fuzzystrmatch.c | 19 +++++++++++--------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/contrib/fuzzystrmatch/dmetaphone.c b/contrib/fuzzystrmatch/dmetaphone.c
index 6627b2b8943..152eb4b2ddf 100644
--- a/contrib/fuzzystrmatch/dmetaphone.c
+++ b/contrib/fuzzystrmatch/dmetaphone.c
@@ -99,6 +99,7 @@ The remaining code is authored by Andrew Dunstan <[email protected]> and
 #include "postgres.h"
 
 #include "utils/builtins.h"
+#include "utils/pg_locale.h"
 
 /* turn off assertions for embedded function */
 #define NDEBUG
@@ -116,6 +117,8 @@ The remaining code is authored by Andrew Dunstan <[email protected]> and
 #include <assert.h>
 #include <ctype.h>
 
+#define TOUPPER(x) char_toupper(x, NULL)
+
 /* prototype for the main function we got from the perl module */
 static void DoubleMetaphone(char *str, char **codes);
 
@@ -284,7 +287,7 @@ MakeUpper(metastring *s)
 	char	   *i;
 
 	for (i = s->str; *i; i++)
-		*i = toupper((unsigned char) *i);
+		*i = TOUPPER((unsigned char) *i);
 }
 
 
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index e7cc314b763..03530fb73ab 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -41,6 +41,7 @@
 #include <ctype.h>
 
 #include "utils/builtins.h"
+#include "utils/pg_locale.h"
 #include "utils/varlena.h"
 #include "varatt.h"
 
@@ -49,6 +50,8 @@ PG_MODULE_MAGIC_EXT(
 					.version = PG_VERSION
 );
 
+#define TOUPPER(x) char_toupper(x, NULL)
+
 /*
  * Soundex
  */
@@ -62,7 +65,7 @@ static const char *const soundex_table = "01230120022455012623010202";
 static char
 soundex_code(char letter)
 {
-	letter = toupper((unsigned char) letter);
+	letter = TOUPPER((unsigned char) letter);
 	/* Defend against non-ASCII letters */
 	if (letter >= 'A' && letter <= 'Z')
 		return soundex_table[letter - 'A'];
@@ -124,7 +127,7 @@ getcode(char c)
 {
 	if (isalpha((unsigned char) c))
 	{
-		c = toupper((unsigned char) c);
+		c = TOUPPER((unsigned char) c);
 		/* Defend against non-ASCII letters */
 		if (c >= 'A' && c <= 'Z')
 			return _codes[c - 'A'];
@@ -301,18 +304,18 @@ metaphone(PG_FUNCTION_ARGS)
  * accessing the array directly... */
 
 /* Look at the next letter in the word */
-#define Next_Letter (toupper((unsigned char) word[w_idx+1]))
+#define Next_Letter (TOUPPER((unsigned char) word[w_idx+1]))
 /* Look at the current letter in the word */
-#define Curr_Letter (toupper((unsigned char) word[w_idx]))
+#define Curr_Letter (TOUPPER((unsigned char) word[w_idx]))
 /* Go N letters back. */
 #define Look_Back_Letter(n) \
-	(w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')
+	(w_idx >= (n) ? TOUPPER((unsigned char) word[w_idx-(n)]) : '\0')
 /* Previous letter.  I dunno, should this return null on failure? */
 #define Prev_Letter (Look_Back_Letter(1))
 /* Look two letters down.  It makes sure you don't walk off the string. */
 #define After_Next_Letter \
-	(Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
-#define Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n))
+	(Next_Letter != '\0' ? TOUPPER((unsigned char) word[w_idx+2]) : '\0')
+#define Look_Ahead_Letter(n) TOUPPER((unsigned char) Lookahead(word+w_idx, n))
 
 
 /* Allows us to safely look ahead an arbitrary # of letters */
@@ -742,7 +745,7 @@ _soundex(const char *instr, char *outstr)
 	}
 
 	/* Take the first letter as is */
-	*outstr++ = (char) toupper((unsigned char) *instr++);
+	*outstr++ = (char) TOUPPER((unsigned char) *instr++);
 
 	count = 1;
 	while (*instr && count < SOUNDEX_LEN)
-- 
2.43.0

From 46420299904cfe1829896446a860c39f0824551e Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 9 Oct 2024 10:00:58 -0700
Subject: [PATCH v6 8/9] Don't include ICU headers in pg_locale.h.

Needed in order to include pg_locale.h in strcasecmp.c.
---
 src/backend/commands/collationcmds.c  |  4 ++++
 src/backend/utils/adt/formatting.c    |  4 ----
 src/backend/utils/adt/pg_locale.c     |  4 ++++
 src/backend/utils/adt/pg_locale_icu.c |  1 +
 src/backend/utils/adt/varlena.c       |  4 ++++
 src/include/utils/pg_locale.h         | 14 +++++---------
 6 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 8acbfbbeda0..a57fe93c387 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -14,6 +14,10 @@
  */
 #include "postgres.h"
 
+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
+
 #include "access/htup_details.h"
 #include "access/table.h"
 #include "access/xact.h"
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 78e19ac39ac..9d0dfc48671 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -70,10 +70,6 @@
 #include <limits.h>
 #include <wctype.h>
 
-#ifdef USE_ICU
-#include <unicode/ustring.h>
-#endif
-
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
 #include "common/int.h"
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 363215edb80..255f660c644 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -33,6 +33,10 @@
 
 #include <time.h>
 
+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
+
 #include "access/htup_details.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_database.h"
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index da250a23630..0fd8171c1da 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -13,6 +13,7 @@
 
 #ifdef USE_ICU
 #include <unicode/ucnv.h>
+#include <unicode/ucol.h>
 #include <unicode/ustring.h>
 
 /*
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 2c398cd9e5c..cf34a96b988 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -17,6 +17,10 @@
 #include <ctype.h>
 #include <limits.h>
 
+#ifdef USE_ICU
+#include <unicode/uchar.h>
+#endif
+
 #include "access/detoast.h"
 #include "access/toast_compression.h"
 #include "catalog/pg_collation.h"
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index c5978d903cc..b668f77e1ca 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -14,15 +14,6 @@
 
 #include "mb/pg_wchar.h"
 
-#ifdef USE_ICU
-/* only include the C APIs, to avoid errors in cpluspluscheck */
-#undef U_SHOW_CPLUSPLUS_API
-#define U_SHOW_CPLUSPLUS_API 0
-#undef U_SHOW_CPLUSPLUS_HEADER_API
-#define U_SHOW_CPLUSPLUS_HEADER_API 0
-#include <unicode/ucol.h>
-#endif
-
 /* use for libc locale names */
 #define LOCALE_NAME_BUFLEN 128
 
@@ -54,6 +45,11 @@ extern void cache_locale_time(void);
 struct pg_locale_struct;
 typedef struct pg_locale_struct *pg_locale_t;
 
+#ifdef USE_ICU
+struct UCollator;
+typedef struct UCollator UCollator;
+#endif
+
 /* methods that define collation behavior */
 struct collate_methods
 {
-- 
2.43.0

From 22a6d36d82a26269f406b64cd0865a360224eb63 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 27 Oct 2025 16:08:54 -0700
Subject: [PATCH v6 9/9] Avoid global LC_CTYPE dependency in strcasecmp.c for
 server.

For the server (but not the frontend), change to use
char_tolower()/char_toupper() instead of tolower()/toupper().
---
 src/port/pgstrcasecmp.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/port/pgstrcasecmp.c b/src/port/pgstrcasecmp.c
index ec2b3a75c3d..f295df6ef51 100644
--- a/src/port/pgstrcasecmp.c
+++ b/src/port/pgstrcasecmp.c
@@ -28,6 +28,17 @@
 
 #include <ctype.h>
 
+/*
+ * Beware multiple evaluation hazards.
+ */
+#ifndef FRONTEND
+#include "utils/pg_locale.h"
+#define TOLOWER(x) char_tolower(x, NULL)
+#define TOUPPER(x) char_toupper(x, NULL)
+#else
+#define TOLOWER(x) (isupper(x) ? tolower(x) : x)
+#define TOUPPER(x) (islower(x) ? toupper(x) : x)
+#endif
 
 /*
  * Case-independent comparison of two null-terminated strings.
@@ -44,13 +55,13 @@ pg_strcasecmp(const char *s1, const char *s2)
 		{
 			if (ch1 >= 'A' && ch1 <= 'Z')
 				ch1 += 'a' - 'A';
-			else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
-				ch1 = tolower(ch1);
+			else if (IS_HIGHBIT_SET(ch1))
+				ch1 = TOLOWER(ch1);
 
 			if (ch2 >= 'A' && ch2 <= 'Z')
 				ch2 += 'a' - 'A';
-			else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
-				ch2 = tolower(ch2);
+			else if (IS_HIGHBIT_SET(ch2))
+				ch2 = TOLOWER(ch2);
 
 			if (ch1 != ch2)
 				return (int) ch1 - (int) ch2;
@@ -77,13 +88,13 @@ pg_strncasecmp(const char *s1, const char *s2, size_t n)
 		{
 			if (ch1 >= 'A' && ch1 <= 'Z')
 				ch1 += 'a' - 'A';
-			else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
-				ch1 = tolower(ch1);
+			else if (IS_HIGHBIT_SET(ch1))
+				ch1 = TOLOWER(ch1);
 
 			if (ch2 >= 'A' && ch2 <= 'Z')
 				ch2 += 'a' - 'A';
-			else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
-				ch2 = tolower(ch2);
+			else if (IS_HIGHBIT_SET(ch2))
+				ch2 = TOLOWER(ch2);
 
 			if (ch1 != ch2)
 				return (int) ch1 - (int) ch2;
@@ -106,8 +117,8 @@ pg_toupper(unsigned char ch)
 {
 	if (ch >= 'a' && ch <= 'z')
 		ch += 'A' - 'a';
-	else if (IS_HIGHBIT_SET(ch) && islower(ch))
-		ch = toupper(ch);
+	else if (IS_HIGHBIT_SET(ch))
+		ch = TOUPPER(ch);
 	return ch;
 }
 
@@ -123,8 +134,8 @@ pg_tolower(unsigned char ch)
 {
 	if (ch >= 'A' && ch <= 'Z')
 		ch += 'a' - 'A';
-	else if (IS_HIGHBIT_SET(ch) && isupper(ch))
-		ch = tolower(ch);
+	else if (IS_HIGHBIT_SET(ch))
+		ch = TOLOWER(ch);
 	return ch;
 }
 
-- 
2.43.0

Reply via email to