On Wed, 2025-07-23 at 19:11 -0700, Jeff Davis wrote:
> On Fri, 2025-07-11 at 11:48 +1200, Thomas Munro wrote:
> > On Fri, Jul 11, 2025 at 6:22 AM Jeff Davis <[email protected]>
> > wrote:
> > > I don't have a great windows development environment, and it
> > > appears CI
> > > and the buildfarm don't offer great coverage either. Can I ask
> > > for
> > > a
> > > volunteer to do the windows side of this work?
> >
> > Me neither but I'm willing to help with that, and have done lots of
> > closely related things through trial-by-CI...
Attached a new patch series, v6.
Rather than creating new global locale_t objects, this series (along
with a separate patch for NLS[1]) removes the dependency on the global
LC_CTYPE entirely. It's a bunch of small patches that replace direct
calls to tolower()/toupper() with calls into the provider.
An assumption of these patches is that, in the UTF-8 encoding, the
logic in pg_tolower()/pg_toupper() is equivalent to
pg_ascii_tolower()/pg_ascii_toupper().
Generally these preserve existing behavior, but there are a couple
differences:
* If using the builtin C locale (not C.UTF-8) along with a datctype
that's a non-C locale with single-byte encoding, it could affect the
results of downcase_identifier(), ltree, and fuzzystrmatch on
characters > 127. For ICU, I went to a bit of extra effort to preserve
the existing behavior here, because it's more likely to be used for
single-byte encodings.
* When using ICU or builtin C.UTF-8, along with a datctype of
"tr_TR.UTF-8", then it will affect ltree's and fuzzystrmatch's
treatment of i/I.
If these are a concern we can fix them with some hacks, but those
behaviors seem fairly obscure to me.
Regards,
Jeff Davis
[1]
https://www.postgresql.org/message-id/[email protected]
From 78fbb9220930918221dc0a6aa48b1d0023860707 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Sun, 26 Oct 2025 14:58:02 -0700
Subject: [PATCH v6 1/9] Avoid global LC_CTYPE dependency in pg_locale_libc.c.
Call tolower_l() directly instead of through pg_tolower(), because the
latter depends on the global LC_CTYPE.
---
src/backend/utils/adt/pg_locale_libc.c | 28 ++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 9c7fcd1fc7a..716f005066a 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -450,7 +450,12 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
for (p = dest; *p; p++)
{
if (locale->is_default)
- *p = pg_tolower((unsigned char) *p);
+ {
+ if (*p >= 'A' && *p <= 'Z')
+ *p += 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
+ *p = tolower_l((unsigned char) *p, loc);
+ }
else
*p = tolower_l((unsigned char) *p, loc);
}
@@ -535,9 +540,19 @@ strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
if (locale->is_default)
{
if (wasalnum)
- *p = pg_tolower((unsigned char) *p);
+ {
+ if (*p >= 'A' && *p <= 'Z')
+ *p += 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
+ *p = tolower_l((unsigned char) *p, loc);
+ }
else
- *p = pg_toupper((unsigned char) *p);
+ {
+ if (*p >= 'a' && *p <= 'z')
+ *p -= 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
+ *p = toupper_l((unsigned char) *p, loc);
+ }
}
else
{
@@ -633,7 +648,12 @@ strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
for (p = dest; *p; p++)
{
if (locale->is_default)
- *p = pg_toupper((unsigned char) *p);
+ {
+ if (*p >= 'a' && *p <= 'z')
+ *p -= 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
+ *p = toupper_l((unsigned char) *p, loc);
+ }
else
*p = toupper_l((unsigned char) *p, loc);
}
--
2.43.0
From 631daededebd9649169951764c72d8a372897b5c Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Sun, 26 Oct 2025 14:51:47 -0700
Subject: [PATCH v6 2/9] Define char_tolower()/char_toupper() for all locale
providers.
The behavior is defined for each locale provider rather than
unconditionally depending on the global LC_CTYPE setting. Needed as an
alternative for tolower()/toupper() for some callers.
---
src/backend/utils/adt/like.c | 4 +--
src/backend/utils/adt/pg_locale.c | 32 ++++++++++++++++-------
src/backend/utils/adt/pg_locale_builtin.c | 18 +++++++++++++
src/backend/utils/adt/pg_locale_icu.c | 23 ++++++++++++++++
src/backend/utils/adt/pg_locale_libc.c | 21 +++++++++++++--
src/include/utils/pg_locale.h | 10 +++----
6 files changed, 89 insertions(+), 19 deletions(-)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 4216ac17f43..37c1c86aee8 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -209,9 +209,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
* way.
*/
- if (locale->ctype_is_c ||
- (char_tolower_enabled(locale) &&
- pg_database_encoding_max_length() == 1))
+ if (locale->ctype_is_c || locale->ctype->pattern_casefold_char)
{
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 67299c55ed8..26a7244c3db 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1551,25 +1551,39 @@ char_is_cased(char ch, pg_locale_t locale)
}
/*
- * char_tolower_enabled()
+ * char_tolower()
*
- * Does the provider support char_tolower()?
+ * Convert single-byte char to lowercase. Not correct for multibyte encodings,
+ * but needed for historical compatibility purposes.
*/
-bool
-char_tolower_enabled(pg_locale_t locale)
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
{
- return (locale->ctype->char_tolower != NULL);
+ if (locale->ctype == NULL)
+ {
+ if (ch >= 'A' && ch <= 'Z')
+ return ch + ('a' - 'A');
+ return ch;
+ }
+ return locale->ctype->char_tolower(ch, locale);
}
/*
- * char_tolower()
+ * char_toupper()
*
- * Convert char (single-byte encoding) to lowercase.
+ * Convert single-byte char to uppercase. Not correct for multibyte encodings,
+ * but needed for historical compatibility purposes.
*/
char
-char_tolower(unsigned char ch, pg_locale_t locale)
+char_toupper(unsigned char ch, pg_locale_t locale)
{
- return locale->ctype->char_tolower(ch, locale);
+ if (locale->ctype == NULL)
+ {
+ if (ch >= 'a' && ch <= 'z')
+ return ch - ('a' - 'A');
+ return ch;
+ }
+ return locale->ctype->char_toupper(ch, locale);
}
/*
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 3dc611b50e1..cfef6a86377 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -169,6 +169,22 @@ wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
return pg_u_isxdigit(wc, !locale->builtin.casemap_full);
}
+static char
+char_tolower_builtin(unsigned char ch, pg_locale_t locale)
+{
+ if (ch >= 'A' && ch <= 'Z')
+ return ch + ('a' - 'A');
+ return ch;
+}
+
+static char
+char_toupper_builtin(unsigned char ch, pg_locale_t locale)
+{
+ if (ch >= 'a' && ch <= 'z')
+ return ch - ('a' - 'A');
+ return ch;
+}
+
static bool
char_is_cased_builtin(char ch, pg_locale_t locale)
{
@@ -203,6 +219,8 @@ static const struct ctype_methods ctype_methods_builtin = {
.wc_ispunct = wc_ispunct_builtin,
.wc_isspace = wc_isspace_builtin,
.wc_isxdigit = wc_isxdigit_builtin,
+ .char_tolower = char_tolower_builtin,
+ .char_toupper = char_toupper_builtin,
.char_is_cased = char_is_cased_builtin,
.wc_tolower = wc_tolower_builtin,
.wc_toupper = wc_toupper_builtin,
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index f5a0cc8fe41..449e3bbb7a6 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -121,6 +121,27 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
const char *locale,
UErrorCode *pErrorCode);
+/*
+ * ICU still depends on libc for compatibility with certain historical
+ * behavior for single-byte encodings. XXX: consider fixing by decoding the
+ * single byte into a code point, and using u_tolower().
+ */
+static char
+char_tolower_icu(unsigned char ch, pg_locale_t locale)
+{
+ if (isupper(ch))
+ return tolower(ch);
+ return ch;
+}
+
+static char
+char_toupper_icu(unsigned char ch, pg_locale_t locale)
+{
+ if (islower(ch))
+ return toupper(ch);
+ return ch;
+}
+
static bool
char_is_cased_icu(char ch, pg_locale_t locale)
{
@@ -238,6 +259,8 @@ static const struct ctype_methods ctype_methods_icu = {
.wc_ispunct = wc_ispunct_icu,
.wc_isspace = wc_isspace_icu,
.wc_isxdigit = wc_isxdigit_icu,
+ .char_tolower = char_tolower_icu,
+ .char_toupper = char_toupper_icu,
.char_is_cased = char_is_cased_icu,
.wc_toupper = toupper_icu,
.wc_tolower = tolower_icu,
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 716f005066a..b0428ad288e 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -251,8 +251,21 @@ wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
static char
char_tolower_libc(unsigned char ch, pg_locale_t locale)
{
- Assert(pg_database_encoding_max_length() == 1);
- return tolower_l(ch, locale->lt);
+ locale_t loc = locale->lt;
+
+ if (isupper_l(ch, loc))
+ return tolower_l(ch, loc);
+ return ch;
+}
+
+static char
+char_toupper_libc(unsigned char ch, pg_locale_t locale)
+{
+ locale_t loc = locale->lt;
+
+ if (islower_l(ch, loc))
+ return toupper_l(ch, loc);
+ return ch;
}
static bool
@@ -338,9 +351,11 @@ static const struct ctype_methods ctype_methods_libc_sb = {
.wc_isxdigit = wc_isxdigit_libc_sb,
.char_is_cased = char_is_cased_libc,
.char_tolower = char_tolower_libc,
+ .char_toupper = char_toupper_libc,
.wc_toupper = toupper_libc_sb,
.wc_tolower = tolower_libc_sb,
.max_chr = UCHAR_MAX,
+ .pattern_casefold_char = true,
};
/*
@@ -363,6 +378,7 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
.wc_isxdigit = wc_isxdigit_libc_sb,
.char_is_cased = char_is_cased_libc,
.char_tolower = char_tolower_libc,
+ .char_toupper = char_toupper_libc,
.wc_toupper = toupper_libc_sb,
.wc_tolower = tolower_libc_sb,
.max_chr = UCHAR_MAX,
@@ -384,6 +400,7 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
.wc_isxdigit = wc_isxdigit_libc_mb,
.char_is_cased = char_is_cased_libc,
.char_tolower = char_tolower_libc,
+ .char_toupper = char_toupper_libc,
.wc_toupper = toupper_libc_mb,
.wc_tolower = tolower_libc_mb,
};
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 683e1a0eef8..790db566e91 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -113,13 +113,13 @@ struct ctype_methods
/* required */
bool (*char_is_cased) (char ch, pg_locale_t locale);
+ char (*char_tolower) (unsigned char ch, pg_locale_t locale);
+ char (*char_toupper) (unsigned char ch, pg_locale_t locale);
/*
- * Optional. If defined, will only be called for single-byte encodings. If
- * not defined, or if the encoding is multibyte, will fall back to
- * pg_strlower().
+ * Use byte-at-a-time case folding for case-insensitive patterns.
*/
- char (*char_tolower) (unsigned char ch, pg_locale_t locale);
+ bool pattern_casefold_char;
/*
* For regex and pattern matching efficiency, the maximum char value
@@ -177,8 +177,8 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid);
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
extern bool char_is_cased(char ch, pg_locale_t locale);
-extern bool char_tolower_enabled(pg_locale_t locale);
extern char char_tolower(unsigned char ch, pg_locale_t locale);
+extern char char_toupper(unsigned char ch, pg_locale_t locale);
extern size_t pg_strlower(char *dst, size_t dstsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
--
2.43.0
From a9f365b0ebd0c71ad2fec3bba8dbf7a21b502e3a Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Sun, 26 Oct 2025 14:59:40 -0700
Subject: [PATCH v6 3/9] Avoid global LC_CTYPE dependency in like.c.
Call char_tolower() instead of pg_tolower().
---
src/backend/utils/adt/like.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 37c1c86aee8..364c39cf4fb 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -96,7 +96,14 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
if (locale->ctype_is_c)
return pg_ascii_tolower(c);
else if (locale->is_default)
- return pg_tolower(c);
+ {
+ if (c >= 'A' && c <= 'Z')
+ return c + ('a' - 'A');
+ else if (IS_HIGHBIT_SET(c))
+ return char_tolower(c, locale);
+ else
+ return c;
+ }
else
return char_tolower(c, locale);
}
--
2.43.0
From 0dad412eb555550dd8a5d4ef3581695328fb8f12 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Sun, 26 Oct 2025 15:04:58 -0700
Subject: [PATCH v6 4/9] Avoid global LC_CTYPE dependency in scansup.c.
Call char_tolower() instead of tolower() in downcase_identifier().
The function downcase_identifier() may be called before locale support
is initialized -- e.g. during GUC processing in the postmaster -- so
if the locale is unavailable, char_tolower() uses plain ASCII
semantics.
That can result in a difference in behavior during that early stage of
processing, but previously it would have depended on the postmaster
environment variable LC_CTYPE, which would have been fragile anyway.
---
src/backend/parser/scansup.c | 5 +++--
src/backend/utils/adt/pg_locale.c | 16 ++++++++++++++--
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index 2feb2b6cf5a..872075ba220 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -18,6 +18,7 @@
#include "mb/pg_wchar.h"
#include "parser/scansup.h"
+#include "utils/pg_locale.h"
/*
@@ -67,8 +68,8 @@ downcase_identifier(const char *ident, int len, bool warn, bool truncate)
if (ch >= 'A' && ch <= 'Z')
ch += 'a' - 'A';
- else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
- ch = tolower(ch);
+ else if (enc_is_single_byte && IS_HIGHBIT_SET(ch))
+ ch = char_tolower(ch, NULL);
result[i] = (char) ch;
}
result[i] = '\0';
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 26a7244c3db..363215edb80 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1555,11 +1555,17 @@ char_is_cased(char ch, pg_locale_t locale)
*
* Convert single-byte char to lowercase. Not correct for multibyte encodings,
* but needed for historical compatibility purposes.
+ *
+ * If locale is NULL, use the default database locale. This function may be
+ * called before the database locale is initialized, in which case it uses
+ * plain ASCII semantics.
*/
char
char_tolower(unsigned char ch, pg_locale_t locale)
{
- if (locale->ctype == NULL)
+ if (locale == NULL)
+ locale = default_locale;
+ if (locale == NULL || locale->ctype == NULL)
{
if (ch >= 'A' && ch <= 'Z')
return ch + ('a' - 'A');
@@ -1573,11 +1579,17 @@ char_tolower(unsigned char ch, pg_locale_t locale)
*
* Convert single-byte char to uppercase. Not correct for multibyte encodings,
* but needed for historical compatibility purposes.
+ *
+ * If locale is NULL, use the default database locale. This function may be
+ * called before the database locale is initialized, in which case it uses
+ * plain ASCII semantics.
*/
char
char_toupper(unsigned char ch, pg_locale_t locale)
{
- if (locale->ctype == NULL)
+ if (locale == NULL)
+ locale = default_locale;
+ if (locale == NULL || locale->ctype == NULL)
{
if (ch >= 'a' && ch <= 'z')
return ch - ('a' - 'A');
--
2.43.0
From af958c9318ade598d74ea1e7ae720c287c83dee0 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Sun, 26 Oct 2025 15:12:38 -0700
Subject: [PATCH v6 5/9] Avoid global LC_CTYPE dependency in pg_locale_icu.c.
ICU still depends on libc for compatibility with certain historical
behavior for single-byte encodings. Make the dependency explicit by
holding a locale_t object in the pg_locale_t object, so that at least
it does not depend on the global LC_CTYPE setting.
---
src/backend/utils/adt/pg_locale_icu.c | 66 ++++++++++++++++++++++-----
src/include/utils/pg_locale.h | 1 +
2 files changed, 56 insertions(+), 11 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 449e3bbb7a6..da250a23630 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -121,25 +121,34 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
const char *locale,
UErrorCode *pErrorCode);
-/*
- * ICU still depends on libc for compatibility with certain historical
- * behavior for single-byte encodings. XXX: consider fixing by decoding the
- * single byte into a code point, and using u_tolower().
- */
static char
char_tolower_icu(unsigned char ch, pg_locale_t locale)
{
- if (isupper(ch))
- return tolower(ch);
- return ch;
+ locale_t loc = locale->icu.lt;
+
+ if (loc)
+ {
+ if (isupper_l(ch, loc))
+ return tolower_l(ch, loc);
+ return ch;
+ }
+ else
+ return pg_ascii_tolower(ch);
}
static char
char_toupper_icu(unsigned char ch, pg_locale_t locale)
{
- if (islower(ch))
- return toupper(ch);
- return ch;
+ locale_t loc = locale->icu.lt;
+
+ if (loc)
+ {
+ if (islower_l(ch, loc))
+ return toupper_l(ch, loc);
+ return ch;
+ }
+ else
+ return pg_ascii_toupper(ch);
}
static bool
@@ -265,6 +274,29 @@ static const struct ctype_methods ctype_methods_icu = {
.wc_toupper = toupper_icu,
.wc_tolower = tolower_icu,
};
+
+/*
+ * ICU still depends on libc for compatibility with certain historical
+ * behavior for single-byte encodings. See char_tolower_libc().
+ *
+ * XXX: consider fixing by decoding the single byte into a code point, and
+ * using u_tolower().
+ */
+static locale_t
+make_libc_ctype_locale(const char *ctype)
+{
+ locale_t loc;
+
+#ifndef WIN32
+ loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
+#else
+ loc = _create_locale(LC_ALL, ctype);
+#endif
+ if (!loc)
+ report_newlocale_failure(ctype);
+
+ return loc;
+}
#endif
pg_locale_t
@@ -275,11 +307,13 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
const char *iculocstr;
const char *icurules = NULL;
UCollator *collator;
+ locale_t loc = (locale_t) 0;
pg_locale_t result;
if (collid == DEFAULT_COLLATION_OID)
{
HeapTuple tp;
+ const char *ctype;
Datum datum;
bool isnull;
@@ -297,6 +331,15 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
if (!isnull)
icurules = TextDatumGetCString(datum);
+ if (pg_database_encoding_max_length() == 1)
+ {
+ datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
+ Anum_pg_database_datctype);
+ ctype = TextDatumGetCString(datum);
+
+ loc = make_libc_ctype_locale(ctype);
+ }
+
ReleaseSysCache(tp);
}
else
@@ -327,6 +370,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
result->icu.locale = MemoryContextStrdup(context, iculocstr);
result->icu.ucol = collator;
+ result->icu.lt = loc;
result->deterministic = deterministic;
result->collate_is_c = false;
result->ctype_is_c = false;
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 790db566e91..c5978d903cc 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -165,6 +165,7 @@ struct pg_locale_struct
{
const char *locale;
UCollator *ucol;
+ locale_t lt;
} icu;
#endif
};
--
2.43.0
From 5ffbafb4051e0bfd763a64a134d71644e66847a4 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 27 Oct 2025 16:23:14 -0700
Subject: [PATCH v6 6/9] Avoid global LC_CTYPE dependency in ltree/crc32.c.
Use char_tolower() instead of tolower().
---
contrib/ltree/crc32.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/contrib/ltree/crc32.c b/contrib/ltree/crc32.c
index 134f46a805e..5969f75c158 100644
--- a/contrib/ltree/crc32.c
+++ b/contrib/ltree/crc32.c
@@ -12,7 +12,7 @@
#ifdef LOWER_NODE
#include <ctype.h>
-#define TOLOWER(x) tolower((unsigned char) (x))
+#define TOLOWER(x) char_tolower((unsigned char) (x), NULL)
#else
#define TOLOWER(x) (x)
#endif
--
2.43.0
From 7399368ce4ee497cf26c1a1f4abfe0fdf192bbd8 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 27 Oct 2025 16:24:18 -0700
Subject: [PATCH v6 7/9] Avoid global LC_CTYPE dependency in fuzzystrmatch.
Use char_toupper() instead of toupper().
---
contrib/fuzzystrmatch/dmetaphone.c | 5 ++++-
contrib/fuzzystrmatch/fuzzystrmatch.c | 19 +++++++++++--------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/contrib/fuzzystrmatch/dmetaphone.c b/contrib/fuzzystrmatch/dmetaphone.c
index 6627b2b8943..152eb4b2ddf 100644
--- a/contrib/fuzzystrmatch/dmetaphone.c
+++ b/contrib/fuzzystrmatch/dmetaphone.c
@@ -99,6 +99,7 @@ The remaining code is authored by Andrew Dunstan <[email protected]> and
#include "postgres.h"
#include "utils/builtins.h"
+#include "utils/pg_locale.h"
/* turn off assertions for embedded function */
#define NDEBUG
@@ -116,6 +117,8 @@ The remaining code is authored by Andrew Dunstan <[email protected]> and
#include <assert.h>
#include <ctype.h>
+#define TOUPPER(x) char_toupper(x, NULL)
+
/* prototype for the main function we got from the perl module */
static void DoubleMetaphone(char *str, char **codes);
@@ -284,7 +287,7 @@ MakeUpper(metastring *s)
char *i;
for (i = s->str; *i; i++)
- *i = toupper((unsigned char) *i);
+ *i = TOUPPER((unsigned char) *i);
}
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index e7cc314b763..03530fb73ab 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -41,6 +41,7 @@
#include <ctype.h>
#include "utils/builtins.h"
+#include "utils/pg_locale.h"
#include "utils/varlena.h"
#include "varatt.h"
@@ -49,6 +50,8 @@ PG_MODULE_MAGIC_EXT(
.version = PG_VERSION
);
+#define TOUPPER(x) char_toupper(x, NULL)
+
/*
* Soundex
*/
@@ -62,7 +65,7 @@ static const char *const soundex_table = "01230120022455012623010202";
static char
soundex_code(char letter)
{
- letter = toupper((unsigned char) letter);
+ letter = TOUPPER((unsigned char) letter);
/* Defend against non-ASCII letters */
if (letter >= 'A' && letter <= 'Z')
return soundex_table[letter - 'A'];
@@ -124,7 +127,7 @@ getcode(char c)
{
if (isalpha((unsigned char) c))
{
- c = toupper((unsigned char) c);
+ c = TOUPPER((unsigned char) c);
/* Defend against non-ASCII letters */
if (c >= 'A' && c <= 'Z')
return _codes[c - 'A'];
@@ -301,18 +304,18 @@ metaphone(PG_FUNCTION_ARGS)
* accessing the array directly... */
/* Look at the next letter in the word */
-#define Next_Letter (toupper((unsigned char) word[w_idx+1]))
+#define Next_Letter (TOUPPER((unsigned char) word[w_idx+1]))
/* Look at the current letter in the word */
-#define Curr_Letter (toupper((unsigned char) word[w_idx]))
+#define Curr_Letter (TOUPPER((unsigned char) word[w_idx]))
/* Go N letters back. */
#define Look_Back_Letter(n) \
- (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')
+ (w_idx >= (n) ? TOUPPER((unsigned char) word[w_idx-(n)]) : '\0')
/* Previous letter. I dunno, should this return null on failure? */
#define Prev_Letter (Look_Back_Letter(1))
/* Look two letters down. It makes sure you don't walk off the string. */
#define After_Next_Letter \
- (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
-#define Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n))
+ (Next_Letter != '\0' ? TOUPPER((unsigned char) word[w_idx+2]) : '\0')
+#define Look_Ahead_Letter(n) TOUPPER((unsigned char) Lookahead(word+w_idx, n))
/* Allows us to safely look ahead an arbitrary # of letters */
@@ -742,7 +745,7 @@ _soundex(const char *instr, char *outstr)
}
/* Take the first letter as is */
- *outstr++ = (char) toupper((unsigned char) *instr++);
+ *outstr++ = (char) TOUPPER((unsigned char) *instr++);
count = 1;
while (*instr && count < SOUNDEX_LEN)
--
2.43.0
From 46420299904cfe1829896446a860c39f0824551e Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 9 Oct 2024 10:00:58 -0700
Subject: [PATCH v6 8/9] Don't include ICU headers in pg_locale.h.
Needed in order to include pg_locale.h in strcasecmp.c.
---
src/backend/commands/collationcmds.c | 4 ++++
src/backend/utils/adt/formatting.c | 4 ----
src/backend/utils/adt/pg_locale.c | 4 ++++
src/backend/utils/adt/pg_locale_icu.c | 1 +
src/backend/utils/adt/varlena.c | 4 ++++
src/include/utils/pg_locale.h | 14 +++++---------
6 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 8acbfbbeda0..a57fe93c387 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -14,6 +14,10 @@
*/
#include "postgres.h"
+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
+
#include "access/htup_details.h"
#include "access/table.h"
#include "access/xact.h"
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 78e19ac39ac..9d0dfc48671 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -70,10 +70,6 @@
#include <limits.h>
#include <wctype.h>
-#ifdef USE_ICU
-#include <unicode/ustring.h>
-#endif
-
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "common/int.h"
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 363215edb80..255f660c644 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -33,6 +33,10 @@
#include <time.h>
+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
+
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_database.h"
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index da250a23630..0fd8171c1da 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -13,6 +13,7 @@
#ifdef USE_ICU
#include <unicode/ucnv.h>
+#include <unicode/ucol.h>
#include <unicode/ustring.h>
/*
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 2c398cd9e5c..cf34a96b988 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -17,6 +17,10 @@
#include <ctype.h>
#include <limits.h>
+#ifdef USE_ICU
+#include <unicode/uchar.h>
+#endif
+
#include "access/detoast.h"
#include "access/toast_compression.h"
#include "catalog/pg_collation.h"
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index c5978d903cc..b668f77e1ca 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -14,15 +14,6 @@
#include "mb/pg_wchar.h"
-#ifdef USE_ICU
-/* only include the C APIs, to avoid errors in cpluspluscheck */
-#undef U_SHOW_CPLUSPLUS_API
-#define U_SHOW_CPLUSPLUS_API 0
-#undef U_SHOW_CPLUSPLUS_HEADER_API
-#define U_SHOW_CPLUSPLUS_HEADER_API 0
-#include <unicode/ucol.h>
-#endif
-
/* use for libc locale names */
#define LOCALE_NAME_BUFLEN 128
@@ -54,6 +45,11 @@ extern void cache_locale_time(void);
struct pg_locale_struct;
typedef struct pg_locale_struct *pg_locale_t;
+#ifdef USE_ICU
+struct UCollator;
+typedef struct UCollator UCollator;
+#endif
+
/* methods that define collation behavior */
struct collate_methods
{
--
2.43.0
From 22a6d36d82a26269f406b64cd0865a360224eb63 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 27 Oct 2025 16:08:54 -0700
Subject: [PATCH v6 9/9] Avoid global LC_CTYPE dependency in strcasecmp.c for
server.
For the server (but not the frontend), change to use
char_tolower()/char_toupper() instead of tolower()/toupper().
---
src/port/pgstrcasecmp.c | 35 +++++++++++++++++++++++------------
1 file changed, 23 insertions(+), 12 deletions(-)
diff --git a/src/port/pgstrcasecmp.c b/src/port/pgstrcasecmp.c
index ec2b3a75c3d..f295df6ef51 100644
--- a/src/port/pgstrcasecmp.c
+++ b/src/port/pgstrcasecmp.c
@@ -28,6 +28,17 @@
#include <ctype.h>
+/*
+ * Beware multiple evaluation hazards.
+ */
+#ifndef FRONTEND
+#include "utils/pg_locale.h"
+#define TOLOWER(x) char_tolower(x, NULL)
+#define TOUPPER(x) char_toupper(x, NULL)
+#else
+#define TOLOWER(x) (isupper(x) ? tolower(x) : x)
+#define TOUPPER(x) (islower(x) ? toupper(x) : x)
+#endif
/*
* Case-independent comparison of two null-terminated strings.
@@ -44,13 +55,13 @@ pg_strcasecmp(const char *s1, const char *s2)
{
if (ch1 >= 'A' && ch1 <= 'Z')
ch1 += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
- ch1 = tolower(ch1);
+ else if (IS_HIGHBIT_SET(ch1))
+ ch1 = TOLOWER(ch1);
if (ch2 >= 'A' && ch2 <= 'Z')
ch2 += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
- ch2 = tolower(ch2);
+ else if (IS_HIGHBIT_SET(ch2))
+ ch2 = TOLOWER(ch2);
if (ch1 != ch2)
return (int) ch1 - (int) ch2;
@@ -77,13 +88,13 @@ pg_strncasecmp(const char *s1, const char *s2, size_t n)
{
if (ch1 >= 'A' && ch1 <= 'Z')
ch1 += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
- ch1 = tolower(ch1);
+ else if (IS_HIGHBIT_SET(ch1))
+ ch1 = TOLOWER(ch1);
if (ch2 >= 'A' && ch2 <= 'Z')
ch2 += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
- ch2 = tolower(ch2);
+ else if (IS_HIGHBIT_SET(ch2))
+ ch2 = TOLOWER(ch2);
if (ch1 != ch2)
return (int) ch1 - (int) ch2;
@@ -106,8 +117,8 @@ pg_toupper(unsigned char ch)
{
if (ch >= 'a' && ch <= 'z')
ch += 'A' - 'a';
- else if (IS_HIGHBIT_SET(ch) && islower(ch))
- ch = toupper(ch);
+ else if (IS_HIGHBIT_SET(ch))
+ ch = TOUPPER(ch);
return ch;
}
@@ -123,8 +134,8 @@ pg_tolower(unsigned char ch)
{
if (ch >= 'A' && ch <= 'Z')
ch += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch) && isupper(ch))
- ch = tolower(ch);
+ else if (IS_HIGHBIT_SET(ch))
+ ch = TOLOWER(ch);
return ch;
}
--
2.43.0