On Wed, 2025-06-11 at 12:15 -0700, Jeff Davis wrote:
> I changed this to a global_libc_locale that includes both LC_COLLATE
> and LC_CTYPE (from datcollate and datctype), in case an extension is
> relying on strcoll for some reason.
..
> This patch series, at least so far, is designed to have zero behavior
> changes. Anything with a potential for a behavior change should be a
> separate commit, so that if we need to revert it, we can revert the
> behavior change without reintroducing a setlocale() dependency.
...
>
> I reworked it to be less confusing by changing wchar2char/char2wchar
> to
> take a locale_t instead of pg_locale_t. Hopefully it's an
> improvement.
...
>
> Changed it so that it only sets LC_COLLATE to C, and leaves LC_CTYPE
> set to datctype.
Attached rebased v3.
Regards,
Jeff Davis
From 454a8998196c49de9a17aa83d198464d52a3f278 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Fri, 6 Jun 2025 14:13:16 -0700
Subject: [PATCH v3 1/7] Hold datcollate/datctype in global_libc_locale.
Callers of locale-aware ctype operations should use the "_l" variants
of the functions and pass global_libc_locale for the locale. Doing so
avoids depending on setlocale().
Discussion: https://postgr.es/m/[email protected]
---
src/backend/utils/adt/pg_locale_libc.c | 77 ++++++++++++++++++++++++++
src/backend/utils/init/postinit.c | 2 +
src/include/utils/pg_locale.h | 7 +++
3 files changed, 86 insertions(+)
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index e9f9fc1e369..a3d8b51a7d9 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -81,6 +81,12 @@
*/
#define TEXTBUFLEN 1024
+/*
+ * Represents datcollate and datctype locales in a global variable, so that we
+ * don't need to rely on setlocale() anywhere.
+ */
+locale_t global_libc_locale = NULL;
+
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
static int strncoll_libc(const char *arg1, ssize_t len1,
@@ -665,6 +671,77 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
return result_size;
}
+/*
+ * Initialize global locale for LC_COLLATE and LC_CTYPE from datcollate and
+ * datctype, respectively.
+ *
+ * NB: should be consistent with make_libc_collator(), except that it must
+ * create the locale even for "C" and "POSIX".
+ */
+void
+init_global_libc_locale(const char *collate, const char *ctype)
+{
+ locale_t loc = 0;
+
+ if (strcmp(collate, ctype) == 0)
+ {
+ /* Normal case where they're the same */
+ errno = 0;
+#ifndef WIN32
+ loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate, NULL);
+#else
+ loc = _create_locale(LC_ALL, collate);
+#endif
+ if (!loc)
+ ereport(FATAL,
+ (errmsg("database locale is incompatible with operating system"),
+ errdetail("The database was initialized with LC_COLLATE \"%s\", "
+ " which is not recognized by setlocale().", collate),
+ errhint("Recreate the database with another locale or install the missing locale.")));
+ }
+ else
+ {
+#ifndef WIN32
+ /* We need two newlocale() steps */
+ locale_t loc1 = 0;
+
+ errno = 0;
+ loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
+ if (!loc1)
+ ereport(FATAL,
+ (errmsg("database locale is incompatible with operating system"),
+ errdetail("The database was initialized with LC_COLLATE \"%s\", "
+ " which is not recognized by setlocale().", collate),
+ errhint("Recreate the database with another locale or install the missing locale.")));
+
+ errno = 0;
+ loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
+ if (!loc)
+ {
+ if (loc1)
+ freelocale(loc1);
+ ereport(FATAL,
+ (errmsg("database locale is incompatible with operating system"),
+ errdetail("The database was initialized with LC_CTYPE \"%s\", "
+ " which is not recognized by setlocale().", ctype),
+ errhint("Recreate the database with another locale or install the missing locale.")));
+ }
+#else
+
+ /*
+ * XXX The _create_locale() API doesn't appear to support this. Could
+ * perhaps be worked around by changing pg_locale_t to contain two
+ * separate fields.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("collations with different collate and ctype values are not supported on this platform")));
+#endif
+ }
+
+ global_libc_locale = loc;
+}
+
pg_locale_t
create_pg_locale_libc(Oid collid, MemoryContext context)
{
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index c86ceefda94..74f9df84fde 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -431,6 +431,8 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
" which is not recognized by setlocale().", ctype),
errhint("Recreate the database with another locale or install the missing locale.")));
+ init_global_libc_locale(collate, ctype);
+
if (strcmp(ctype, "C") == 0 ||
strcmp(ctype, "POSIX") == 0)
database_ctype_is_c = true;
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 44ff60a25b4..9735d15ceb2 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -34,6 +34,12 @@ extern PGDLLIMPORT char *localized_full_days[];
extern PGDLLIMPORT char *localized_abbrev_months[];
extern PGDLLIMPORT char *localized_full_months[];
+/*
+ * Represents datcollate and datctype locales in a global variable, so that we
+ * don't need to rely on setlocale() anywhere.
+ */
+extern PGDLLIMPORT locale_t global_libc_locale;
+
/* is the databases's LC_CTYPE the C locale? */
extern PGDLLIMPORT bool database_ctype_is_c;
@@ -169,6 +175,7 @@ struct pg_locale_struct
} info;
};
+extern void init_global_libc_locale(const char *collate, const char *ctype);
extern void init_database_collation(void);
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
--
2.43.0
From 9305b8065086a7d03900e2f4dc4396219c206768 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Tue, 10 Jun 2025 20:06:34 -0700
Subject: [PATCH v3 2/7] fuzzystrmatch: use global_libc_locale.
---
contrib/fuzzystrmatch/dmetaphone.c | 3 ++-
contrib/fuzzystrmatch/fuzzystrmatch.c | 19 +++++++++++--------
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/contrib/fuzzystrmatch/dmetaphone.c b/contrib/fuzzystrmatch/dmetaphone.c
index 6627b2b8943..8777c1f5c04 100644
--- a/contrib/fuzzystrmatch/dmetaphone.c
+++ b/contrib/fuzzystrmatch/dmetaphone.c
@@ -99,6 +99,7 @@ The remaining code is authored by Andrew Dunstan <[email protected]> and
#include "postgres.h"
#include "utils/builtins.h"
+#include "utils/pg_locale.h"
/* turn off assertions for embedded function */
#define NDEBUG
@@ -284,7 +285,7 @@ MakeUpper(metastring *s)
char *i;
for (i = s->str; *i; i++)
- *i = toupper((unsigned char) *i);
+ *i = toupper_l((unsigned char) *i, global_libc_locale);
}
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index e7cc314b763..103dd07220c 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -41,6 +41,7 @@
#include <ctype.h>
#include "utils/builtins.h"
+#include "utils/pg_locale.h"
#include "utils/varlena.h"
#include "varatt.h"
@@ -56,13 +57,15 @@ static void _soundex(const char *instr, char *outstr);
#define SOUNDEX_LEN 4
+#define TOUPPER(x) toupper_l((unsigned char) (x), global_libc_locale)
+
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
static const char *const soundex_table = "01230120022455012623010202";
static char
soundex_code(char letter)
{
- letter = toupper((unsigned char) letter);
+ letter = TOUPPER((unsigned char) letter);
/* Defend against non-ASCII letters */
if (letter >= 'A' && letter <= 'Z')
return soundex_table[letter - 'A'];
@@ -124,7 +127,7 @@ getcode(char c)
{
if (isalpha((unsigned char) c))
{
- c = toupper((unsigned char) c);
+ c = TOUPPER((unsigned char) c);
/* Defend against non-ASCII letters */
if (c >= 'A' && c <= 'Z')
return _codes[c - 'A'];
@@ -301,18 +304,18 @@ metaphone(PG_FUNCTION_ARGS)
* accessing the array directly... */
/* Look at the next letter in the word */
-#define Next_Letter (toupper((unsigned char) word[w_idx+1]))
+#define Next_Letter (TOUPPER((unsigned char) word[w_idx+1]))
/* Look at the current letter in the word */
-#define Curr_Letter (toupper((unsigned char) word[w_idx]))
+#define Curr_Letter (TOUPPER((unsigned char) word[w_idx]))
/* Go N letters back. */
#define Look_Back_Letter(n) \
- (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')
+ (w_idx >= (n) ? TOUPPER((unsigned char) word[w_idx-(n)]) : '\0')
/* Previous letter. I dunno, should this return null on failure? */
#define Prev_Letter (Look_Back_Letter(1))
/* Look two letters down. It makes sure you don't walk off the string. */
#define After_Next_Letter \
- (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
-#define Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n))
+ (Next_Letter != '\0' ? TOUPPER((unsigned char) word[w_idx+2]) : '\0')
+#define Look_Ahead_Letter(n) TOUPPER((unsigned char) Lookahead(word+w_idx, n))
/* Allows us to safely look ahead an arbitrary # of letters */
@@ -742,7 +745,7 @@ _soundex(const char *instr, char *outstr)
}
/* Take the first letter as is */
- *outstr++ = (char) toupper((unsigned char) *instr++);
+ *outstr++ = (char) TOUPPER((unsigned char) *instr++);
count = 1;
while (*instr && count < SOUNDEX_LEN)
--
2.43.0
From 04db5e1ff309c08010ef7f87aba96e71fbd8f42c Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Tue, 10 Jun 2025 20:06:50 -0700
Subject: [PATCH v3 3/7] ltree: use global_libc_locale.
---
contrib/ltree/crc32.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/contrib/ltree/crc32.c b/contrib/ltree/crc32.c
index 134f46a805e..5f5c563471e 100644
--- a/contrib/ltree/crc32.c
+++ b/contrib/ltree/crc32.c
@@ -12,7 +12,7 @@
#ifdef LOWER_NODE
#include <ctype.h>
-#define TOLOWER(x) tolower((unsigned char) (x))
+#define TOLOWER(x) tolower_l((unsigned char) (x), global_libc_locale)
#else
#define TOLOWER(x) (x)
#endif
--
2.43.0
From 9c454496624da63948641b19e4592e4fbb4f609f Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Tue, 10 Jun 2025 20:07:01 -0700
Subject: [PATCH v3 4/7] Use global_libc_locale for downcase_identifier() and
pg_strcasecmp().
---
src/backend/parser/scansup.c | 3 ++-
src/port/pgstrcasecmp.c | 20 ++++++++++++++------
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index 2feb2b6cf5a..d45bf275e42 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -18,6 +18,7 @@
#include "mb/pg_wchar.h"
#include "parser/scansup.h"
+#include "utils/pg_locale.h"
/*
@@ -68,7 +69,7 @@ downcase_identifier(const char *ident, int len, bool warn, bool truncate)
if (ch >= 'A' && ch <= 'Z')
ch += 'a' - 'A';
else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
- ch = tolower(ch);
+ ch = tolower_l(ch, global_libc_locale);
result[i] = (char) ch;
}
result[i] = '\0';
diff --git a/src/port/pgstrcasecmp.c b/src/port/pgstrcasecmp.c
index ec2b3a75c3d..812050598e7 100644
--- a/src/port/pgstrcasecmp.c
+++ b/src/port/pgstrcasecmp.c
@@ -28,6 +28,14 @@
#include <ctype.h>
+#ifndef FRONTEND
+extern PGDLLIMPORT locale_t global_libc_locale;
+#define TOUPPER(x) toupper_l((unsigned char) (x), global_libc_locale)
+#define TOLOWER(x) tolower_l((unsigned char) (x), global_libc_locale)
+#else
+#define TOUPPER(x) toupper(x)
+#define TOLOWER(x) tolower(x)
+#endif
/*
* Case-independent comparison of two null-terminated strings.
@@ -45,12 +53,12 @@ pg_strcasecmp(const char *s1, const char *s2)
if (ch1 >= 'A' && ch1 <= 'Z')
ch1 += 'a' - 'A';
else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
- ch1 = tolower(ch1);
+ ch1 = TOLOWER(ch1);
if (ch2 >= 'A' && ch2 <= 'Z')
ch2 += 'a' - 'A';
else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
- ch2 = tolower(ch2);
+ ch2 = TOLOWER(ch2);
if (ch1 != ch2)
return (int) ch1 - (int) ch2;
@@ -78,12 +86,12 @@ pg_strncasecmp(const char *s1, const char *s2, size_t n)
if (ch1 >= 'A' && ch1 <= 'Z')
ch1 += 'a' - 'A';
else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
- ch1 = tolower(ch1);
+ ch1 = TOLOWER(ch1);
if (ch2 >= 'A' && ch2 <= 'Z')
ch2 += 'a' - 'A';
else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
- ch2 = tolower(ch2);
+ ch2 = TOLOWER(ch2);
if (ch1 != ch2)
return (int) ch1 - (int) ch2;
@@ -107,7 +115,7 @@ pg_toupper(unsigned char ch)
if (ch >= 'a' && ch <= 'z')
ch += 'A' - 'a';
else if (IS_HIGHBIT_SET(ch) && islower(ch))
- ch = toupper(ch);
+ ch = TOUPPER(ch);
return ch;
}
@@ -124,7 +132,7 @@ pg_tolower(unsigned char ch)
if (ch >= 'A' && ch <= 'Z')
ch += 'a' - 'A';
else if (IS_HIGHBIT_SET(ch) && isupper(ch))
- ch = tolower(ch);
+ ch = TOLOWER(ch);
return ch;
}
--
2.43.0
From cfc6a2d1dacc51dd7e09291eaa1d4cac350625c7 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 11 Jun 2025 10:11:16 -0700
Subject: [PATCH v3 5/7] Change wchar2char() and char2wchar() to accept a
locale_t.
These are libc-specific functions, so accepting a locale_t makes more
sense than accepting a pg_locale_t (which could use another provider).
Also, no longer accept NULL.
---
src/backend/tsearch/ts_locale.c | 4 +--
src/backend/tsearch/wparser_def.c | 2 +-
src/backend/utils/adt/pg_locale.c | 2 +-
src/backend/utils/adt/pg_locale_libc.c | 42 +++++++++-----------------
src/include/utils/pg_locale.h | 4 +--
5 files changed, 20 insertions(+), 34 deletions(-)
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index b77d8c23d36..4801fe90089 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -36,7 +36,7 @@ t_isalpha(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
+ locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isalpha(TOUCHAR(ptr));
@@ -51,7 +51,7 @@ t_isalnum(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
+ locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isalnum(TOUCHAR(ptr));
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 79bcd32a063..e2dd3da3aa3 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -299,7 +299,7 @@ TParserInit(char *str, int len)
*/
if (prs->charmaxlen > 1)
{
- pg_locale_t mylocale = 0; /* TODO */
+ locale_t mylocale = 0; /* TODO */
prs->usewide = true;
if (database_ctype_is_c)
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 97c2ac1faf9..ce50e9e15d0 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -998,7 +998,7 @@ get_iso_localename(const char *winlocname)
char *hyphen;
/* Locale names use only ASCII, any conversion locale suffices. */
- rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
+ rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), LC_C_LOCALE);
if (rc == -1 || rc == sizeof(iso_lc_messages))
return NULL;
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index a3d8b51a7d9..998bfa857f0 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -463,7 +463,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
- char2wchar(workspace, srclen + 1, src, srclen, locale);
+ char2wchar(workspace, srclen + 1, src, srclen, loc);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
workspace[curr_char] = towlower_l(workspace[curr_char], loc);
@@ -474,7 +474,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
max_size = curr_char * pg_database_encoding_max_length();
result = palloc(max_size + 1);
- result_size = wchar2char(result, workspace, max_size + 1, locale);
+ result_size = wchar2char(result, workspace, max_size + 1, loc);
if (result_size + 1 > destsize)
return result_size;
@@ -558,7 +558,7 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
- char2wchar(workspace, srclen + 1, src, srclen, locale);
+ char2wchar(workspace, srclen + 1, src, srclen, loc);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
@@ -575,7 +575,7 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
max_size = curr_char * pg_database_encoding_max_length();
result = palloc(max_size + 1);
- result_size = wchar2char(result, workspace, max_size + 1, locale);
+ result_size = wchar2char(result, workspace, max_size + 1, loc);
if (result_size + 1 > destsize)
return result_size;
@@ -646,7 +646,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
- char2wchar(workspace, srclen + 1, src, srclen, locale);
+ char2wchar(workspace, srclen + 1, src, srclen, loc);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
workspace[curr_char] = towupper_l(workspace[curr_char], loc);
@@ -657,7 +657,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
max_size = curr_char * pg_database_encoding_max_length();
result = palloc(max_size + 1);
- result_size = wchar2char(result, workspace, max_size + 1, locale);
+ result_size = wchar2char(result, workspace, max_size + 1, loc);
if (result_size + 1 > destsize)
return result_size;
@@ -1207,10 +1207,12 @@ wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
* zero-terminated. The output will be zero-terminated iff there is room.
*/
size_t
-wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
+wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
{
size_t result;
+ Assert(loc != NULL);
+
if (tolen == 0)
return 0;
@@ -1237,16 +1239,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
}
else
#endif /* WIN32 */
- if (locale == (pg_locale_t) 0)
- {
- /* Use wcstombs directly for the default locale */
- result = wcstombs(to, from, tolen);
- }
- else
- {
- /* Use wcstombs_l for nondefault locales */
- result = wcstombs_l(to, from, tolen, locale->info.lt);
- }
+ result = wcstombs_l(to, from, tolen, loc);
return result;
}
@@ -1262,10 +1255,12 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
*/
size_t
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
- pg_locale_t locale)
+ locale_t loc)
{
size_t result;
+ Assert(loc != NULL);
+
if (tolen == 0)
return 0;
@@ -1297,16 +1292,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
/* mbstowcs requires ending '\0' */
char *str = pnstrdup(from, fromlen);
- if (locale == (pg_locale_t) 0)
- {
- /* Use mbstowcs directly for the default locale */
- result = mbstowcs(to, str, tolen);
- }
- else
- {
- /* Use mbstowcs_l for nondefault locales */
- result = mbstowcs_l(to, str, tolen, locale->info.lt);
- }
+ result = mbstowcs_l(to, str, tolen, loc);
pfree(str);
}
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 9735d15ceb2..d008b49e3c7 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -218,8 +218,8 @@ extern void report_newlocale_failure(const char *localename);
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
- pg_locale_t locale);
+ locale_t loc);
extern size_t char2wchar(wchar_t *to, size_t tolen,
- const char *from, size_t fromlen, pg_locale_t locale);
+ const char *from, size_t fromlen, locale_t loc);
#endif /* _PG_LOCALE_ */
--
2.43.0
From 3040033010333689ee3135e476b31b9dd07cbe41 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 11 Jun 2025 10:07:29 -0700
Subject: [PATCH v3 6/7] tsearch: use global_libc_locale.
---
configure | 2 +-
configure.ac | 2 ++
meson.build | 2 ++
src/backend/tsearch/ts_locale.c | 8 +++---
src/backend/tsearch/wparser_def.c | 44 ++++++++++++++++++++++++++++---
src/include/pg_config.h.in | 6 +++++
6 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/configure b/configure
index 16ef5b58d1a..82dd3a04e3a 100755
--- a/configure
+++ b/configure
@@ -15616,7 +15616,7 @@ fi
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
-for ac_func in backtrace_symbols copyfile copy_file_range elf_aux_info getauxval getifaddrs getpeerucred inet_pton kqueue localeconv_l mbstowcs_l posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strsignal syncfs sync_file_range uselocale wcstombs_l
+for ac_func in backtrace_symbols copyfile copy_file_range elf_aux_info getauxval getifaddrs getpeerucred inet_pton iswxdigit_l isxdigit_l kqueue localeconv_l mbstowcs_l posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strsignal syncfs sync_file_range uselocale wcstombs_l
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
diff --git a/configure.ac b/configure.ac
index b3efc49c97a..d23ef43f243 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1789,6 +1789,8 @@ AC_CHECK_FUNCS(m4_normalize([
getifaddrs
getpeerucred
inet_pton
+ iswxdigit_l
+ isxdigit_l
kqueue
localeconv_l
mbstowcs_l
diff --git a/meson.build b/meson.build
index 91fb4756ed4..c11a6f63a05 100644
--- a/meson.build
+++ b/meson.build
@@ -2885,6 +2885,8 @@ func_checks = [
['getpeerucred'],
['inet_aton'],
['inet_pton'],
+ ['iswxdigit_l'],
+ ['isxdigit_l'],
['kqueue'],
['localeconv_l'],
['mbstowcs_l'],
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index 4801fe90089..6b66fd1c05b 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -36,14 +36,14 @@ t_isalpha(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- locale_t mylocale = 0; /* TODO */
+ locale_t mylocale = global_libc_locale; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isalpha(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
- return iswalpha((wint_t) character[0]);
+ return iswalpha_l((wint_t) character[0], mylocale);
}
int
@@ -51,14 +51,14 @@ t_isalnum(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- locale_t mylocale = 0; /* TODO */
+ locale_t mylocale = global_libc_locale; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isalnum(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
- return iswalnum((wint_t) character[0]);
+ return iswalnum_l((wint_t) character[0], mylocale);
}
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index e2dd3da3aa3..9a80d32b448 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -299,7 +299,7 @@ TParserInit(char *str, int len)
*/
if (prs->charmaxlen > 1)
{
- locale_t mylocale = 0; /* TODO */
+ locale_t mylocale = global_libc_locale; /* TODO */
prs->usewide = true;
if (database_ctype_is_c)
@@ -411,6 +411,40 @@ TParserCopyClose(TParser *prs)
}
+#ifndef HAVE_ISXDIGIT_L
+static int
+isxdigit_l(wint_t wc, locale_t loc)
+{
+#ifdef WIN32
+ return _isxdigit_l(wc, loc);
+#else
+ size_t result;
+ locale_t save_locale = uselocale(loc);
+
+ result = isxdigit(wc);
+ uselocale(save_locale);
+ return result;
+#endif
+}
+#endif
+#ifndef HAVE_ISWXDIGIT_L
+static int
+iswxdigit_l(wint_t wc, locale_t loc)
+{
+#ifdef WIN32
+ return _iswxdigit_l(wc, loc);
+#else
+ size_t result;
+ locale_t save_locale = uselocale(loc);
+
+ result = iswxdigit(wc);
+ uselocale(save_locale);
+ return result;
+#endif
+}
+#endif
+
+
/*
* Character-type support functions, equivalent to is* macros, but
* working with any possible encodings and locales. Notes:
@@ -434,11 +468,13 @@ p_is##type(TParser *prs) \
unsigned int c = *(prs->pgwstr + prs->state->poschar); \
if (c > 0x7f) \
return nonascii; \
- return is##type(c); \
+ return is##type##_l(c, global_libc_locale); \
} \
- return isw##type(*(prs->wstr + prs->state->poschar)); \
+ return isw##type##_l(*(prs->wstr + prs->state->poschar), \
+ global_libc_locale); \
} \
- return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \
+ return is##type##_l(*(unsigned char *) (prs->str + prs->state->posbyte), \
+ global_libc_locale); \
} \
\
static int \
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 726a7c1be1f..f06396c94f4 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -229,6 +229,12 @@
/* Define to 1 if you have the global variable 'int timezone'. */
#undef HAVE_INT_TIMEZONE
+/* Define to 1 if you have the `iswxdigit_l' function. */
+#undef HAVE_ISWXDIGIT_L
+
+/* Define to 1 if you have the `isxdigit_l' function. */
+#undef HAVE_ISXDIGIT_L
+
/* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */
#undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
--
2.43.0
From 13c2e61f4592c85645a4ea73cb3f2a3dd5da3a68 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Tue, 10 Jun 2025 11:32:01 -0700
Subject: [PATCH v3 7/7] Force LC_COLLATE to C in postmaster.
Avoid dependence on setlocale().
strcoll(), etc., is not called directly; all such calls should go
through pg_locale.c and use the appropriate provider. By setting
LC_COLLATE to C, we avoid accidentally depending on libc behavior when
using a different provider.
No behavior change in the backend, but it's possible that some
extensions will be affected. Such extensions should ordinarily be
updated to use the pg_locale_t APIs. If the extension must use libc
behavior, it can instead use the "_l" variants of functions along with
global_libc_locale.
Discussion: https://postgr.es/m/[email protected]
Reviewed-by: Peter Eisentraut <[email protected]>
---
src/backend/main/main.c | 16 ++++++++++------
src/backend/utils/init/postinit.c | 10 ++++------
2 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/src/backend/main/main.c b/src/backend/main/main.c
index 7d63cf94a6b..9e11557d91a 100644
--- a/src/backend/main/main.c
+++ b/src/backend/main/main.c
@@ -125,13 +125,17 @@ main(int argc, char *argv[])
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("postgres"));
/*
- * In the postmaster, absorb the environment values for LC_COLLATE and
- * LC_CTYPE. Individual backends will change these later to settings
- * taken from pg_database, but the postmaster cannot do that. If we leave
- * these set to "C" then message localization might not work well in the
- * postmaster.
+ * Collation is handled by pg_locale.c, and the behavior is dependent on
+ * the provider. strcoll(), etc., should not be called directly.
+ */
+ init_locale("LC_COLLATE", LC_COLLATE, "C");
+
+ /*
+ * In the postmaster, absorb the environment values for LC_CTYPE.
+ * Individual backends will change it later to pg_database.datctype, but
+ * the postmaster cannot do that. If we leave it set to "C" then message
+ * localization might not work well in the postmaster.
*/
- init_locale("LC_COLLATE", LC_COLLATE, "");
init_locale("LC_CTYPE", LC_CTYPE, "");
/*
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 74f9df84fde..6deabf7474c 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -417,12 +417,10 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
ctype = TextDatumGetCString(datum);
- if (pg_perm_setlocale(LC_COLLATE, collate) == NULL)
- ereport(FATAL,
- (errmsg("database locale is incompatible with operating system"),
- errdetail("The database was initialized with LC_COLLATE \"%s\", "
- " which is not recognized by setlocale().", collate),
- errhint("Recreate the database with another locale or install the missing locale.")));
+ /*
+ * Historcally, we set LC_COLLATE from datcollate, as well, but that's no
+ * longer necessary.
+ */
if (pg_perm_setlocale(LC_CTYPE, ctype) == NULL)
ereport(FATAL,
--
2.43.0