On Fri, 2024-07-26 at 19:38 +0200, Andreas Karlsson wrote: > Nice refactoring! > > Two small comments about CheckMyDatabase(). > > - Shouldn't we look at the default_locale.ctype_is_c when setting > database_ctype_is_c instead of doing a strcmp()? or maybe we should > even > remove the global variable and always look at the default_locale?
database_ctype_is_c refers to the LC_CTYPE environment of the database -- pg_database.datctype. default_locale.ctype_is_c is the ctype of the database's default collation. Confusing, I know, but it matters for a few things that still depend on the LC_CTYPE, such as tsearch and maybe a few extensions. See f413941f41. > - I think that the lookup of Anum_pg_database_datlocale could be done > later in the code since it is not needed when we use a libc locale. > E.g. > as below. Done, thank you. > Also is there any reaosn you do not squash th 4th and the 6th patch? Done. I had to rearrange the patch ordering a bit because prior to the cache refactoring patch, it's unsafe to call pg_newlocale_from_collation() without checking lc_collate_is_c() or lc_ctype_is_c() first. Regards, Jeff Davis
From 8a98af04912afedcb481d0e3851a485a63baf3d9 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 11:45:55 -0700 Subject: [PATCH v4 1/5] Make database default collation internal to pg_locale.c. --- src/backend/utils/adt/pg_locale.c | 69 +++++++++++++++++++++++++++++-- src/backend/utils/init/postinit.c | 44 ++++---------------- src/include/utils/pg_locale.h | 3 +- 3 files changed, 74 insertions(+), 42 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 38c40a40489..1653e997d9b 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -56,6 +56,7 @@ #include "access/htup_details.h" #include "catalog/pg_collation.h" +#include "catalog/pg_database.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" @@ -116,6 +117,8 @@ char *localized_full_months[12 + 1]; /* is the databases's LC_CTYPE the C locale? */ bool database_ctype_is_c = false; +static struct pg_locale_struct default_locale; + /* indicates whether locale information cache is valid */ static bool CurrentLocaleConvValid = false; static bool CurrentLCTimeValid = false; @@ -1443,8 +1446,6 @@ lc_ctype_is_c(Oid collation) return (lookup_collation_cache(collation, true))->ctype_is_c; } -struct pg_locale_struct default_locale; - void make_icu_collator(const char *iculocstr, const char *icurules, @@ -1539,7 +1540,69 @@ pg_locale_deterministic(pg_locale_t locale) } /* - * Create a locale_t from a collation OID. Results are cached for the + * Initialize default_locale with database locale settings. + */ +void +init_database_collation(void) +{ + HeapTuple tup; + Form_pg_database dbform; + Datum datum; + bool isnull; + + /* Fetch our pg_database row normally, via syscache */ + tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for database %u", MyDatabaseId); + dbform = (Form_pg_database) GETSTRUCT(tup); + + if (dbform->datlocprovider == COLLPROVIDER_BUILTIN) + { + char *datlocale; + + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); + datlocale = TextDatumGetCString(datum); + + builtin_validate_locale(dbform->encoding, datlocale); + + default_locale.info.builtin.locale = MemoryContextStrdup( + TopMemoryContext, datlocale); + } + else if (dbform->datlocprovider == COLLPROVIDER_ICU) + { + char *datlocale; + char *icurules; + + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); + datlocale = TextDatumGetCString(datum); + + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull); + if (!isnull) + icurules = TextDatumGetCString(datum); + else + icurules = NULL; + + make_icu_collator(datlocale, icurules, &default_locale); + } + else + { + Assert(dbform->datlocprovider == COLLPROVIDER_LIBC); + } + + default_locale.provider = dbform->datlocprovider; + + /* + * Default locale is currently always deterministic. Nondeterministic + * locales currently don't support pattern matching, which would break a + * lot of things if applied globally. + */ + default_locale.deterministic = true; + + ReleaseSysCache(tup); +} + +/* + * Create a pg_locale_t from a collation OID. Results are cached for the * lifetime of the backend. Thus, do not free the result with freelocale(). * * As a special optimization, the default/database collation returns 0. diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 25867c8bd5b..3537df37056 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -318,7 +318,6 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect bool isnull; char *collate; char *ctype; - char *datlocale; /* Fetch our pg_database row normally, via syscache */ tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); @@ -423,42 +422,7 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect strcmp(ctype, "POSIX") == 0) database_ctype_is_c = true; - if (dbform->datlocprovider == COLLPROVIDER_BUILTIN) - { - datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); - datlocale = TextDatumGetCString(datum); - - builtin_validate_locale(dbform->encoding, datlocale); - - default_locale.info.builtin.locale = MemoryContextStrdup( - TopMemoryContext, datlocale); - } - else if (dbform->datlocprovider == COLLPROVIDER_ICU) - { - char *icurules; - - datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); - datlocale = TextDatumGetCString(datum); - - datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull); - if (!isnull) - icurules = TextDatumGetCString(datum); - else - icurules = NULL; - - make_icu_collator(datlocale, icurules, &default_locale); - } - else - datlocale = NULL; - - default_locale.provider = dbform->datlocprovider; - - /* - * Default locale is currently always deterministic. Nondeterministic - * locales currently don't support pattern matching, which would break a - * lot of things if applied globally. - */ - default_locale.deterministic = true; + init_database_collation(); /* * Check collation version. See similar code in @@ -478,7 +442,13 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect if (dbform->datlocprovider == COLLPROVIDER_LIBC) locale = collate; else + { + char *datlocale; + + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); + datlocale = TextDatumGetCString(datum); locale = datlocale; + } actual_versionstr = get_collation_actual_version(dbform->datlocprovider, locale); if (!actual_versionstr) diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 040968d6ff2..3e14a261b16 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -93,13 +93,12 @@ struct pg_locale_struct typedef struct pg_locale_struct *pg_locale_t; -extern PGDLLIMPORT struct pg_locale_struct default_locale; - extern void make_icu_collator(const char *iculocstr, const char *icurules, struct pg_locale_struct *resultp); extern bool pg_locale_deterministic(pg_locale_t locale); +extern void init_database_collation(void); extern pg_locale_t pg_newlocale_from_collation(Oid collid); extern char *get_collation_actual_version(char collprovider, const char *collcollate); -- 2.34.1
From b26ffe549028e204e564582fc486759bcdc5ab5b Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 15:02:26 -0700 Subject: [PATCH v4 2/5] Make database collation pg_locale_t always non-NULL. Previously, the database collation's pg_locale_t was NULL for the libc provider. This commit properly initializes a pg_locale_t object in all cases. --- src/backend/utils/adt/pg_locale.c | 191 +++++++++++++++++------------- 1 file changed, 110 insertions(+), 81 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 1653e997d9b..598b42b1767 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1446,6 +1446,103 @@ lc_ctype_is_c(Oid collation) return (lookup_collation_cache(collation, true))->ctype_is_c; } +/* simple subroutine for reporting errors from newlocale() */ +static void +report_newlocale_failure(const char *localename) +{ + int save_errno; + + /* + * Windows doesn't provide any useful error indication from + * _create_locale(), and BSD-derived platforms don't seem to feel they + * need to set errno either (even though POSIX is pretty clear that + * newlocale should do so). So, if errno hasn't been set, assume ENOENT + * is what to report. + */ + if (errno == 0) + errno = ENOENT; + + /* + * ENOENT means "no such locale", not "no such file", so clarify that + * errno with an errdetail message. + */ + save_errno = errno; /* auxiliary funcs might change errno */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not create locale \"%s\": %m", + localename), + (save_errno == ENOENT ? + errdetail("The operating system could not find any locale data for the locale name \"%s\".", + localename) : 0))); +} + +/* + * Initialize the locale_t field. + * + * The "C" and "POSIX" locales are not actually handled by libc, so set the + * locale_t to zero in that case. + */ +static void +make_libc_collator(const char *collate, const char *ctype, + pg_locale_t result) +{ + locale_t loc = 0; + + if (strcmp(collate, ctype) == 0) + { + if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0) + { + /* Normal case where they're the same */ + errno = 0; +#ifndef WIN32 + loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate, + NULL); +#else + loc = _create_locale(LC_ALL, collate); +#endif + if (!loc) + report_newlocale_failure(collate); + } + } + else + { +#ifndef WIN32 + /* We need two newlocale() steps */ + locale_t loc1 = 0; + + if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0) + { + errno = 0; + loc1 = newlocale(LC_COLLATE_MASK, collate, NULL); + if (!loc1) + report_newlocale_failure(collate); + } + + if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0) + { + errno = 0; + loc = newlocale(LC_CTYPE_MASK, ctype, loc1); + if (!loc) + report_newlocale_failure(ctype); + } + else + loc = loc1; +#else + + /* + * XXX The _create_locale() API doesn't appear to support this. Could + * perhaps be worked around by changing pg_locale_t to contain two + * separate fields. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collations with different collate and ctype values are not supported on this platform"))); +#endif + } + + result->info.lt = loc; +} + void make_icu_collator(const char *iculocstr, const char *icurules, @@ -1499,36 +1596,6 @@ make_icu_collator(const char *iculocstr, } -/* simple subroutine for reporting errors from newlocale() */ -static void -report_newlocale_failure(const char *localename) -{ - int save_errno; - - /* - * Windows doesn't provide any useful error indication from - * _create_locale(), and BSD-derived platforms don't seem to feel they - * need to set errno either (even though POSIX is pretty clear that - * newlocale should do so). So, if errno hasn't been set, assume ENOENT - * is what to report. - */ - if (errno == 0) - errno = ENOENT; - - /* - * ENOENT means "no such locale", not "no such file", so clarify that - * errno with an errdetail message. - */ - save_errno = errno; /* auxiliary funcs might change errno */ - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("could not create locale \"%s\": %m", - localename), - (save_errno == ENOENT ? - errdetail("The operating system could not find any locale data for the locale name \"%s\".", - localename) : 0))); -} - bool pg_locale_deterministic(pg_locale_t locale) { @@ -1586,7 +1653,17 @@ init_database_collation(void) } else { + const char *datcollate; + const char *datctype; + Assert(dbform->datlocprovider == COLLPROVIDER_LIBC); + + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datcollate); + datcollate = TextDatumGetCString(datum); + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype); + datctype = TextDatumGetCString(datum); + + make_libc_collator(datcollate, datctype, &default_locale); } default_locale.provider = dbform->datlocprovider; @@ -1605,8 +1682,6 @@ init_database_collation(void) * Create a pg_locale_t from a collation OID. Results are cached for the * lifetime of the backend. Thus, do not free the result with freelocale(). * - * As a special optimization, the default/database collation returns 0. - * * For simplicity, we always generate COLLATE + CTYPE even though we * might only need one of them. Since this is called only once per session, * it shouldn't cost much. @@ -1620,12 +1695,7 @@ pg_newlocale_from_collation(Oid collid) Assert(OidIsValid(collid)); if (collid == DEFAULT_COLLATION_OID) - { - if (default_locale.provider == COLLPROVIDER_LIBC) - return (pg_locale_t) 0; - else - return &default_locale; - } + return &default_locale; cache_entry = lookup_collation_cache(collid, false); @@ -1664,55 +1734,14 @@ pg_newlocale_from_collation(Oid collid) else if (collform->collprovider == COLLPROVIDER_LIBC) { const char *collcollate; - const char *collctype pg_attribute_unused(); - locale_t loc; + const char *collctype; datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate); collcollate = TextDatumGetCString(datum); datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype); collctype = TextDatumGetCString(datum); - if (strcmp(collcollate, collctype) == 0) - { - /* Normal case where they're the same */ - errno = 0; -#ifndef WIN32 - loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, - NULL); -#else - loc = _create_locale(LC_ALL, collcollate); -#endif - if (!loc) - report_newlocale_failure(collcollate); - } - else - { -#ifndef WIN32 - /* We need two newlocale() steps */ - locale_t loc1; - - errno = 0; - loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); - if (!loc1) - report_newlocale_failure(collcollate); - errno = 0; - loc = newlocale(LC_CTYPE_MASK, collctype, loc1); - if (!loc) - report_newlocale_failure(collctype); -#else - - /* - * XXX The _create_locale() API doesn't appear to support - * this. Could perhaps be worked around by changing - * pg_locale_t to contain two separate fields. - */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("collations with different collate and ctype values are not supported on this platform"))); -#endif - } - - result.info.lt = loc; + make_libc_collator(collcollate, collctype, &result); } else if (collform->collprovider == COLLPROVIDER_ICU) { -- 2.34.1
From 6c083830ae4c5be22801b5670866689e84eb0510 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Fri, 14 Jun 2024 15:13:59 -0700 Subject: [PATCH v4 3/5] Refactor collation cache. Now that the result of pg_newlocale_from_collation() is always non-NULL, move the collate_is_c and ctype_is_c flags into pg_locale_t, and always use that. This commit eliminates the multi-stage initialization of the cache and the extra code in lc_collate_is_c() and lc_ctype_is_c(). It also makes it safe to call pg_newlocale_from_collation() before checking lc_collate_is_c() or lc_ctype_is_c(). --- src/backend/utils/adt/pg_locale.c | 180 +++++------------------------- src/include/utils/pg_locale.h | 14 +++ 2 files changed, 40 insertions(+), 154 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 598b42b1767..42d8bc5deda 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -128,9 +128,6 @@ static bool CurrentLCTimeValid = false; typedef struct { Oid collid; /* hash key: pg_collation OID */ - bool collate_is_c; /* is collation's LC_COLLATE C? */ - bool ctype_is_c; /* is collation's LC_CTYPE C? */ - bool flags_valid; /* true if above flags are valid */ pg_locale_t locale; /* locale_t struct, or 0 if not valid */ } collation_cache_entry; @@ -1208,29 +1205,13 @@ IsoLocaleName(const char *winlocname) /* * Cache mechanism for collation information. * - * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C - * (or POSIX), so we can optimize a few code paths in various places. - * For the built-in C and POSIX collations, we can know that without even - * doing a cache lookup, but we want to support aliases for C/POSIX too. - * For the "default" collation, there are separate static cache variables, - * since consulting the pg_collation catalog doesn't tell us what we need. - * - * Also, if a pg_locale_t has been requested for a collation, we cache that - * for the life of a backend. - * - * Note that some code relies on the flags not reporting false negatives - * (that is, saying it's not C when it is). For example, char2wchar() - * could fail if the locale is C, so str_tolower() shouldn't call it - * in that case. - * * Note that we currently lack any way to flush the cache. Since we don't * support ALTER COLLATION, this is OK. The worst case is that someone * drops a collation, and a useless cache entry hangs around in existing * backends. */ - static collation_cache_entry * -lookup_collation_cache(Oid collation, bool set_flags) +lookup_collation_cache(Oid collation) { collation_cache_entry *cache_entry; bool found; @@ -1256,59 +1237,9 @@ lookup_collation_cache(Oid collation, bool set_flags) * Make sure cache entry is marked invalid, in case we fail before * setting things. */ - cache_entry->flags_valid = false; cache_entry->locale = 0; } - if (set_flags && !cache_entry->flags_valid) - { - /* Attempt to set the flags */ - HeapTuple tp; - Form_pg_collation collform; - - tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for collation %u", collation); - collform = (Form_pg_collation) GETSTRUCT(tp); - - if (collform->collprovider == COLLPROVIDER_BUILTIN) - { - Datum datum; - const char *colllocale; - - datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale); - colllocale = TextDatumGetCString(datum); - - cache_entry->collate_is_c = true; - cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0); - } - else if (collform->collprovider == COLLPROVIDER_LIBC) - { - Datum datum; - const char *collcollate; - const char *collctype; - - datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate); - collcollate = TextDatumGetCString(datum); - datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype); - collctype = TextDatumGetCString(datum); - - cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) || - (strcmp(collcollate, "POSIX") == 0)); - cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) || - (strcmp(collctype, "POSIX") == 0)); - } - else - { - cache_entry->collate_is_c = false; - cache_entry->ctype_is_c = false; - } - - cache_entry->flags_valid = true; - - ReleaseSysCache(tp); - } - return cache_entry; } @@ -1326,47 +1257,6 @@ lc_collate_is_c(Oid collation) if (!OidIsValid(collation)) return false; - /* - * If we're asked about the default collation, we have to inquire of the C - * library. Cache the result so we only have to compute it once. - */ - if (collation == DEFAULT_COLLATION_OID) - { - static int result = -1; - const char *localeptr; - - if (result >= 0) - return (bool) result; - - if (default_locale.provider == COLLPROVIDER_BUILTIN) - { - result = true; - return (bool) result; - } - else if (default_locale.provider == COLLPROVIDER_ICU) - { - result = false; - return (bool) result; - } - else if (default_locale.provider == COLLPROVIDER_LIBC) - { - localeptr = setlocale(LC_CTYPE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_CTYPE setting"); - } - else - elog(ERROR, "unexpected collation provider '%c'", - default_locale.provider); - - if (strcmp(localeptr, "C") == 0) - result = true; - else if (strcmp(localeptr, "POSIX") == 0) - result = true; - else - result = false; - return (bool) result; - } - /* * If we're asked about the built-in C/POSIX collations, we know that. */ @@ -1377,7 +1267,7 @@ lc_collate_is_c(Oid collation) /* * Otherwise, we have to consult pg_collation, but we cache that. */ - return (lookup_collation_cache(collation, true))->collate_is_c; + return pg_newlocale_from_collation(collation)->collate_is_c; } /* @@ -1393,46 +1283,6 @@ lc_ctype_is_c(Oid collation) if (!OidIsValid(collation)) return false; - /* - * If we're asked about the default collation, we have to inquire of the C - * library. Cache the result so we only have to compute it once. - */ - if (collation == DEFAULT_COLLATION_OID) - { - static int result = -1; - const char *localeptr; - - if (result >= 0) - return (bool) result; - - if (default_locale.provider == COLLPROVIDER_BUILTIN) - { - localeptr = default_locale.info.builtin.locale; - } - else if (default_locale.provider == COLLPROVIDER_ICU) - { - result = false; - return (bool) result; - } - else if (default_locale.provider == COLLPROVIDER_LIBC) - { - localeptr = setlocale(LC_CTYPE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_CTYPE setting"); - } - else - elog(ERROR, "unexpected collation provider '%c'", - default_locale.provider); - - if (strcmp(localeptr, "C") == 0) - result = true; - else if (strcmp(localeptr, "POSIX") == 0) - result = true; - else - result = false; - return (bool) result; - } - /* * If we're asked about the built-in C/POSIX collations, we know that. */ @@ -1443,7 +1293,7 @@ lc_ctype_is_c(Oid collation) /* * Otherwise, we have to consult pg_collation, but we cache that. */ - return (lookup_collation_cache(collation, true))->ctype_is_c; + return pg_newlocale_from_collation(collation)->ctype_is_c; } /* simple subroutine for reporting errors from newlocale() */ @@ -1632,6 +1482,9 @@ init_database_collation(void) builtin_validate_locale(dbform->encoding, datlocale); + default_locale.collate_is_c = true; + default_locale.ctype_is_c = (strcmp(datlocale, "C") == 0); + default_locale.info.builtin.locale = MemoryContextStrdup( TopMemoryContext, datlocale); } @@ -1643,6 +1496,9 @@ init_database_collation(void) datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); datlocale = TextDatumGetCString(datum); + default_locale.collate_is_c = false; + default_locale.ctype_is_c = false; + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull); if (!isnull) icurules = TextDatumGetCString(datum); @@ -1663,6 +1519,11 @@ init_database_collation(void) datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype); datctype = TextDatumGetCString(datum); + default_locale.collate_is_c = (strcmp(datcollate, "C") == 0) || + (strcmp(datcollate, "POSIX") == 0); + default_locale.ctype_is_c = (strcmp(datctype, "C") == 0) || + (strcmp(datctype, "POSIX") == 0); + make_libc_collator(datcollate, datctype, &default_locale); } @@ -1697,7 +1558,7 @@ pg_newlocale_from_collation(Oid collid) if (collid == DEFAULT_COLLATION_OID) return &default_locale; - cache_entry = lookup_collation_cache(collid, false); + cache_entry = lookup_collation_cache(collid); if (cache_entry->locale == 0) { @@ -1726,6 +1587,9 @@ pg_newlocale_from_collation(Oid collid) datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale); locstr = TextDatumGetCString(datum); + result.collate_is_c = true; + result.collate_is_c = (strcmp(locstr, "C") == 0); + builtin_validate_locale(GetDatabaseEncoding(), locstr); result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext, @@ -1741,6 +1605,11 @@ pg_newlocale_from_collation(Oid collid) datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype); collctype = TextDatumGetCString(datum); + result.collate_is_c = (strcmp(collcollate, "C") == 0) || + (strcmp(collcollate, "POSIX") == 0); + result.ctype_is_c = (strcmp(collctype, "C") == 0) || + (strcmp(collctype, "POSIX") == 0); + make_libc_collator(collcollate, collctype, &result); } else if (collform->collprovider == COLLPROVIDER_ICU) @@ -1751,6 +1620,9 @@ pg_newlocale_from_collation(Oid collid) datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale); iculocstr = TextDatumGetCString(datum); + result.collate_is_c = false; + result.ctype_is_c = false; + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull); if (!isnull) icurules = TextDatumGetCString(datum); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 3e14a261b16..f41d33975be 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -69,11 +69,25 @@ extern void cache_locale_time(void); /* * We use a discriminated union to hold either a locale_t or an ICU collator. * pg_locale_t is occasionally checked for truth, so make it a pointer. + * + * Also, hold two flags: whether the collation's LC_COLLATE or LC_CTYPE is C + * (or POSIX), so we can optimize a few code paths in various places. For the + * built-in C and POSIX collations, we can know that without even doing a + * cache lookup, but we want to support aliases for C/POSIX too. For the + * "default" collation, there are separate static cache variables, since + * consulting the pg_collation catalog doesn't tell us what we need. + * + * Note that some code relies on the flags not reporting false negatives + * (that is, saying it's not C when it is). For example, char2wchar() + * could fail if the locale is C, so str_tolower() shouldn't call it + * in that case. */ struct pg_locale_struct { char provider; bool deterministic; + bool collate_is_c; + bool ctype_is_c; union { struct -- 2.34.1
From 9327537087a1c5327cd524ffa2ad4d934b53f6d2 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 14:48:07 -0700 Subject: [PATCH v4 4/5] ts_locale.c: do not use NULL to mean the database collation. Use pg_newlocale_from_collation(DEFAULT_COLLATION_OID) to explicitly get the database collation. --- src/backend/tsearch/ts_locale.c | 25 +++++++++++++++++++------ src/backend/tsearch/wparser_def.c | 4 +++- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index bc44599de6a..2d5e3905b41 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "catalog/pg_collation.h" #include "common/string.h" #include "storage/fd.h" #include "tsearch/ts_locale.h" @@ -36,7 +37,9 @@ t_isdigit(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + + /* TODO: determine collation properly */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isdigit(TOUCHAR(ptr)); @@ -51,7 +54,9 @@ t_isspace(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + + /* TODO: determine collation properly */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isspace(TOUCHAR(ptr)); @@ -66,7 +71,9 @@ t_isalpha(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + + /* TODO: determine collation properly */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isalpha(TOUCHAR(ptr)); @@ -81,7 +88,9 @@ t_isalnum(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + + /* TODO: determine collation properly */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isalnum(TOUCHAR(ptr)); @@ -96,7 +105,9 @@ t_isprint(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + + /* TODO: determine collation properly */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);; if (clen == 1 || database_ctype_is_c) return isprint(TOUCHAR(ptr)); @@ -266,7 +277,9 @@ char * lowerstr_with_len(const char *str, int len) { char *out; - pg_locale_t mylocale = 0; /* TODO */ + + /* TODO: determine collation properly */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (len == 0) return pstrdup(""); diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 3919ef27b57..03b4a08244f 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -17,6 +17,7 @@ #include <limits.h> #include <wctype.h> +#include "catalog/pg_collation.h" #include "commands/defrem.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -299,7 +300,8 @@ TParserInit(char *str, int len) */ if (prs->charmaxlen > 1) { - pg_locale_t mylocale = 0; /* TODO */ + /* TODO: determine collation properly */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); prs->usewide = true; if (database_ctype_is_c) -- 2.34.1
From 5cdad53fc73e7de9d5c85eb50cd6ae27e0697698 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 11:58:59 -0700 Subject: [PATCH v4 5/5] Remove support for null pg_locale_t. Previously, passing NULL for pg_locale_t meant "use the libc provider and the server environment". Now that the database collation is represented as a proper pg_locale_t (not dependent on setlocale()), remove special cases for NULL. --- src/backend/access/hash/hashfunc.c | 10 +-- src/backend/regex/regc_pg_locale.c | 115 ++--------------------------- src/backend/utils/adt/formatting.c | 84 ++++++--------------- src/backend/utils/adt/like.c | 10 +-- src/backend/utils/adt/pg_locale.c | 77 ++++++------------- src/backend/utils/adt/varchar.c | 10 +-- src/backend/utils/adt/varlena.c | 28 +++---- 7 files changed, 69 insertions(+), 265 deletions(-) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index ce8ee0ea2ef..d151751e185 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -268,7 +268,7 @@ hashtext(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); Oid collid = PG_GET_COLLATION(); - pg_locale_t mylocale = 0; + pg_locale_t mylocale; Datum result; if (!collid) @@ -277,8 +277,7 @@ hashtext(PG_FUNCTION_ARGS) errmsg("could not determine which collation to use for string hashing"), errhint("Use the COLLATE clause to set the collation explicitly."))); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (pg_locale_deterministic(mylocale)) { @@ -322,7 +321,7 @@ hashtextextended(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); Oid collid = PG_GET_COLLATION(); - pg_locale_t mylocale = 0; + pg_locale_t mylocale; Datum result; if (!collid) @@ -331,8 +330,7 @@ hashtextextended(PG_FUNCTION_ARGS) errmsg("could not determine which collation to use for string hashing"), errhint("Use the COLLATE clause to set the collation explicitly."))); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (pg_locale_deterministic(mylocale)) { diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 85f3238eb07..dfea0e75ad7 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -67,8 +67,6 @@ typedef enum { PG_REGEX_LOCALE_C, /* C locale (encoding independent) */ PG_REGEX_BUILTIN, /* built-in Unicode semantics */ - PG_REGEX_LOCALE_WIDE, /* Use <wctype.h> functions */ - PG_REGEX_LOCALE_1BYTE, /* Use <ctype.h> functions */ PG_REGEX_LOCALE_WIDE_L, /* Use locale_t <wctype.h> functions */ PG_REGEX_LOCALE_1BYTE_L, /* Use locale_t <ctype.h> functions */ PG_REGEX_LOCALE_ICU, /* Use ICU uchar.h functions */ @@ -262,29 +260,19 @@ pg_set_regex_collation(Oid collation) errmsg("nondeterministic collations are not supported for regular expressions"))); #ifdef USE_ICU - if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU) + if (pg_regex_locale->provider == COLLPROVIDER_ICU) pg_regex_strategy = PG_REGEX_LOCALE_ICU; else #endif if (GetDatabaseEncoding() == PG_UTF8) { - if (pg_regex_locale) - { - if (pg_regex_locale->provider == COLLPROVIDER_BUILTIN) - pg_regex_strategy = PG_REGEX_BUILTIN; - else - pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L; - } + if (pg_regex_locale->provider == COLLPROVIDER_BUILTIN) + pg_regex_strategy = PG_REGEX_BUILTIN; else - pg_regex_strategy = PG_REGEX_LOCALE_WIDE; + pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L; } else - { - if (pg_regex_locale) - pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L; - else - pg_regex_strategy = PG_REGEX_LOCALE_1BYTE; - } + pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L; pg_regex_collation = collation; } @@ -300,13 +288,6 @@ pg_wc_isdigit(pg_wchar c) (pg_char_properties[c] & PG_ISDIGIT)); case PG_REGEX_BUILTIN: return pg_u_isdigit(c, true); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isdigit((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswdigit_l((wint_t) c, pg_regex_locale->info.lt); @@ -334,13 +315,6 @@ pg_wc_isalpha(pg_wchar c) (pg_char_properties[c] & PG_ISALPHA)); case PG_REGEX_BUILTIN: return pg_u_isalpha(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isalpha((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswalpha_l((wint_t) c, pg_regex_locale->info.lt); @@ -368,13 +342,6 @@ pg_wc_isalnum(pg_wchar c) (pg_char_properties[c] & PG_ISALNUM)); case PG_REGEX_BUILTIN: return pg_u_isalnum(c, true); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isalnum((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswalnum_l((wint_t) c, pg_regex_locale->info.lt); @@ -411,13 +378,6 @@ pg_wc_isupper(pg_wchar c) (pg_char_properties[c] & PG_ISUPPER)); case PG_REGEX_BUILTIN: return pg_u_isupper(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isupper((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswupper_l((wint_t) c, pg_regex_locale->info.lt); @@ -445,13 +405,6 @@ pg_wc_islower(pg_wchar c) (pg_char_properties[c] & PG_ISLOWER)); case PG_REGEX_BUILTIN: return pg_u_islower(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - islower((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswlower_l((wint_t) c, pg_regex_locale->info.lt); @@ -479,13 +432,6 @@ pg_wc_isgraph(pg_wchar c) (pg_char_properties[c] & PG_ISGRAPH)); case PG_REGEX_BUILTIN: return pg_u_isgraph(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isgraph((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswgraph_l((wint_t) c, pg_regex_locale->info.lt); @@ -513,13 +459,6 @@ pg_wc_isprint(pg_wchar c) (pg_char_properties[c] & PG_ISPRINT)); case PG_REGEX_BUILTIN: return pg_u_isprint(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isprint((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswprint_l((wint_t) c, pg_regex_locale->info.lt); @@ -547,13 +486,6 @@ pg_wc_ispunct(pg_wchar c) (pg_char_properties[c] & PG_ISPUNCT)); case PG_REGEX_BUILTIN: return pg_u_ispunct(c, true); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - ispunct((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswpunct_l((wint_t) c, pg_regex_locale->info.lt); @@ -581,13 +513,6 @@ pg_wc_isspace(pg_wchar c) (pg_char_properties[c] & PG_ISSPACE)); case PG_REGEX_BUILTIN: return pg_u_isspace(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isspace((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswspace_l((wint_t) c, pg_regex_locale->info.lt); @@ -616,20 +541,6 @@ pg_wc_toupper(pg_wchar c) return c; case PG_REGEX_BUILTIN: return unicode_uppercase_simple(c); - case PG_REGEX_LOCALE_WIDE: - /* force C behavior for ASCII characters, per comments above */ - if (c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - /* force C behavior for ASCII characters, per comments above */ - if (c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - if (c <= (pg_wchar) UCHAR_MAX) - return toupper((unsigned char) c); - return c; case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return towupper_l((wint_t) c, pg_regex_locale->info.lt); @@ -658,20 +569,6 @@ pg_wc_tolower(pg_wchar c) return c; case PG_REGEX_BUILTIN: return unicode_lowercase_simple(c); - case PG_REGEX_LOCALE_WIDE: - /* force C behavior for ASCII characters, per comments above */ - if (c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - /* force C behavior for ASCII characters, per comments above */ - if (c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - if (c <= (pg_wchar) UCHAR_MAX) - return tolower((unsigned char) c); - return c; case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return towlower_l((wint_t) c, pg_regex_locale->info.lt); @@ -825,11 +722,9 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode) case PG_REGEX_BUILTIN: max_chr = (pg_wchar) MAX_SIMPLE_CHR; break; - case PG_REGEX_LOCALE_WIDE: case PG_REGEX_LOCALE_WIDE_L: max_chr = (pg_wchar) MAX_SIMPLE_CHR; break; - case PG_REGEX_LOCALE_1BYTE: case PG_REGEX_LOCALE_1BYTE_L: #if MAX_SIMPLE_CHR >= UCHAR_MAX max_chr = (pg_wchar) UCHAR_MAX; diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 8736ada4be2..68069fcfd3b 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1665,7 +1665,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); #ifdef USE_ICU - if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + if (mylocale->provider == COLLPROVIDER_ICU) { int32_t len_uchar; int32_t len_conv; @@ -1681,7 +1681,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) } else #endif - if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN) + if (mylocale->provider == COLLPROVIDER_BUILTIN) { const char *src = buff; size_t srclen = nbytes; @@ -1710,7 +1710,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) } else { - Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC); + Assert(mylocale->provider == COLLPROVIDER_LIBC); if (pg_database_encoding_max_length() > 1) { @@ -1730,12 +1730,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - { - if (mylocale) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); - else - workspace[curr_char] = towlower(workspace[curr_char]); - } + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); /* * Make result large enough; case change might change number @@ -1761,12 +1756,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) * collations you get exactly what the collation says. */ for (p = result; *p; p++) - { - if (mylocale) - *p = tolower_l((unsigned char) *p, mylocale->info.lt); - else - *p = pg_tolower((unsigned char) *p); - } + *p = tolower_l((unsigned char) *p, mylocale->info.lt); } } } @@ -1813,7 +1803,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); #ifdef USE_ICU - if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + if (mylocale->provider == COLLPROVIDER_ICU) { int32_t len_uchar, len_conv; @@ -1829,7 +1819,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) } else #endif - if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN) + if (mylocale->provider == COLLPROVIDER_BUILTIN) { const char *src = buff; size_t srclen = nbytes; @@ -1858,7 +1848,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) } else { - Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC); + Assert(mylocale->provider == COLLPROVIDER_LIBC); if (pg_database_encoding_max_length() > 1) { @@ -1878,12 +1868,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - { - if (mylocale) - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); - else - workspace[curr_char] = towupper(workspace[curr_char]); - } + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); /* * Make result large enough; case change might change number @@ -1909,12 +1894,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) * collations you get exactly what the collation says. */ for (p = result; *p; p++) - { - if (mylocale) - *p = toupper_l((unsigned char) *p, mylocale->info.lt); - else - *p = pg_toupper((unsigned char) *p); - } + *p = toupper_l((unsigned char) *p, mylocale->info.lt); } } } @@ -2003,7 +1983,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); #ifdef USE_ICU - if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + if (mylocale->provider == COLLPROVIDER_ICU) { int32_t len_uchar, len_conv; @@ -2019,7 +1999,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) } else #endif - if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN) + if (mylocale->provider == COLLPROVIDER_BUILTIN) { const char *src = buff; size_t srclen = nbytes; @@ -2060,7 +2040,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) } else { - Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC); + Assert(mylocale->provider == COLLPROVIDER_LIBC); if (pg_database_encoding_max_length() > 1) { @@ -2081,22 +2061,11 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { - if (mylocale) - { - if (wasalnum) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); - else - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); - wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); - } + if (wasalnum) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); else - { - if (wasalnum) - workspace[curr_char] = towlower(workspace[curr_char]); - else - workspace[curr_char] = towupper(workspace[curr_char]); - wasalnum = iswalnum(workspace[curr_char]); - } + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); } /* @@ -2124,22 +2093,11 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) */ for (p = result; *p; p++) { - if (mylocale) - { - if (wasalnum) - *p = tolower_l((unsigned char) *p, mylocale->info.lt); - else - *p = toupper_l((unsigned char) *p, mylocale->info.lt); - wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); - } + if (wasalnum) + *p = tolower_l((unsigned char) *p, mylocale->info.lt); else - { - if (wasalnum) - *p = pg_tolower((unsigned char) *p); - else - *p = pg_toupper((unsigned char) *p); - wasalnum = isalnum((unsigned char) *p); - } + *p = toupper_l((unsigned char) *p, mylocale->info.lt); + wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); } } } diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 57ead66b5aa..0ecc96d48e5 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -174,8 +174,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) *p; int slen, plen; - pg_locale_t locale = 0; - bool locale_is_c = false; + pg_locale_t locale; if (!OidIsValid(collation)) { @@ -189,10 +188,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } - if (lc_ctype_is_c(collation)) - locale_is_c = true; - else - locale = pg_newlocale_from_collation(collation); + locale = pg_newlocale_from_collation(collation); if (!pg_locale_deterministic(locale)) ereport(ERROR, @@ -228,7 +224,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) plen = VARSIZE_ANY_EXHDR(pat); s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); - return SB_IMatchText(s, slen, p, plen, locale, locale_is_c); + return SB_IMatchText(s, slen, p, plen, locale, locale->ctype_is_c); } } diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 42d8bc5deda..2e6f624798f 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1154,7 +1154,7 @@ get_iso_localename(const char *winlocname) char *hyphen; /* Locale names use only ASCII, any conversion locale suffices. */ - rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL); + rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), default_locale); if (rc == -1 || rc == sizeof(iso_lc_messages)) return NULL; @@ -1449,11 +1449,7 @@ make_icu_collator(const char *iculocstr, bool pg_locale_deterministic(pg_locale_t locale) { - /* default locale must always be deterministic */ - if (locale == NULL) - return true; - else - return locale->deterministic; + return locale->deterministic; } /* @@ -1813,7 +1809,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, int r; int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); Assert(GetDatabaseEncoding() == PG_UTF8); #ifndef WIN32 Assert(false); @@ -1853,10 +1849,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, ((LPWSTR) a2p)[r] = 0; errno = 0; - if (locale) - result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); - else - result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p); + result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); @@ -1882,7 +1875,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) { int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef WIN32 if (GetDatabaseEncoding() == PG_UTF8) { @@ -1893,10 +1886,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) } else #endif /* WIN32 */ - if (locale) result = strcoll_l(arg1, arg2, locale->info.lt); - else - result = strcoll(arg1, arg2); return result; } @@ -1918,7 +1908,7 @@ pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2, char *arg2n; int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef WIN32 /* check for this case before doing the work for nul-termination */ @@ -2064,7 +2054,7 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale) { int result; - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strcoll_libc(arg1, arg2, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2100,7 +2090,7 @@ pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, { int result; - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strncoll_libc(arg1, len1, arg2, len2, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2118,13 +2108,10 @@ static size_t pg_strxfrm_libc(char *dest, const char *src, size_t destsize, pg_locale_t locale) { - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef TRUST_STRXFRM - if (locale) - return strxfrm_l(dest, src, destsize, locale->info.lt); - else - return strxfrm(dest, src, destsize); + return strxfrm_l(dest, src, destsize, locale->info.lt); #else /* shouldn't happen */ PGLOCALE_SUPPORT_ERROR(locale->provider); @@ -2141,7 +2128,7 @@ pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize, size_t bufsize = srclen + 1; size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (bufsize > TEXTBUFLEN) buf = palloc(bufsize); @@ -2313,7 +2300,7 @@ pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen, bool pg_strxfrm_enabled(pg_locale_t locale) { - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) #ifdef TRUST_STRXFRM return true; #else @@ -2347,7 +2334,7 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) { size_t result = 0; /* keep compiler quiet */ - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strxfrm_libc(dest, src, destsize, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2384,7 +2371,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, { size_t result = 0; /* keep compiler quiet */ - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2404,7 +2391,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, bool pg_strxfrm_prefix_enabled(pg_locale_t locale) { - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) return false; else if (locale->provider == COLLPROVIDER_ICU) return true; @@ -2434,13 +2421,11 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, { size_t result = 0; /* keep compiler quiet */ - if (!locale) - PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC); #ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) + if (locale->provider == COLLPROVIDER_ICU) result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); -#endif else +#endif PGLOCALE_SUPPORT_ERROR(locale->provider); return result; @@ -2469,13 +2454,11 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, { size_t result = 0; /* keep compiler quiet */ - if (!locale) - PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC); #ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) + if (locale->provider == COLLPROVIDER_ICU) result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); -#endif else +#endif PGLOCALE_SUPPORT_ERROR(locale->provider); return result; @@ -3032,7 +3015,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) { size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (tolen == 0) return 0; @@ -3060,12 +3043,6 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) } else #endif /* WIN32 */ - if (locale == (pg_locale_t) 0) - { - /* Use wcstombs directly for the default locale */ - result = wcstombs(to, from, tolen); - } - else { /* Use wcstombs_l for nondefault locales */ result = wcstombs_l(to, from, tolen, locale->info.lt); @@ -3089,7 +3066,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, { size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (tolen == 0) return 0; @@ -3122,16 +3099,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, /* mbstowcs requires ending '\0' */ char *str = pnstrdup(from, fromlen); - if (locale == (pg_locale_t) 0) - { - /* Use mbstowcs directly for the default locale */ - result = mbstowcs(to, str, tolen); - } - else - { - /* Use mbstowcs_l for nondefault locales */ - result = mbstowcs_l(to, str, tolen, locale->info.lt); - } + /* Use mbstowcs_l for nondefault locales */ + result = mbstowcs_l(to, str, tolen, locale->info.lt); pfree(str); } diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 02dfe219f54..829375cd1a3 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -999,7 +999,7 @@ hashbpchar(PG_FUNCTION_ARGS) Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; - pg_locale_t mylocale = 0; + pg_locale_t mylocale; Datum result; if (!collid) @@ -1011,8 +1011,7 @@ hashbpchar(PG_FUNCTION_ARGS) keydata = VARDATA_ANY(key); keylen = bcTruelen(key); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (pg_locale_deterministic(mylocale)) { @@ -1054,7 +1053,7 @@ hashbpcharextended(PG_FUNCTION_ARGS) Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; - pg_locale_t mylocale = 0; + pg_locale_t mylocale; Datum result; if (!collid) @@ -1066,8 +1065,7 @@ hashbpcharextended(PG_FUNCTION_ARGS) keydata = VARDATA_ANY(key); keylen = bcTruelen(key); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (pg_locale_deterministic(mylocale)) { diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index d2e2e9bbba0..52ab8c43c66 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1217,12 +1217,11 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state) { int len1 = VARSIZE_ANY_EXHDR(t1); int len2 = VARSIZE_ANY_EXHDR(t2); - pg_locale_t mylocale = 0; + pg_locale_t mylocale; check_collation_set(collid); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (!pg_locale_deterministic(mylocale)) ereport(ERROR, @@ -1619,18 +1618,14 @@ Datum texteq(PG_FUNCTION_ARGS) { Oid collid = PG_GET_COLLATION(); - bool locale_is_c = false; pg_locale_t mylocale = 0; bool result; check_collation_set(collid); - if (lc_collate_is_c(collid)) - locale_is_c = true; - else - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); - if (locale_is_c || pg_locale_deterministic(mylocale)) + if (pg_locale_deterministic(mylocale)) { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); @@ -1678,18 +1673,14 @@ Datum textne(PG_FUNCTION_ARGS) { Oid collid = PG_GET_COLLATION(); - bool locale_is_c = false; - pg_locale_t mylocale = 0; + pg_locale_t mylocale; bool result; check_collation_set(collid); - if (lc_collate_is_c(collid)) - locale_is_c = true; - else - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); - if (locale_is_c || pg_locale_deterministic(mylocale)) + if (pg_locale_deterministic(mylocale)) { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); @@ -1793,15 +1784,14 @@ text_starts_with(PG_FUNCTION_ARGS) Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); Oid collid = PG_GET_COLLATION(); - pg_locale_t mylocale = 0; + pg_locale_t mylocale; bool result; Size len1, len2; check_collation_set(collid); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (!pg_locale_deterministic(mylocale)) ereport(ERROR, -- 2.34.1