On Fri, 2024-07-26 at 19:38 +0200, Andreas Karlsson wrote:
> Nice refactoring!
>
> Two small comments about CheckMyDatabase().
>
> - Shouldn't we look at the default_locale.ctype_is_c when setting
> database_ctype_is_c instead of doing a strcmp()? or maybe we should
> even
> remove the global variable and always look at the default_locale?
database_ctype_is_c refers to the LC_CTYPE environment of the database
-- pg_database.datctype. default_locale.ctype_is_c is the ctype of the
database's default collation.
Confusing, I know, but it matters for a few things that still depend on
the LC_CTYPE, such as tsearch and maybe a few extensions. See
f413941f41.
> - I think that the lookup of Anum_pg_database_datlocale could be done
> later in the code since it is not needed when we use a libc locale.
> E.g.
> as below.
Done, thank you.
> Also is there any reaosn you do not squash th 4th and the 6th patch?
Done. I had to rearrange the patch ordering a bit because prior to the
cache refactoring patch, it's unsafe to call
pg_newlocale_from_collation() without checking lc_collate_is_c() or
lc_ctype_is_c() first.
Regards,
Jeff Davis
From 8a98af04912afedcb481d0e3851a485a63baf3d9 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 5 Jun 2024 11:45:55 -0700
Subject: [PATCH v4 1/5] Make database default collation internal to
pg_locale.c.
---
src/backend/utils/adt/pg_locale.c | 69 +++++++++++++++++++++++++++++--
src/backend/utils/init/postinit.c | 44 ++++----------------
src/include/utils/pg_locale.h | 3 +-
3 files changed, 74 insertions(+), 42 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 38c40a40489..1653e997d9b 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -56,6 +56,7 @@
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
+#include "catalog/pg_database.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
@@ -116,6 +117,8 @@ char *localized_full_months[12 + 1];
/* is the databases's LC_CTYPE the C locale? */
bool database_ctype_is_c = false;
+static struct pg_locale_struct default_locale;
+
/* indicates whether locale information cache is valid */
static bool CurrentLocaleConvValid = false;
static bool CurrentLCTimeValid = false;
@@ -1443,8 +1446,6 @@ lc_ctype_is_c(Oid collation)
return (lookup_collation_cache(collation, true))->ctype_is_c;
}
-struct pg_locale_struct default_locale;
-
void
make_icu_collator(const char *iculocstr,
const char *icurules,
@@ -1539,7 +1540,69 @@ pg_locale_deterministic(pg_locale_t locale)
}
/*
- * Create a locale_t from a collation OID. Results are cached for the
+ * Initialize default_locale with database locale settings.
+ */
+void
+init_database_collation(void)
+{
+ HeapTuple tup;
+ Form_pg_database dbform;
+ Datum datum;
+ bool isnull;
+
+ /* Fetch our pg_database row normally, via syscache */
+ tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
+ dbform = (Form_pg_database) GETSTRUCT(tup);
+
+ if (dbform->datlocprovider == COLLPROVIDER_BUILTIN)
+ {
+ char *datlocale;
+
+ datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
+ datlocale = TextDatumGetCString(datum);
+
+ builtin_validate_locale(dbform->encoding, datlocale);
+
+ default_locale.info.builtin.locale = MemoryContextStrdup(
+ TopMemoryContext, datlocale);
+ }
+ else if (dbform->datlocprovider == COLLPROVIDER_ICU)
+ {
+ char *datlocale;
+ char *icurules;
+
+ datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
+ datlocale = TextDatumGetCString(datum);
+
+ datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
+ if (!isnull)
+ icurules = TextDatumGetCString(datum);
+ else
+ icurules = NULL;
+
+ make_icu_collator(datlocale, icurules, &default_locale);
+ }
+ else
+ {
+ Assert(dbform->datlocprovider == COLLPROVIDER_LIBC);
+ }
+
+ default_locale.provider = dbform->datlocprovider;
+
+ /*
+ * Default locale is currently always deterministic. Nondeterministic
+ * locales currently don't support pattern matching, which would break a
+ * lot of things if applied globally.
+ */
+ default_locale.deterministic = true;
+
+ ReleaseSysCache(tup);
+}
+
+/*
+ * Create a pg_locale_t from a collation OID. Results are cached for the
* lifetime of the backend. Thus, do not free the result with freelocale().
*
* As a special optimization, the default/database collation returns 0.
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 25867c8bd5b..3537df37056 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -318,7 +318,6 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
bool isnull;
char *collate;
char *ctype;
- char *datlocale;
/* Fetch our pg_database row normally, via syscache */
tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
@@ -423,42 +422,7 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
strcmp(ctype, "POSIX") == 0)
database_ctype_is_c = true;
- if (dbform->datlocprovider == COLLPROVIDER_BUILTIN)
- {
- datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
- datlocale = TextDatumGetCString(datum);
-
- builtin_validate_locale(dbform->encoding, datlocale);
-
- default_locale.info.builtin.locale = MemoryContextStrdup(
- TopMemoryContext, datlocale);
- }
- else if (dbform->datlocprovider == COLLPROVIDER_ICU)
- {
- char *icurules;
-
- datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
- datlocale = TextDatumGetCString(datum);
-
- datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
- if (!isnull)
- icurules = TextDatumGetCString(datum);
- else
- icurules = NULL;
-
- make_icu_collator(datlocale, icurules, &default_locale);
- }
- else
- datlocale = NULL;
-
- default_locale.provider = dbform->datlocprovider;
-
- /*
- * Default locale is currently always deterministic. Nondeterministic
- * locales currently don't support pattern matching, which would break a
- * lot of things if applied globally.
- */
- default_locale.deterministic = true;
+ init_database_collation();
/*
* Check collation version. See similar code in
@@ -478,7 +442,13 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
if (dbform->datlocprovider == COLLPROVIDER_LIBC)
locale = collate;
else
+ {
+ char *datlocale;
+
+ datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
+ datlocale = TextDatumGetCString(datum);
locale = datlocale;
+ }
actual_versionstr = get_collation_actual_version(dbform->datlocprovider, locale);
if (!actual_versionstr)
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 040968d6ff2..3e14a261b16 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -93,13 +93,12 @@ struct pg_locale_struct
typedef struct pg_locale_struct *pg_locale_t;
-extern PGDLLIMPORT struct pg_locale_struct default_locale;
-
extern void make_icu_collator(const char *iculocstr,
const char *icurules,
struct pg_locale_struct *resultp);
extern bool pg_locale_deterministic(pg_locale_t locale);
+extern void init_database_collation(void);
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
--
2.34.1
From b26ffe549028e204e564582fc486759bcdc5ab5b Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 5 Jun 2024 15:02:26 -0700
Subject: [PATCH v4 2/5] Make database collation pg_locale_t always non-NULL.
Previously, the database collation's pg_locale_t was NULL for the libc
provider.
This commit properly initializes a pg_locale_t object in all cases.
---
src/backend/utils/adt/pg_locale.c | 191 +++++++++++++++++-------------
1 file changed, 110 insertions(+), 81 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 1653e997d9b..598b42b1767 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1446,6 +1446,103 @@ lc_ctype_is_c(Oid collation)
return (lookup_collation_cache(collation, true))->ctype_is_c;
}
+/* simple subroutine for reporting errors from newlocale() */
+static void
+report_newlocale_failure(const char *localename)
+{
+ int save_errno;
+
+ /*
+ * Windows doesn't provide any useful error indication from
+ * _create_locale(), and BSD-derived platforms don't seem to feel they
+ * need to set errno either (even though POSIX is pretty clear that
+ * newlocale should do so). So, if errno hasn't been set, assume ENOENT
+ * is what to report.
+ */
+ if (errno == 0)
+ errno = ENOENT;
+
+ /*
+ * ENOENT means "no such locale", not "no such file", so clarify that
+ * errno with an errdetail message.
+ */
+ save_errno = errno; /* auxiliary funcs might change errno */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not create locale \"%s\": %m",
+ localename),
+ (save_errno == ENOENT ?
+ errdetail("The operating system could not find any locale data for the locale name \"%s\".",
+ localename) : 0)));
+}
+
+/*
+ * Initialize the locale_t field.
+ *
+ * The "C" and "POSIX" locales are not actually handled by libc, so set the
+ * locale_t to zero in that case.
+ */
+static void
+make_libc_collator(const char *collate, const char *ctype,
+ pg_locale_t result)
+{
+ locale_t loc = 0;
+
+ if (strcmp(collate, ctype) == 0)
+ {
+ if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
+ {
+ /* Normal case where they're the same */
+ errno = 0;
+#ifndef WIN32
+ loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
+ NULL);
+#else
+ loc = _create_locale(LC_ALL, collate);
+#endif
+ if (!loc)
+ report_newlocale_failure(collate);
+ }
+ }
+ else
+ {
+#ifndef WIN32
+ /* We need two newlocale() steps */
+ locale_t loc1 = 0;
+
+ if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
+ {
+ errno = 0;
+ loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
+ if (!loc1)
+ report_newlocale_failure(collate);
+ }
+
+ if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
+ {
+ errno = 0;
+ loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
+ if (!loc)
+ report_newlocale_failure(ctype);
+ }
+ else
+ loc = loc1;
+#else
+
+ /*
+ * XXX The _create_locale() API doesn't appear to support this. Could
+ * perhaps be worked around by changing pg_locale_t to contain two
+ * separate fields.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("collations with different collate and ctype values are not supported on this platform")));
+#endif
+ }
+
+ result->info.lt = loc;
+}
+
void
make_icu_collator(const char *iculocstr,
const char *icurules,
@@ -1499,36 +1596,6 @@ make_icu_collator(const char *iculocstr,
}
-/* simple subroutine for reporting errors from newlocale() */
-static void
-report_newlocale_failure(const char *localename)
-{
- int save_errno;
-
- /*
- * Windows doesn't provide any useful error indication from
- * _create_locale(), and BSD-derived platforms don't seem to feel they
- * need to set errno either (even though POSIX is pretty clear that
- * newlocale should do so). So, if errno hasn't been set, assume ENOENT
- * is what to report.
- */
- if (errno == 0)
- errno = ENOENT;
-
- /*
- * ENOENT means "no such locale", not "no such file", so clarify that
- * errno with an errdetail message.
- */
- save_errno = errno; /* auxiliary funcs might change errno */
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("could not create locale \"%s\": %m",
- localename),
- (save_errno == ENOENT ?
- errdetail("The operating system could not find any locale data for the locale name \"%s\".",
- localename) : 0)));
-}
-
bool
pg_locale_deterministic(pg_locale_t locale)
{
@@ -1586,7 +1653,17 @@ init_database_collation(void)
}
else
{
+ const char *datcollate;
+ const char *datctype;
+
Assert(dbform->datlocprovider == COLLPROVIDER_LIBC);
+
+ datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datcollate);
+ datcollate = TextDatumGetCString(datum);
+ datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
+ datctype = TextDatumGetCString(datum);
+
+ make_libc_collator(datcollate, datctype, &default_locale);
}
default_locale.provider = dbform->datlocprovider;
@@ -1605,8 +1682,6 @@ init_database_collation(void)
* Create a pg_locale_t from a collation OID. Results are cached for the
* lifetime of the backend. Thus, do not free the result with freelocale().
*
- * As a special optimization, the default/database collation returns 0.
- *
* For simplicity, we always generate COLLATE + CTYPE even though we
* might only need one of them. Since this is called only once per session,
* it shouldn't cost much.
@@ -1620,12 +1695,7 @@ pg_newlocale_from_collation(Oid collid)
Assert(OidIsValid(collid));
if (collid == DEFAULT_COLLATION_OID)
- {
- if (default_locale.provider == COLLPROVIDER_LIBC)
- return (pg_locale_t) 0;
- else
- return &default_locale;
- }
+ return &default_locale;
cache_entry = lookup_collation_cache(collid, false);
@@ -1664,55 +1734,14 @@ pg_newlocale_from_collation(Oid collid)
else if (collform->collprovider == COLLPROVIDER_LIBC)
{
const char *collcollate;
- const char *collctype pg_attribute_unused();
- locale_t loc;
+ const char *collctype;
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
collcollate = TextDatumGetCString(datum);
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
collctype = TextDatumGetCString(datum);
- if (strcmp(collcollate, collctype) == 0)
- {
- /* Normal case where they're the same */
- errno = 0;
-#ifndef WIN32
- loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
- NULL);
-#else
- loc = _create_locale(LC_ALL, collcollate);
-#endif
- if (!loc)
- report_newlocale_failure(collcollate);
- }
- else
- {
-#ifndef WIN32
- /* We need two newlocale() steps */
- locale_t loc1;
-
- errno = 0;
- loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
- if (!loc1)
- report_newlocale_failure(collcollate);
- errno = 0;
- loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
- if (!loc)
- report_newlocale_failure(collctype);
-#else
-
- /*
- * XXX The _create_locale() API doesn't appear to support
- * this. Could perhaps be worked around by changing
- * pg_locale_t to contain two separate fields.
- */
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("collations with different collate and ctype values are not supported on this platform")));
-#endif
- }
-
- result.info.lt = loc;
+ make_libc_collator(collcollate, collctype, &result);
}
else if (collform->collprovider == COLLPROVIDER_ICU)
{
--
2.34.1
From 6c083830ae4c5be22801b5670866689e84eb0510 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Fri, 14 Jun 2024 15:13:59 -0700
Subject: [PATCH v4 3/5] Refactor collation cache.
Now that the result of pg_newlocale_from_collation() is always
non-NULL, move the collate_is_c and ctype_is_c flags into pg_locale_t,
and always use that.
This commit eliminates the multi-stage initialization of the cache and
the extra code in lc_collate_is_c() and lc_ctype_is_c(). It also makes
it safe to call pg_newlocale_from_collation() before checking
lc_collate_is_c() or lc_ctype_is_c().
---
src/backend/utils/adt/pg_locale.c | 180 +++++-------------------------
src/include/utils/pg_locale.h | 14 +++
2 files changed, 40 insertions(+), 154 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 598b42b1767..42d8bc5deda 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -128,9 +128,6 @@ static bool CurrentLCTimeValid = false;
typedef struct
{
Oid collid; /* hash key: pg_collation OID */
- bool collate_is_c; /* is collation's LC_COLLATE C? */
- bool ctype_is_c; /* is collation's LC_CTYPE C? */
- bool flags_valid; /* true if above flags are valid */
pg_locale_t locale; /* locale_t struct, or 0 if not valid */
} collation_cache_entry;
@@ -1208,29 +1205,13 @@ IsoLocaleName(const char *winlocname)
/*
* Cache mechanism for collation information.
*
- * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
- * (or POSIX), so we can optimize a few code paths in various places.
- * For the built-in C and POSIX collations, we can know that without even
- * doing a cache lookup, but we want to support aliases for C/POSIX too.
- * For the "default" collation, there are separate static cache variables,
- * since consulting the pg_collation catalog doesn't tell us what we need.
- *
- * Also, if a pg_locale_t has been requested for a collation, we cache that
- * for the life of a backend.
- *
- * Note that some code relies on the flags not reporting false negatives
- * (that is, saying it's not C when it is). For example, char2wchar()
- * could fail if the locale is C, so str_tolower() shouldn't call it
- * in that case.
- *
* Note that we currently lack any way to flush the cache. Since we don't
* support ALTER COLLATION, this is OK. The worst case is that someone
* drops a collation, and a useless cache entry hangs around in existing
* backends.
*/
-
static collation_cache_entry *
-lookup_collation_cache(Oid collation, bool set_flags)
+lookup_collation_cache(Oid collation)
{
collation_cache_entry *cache_entry;
bool found;
@@ -1256,59 +1237,9 @@ lookup_collation_cache(Oid collation, bool set_flags)
* Make sure cache entry is marked invalid, in case we fail before
* setting things.
*/
- cache_entry->flags_valid = false;
cache_entry->locale = 0;
}
- if (set_flags && !cache_entry->flags_valid)
- {
- /* Attempt to set the flags */
- HeapTuple tp;
- Form_pg_collation collform;
-
- tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
- if (!HeapTupleIsValid(tp))
- elog(ERROR, "cache lookup failed for collation %u", collation);
- collform = (Form_pg_collation) GETSTRUCT(tp);
-
- if (collform->collprovider == COLLPROVIDER_BUILTIN)
- {
- Datum datum;
- const char *colllocale;
-
- datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
- colllocale = TextDatumGetCString(datum);
-
- cache_entry->collate_is_c = true;
- cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
- }
- else if (collform->collprovider == COLLPROVIDER_LIBC)
- {
- Datum datum;
- const char *collcollate;
- const char *collctype;
-
- datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
- collcollate = TextDatumGetCString(datum);
- datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
- collctype = TextDatumGetCString(datum);
-
- cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
- (strcmp(collcollate, "POSIX") == 0));
- cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
- (strcmp(collctype, "POSIX") == 0));
- }
- else
- {
- cache_entry->collate_is_c = false;
- cache_entry->ctype_is_c = false;
- }
-
- cache_entry->flags_valid = true;
-
- ReleaseSysCache(tp);
- }
-
return cache_entry;
}
@@ -1326,47 +1257,6 @@ lc_collate_is_c(Oid collation)
if (!OidIsValid(collation))
return false;
- /*
- * If we're asked about the default collation, we have to inquire of the C
- * library. Cache the result so we only have to compute it once.
- */
- if (collation == DEFAULT_COLLATION_OID)
- {
- static int result = -1;
- const char *localeptr;
-
- if (result >= 0)
- return (bool) result;
-
- if (default_locale.provider == COLLPROVIDER_BUILTIN)
- {
- result = true;
- return (bool) result;
- }
- else if (default_locale.provider == COLLPROVIDER_ICU)
- {
- result = false;
- return (bool) result;
- }
- else if (default_locale.provider == COLLPROVIDER_LIBC)
- {
- localeptr = setlocale(LC_CTYPE, NULL);
- if (!localeptr)
- elog(ERROR, "invalid LC_CTYPE setting");
- }
- else
- elog(ERROR, "unexpected collation provider '%c'",
- default_locale.provider);
-
- if (strcmp(localeptr, "C") == 0)
- result = true;
- else if (strcmp(localeptr, "POSIX") == 0)
- result = true;
- else
- result = false;
- return (bool) result;
- }
-
/*
* If we're asked about the built-in C/POSIX collations, we know that.
*/
@@ -1377,7 +1267,7 @@ lc_collate_is_c(Oid collation)
/*
* Otherwise, we have to consult pg_collation, but we cache that.
*/
- return (lookup_collation_cache(collation, true))->collate_is_c;
+ return pg_newlocale_from_collation(collation)->collate_is_c;
}
/*
@@ -1393,46 +1283,6 @@ lc_ctype_is_c(Oid collation)
if (!OidIsValid(collation))
return false;
- /*
- * If we're asked about the default collation, we have to inquire of the C
- * library. Cache the result so we only have to compute it once.
- */
- if (collation == DEFAULT_COLLATION_OID)
- {
- static int result = -1;
- const char *localeptr;
-
- if (result >= 0)
- return (bool) result;
-
- if (default_locale.provider == COLLPROVIDER_BUILTIN)
- {
- localeptr = default_locale.info.builtin.locale;
- }
- else if (default_locale.provider == COLLPROVIDER_ICU)
- {
- result = false;
- return (bool) result;
- }
- else if (default_locale.provider == COLLPROVIDER_LIBC)
- {
- localeptr = setlocale(LC_CTYPE, NULL);
- if (!localeptr)
- elog(ERROR, "invalid LC_CTYPE setting");
- }
- else
- elog(ERROR, "unexpected collation provider '%c'",
- default_locale.provider);
-
- if (strcmp(localeptr, "C") == 0)
- result = true;
- else if (strcmp(localeptr, "POSIX") == 0)
- result = true;
- else
- result = false;
- return (bool) result;
- }
-
/*
* If we're asked about the built-in C/POSIX collations, we know that.
*/
@@ -1443,7 +1293,7 @@ lc_ctype_is_c(Oid collation)
/*
* Otherwise, we have to consult pg_collation, but we cache that.
*/
- return (lookup_collation_cache(collation, true))->ctype_is_c;
+ return pg_newlocale_from_collation(collation)->ctype_is_c;
}
/* simple subroutine for reporting errors from newlocale() */
@@ -1632,6 +1482,9 @@ init_database_collation(void)
builtin_validate_locale(dbform->encoding, datlocale);
+ default_locale.collate_is_c = true;
+ default_locale.ctype_is_c = (strcmp(datlocale, "C") == 0);
+
default_locale.info.builtin.locale = MemoryContextStrdup(
TopMemoryContext, datlocale);
}
@@ -1643,6 +1496,9 @@ init_database_collation(void)
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
datlocale = TextDatumGetCString(datum);
+ default_locale.collate_is_c = false;
+ default_locale.ctype_is_c = false;
+
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
if (!isnull)
icurules = TextDatumGetCString(datum);
@@ -1663,6 +1519,11 @@ init_database_collation(void)
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
datctype = TextDatumGetCString(datum);
+ default_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
+ (strcmp(datcollate, "POSIX") == 0);
+ default_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
+ (strcmp(datctype, "POSIX") == 0);
+
make_libc_collator(datcollate, datctype, &default_locale);
}
@@ -1697,7 +1558,7 @@ pg_newlocale_from_collation(Oid collid)
if (collid == DEFAULT_COLLATION_OID)
return &default_locale;
- cache_entry = lookup_collation_cache(collid, false);
+ cache_entry = lookup_collation_cache(collid);
if (cache_entry->locale == 0)
{
@@ -1726,6 +1587,9 @@ pg_newlocale_from_collation(Oid collid)
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
locstr = TextDatumGetCString(datum);
+ result.collate_is_c = true;
+ result.collate_is_c = (strcmp(locstr, "C") == 0);
+
builtin_validate_locale(GetDatabaseEncoding(), locstr);
result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
@@ -1741,6 +1605,11 @@ pg_newlocale_from_collation(Oid collid)
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
collctype = TextDatumGetCString(datum);
+ result.collate_is_c = (strcmp(collcollate, "C") == 0) ||
+ (strcmp(collcollate, "POSIX") == 0);
+ result.ctype_is_c = (strcmp(collctype, "C") == 0) ||
+ (strcmp(collctype, "POSIX") == 0);
+
make_libc_collator(collcollate, collctype, &result);
}
else if (collform->collprovider == COLLPROVIDER_ICU)
@@ -1751,6 +1620,9 @@ pg_newlocale_from_collation(Oid collid)
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
iculocstr = TextDatumGetCString(datum);
+ result.collate_is_c = false;
+ result.ctype_is_c = false;
+
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
if (!isnull)
icurules = TextDatumGetCString(datum);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 3e14a261b16..f41d33975be 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -69,11 +69,25 @@ extern void cache_locale_time(void);
/*
* We use a discriminated union to hold either a locale_t or an ICU collator.
* pg_locale_t is occasionally checked for truth, so make it a pointer.
+ *
+ * Also, hold two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
+ * (or POSIX), so we can optimize a few code paths in various places. For the
+ * built-in C and POSIX collations, we can know that without even doing a
+ * cache lookup, but we want to support aliases for C/POSIX too. For the
+ * "default" collation, there are separate static cache variables, since
+ * consulting the pg_collation catalog doesn't tell us what we need.
+ *
+ * Note that some code relies on the flags not reporting false negatives
+ * (that is, saying it's not C when it is). For example, char2wchar()
+ * could fail if the locale is C, so str_tolower() shouldn't call it
+ * in that case.
*/
struct pg_locale_struct
{
char provider;
bool deterministic;
+ bool collate_is_c;
+ bool ctype_is_c;
union
{
struct
--
2.34.1
From 9327537087a1c5327cd524ffa2ad4d934b53f6d2 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 5 Jun 2024 14:48:07 -0700
Subject: [PATCH v4 4/5] ts_locale.c: do not use NULL to mean the database
collation.
Use pg_newlocale_from_collation(DEFAULT_COLLATION_OID) to explicitly
get the database collation.
---
src/backend/tsearch/ts_locale.c | 25 +++++++++++++++++++------
src/backend/tsearch/wparser_def.c | 4 +++-
2 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index bc44599de6a..2d5e3905b41 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -13,6 +13,7 @@
*/
#include "postgres.h"
+#include "catalog/pg_collation.h"
#include "common/string.h"
#include "storage/fd.h"
#include "tsearch/ts_locale.h"
@@ -36,7 +37,9 @@ t_isdigit(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
+
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
if (clen == 1 || database_ctype_is_c)
return isdigit(TOUCHAR(ptr));
@@ -51,7 +54,9 @@ t_isspace(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
+
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
if (clen == 1 || database_ctype_is_c)
return isspace(TOUCHAR(ptr));
@@ -66,7 +71,9 @@ t_isalpha(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
+
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
if (clen == 1 || database_ctype_is_c)
return isalpha(TOUCHAR(ptr));
@@ -81,7 +88,9 @@ t_isalnum(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
+
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
if (clen == 1 || database_ctype_is_c)
return isalnum(TOUCHAR(ptr));
@@ -96,7 +105,9 @@ t_isprint(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
+
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);;
if (clen == 1 || database_ctype_is_c)
return isprint(TOUCHAR(ptr));
@@ -266,7 +277,9 @@ char *
lowerstr_with_len(const char *str, int len)
{
char *out;
- pg_locale_t mylocale = 0; /* TODO */
+
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
if (len == 0)
return pstrdup("");
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 3919ef27b57..03b4a08244f 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -17,6 +17,7 @@
#include <limits.h>
#include <wctype.h>
+#include "catalog/pg_collation.h"
#include "commands/defrem.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -299,7 +300,8 @@ TParserInit(char *str, int len)
*/
if (prs->charmaxlen > 1)
{
- pg_locale_t mylocale = 0; /* TODO */
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
prs->usewide = true;
if (database_ctype_is_c)
--
2.34.1
From 5cdad53fc73e7de9d5c85eb50cd6ae27e0697698 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 5 Jun 2024 11:58:59 -0700
Subject: [PATCH v4 5/5] Remove support for null pg_locale_t.
Previously, passing NULL for pg_locale_t meant "use the libc provider
and the server environment". Now that the database collation is
represented as a proper pg_locale_t (not dependent on setlocale()),
remove special cases for NULL.
---
src/backend/access/hash/hashfunc.c | 10 +--
src/backend/regex/regc_pg_locale.c | 115 ++---------------------------
src/backend/utils/adt/formatting.c | 84 ++++++---------------
src/backend/utils/adt/like.c | 10 +--
src/backend/utils/adt/pg_locale.c | 77 ++++++-------------
src/backend/utils/adt/varchar.c | 10 +--
src/backend/utils/adt/varlena.c | 28 +++----
7 files changed, 69 insertions(+), 265 deletions(-)
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index ce8ee0ea2ef..d151751e185 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -268,7 +268,7 @@ hashtext(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
Oid collid = PG_GET_COLLATION();
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
Datum result;
if (!collid)
@@ -277,8 +277,7 @@ hashtext(PG_FUNCTION_ARGS)
errmsg("could not determine which collation to use for string hashing"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (pg_locale_deterministic(mylocale))
{
@@ -322,7 +321,7 @@ hashtextextended(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
Oid collid = PG_GET_COLLATION();
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
Datum result;
if (!collid)
@@ -331,8 +330,7 @@ hashtextextended(PG_FUNCTION_ARGS)
errmsg("could not determine which collation to use for string hashing"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (pg_locale_deterministic(mylocale))
{
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 85f3238eb07..dfea0e75ad7 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -67,8 +67,6 @@ typedef enum
{
PG_REGEX_LOCALE_C, /* C locale (encoding independent) */
PG_REGEX_BUILTIN, /* built-in Unicode semantics */
- PG_REGEX_LOCALE_WIDE, /* Use <wctype.h> functions */
- PG_REGEX_LOCALE_1BYTE, /* Use <ctype.h> functions */
PG_REGEX_LOCALE_WIDE_L, /* Use locale_t <wctype.h> functions */
PG_REGEX_LOCALE_1BYTE_L, /* Use locale_t <ctype.h> functions */
PG_REGEX_LOCALE_ICU, /* Use ICU uchar.h functions */
@@ -262,29 +260,19 @@ pg_set_regex_collation(Oid collation)
errmsg("nondeterministic collations are not supported for regular expressions")));
#ifdef USE_ICU
- if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
+ if (pg_regex_locale->provider == COLLPROVIDER_ICU)
pg_regex_strategy = PG_REGEX_LOCALE_ICU;
else
#endif
if (GetDatabaseEncoding() == PG_UTF8)
{
- if (pg_regex_locale)
- {
- if (pg_regex_locale->provider == COLLPROVIDER_BUILTIN)
- pg_regex_strategy = PG_REGEX_BUILTIN;
- else
- pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
- }
+ if (pg_regex_locale->provider == COLLPROVIDER_BUILTIN)
+ pg_regex_strategy = PG_REGEX_BUILTIN;
else
- pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
+ pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
}
else
- {
- if (pg_regex_locale)
- pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
- else
- pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
- }
+ pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
pg_regex_collation = collation;
}
@@ -300,13 +288,6 @@ pg_wc_isdigit(pg_wchar c)
(pg_char_properties[c] & PG_ISDIGIT));
case PG_REGEX_BUILTIN:
return pg_u_isdigit(c, true);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswdigit((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isdigit((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
@@ -334,13 +315,6 @@ pg_wc_isalpha(pg_wchar c)
(pg_char_properties[c] & PG_ISALPHA));
case PG_REGEX_BUILTIN:
return pg_u_isalpha(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalpha((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalpha((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
@@ -368,13 +342,6 @@ pg_wc_isalnum(pg_wchar c)
(pg_char_properties[c] & PG_ISALNUM));
case PG_REGEX_BUILTIN:
return pg_u_isalnum(c, true);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalnum((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalnum((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
@@ -411,13 +378,6 @@ pg_wc_isupper(pg_wchar c)
(pg_char_properties[c] & PG_ISUPPER));
case PG_REGEX_BUILTIN:
return pg_u_isupper(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswupper((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isupper((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
@@ -445,13 +405,6 @@ pg_wc_islower(pg_wchar c)
(pg_char_properties[c] & PG_ISLOWER));
case PG_REGEX_BUILTIN:
return pg_u_islower(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswlower((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- islower((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
@@ -479,13 +432,6 @@ pg_wc_isgraph(pg_wchar c)
(pg_char_properties[c] & PG_ISGRAPH));
case PG_REGEX_BUILTIN:
return pg_u_isgraph(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswgraph((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isgraph((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
@@ -513,13 +459,6 @@ pg_wc_isprint(pg_wchar c)
(pg_char_properties[c] & PG_ISPRINT));
case PG_REGEX_BUILTIN:
return pg_u_isprint(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswprint((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isprint((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
@@ -547,13 +486,6 @@ pg_wc_ispunct(pg_wchar c)
(pg_char_properties[c] & PG_ISPUNCT));
case PG_REGEX_BUILTIN:
return pg_u_ispunct(c, true);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswpunct((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- ispunct((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
@@ -581,13 +513,6 @@ pg_wc_isspace(pg_wchar c)
(pg_char_properties[c] & PG_ISSPACE));
case PG_REGEX_BUILTIN:
return pg_u_isspace(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswspace((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isspace((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
@@ -616,20 +541,6 @@ pg_wc_toupper(pg_wchar c)
return c;
case PG_REGEX_BUILTIN:
return unicode_uppercase_simple(c);
- case PG_REGEX_LOCALE_WIDE:
- /* force C behavior for ASCII characters, per comments above */
- if (c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towupper((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- /* force C behavior for ASCII characters, per comments above */
- if (c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- if (c <= (pg_wchar) UCHAR_MAX)
- return toupper((unsigned char) c);
- return c;
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return towupper_l((wint_t) c, pg_regex_locale->info.lt);
@@ -658,20 +569,6 @@ pg_wc_tolower(pg_wchar c)
return c;
case PG_REGEX_BUILTIN:
return unicode_lowercase_simple(c);
- case PG_REGEX_LOCALE_WIDE:
- /* force C behavior for ASCII characters, per comments above */
- if (c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towlower((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- /* force C behavior for ASCII characters, per comments above */
- if (c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- if (c <= (pg_wchar) UCHAR_MAX)
- return tolower((unsigned char) c);
- return c;
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return towlower_l((wint_t) c, pg_regex_locale->info.lt);
@@ -825,11 +722,9 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
case PG_REGEX_BUILTIN:
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
break;
- case PG_REGEX_LOCALE_WIDE:
case PG_REGEX_LOCALE_WIDE_L:
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
break;
- case PG_REGEX_LOCALE_1BYTE:
case PG_REGEX_LOCALE_1BYTE_L:
#if MAX_SIMPLE_CHR >= UCHAR_MAX
max_chr = (pg_wchar) UCHAR_MAX;
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 8736ada4be2..68069fcfd3b 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1665,7 +1665,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
mylocale = pg_newlocale_from_collation(collid);
#ifdef USE_ICU
- if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar;
int32_t len_conv;
@@ -1681,7 +1681,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
}
else
#endif
- if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
+ if (mylocale->provider == COLLPROVIDER_BUILTIN)
{
const char *src = buff;
size_t srclen = nbytes;
@@ -1710,7 +1710,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
}
else
{
- Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+ Assert(mylocale->provider == COLLPROVIDER_LIBC);
if (pg_database_encoding_max_length() > 1)
{
@@ -1730,12 +1730,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- {
- if (mylocale)
- workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
- else
- workspace[curr_char] = towlower(workspace[curr_char]);
- }
+ workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
/*
* Make result large enough; case change might change number
@@ -1761,12 +1756,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
* collations you get exactly what the collation says.
*/
for (p = result; *p; p++)
- {
- if (mylocale)
- *p = tolower_l((unsigned char) *p, mylocale->info.lt);
- else
- *p = pg_tolower((unsigned char) *p);
- }
+ *p = tolower_l((unsigned char) *p, mylocale->info.lt);
}
}
}
@@ -1813,7 +1803,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
mylocale = pg_newlocale_from_collation(collid);
#ifdef USE_ICU
- if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar,
len_conv;
@@ -1829,7 +1819,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
}
else
#endif
- if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
+ if (mylocale->provider == COLLPROVIDER_BUILTIN)
{
const char *src = buff;
size_t srclen = nbytes;
@@ -1858,7 +1848,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
}
else
{
- Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+ Assert(mylocale->provider == COLLPROVIDER_LIBC);
if (pg_database_encoding_max_length() > 1)
{
@@ -1878,12 +1868,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- {
- if (mylocale)
- workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
- else
- workspace[curr_char] = towupper(workspace[curr_char]);
- }
+ workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
/*
* Make result large enough; case change might change number
@@ -1909,12 +1894,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
* collations you get exactly what the collation says.
*/
for (p = result; *p; p++)
- {
- if (mylocale)
- *p = toupper_l((unsigned char) *p, mylocale->info.lt);
- else
- *p = pg_toupper((unsigned char) *p);
- }
+ *p = toupper_l((unsigned char) *p, mylocale->info.lt);
}
}
}
@@ -2003,7 +1983,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
mylocale = pg_newlocale_from_collation(collid);
#ifdef USE_ICU
- if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar,
len_conv;
@@ -2019,7 +1999,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
}
else
#endif
- if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
+ if (mylocale->provider == COLLPROVIDER_BUILTIN)
{
const char *src = buff;
size_t srclen = nbytes;
@@ -2060,7 +2040,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
}
else
{
- Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+ Assert(mylocale->provider == COLLPROVIDER_LIBC);
if (pg_database_encoding_max_length() > 1)
{
@@ -2081,22 +2061,11 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
- if (mylocale)
- {
- if (wasalnum)
- workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
- else
- workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
- wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
- }
+ if (wasalnum)
+ workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
else
- {
- if (wasalnum)
- workspace[curr_char] = towlower(workspace[curr_char]);
- else
- workspace[curr_char] = towupper(workspace[curr_char]);
- wasalnum = iswalnum(workspace[curr_char]);
- }
+ workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+ wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
}
/*
@@ -2124,22 +2093,11 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
*/
for (p = result; *p; p++)
{
- if (mylocale)
- {
- if (wasalnum)
- *p = tolower_l((unsigned char) *p, mylocale->info.lt);
- else
- *p = toupper_l((unsigned char) *p, mylocale->info.lt);
- wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
- }
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, mylocale->info.lt);
else
- {
- if (wasalnum)
- *p = pg_tolower((unsigned char) *p);
- else
- *p = pg_toupper((unsigned char) *p);
- wasalnum = isalnum((unsigned char) *p);
- }
+ *p = toupper_l((unsigned char) *p, mylocale->info.lt);
+ wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
}
}
}
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 57ead66b5aa..0ecc96d48e5 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -174,8 +174,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
*p;
int slen,
plen;
- pg_locale_t locale = 0;
- bool locale_is_c = false;
+ pg_locale_t locale;
if (!OidIsValid(collation))
{
@@ -189,10 +188,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
- if (lc_ctype_is_c(collation))
- locale_is_c = true;
- else
- locale = pg_newlocale_from_collation(collation);
+ locale = pg_newlocale_from_collation(collation);
if (!pg_locale_deterministic(locale))
ereport(ERROR,
@@ -228,7 +224,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
plen = VARSIZE_ANY_EXHDR(pat);
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
- return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
+ return SB_IMatchText(s, slen, p, plen, locale, locale->ctype_is_c);
}
}
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 42d8bc5deda..2e6f624798f 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1154,7 +1154,7 @@ get_iso_localename(const char *winlocname)
char *hyphen;
/* Locale names use only ASCII, any conversion locale suffices. */
- rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
+ rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), default_locale);
if (rc == -1 || rc == sizeof(iso_lc_messages))
return NULL;
@@ -1449,11 +1449,7 @@ make_icu_collator(const char *iculocstr,
bool
pg_locale_deterministic(pg_locale_t locale)
{
- /* default locale must always be deterministic */
- if (locale == NULL)
- return true;
- else
- return locale->deterministic;
+ return locale->deterministic;
}
/*
@@ -1813,7 +1809,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
int r;
int result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
Assert(GetDatabaseEncoding() == PG_UTF8);
#ifndef WIN32
Assert(false);
@@ -1853,10 +1849,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
((LPWSTR) a2p)[r] = 0;
errno = 0;
- if (locale)
- result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
- else
- result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
+ result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
ereport(ERROR,
(errmsg("could not compare Unicode strings: %m")));
@@ -1882,7 +1875,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
{
int result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
#ifdef WIN32
if (GetDatabaseEncoding() == PG_UTF8)
{
@@ -1893,10 +1886,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
}
else
#endif /* WIN32 */
- if (locale)
result = strcoll_l(arg1, arg2, locale->info.lt);
- else
- result = strcoll(arg1, arg2);
return result;
}
@@ -1918,7 +1908,7 @@ pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2,
char *arg2n;
int result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
#ifdef WIN32
/* check for this case before doing the work for nul-termination */
@@ -2064,7 +2054,7 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
{
int result;
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
result = pg_strcoll_libc(arg1, arg2, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
@@ -2100,7 +2090,7 @@ pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
{
int result;
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
result = pg_strncoll_libc(arg1, len1, arg2, len2, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
@@ -2118,13 +2108,10 @@ static size_t
pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
pg_locale_t locale)
{
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
#ifdef TRUST_STRXFRM
- if (locale)
- return strxfrm_l(dest, src, destsize, locale->info.lt);
- else
- return strxfrm(dest, src, destsize);
+ return strxfrm_l(dest, src, destsize, locale->info.lt);
#else
/* shouldn't happen */
PGLOCALE_SUPPORT_ERROR(locale->provider);
@@ -2141,7 +2128,7 @@ pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize,
size_t bufsize = srclen + 1;
size_t result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
if (bufsize > TEXTBUFLEN)
buf = palloc(bufsize);
@@ -2313,7 +2300,7 @@ pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen,
bool
pg_strxfrm_enabled(pg_locale_t locale)
{
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
#ifdef TRUST_STRXFRM
return true;
#else
@@ -2347,7 +2334,7 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
{
size_t result = 0; /* keep compiler quiet */
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
result = pg_strxfrm_libc(dest, src, destsize, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
@@ -2384,7 +2371,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
{
size_t result = 0; /* keep compiler quiet */
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
@@ -2404,7 +2391,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
bool
pg_strxfrm_prefix_enabled(pg_locale_t locale)
{
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
return false;
else if (locale->provider == COLLPROVIDER_ICU)
return true;
@@ -2434,13 +2421,11 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
{
size_t result = 0; /* keep compiler quiet */
- if (!locale)
- PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
+ if (locale->provider == COLLPROVIDER_ICU)
result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
-#endif
else
+#endif
PGLOCALE_SUPPORT_ERROR(locale->provider);
return result;
@@ -2469,13 +2454,11 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
{
size_t result = 0; /* keep compiler quiet */
- if (!locale)
- PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
+ if (locale->provider == COLLPROVIDER_ICU)
result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
-#endif
else
+#endif
PGLOCALE_SUPPORT_ERROR(locale->provider);
return result;
@@ -3032,7 +3015,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
{
size_t result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
if (tolen == 0)
return 0;
@@ -3060,12 +3043,6 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
}
else
#endif /* WIN32 */
- if (locale == (pg_locale_t) 0)
- {
- /* Use wcstombs directly for the default locale */
- result = wcstombs(to, from, tolen);
- }
- else
{
/* Use wcstombs_l for nondefault locales */
result = wcstombs_l(to, from, tolen, locale->info.lt);
@@ -3089,7 +3066,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
{
size_t result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
if (tolen == 0)
return 0;
@@ -3122,16 +3099,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
/* mbstowcs requires ending '\0' */
char *str = pnstrdup(from, fromlen);
- if (locale == (pg_locale_t) 0)
- {
- /* Use mbstowcs directly for the default locale */
- result = mbstowcs(to, str, tolen);
- }
- else
- {
- /* Use mbstowcs_l for nondefault locales */
- result = mbstowcs_l(to, str, tolen, locale->info.lt);
- }
+ /* Use mbstowcs_l for nondefault locales */
+ result = mbstowcs_l(to, str, tolen, locale->info.lt);
pfree(str);
}
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 02dfe219f54..829375cd1a3 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -999,7 +999,7 @@ hashbpchar(PG_FUNCTION_ARGS)
Oid collid = PG_GET_COLLATION();
char *keydata;
int keylen;
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
Datum result;
if (!collid)
@@ -1011,8 +1011,7 @@ hashbpchar(PG_FUNCTION_ARGS)
keydata = VARDATA_ANY(key);
keylen = bcTruelen(key);
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (pg_locale_deterministic(mylocale))
{
@@ -1054,7 +1053,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
Oid collid = PG_GET_COLLATION();
char *keydata;
int keylen;
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
Datum result;
if (!collid)
@@ -1066,8 +1065,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
keydata = VARDATA_ANY(key);
keylen = bcTruelen(key);
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (pg_locale_deterministic(mylocale))
{
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2e2e9bbba0..52ab8c43c66 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1217,12 +1217,11 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
{
int len1 = VARSIZE_ANY_EXHDR(t1);
int len2 = VARSIZE_ANY_EXHDR(t2);
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
check_collation_set(collid);
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (!pg_locale_deterministic(mylocale))
ereport(ERROR,
@@ -1619,18 +1618,14 @@ Datum
texteq(PG_FUNCTION_ARGS)
{
Oid collid = PG_GET_COLLATION();
- bool locale_is_c = false;
pg_locale_t mylocale = 0;
bool result;
check_collation_set(collid);
- if (lc_collate_is_c(collid))
- locale_is_c = true;
- else
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
- if (locale_is_c || pg_locale_deterministic(mylocale))
+ if (pg_locale_deterministic(mylocale))
{
Datum arg1 = PG_GETARG_DATUM(0);
Datum arg2 = PG_GETARG_DATUM(1);
@@ -1678,18 +1673,14 @@ Datum
textne(PG_FUNCTION_ARGS)
{
Oid collid = PG_GET_COLLATION();
- bool locale_is_c = false;
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
bool result;
check_collation_set(collid);
- if (lc_collate_is_c(collid))
- locale_is_c = true;
- else
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
- if (locale_is_c || pg_locale_deterministic(mylocale))
+ if (pg_locale_deterministic(mylocale))
{
Datum arg1 = PG_GETARG_DATUM(0);
Datum arg2 = PG_GETARG_DATUM(1);
@@ -1793,15 +1784,14 @@ text_starts_with(PG_FUNCTION_ARGS)
Datum arg1 = PG_GETARG_DATUM(0);
Datum arg2 = PG_GETARG_DATUM(1);
Oid collid = PG_GET_COLLATION();
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
bool result;
Size len1,
len2;
check_collation_set(collid);
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (!pg_locale_deterministic(mylocale))
ereport(ERROR,
--
2.34.1