On 8/15/24 12:55 AM, Jeff Davis wrote:
This overlaps a bit with what Peter already proposed here:

https://www.postgresql.org/message-id/4f562d84-87f4-44dc-8946-01d6c437936f%40eisentraut.org

right?

Maybe I am missing something but not as far as I can see. It is related but I do not see any overlap.

0003-Remove-dubious-check-against-default-locale.patch

This patch removes a check against DEFAULT_COLLATION_OID which I
thought
looked really dubious. Shouldn't this just be a simple check for if
the
locale is deterministic? Since we know we have a valid locale that
should be enough, right?

Looks good to me. The code was correct in the sense that the default
collation is always deterministic, but (a) Peter is working on non-
deterministic default collations; and (b) it was a redundant check.

Suspected so.

I have attached a set of rebased patches with an added assert. Maybe I should start a new thread though.

Andreas
From 9715b6f429907a9f284284e697727bfc7b4082e7 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Wed, 14 Aug 2024 00:52:53 +0200
Subject: [PATCH v2 5/5] Remove pg_collate_deterministic() and check field
 directly

There is no clear benefit from using this thin accessor function,
espeically since other fields like collate_is_c and ctype_is_c are
accessed directly.
---
 src/backend/access/hash/hashfunc.c |  4 ++--
 src/backend/regex/regc_pg_locale.c |  2 +-
 src/backend/utils/adt/like.c       |  4 ++--
 src/backend/utils/adt/pg_locale.c  |  7 -------
 src/backend/utils/adt/varchar.c    |  8 ++++----
 src/backend/utils/adt/varlena.c    | 14 +++++++-------
 src/include/utils/pg_locale.h      |  1 -
 7 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index 86f1adecb7..01bdf997e3 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -279,7 +279,7 @@ hashtext(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (mylocale->deterministic)
 	{
 		result = hash_any((unsigned char *) VARDATA_ANY(key),
 						  VARSIZE_ANY_EXHDR(key));
@@ -334,7 +334,7 @@ hashtextextended(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (mylocale->deterministic)
 	{
 		result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
 								   VARSIZE_ANY_EXHDR(key),
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 749ae6b4b2..1b8008a03e 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -254,7 +254,7 @@ pg_set_regex_collation(Oid collation)
 	{
 		pg_locale_t locale = pg_newlocale_from_collation(collation);
 
-		if (!pg_locale_deterministic(locale))
+		if (!locale->deterministic)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("nondeterministic collations are not supported for regular expressions")));
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index f87675d755..286b737fd7 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -151,7 +151,7 @@ GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation
 	{
 		pg_locale_t locale = pg_newlocale_from_collation(collation);
 
-		if (!pg_locale_deterministic(locale))
+		if (!locale->deterministic)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("nondeterministic collations are not supported for LIKE")));
@@ -188,7 +188,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 
 	locale = pg_newlocale_from_collation(collation);
 
-	if (!pg_locale_deterministic(locale))
+	if (!locale->deterministic)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("nondeterministic collations are not supported for ILIKE")));
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 3e37837b8d..6af78301b7 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1408,13 +1408,6 @@ make_icu_collator(const char *iculocstr,
 #endif							/* not USE_ICU */
 }
 
-
-bool
-pg_locale_deterministic(pg_locale_t locale)
-{
-	return locale->deterministic;
-}
-
 /*
  * Initialize default_locale with database locale settings.
  */
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 03e91c0333..8f86aca297 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -757,7 +757,7 @@ bpchareq(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (mylocale->deterministic)
 	{
 		/*
 		 * Since we only care about equality or not-equality, we can avoid all
@@ -798,7 +798,7 @@ bpcharne(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (mylocale->deterministic)
 	{
 		/*
 		 * Since we only care about equality or not-equality, we can avoid all
@@ -1005,7 +1005,7 @@ hashbpchar(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (mylocale->deterministic)
 	{
 		result = hash_any((unsigned char *) keydata, keylen);
 	}
@@ -1061,7 +1061,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (mylocale->deterministic)
 	{
 		result = hash_any_extended((unsigned char *) keydata, keylen,
 								   PG_GETARG_INT64(1));
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 3a0f8ab6b5..fe8ac9a756 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1223,7 +1223,7 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (!pg_locale_deterministic(mylocale))
+	if (!mylocale->deterministic)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("nondeterministic collations are not supported for substring searches")));
@@ -1567,7 +1567,7 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 		result = pg_strncoll(arg1, len1, arg2, len2, mylocale);
 
 		/* Break tie if necessary. */
-		if (result == 0 && pg_locale_deterministic(mylocale))
+		if (result == 0 && mylocale->deterministic)
 		{
 			result = memcmp(arg1, arg2, Min(len1, len2));
 			if ((result == 0) && (len1 != len2))
@@ -1618,7 +1618,7 @@ texteq(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (mylocale->deterministic)
 	{
 		Datum		arg1 = PG_GETARG_DATUM(0);
 		Datum		arg2 = PG_GETARG_DATUM(1);
@@ -1673,7 +1673,7 @@ textne(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (mylocale->deterministic)
 	{
 		Datum		arg1 = PG_GETARG_DATUM(0);
 		Datum		arg2 = PG_GETARG_DATUM(1);
@@ -1786,7 +1786,7 @@ text_starts_with(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (!pg_locale_deterministic(mylocale))
+	if (!mylocale->deterministic)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("nondeterministic collations are not supported for substring searches")));
@@ -2198,7 +2198,7 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
 	result = pg_strcoll(sss->buf1, sss->buf2, sss->locale);
 
 	/* Break tie if necessary. */
-	if (result == 0 && pg_locale_deterministic(sss->locale))
+	if (result == 0 && sss->locale->deterministic)
 		result = strcmp(sss->buf1, sss->buf2);
 
 	/* Cache result, perhaps saving an expensive strcoll() call next time */
@@ -2537,7 +2537,7 @@ btvarstrequalimage(PG_FUNCTION_ARGS)
 
 	locale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(locale))
+	if (locale->deterministic)
 		PG_RETURN_BOOL(true);
 	else
 		PG_RETURN_BOOL(false);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index ab1c37a44b..faae868bfc 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -108,7 +108,6 @@ extern void make_icu_collator(const char *iculocstr,
 							  const char *icurules,
 							  struct pg_locale_struct *resultp);
 
-extern bool pg_locale_deterministic(pg_locale_t locale);
 extern void init_database_collation(void);
 extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 
-- 
2.43.0

From e4742d692fbf8657e6710cd9d54173f890155249 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Wed, 14 Aug 2024 00:49:46 +0200
Subject: [PATCH v2 4/5] Do not check both for collate_is_c and deterministic

Since deterimistic will always be true if collate_is_c we can just
check for deterimistic and make the code cleaner to read.
---
 src/backend/utils/adt/varchar.c | 4 ++--
 src/backend/utils/adt/varlena.c | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index d2d1c5709d..03e91c0333 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -757,7 +757,7 @@ bpchareq(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (mylocale->collate_is_c || pg_locale_deterministic(mylocale))
+	if (pg_locale_deterministic(mylocale))
 	{
 		/*
 		 * Since we only care about equality or not-equality, we can avoid all
@@ -798,7 +798,7 @@ bpcharne(PG_FUNCTION_ARGS)
 
 	mylocale = pg_newlocale_from_collation(collid);
 
-	if (mylocale->collate_is_c || pg_locale_deterministic(mylocale))
+	if (pg_locale_deterministic(mylocale))
 	{
 		/*
 		 * Since we only care about equality or not-equality, we can avoid all
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 306f876e94..3a0f8ab6b5 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -2537,8 +2537,7 @@ btvarstrequalimage(PG_FUNCTION_ARGS)
 
 	locale = pg_newlocale_from_collation(collid);
 
-	if (locale->collate_is_c ||
-		pg_locale_deterministic(locale))
+	if (pg_locale_deterministic(locale))
 		PG_RETURN_BOOL(true);
 	else
 		PG_RETURN_BOOL(false);
-- 
2.43.0

From b79c102dc45ccf55dd4d2383624ae434c6c590e8 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Wed, 14 Aug 2024 00:47:08 +0200
Subject: [PATCH v2 3/5] Remove dubious check against default locale

---
 src/backend/utils/adt/varlena.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 6b664dfdd8..306f876e94 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -2538,7 +2538,6 @@ btvarstrequalimage(PG_FUNCTION_ARGS)
 	locale = pg_newlocale_from_collation(collid);
 
 	if (locale->collate_is_c ||
-		collid == DEFAULT_COLLATION_OID ||
 		pg_locale_deterministic(locale))
 		PG_RETURN_BOOL(true);
 	else
-- 
2.43.0

From b8df8e4dd7fcaa20c9beb7710f91c96142d87a71 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Wed, 14 Aug 2024 00:37:48 +0200
Subject: [PATCH v2 2/5] Remove lc_ctype_is_c()

Instead always fetch the locale and look at the ctype_is_c field.

Since regular expressions are used by pg_hba we need to hardcode
the logic for regular expression using C_COLLATE_OID.
---
 src/backend/regex/regc_pg_locale.c   | 21 +++++++++++++++------
 src/backend/utils/adt/formatting.c   | 27 ++++++++++++---------------
 src/backend/utils/adt/like.c         |  2 +-
 src/backend/utils/adt/like_support.c | 15 +++++----------
 src/backend/utils/adt/pg_locale.c    | 26 --------------------------
 src/include/catalog/pg_collation.dat |  3 +--
 src/include/utils/pg_locale.h        |  2 --
 7 files changed, 34 insertions(+), 62 deletions(-)

diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 947d73f3e0..749ae6b4b2 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -243,41 +243,50 @@ pg_set_regex_collation(Oid collation)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
-	if (lc_ctype_is_c(collation))
+	if (collation == C_COLLATION_OID)
 	{
-		/* C/POSIX collations use this path regardless of database encoding */
+		/* Allow using regex in C locale without catalog */
 		pg_regex_strategy = PG_REGEX_LOCALE_C;
 		pg_regex_locale = 0;
 		pg_regex_collation = C_COLLATION_OID;
 	}
 	else
 	{
-		pg_regex_locale = pg_newlocale_from_collation(collation);
+		pg_locale_t locale = pg_newlocale_from_collation(collation);
 
-		if (!pg_locale_deterministic(pg_regex_locale))
+		if (!pg_locale_deterministic(locale))
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("nondeterministic collations are not supported for regular expressions")));
 
-		if (pg_regex_locale->provider == COLLPROVIDER_BUILTIN)
+		if (locale->ctype_is_c)
+		{
+			/* C/POSIX collations use this path regardless of database encoding */
+			pg_regex_strategy = PG_REGEX_LOCALE_C;
+			locale = 0;
+			collation = C_COLLATION_OID;
+		}
+		else if (locale->provider == COLLPROVIDER_BUILTIN)
 		{
 			Assert(GetDatabaseEncoding() == PG_UTF8);
 			pg_regex_strategy = PG_REGEX_BUILTIN;
 		}
 #ifdef USE_ICU
-		else if (pg_regex_locale->provider == COLLPROVIDER_ICU)
+		else if (locale->provider == COLLPROVIDER_ICU)
 		{
 			pg_regex_strategy = PG_REGEX_LOCALE_ICU;
 		}
 #endif
 		else
 		{
+			Assert(pg_regex_locale->provider == COLLPROVIDER_LIBC);
 			if (GetDatabaseEncoding() == PG_UTF8)
 				pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
 			else
 				pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
 		}
 
+		pg_regex_locale = locale;
 		pg_regex_collation = collation;
 	}
 }
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 68069fcfd3..dc5178f56b 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1636,6 +1636,7 @@ char *
 str_tolower(const char *buff, size_t nbytes, Oid collid)
 {
 	char	   *result;
+	pg_locale_t mylocale;
 
 	if (!buff)
 		return NULL;
@@ -1653,17 +1654,15 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	mylocale = pg_newlocale_from_collation(collid);
+
 	/* C/POSIX collations use this path regardless of database encoding */
-	if (lc_ctype_is_c(collid))
+	if (mylocale->ctype_is_c)
 	{
 		result = asc_tolower(buff, nbytes);
 	}
 	else
 	{
-		pg_locale_t mylocale;
-
-		mylocale = pg_newlocale_from_collation(collid);
-
 #ifdef USE_ICU
 		if (mylocale->provider == COLLPROVIDER_ICU)
 		{
@@ -1774,6 +1773,7 @@ char *
 str_toupper(const char *buff, size_t nbytes, Oid collid)
 {
 	char	   *result;
+	pg_locale_t mylocale;
 
 	if (!buff)
 		return NULL;
@@ -1791,17 +1791,15 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	mylocale = pg_newlocale_from_collation(collid);
+
 	/* C/POSIX collations use this path regardless of database encoding */
-	if (lc_ctype_is_c(collid))
+	if (mylocale->ctype_is_c)
 	{
 		result = asc_toupper(buff, nbytes);
 	}
 	else
 	{
-		pg_locale_t mylocale;
-
-		mylocale = pg_newlocale_from_collation(collid);
-
 #ifdef USE_ICU
 		if (mylocale->provider == COLLPROVIDER_ICU)
 		{
@@ -1954,6 +1952,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 {
 	char	   *result;
 	int			wasalnum = false;
+	pg_locale_t mylocale;
 
 	if (!buff)
 		return NULL;
@@ -1971,17 +1970,15 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	mylocale = pg_newlocale_from_collation(collid);
+
 	/* C/POSIX collations use this path regardless of database encoding */
-	if (lc_ctype_is_c(collid))
+	if (mylocale->ctype_is_c)
 	{
 		result = asc_initcap(buff, nbytes);
 	}
 	else
 	{
-		pg_locale_t mylocale;
-
-		mylocale = pg_newlocale_from_collation(collid);
-
 #ifdef USE_ICU
 		if (mylocale->provider == COLLPROVIDER_ICU)
 		{
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 131616fa6b..f87675d755 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -147,7 +147,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
 static inline int
 GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
 {
-	if (collation && !lc_ctype_is_c(collation))
+	if (collation)
 	{
 		pg_locale_t locale = pg_newlocale_from_collation(collation);
 
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index a579fa0157..fcfd2576b2 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -100,7 +100,7 @@ static Selectivity regex_selectivity(const char *patt, int pattlen,
 									 bool case_insensitive,
 									 int fixed_prefix_len);
 static int	pattern_char_isalpha(char c, bool is_multibyte,
-								 pg_locale_t locale, bool locale_is_c);
+								 pg_locale_t locale);
 static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
 								  Oid collation);
 static Datum string_to_datum(const char *str, Oid datatype);
@@ -1000,7 +1000,6 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 				match_pos;
 	bool		is_multibyte = (pg_database_encoding_max_length() > 1);
 	pg_locale_t locale = 0;
-	bool		locale_is_c = false;
 
 	/* the right-hand const is type text or bytea */
 	Assert(typeid == BYTEAOID || typeid == TEXTOID);
@@ -1024,11 +1023,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 					 errhint("Use the COLLATE clause to set the collation explicitly.")));
 		}
 
-		/* If case-insensitive, we need locale info */
-		if (lc_ctype_is_c(collation))
-			locale_is_c = true;
-		else
-			locale = pg_newlocale_from_collation(collation);
+		locale = pg_newlocale_from_collation(collation);
 	}
 
 	if (typeid != BYTEAOID)
@@ -1065,7 +1060,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 
 		/* Stop if case-varying character (it's sort of a wildcard) */
 		if (case_insensitive &&
-			pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
+			pattern_char_isalpha(patt[pos], is_multibyte, locale))
 			break;
 
 		match[match_pos++] = patt[pos];
@@ -1499,9 +1494,9 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
  */
 static int
 pattern_char_isalpha(char c, bool is_multibyte,
-					 pg_locale_t locale, bool locale_is_c)
+					 pg_locale_t locale)
 {
-	if (locale_is_c)
+	if (locale->ctype_is_c)
 		return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
 	else if (is_multibyte && IS_HIGHBIT_SET(c))
 		return true;
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 4438b3817a..3e37837b8d 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1259,32 +1259,6 @@ lookup_collation_cache(Oid collation)
 	return cache_entry;
 }
 
-/*
- * Detect whether collation's LC_CTYPE property is C
- */
-bool
-lc_ctype_is_c(Oid collation)
-{
-	/*
-	 * If we're asked about "collation 0", return false, so that the code will
-	 * go into the non-C path and report that the collation is bogus.
-	 */
-	if (!OidIsValid(collation))
-		return false;
-
-	/*
-	 * If we're asked about the built-in C/POSIX collations, we know that.
-	 */
-	if (collation == C_COLLATION_OID ||
-		collation == POSIX_COLLATION_OID)
-		return true;
-
-	/*
-	 * Otherwise, we have to consult pg_collation, but we cache that.
-	 */
-	return pg_newlocale_from_collation(collation)->ctype_is_c;
-}
-
 /* simple subroutine for reporting errors from newlocale() */
 static void
 report_newlocale_failure(const char *localename)
diff --git a/src/include/catalog/pg_collation.dat b/src/include/catalog/pg_collation.dat
index f126201276..af5c9aa582 100644
--- a/src/include/catalog/pg_collation.dat
+++ b/src/include/catalog/pg_collation.dat
@@ -19,8 +19,7 @@
   descr => 'standard C collation',
   collname => 'C', collprovider => 'c', collencoding => '-1',
   collcollate => 'C', collctype => 'C' },
-{ oid => '951', oid_symbol => 'POSIX_COLLATION_OID',
-  descr => 'standard POSIX collation',
+{ oid => '951', descr => 'standard POSIX collation',
   collname => 'POSIX', collprovider => 'c', collencoding => '-1',
   collcollate => 'POSIX', collctype => 'POSIX' },
 { oid => '962', descr => 'sorts by Unicode code point, C character semantics',
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 8ec24437f4..ab1c37a44b 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -54,8 +54,6 @@ extern PGDLLIMPORT bool database_ctype_is_c;
 extern bool check_locale(int category, const char *locale, char **canonname);
 extern char *pg_perm_setlocale(int category, const char *locale);
 
-extern bool lc_ctype_is_c(Oid collation);
-
 /*
  * Return the POSIX lconv struct (contains number/money formatting
  * information) with locale information for all categories.
-- 
2.43.0

From 849ab262a27017f5bbe1472b8f9888fbcab068f2 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Wed, 14 Aug 2024 00:37:23 +0200
Subject: [PATCH v2 1/5] Remove lc_collate_is_c()

Instead always look up the collation and check to collate_is_c field.

TODO: Check for performance regression?
---
 src/backend/access/spgist/spgtextproc.c |  2 +-
 src/backend/commands/collationcmds.c    |  8 ++-----
 src/backend/utils/adt/like_support.c    |  4 ++--
 src/backend/utils/adt/pg_locale.c       | 27 ---------------------
 src/backend/utils/adt/selfuncs.c        |  6 +++--
 src/backend/utils/adt/varchar.c         | 20 +++++-----------
 src/backend/utils/adt/varlena.c         | 32 ++++++++++++-------------
 src/include/utils/pg_locale.h           |  1 -
 8 files changed, 30 insertions(+), 70 deletions(-)

diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index 3f08d330b6..d5237a68b5 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -427,7 +427,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
 {
 	spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
 	spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
-	bool		collate_is_c = lc_collate_is_c(PG_GET_COLLATION());
+	bool		collate_is_c = pg_newlocale_from_collation(PG_GET_COLLATION())->collate_is_c;
 	text	   *reconstructedValue;
 	text	   *reconstrText;
 	int			maxReconstrLen;
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 63ef9a0841..53b6a479aa 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -377,13 +377,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 	if (!OidIsValid(newoid))
 		return InvalidObjectAddress;
 
-	/*
-	 * Check that the locales can be loaded.  NB: pg_newlocale_from_collation
-	 * is only supposed to be called on non-C-equivalent locales.
-	 */
+	/* Check that the locales can be loaded. */
 	CommandCounterIncrement();
-	if (!lc_collate_is_c(newoid) || !lc_ctype_is_c(newoid))
-		(void) pg_newlocale_from_collation(newoid);
+	(void) pg_newlocale_from_collation(newoid);
 
 	ObjectAddressSet(address, CollationRelationId, newoid);
 
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 2635050861..a579fa0157 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -433,7 +433,7 @@ match_pattern_prefix(Node *leftop,
 	 * collation.
 	 */
 	if (collation_aware &&
-		!lc_collate_is_c(indexcollation))
+		!pg_newlocale_from_collation(indexcollation)->collate_is_c)
 		return NIL;
 
 	/*
@@ -1603,7 +1603,7 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
 		else
 			workstr = TextDatumGetCString(str_const->constvalue);
 		len = strlen(workstr);
-		if (lc_collate_is_c(collation) || len == 0)
+		if (pg_newlocale_from_collation(collation)->collate_is_c || len == 0)
 			cmpstr = str_const->constvalue;
 		else
 		{
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 643cca05d3..4438b3817a 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1259,33 +1259,6 @@ lookup_collation_cache(Oid collation)
 	return cache_entry;
 }
 
-
-/*
- * Detect whether collation's LC_COLLATE property is C
- */
-bool
-lc_collate_is_c(Oid collation)
-{
-	/*
-	 * If we're asked about "collation 0", return false, so that the code will
-	 * go into the non-C path and report that the collation is bogus.
-	 */
-	if (!OidIsValid(collation))
-		return false;
-
-	/*
-	 * If we're asked about the built-in C/POSIX collations, we know that.
-	 */
-	if (collation == C_COLLATION_OID ||
-		collation == POSIX_COLLATION_OID)
-		return true;
-
-	/*
-	 * Otherwise, we have to consult pg_collation, but we cache that.
-	 */
-	return pg_newlocale_from_collation(collation)->collate_is_c;
-}
-
 /*
  * Detect whether collation's LC_CTYPE property is C
  */
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bf42393bec..03d7fb5f48 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -4646,6 +4646,7 @@ static char *
 convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
 {
 	char	   *val;
+	pg_locale_t mylocale;
 
 	switch (typid)
 	{
@@ -4671,9 +4672,10 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
 			return NULL;
 	}
 
-	if (!lc_collate_is_c(collid))
+	mylocale = pg_newlocale_from_collation(collid);
+
+	if (!mylocale->collate_is_c)
 	{
-		pg_locale_t mylocale = pg_newlocale_from_collation(collid);
 		char	   *xfrmstr;
 		size_t		xfrmlen;
 		size_t		xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 0c219dcc77..d2d1c5709d 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -748,20 +748,16 @@ bpchareq(PG_FUNCTION_ARGS)
 				len2;
 	bool		result;
 	Oid			collid = PG_GET_COLLATION();
-	bool		locale_is_c = false;
-	pg_locale_t mylocale = 0;
+	pg_locale_t mylocale;
 
 	check_collation_set(collid);
 
 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);
 
-	if (lc_collate_is_c(collid))
-		locale_is_c = true;
-	else
-		mylocale = pg_newlocale_from_collation(collid);
+	mylocale = pg_newlocale_from_collation(collid);
 
-	if (locale_is_c || pg_locale_deterministic(mylocale))
+	if (mylocale->collate_is_c || pg_locale_deterministic(mylocale))
 	{
 		/*
 		 * Since we only care about equality or not-equality, we can avoid all
@@ -793,20 +789,16 @@ bpcharne(PG_FUNCTION_ARGS)
 				len2;
 	bool		result;
 	Oid			collid = PG_GET_COLLATION();
-	bool		locale_is_c = false;
-	pg_locale_t mylocale = 0;
+	pg_locale_t mylocale;
 
 	check_collation_set(collid);
 
 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);
 
-	if (lc_collate_is_c(collid))
-		locale_is_c = true;
-	else
-		mylocale = pg_newlocale_from_collation(collid);
+	mylocale = pg_newlocale_from_collation(collid);
 
-	if (locale_is_c || pg_locale_deterministic(mylocale))
+	if (mylocale->collate_is_c || pg_locale_deterministic(mylocale))
 	{
 		/*
 		 * Since we only care about equality or not-equality, we can avoid all
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 7c6391a276..6b664dfdd8 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1538,10 +1538,13 @@ int
 varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 {
 	int			result;
+	pg_locale_t mylocale;
 
 	check_collation_set(collid);
 
-	if (lc_collate_is_c(collid))
+	mylocale = pg_newlocale_from_collation(collid);
+
+	if (mylocale->collate_is_c)
 	{
 		result = memcmp(arg1, arg2, Min(len1, len2));
 		if ((result == 0) && (len1 != len2))
@@ -1549,10 +1552,6 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 	}
 	else
 	{
-		pg_locale_t mylocale;
-
-		mylocale = pg_newlocale_from_collation(collid);
-
 		/*
 		 * memcmp() can't tell us which of two unequal strings sorts first,
 		 * but it's a cheap way to tell if they're equal.  Testing shows that
@@ -1859,10 +1858,12 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
 	bool		abbreviate = ssup->abbreviate;
 	bool		collate_c = false;
 	VarStringSortSupport *sss;
-	pg_locale_t locale = 0;
+	pg_locale_t locale;
 
 	check_collation_set(collid);
 
+	locale = pg_newlocale_from_collation(collid);
+
 	/*
 	 * If possible, set ssup->comparator to a function which can be used to
 	 * directly compare two datums.  If we can do this, we'll avoid the
@@ -1876,7 +1877,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
 	 * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
 	 * memcmp() rather than strcoll().
 	 */
-	if (lc_collate_is_c(collid))
+	if (locale->collate_is_c)
 	{
 		if (typid == BPCHAROID)
 			ssup->comparator = bpcharfastcmp_c;
@@ -1893,13 +1894,6 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
 	}
 	else
 	{
-		/*
-		 * We need a collation-sensitive comparison.  To make things faster,
-		 * we'll figure out the collation based on the locale id and cache the
-		 * result.
-		 */
-		locale = pg_newlocale_from_collation(collid);
-
 		/*
 		 * We use varlenafastcmp_locale except for type NAME.
 		 */
@@ -1950,7 +1944,8 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
 		sss->last_len2 = -1;
 		/* Initialize */
 		sss->last_returned = 0;
-		sss->locale = locale;
+		if (!collate_c)
+			sss->locale = locale;
 
 		/*
 		 * To avoid somehow confusing a strxfrm() blob and an original string,
@@ -2536,12 +2531,15 @@ btvarstrequalimage(PG_FUNCTION_ARGS)
 {
 	/* Oid		opcintype = PG_GETARG_OID(0); */
 	Oid			collid = PG_GET_COLLATION();
+	pg_locale_t locale;
 
 	check_collation_set(collid);
 
-	if (lc_collate_is_c(collid) ||
+	locale = pg_newlocale_from_collation(collid);
+
+	if (locale->collate_is_c ||
 		collid == DEFAULT_COLLATION_OID ||
-		get_collation_isdeterministic(collid))
+		pg_locale_deterministic(locale))
 		PG_RETURN_BOOL(true);
 	else
 		PG_RETURN_BOOL(false);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index f41d33975b..8ec24437f4 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -54,7 +54,6 @@ extern PGDLLIMPORT bool database_ctype_is_c;
 extern bool check_locale(int category, const char *locale, char **canonname);
 extern char *pg_perm_setlocale(int category, const char *locale);
 
-extern bool lc_collate_is_c(Oid collation);
 extern bool lc_ctype_is_c(Oid collation);
 
 /*
-- 
2.43.0

Reply via email to