> Attached are the patches rebased on latest master.
>
> I've removed the ASCII fast-path patch 0006 as it turned out to be more
> complicated to make work than expected.
>
> I kept the radix sort patch because it gives a decent speedup but I
> would like to focus for now on getting patches 0001 - 0004 merged.
> They're all simple and, the way I see it, uncontroversial.
>
> I remeasured the savings of 0001 - 0004, which comes on top of the
> already committed patch that inlined the comparison function, which gave
> another ~5%:
>
> Data set | Patched (ms) | Master (ms) | Speedup
> --------------------|--------------|--------------|----------
> movies(plot) | 8,058 | 10,311 | 1.27x
> lineitem(l_comment) | 223,233 | 256,986 | 1.19x
>
> I've also registered the change at the commit fest, see
> https://commitfest.postgresql.org/patch/6418/.
Attached is v5 that removes an incorrect assertion from the radix sort code.
--
David Geier
From 19988b78b4d3c5baff19277b09a99d08f3289d7e Mon Sep 17 00:00:00 2001
From: David Geier <[email protected]>
Date: Tue, 11 Nov 2025 13:18:59 +0100
Subject: [PATCH v5 5/5] Optimize generate_trgm() with radix sort
---
contrib/pg_trgm/trgm_op.c | 63 ++++++++++++++++++++++++++++++++-------
1 file changed, 53 insertions(+), 10 deletions(-)
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index 07daf111729..d09e319dda1 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -235,14 +235,6 @@ CMPTRGM_CHOOSE(const void *a, const void *b)
}
/* Define our specialized sort function name */
-#define ST_SORT trigram_qsort_signed
-#define ST_ELEMENT_TYPE_VOID
-#define ST_COMPARE(a, b) CMPTRGM_SIGNED(a, b)
-#define ST_SCOPE static
-#define ST_DEFINE
-#define ST_DECLARE
-#include "lib/sort_template.h"
-
#define ST_SORT trigram_qsort_unsigned
#define ST_ELEMENT_TYPE_VOID
#define ST_COMPARE(a, b) CMPTRGM_UNSIGNED(a, b)
@@ -564,6 +556,57 @@ generate_trgm_only(growable_trgm_array *dst, char *str, int slen, TrgmBound **bo
pfree(buf);
}
+/*
+ * Needed to properly handle negative numbers in case char is signed.
+ */
+static inline unsigned char FlipSign(char x)
+{
+ return x^0x80;
+}
+
+static void radix_sort_trigrams_signed(trgm *trg, int count)
+{
+ trgm *buffer = palloc_array(trgm, count);
+ trgm *starts[256];
+ trgm *from = trg;
+ trgm *to = buffer;
+ int freqs[3][256];
+
+ /*
+ * Compute frequencies to partition the buffer.
+ */
+ memset(freqs, 0, sizeof(freqs));
+
+ for (int i=0; i<count; i++)
+ for (int j=0; j<3; j++)
+ freqs[j][FlipSign(trg[i][j])]++;
+
+ /*
+ * Do the sorting. Start with last character because that's the is "LSB"
+ * in a trigram. Avoid unnecessary copies by ping-ponging between the buffers.
+ */
+ for (int i=2; i>=0; i--)
+ {
+ trgm *old_from = from;
+ trgm *next = to;
+
+ for (int j=0; j<256; j++)
+ {
+ starts[j] = next;
+ next += freqs[i][j];
+ }
+
+ for (int j=0; j<count; j++)
+ memcpy(starts[FlipSign(from[j][i])]++, from[j], sizeof(trgm));
+
+ from = to;
+ to = old_from;
+ }
+
+ memcpy(trg, buffer, sizeof(trgm) * count);
+ pfree(buffer);
+}
+
/*
* Make array of trigrams with sorting and removing duplicate items.
*
@@ -589,7 +632,7 @@ generate_trgm(char *str, int slen)
if (len > 1)
{
if (GetDefaultCharSignedness())
- trigram_qsort_signed((void *) GETARR(trg), len, sizeof(trgm));
+ radix_sort_trigrams_signed((trgm *)GETARR(trg), len);
else
trigram_qsort_unsigned((void *) GETARR(trg), len, sizeof(trgm));
@@ -1124,7 +1167,7 @@ generate_wildcard_trgm(const char *str, int slen)
if (len > 1)
{
if (GetDefaultCharSignedness())
- trigram_qsort_signed((void *) GETARR(trg), len, sizeof(trgm));
+ radix_sort_trigrams_signed((trgm *)GETARR(trg), len);
else
trigram_qsort_unsigned((void *) GETARR(trg), len, sizeof(trgm));
--
2.51.0
From 563dd153369a92be1170ba77f0475567c9223f9e Mon Sep 17 00:00:00 2001
From: David Geier <[email protected]>
Date: Wed, 12 Nov 2025 14:27:13 +0100
Subject: [PATCH v5 4/5] Faster qunique() comparator in generate_trgm()
---
contrib/pg_trgm/trgm_op.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index 85df5ef2310..07daf111729 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -211,6 +211,14 @@ CMPTRGM_UNSIGNED(const void *a, const void *b)
: CMPPCHAR_UNS(a, b, 2));
}
+static inline int
+CMPTRGM_EQ(const void *a, const void *b)
+{
+ char *aa = (char *)a;
+ char *bb = (char *)b;
+ return aa[0] != bb[0] || aa[1] != bb[1] || aa[2] != bb[2] ? 1 : 0;
+}
+
/*
* This gets called on the first call. It replaces the function pointer so
* that subsequent calls are routed directly to the chosen implementation.
@@ -581,15 +589,11 @@ generate_trgm(char *str, int slen)
if (len > 1)
{
if (GetDefaultCharSignedness())
- {
trigram_qsort_signed((void *) GETARR(trg), len, sizeof(trgm));
- len = qunique(GETARR(trg), len, sizeof(trgm), CMPTRGM_SIGNED);
- }
else
- {
trigram_qsort_unsigned((void *) GETARR(trg), len, sizeof(trgm));
- len = qunique(GETARR(trg), len, sizeof(trgm), CMPTRGM_UNSIGNED);
- }
+
+ len = qunique(GETARR(trg), len, sizeof(trgm), CMPTRGM_EQ);
}
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
@@ -1120,15 +1124,11 @@ generate_wildcard_trgm(const char *str, int slen)
if (len > 1)
{
if (GetDefaultCharSignedness())
- {
trigram_qsort_signed((void *) GETARR(trg), len, sizeof(trgm));
- len = qunique(GETARR(trg), len, sizeof(trgm), CMPTRGM_SIGNED);
- }
else
- {
trigram_qsort_unsigned((void *) GETARR(trg), len, sizeof(trgm));
- len = qunique(GETARR(trg), len, sizeof(trgm), CMPTRGM_UNSIGNED);
- }
+
+ len = qunique(GETARR(trg), len, sizeof(trgm), CMPTRGM_EQ);
}
trg->flag = ARRKEY;
--
2.51.0
From eefe78c6a12a45eee7f787ea10a4692ec4b28b62 Mon Sep 17 00:00:00 2001
From: David Geier <[email protected]>
Date: Mon, 10 Nov 2025 15:40:11 +0100
Subject: [PATCH v5 3/5] Make btint4cmp() branchless
---
src/backend/access/nbtree/nbtcompare.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c
index 1d343377e98..ac16e3d993d 100644
--- a/src/backend/access/nbtree/nbtcompare.c
+++ b/src/backend/access/nbtree/nbtcompare.c
@@ -61,6 +61,7 @@
#include "utils/fmgrprotos.h"
#include "utils/skipsupport.h"
#include "utils/sortsupport.h"
+#include "common/int.h"
#ifdef STRESS_SORT_INT_MIN
#define A_LESS_THAN_B INT_MIN
@@ -203,12 +204,7 @@ btint4cmp(PG_FUNCTION_ARGS)
int32 a = PG_GETARG_INT32(0);
int32 b = PG_GETARG_INT32(1);
- if (a > b)
- PG_RETURN_INT32(A_GREATER_THAN_B);
- else if (a == b)
- PG_RETURN_INT32(0);
- else
- PG_RETURN_INT32(A_LESS_THAN_B);
+ PG_RETURN_INT32(pg_cmp_s32(a, b));
}
Datum
--
2.51.0
From 9492ca0439d44d9806f2f0d6128852e48dc105fe Mon Sep 17 00:00:00 2001
From: David Geier <[email protected]>
Date: Mon, 10 Nov 2025 13:35:11 +0100
Subject: [PATCH v5 2/5] Optimize generate_trgm() with sort_template.h
---
contrib/pg_trgm/trgm_op.c | 47 ++++++++++++++++++++++++++++++---------
1 file changed, 37 insertions(+), 10 deletions(-)
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index ee89e548d16..85df5ef2310 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -226,6 +226,23 @@ CMPTRGM_CHOOSE(const void *a, const void *b)
return CMPTRGM(a, b);
}
+/* Define our specialized sort function name */
+#define ST_SORT trigram_qsort_signed
+#define ST_ELEMENT_TYPE_VOID
+#define ST_COMPARE(a, b) CMPTRGM_SIGNED(a, b)
+#define ST_SCOPE static
+#define ST_DEFINE
+#define ST_DECLARE
+#include "lib/sort_template.h"
+
+#define ST_SORT trigram_qsort_unsigned
+#define ST_ELEMENT_TYPE_VOID
+#define ST_COMPARE(a, b) CMPTRGM_UNSIGNED(a, b)
+#define ST_SCOPE static
+#define ST_DEFINE
+#define ST_DECLARE
+#include "lib/sort_template.h"
+
/*
* Deprecated function.
* Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
@@ -281,12 +298,6 @@ show_limit(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT4(similarity_threshold);
}
-static int
-comp_trgm(const void *a, const void *b)
-{
- return CMPTRGM(a, b);
-}
-
/*
* Finds first word in string, returns pointer to the word,
* endword points to the character after word
@@ -569,8 +580,16 @@ generate_trgm(char *str, int slen)
*/
if (len > 1)
{
- qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
- len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
+ if (GetDefaultCharSignedness())
+ {
+ trigram_qsort_signed((void *) GETARR(trg), len, sizeof(trgm));
+ len = qunique(GETARR(trg), len, sizeof(trgm), CMPTRGM_SIGNED);
+ }
+ else
+ {
+ trigram_qsort_unsigned((void *) GETARR(trg), len, sizeof(trgm));
+ len = qunique(GETARR(trg), len, sizeof(trgm), CMPTRGM_UNSIGNED);
+ }
}
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
@@ -1100,8 +1119,16 @@ generate_wildcard_trgm(const char *str, int slen)
len = arr.length;
if (len > 1)
{
- qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
- len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
+ if (GetDefaultCharSignedness())
+ {
+ trigram_qsort_signed((void *) GETARR(trg), len, sizeof(trgm));
+ len = qunique(GETARR(trg), len, sizeof(trgm), CMPTRGM_SIGNED);
+ }
+ else
+ {
+ trigram_qsort_unsigned((void *) GETARR(trg), len, sizeof(trgm));
+ len = qunique(GETARR(trg), len, sizeof(trgm), CMPTRGM_UNSIGNED);
+ }
}
trg->flag = ARRKEY;
--
2.51.0
From 7ab2b2af2d84b1a5e0e79d1ce9dbd4d7b5d4b921 Mon Sep 17 00:00:00 2001
From: David Geier <[email protected]>
Date: Wed, 14 Jan 2026 10:54:32 +0100
Subject: [PATCH v5 1/5] Optimize sort and deduplication in ginExtractEntries
---
src/backend/access/gin/ginutil.c | 152 +++++++++++++------------------
src/include/access/gin_private.h | 2 +-
2 files changed, 66 insertions(+), 88 deletions(-)
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index ff927279cc3..15f4a686795 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -28,6 +28,7 @@
#include "utils/index_selfuncs.h"
#include "utils/rel.h"
#include "utils/typcache.h"
+#include "lib/qunique.h"
/*
@@ -387,19 +388,6 @@ GinInitMetabuffer(Buffer b)
((char *) metadata + sizeof(GinMetaPageData)) - (char *) page;
}
-/*
- * Support for sorting key datums in ginExtractEntries
- *
- * Note: we only have to worry about null and not-null keys here;
- * ginExtractEntries never generates more than one placeholder null,
- * so it doesn't have to sort those.
- */
-typedef struct
-{
- Datum datum;
- bool isnull;
-} keyEntryData;
-
typedef struct
{
FmgrInfo *cmpDatumFunc;
@@ -410,24 +398,14 @@ typedef struct
static int
cmpEntries(const void *a, const void *b, void *arg)
{
- const keyEntryData *aa = (const keyEntryData *) a;
- const keyEntryData *bb = (const keyEntryData *) b;
+ const Datum *aa = (const Datum *) a;
+ const Datum *bb = (const Datum *) b;
cmpEntriesArg *data = (cmpEntriesArg *) arg;
int res;
- if (aa->isnull)
- {
- if (bb->isnull)
- res = 0; /* NULL "=" NULL */
- else
- res = 1; /* NULL ">" not-NULL */
- }
- else if (bb->isnull)
- res = -1; /* not-NULL "<" NULL */
- else
- res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
- data->collation,
- aa->datum, bb->datum));
+ res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
+ data->collation,
+ *aa, *bb));
/*
* Detect if we have any duplicates. If there are equal keys, qsort must
@@ -440,6 +418,14 @@ cmpEntries(const void *a, const void *b, void *arg)
return res;
}
+#define ST_SORT qsort_arg_entries
+#define ST_ELEMENT_TYPE Datum
+#define ST_COMPARE_ARG_TYPE cmpEntriesArg
+#define ST_COMPARE(a, b, arg) cmpEntries(a, b, arg)
+#define ST_SCOPE static
+#define ST_DEFINE
+#define ST_DECLARE
+#include "lib/sort_template.h"
/*
* Extract the index key values from an indexable item
@@ -450,11 +436,13 @@ cmpEntries(const void *a, const void *b, void *arg)
Datum *
ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
Datum value, bool isNull,
- int32 *nentries, GinNullCategory **categories)
+ int32 *nentries_p, GinNullCategory **categories_p)
{
Datum *entries;
bool *nullFlags;
- int32 i;
+ GinNullCategory *categories;
+ bool hasNull;
+ int32 nentries;
/*
* We don't call the extractValueFn on a null item. Instead generate a
@@ -462,42 +450,60 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
*/
if (isNull)
{
- *nentries = 1;
+ *nentries_p = 1;
entries = palloc_object(Datum);
entries[0] = (Datum) 0;
- *categories = palloc_object(GinNullCategory);
- (*categories)[0] = GIN_CAT_NULL_ITEM;
+ *categories_p = palloc_object(GinNullCategory);
+ (*categories_p)[0] = GIN_CAT_NULL_ITEM;
return entries;
}
/* OK, call the opclass's extractValueFn */
nullFlags = NULL; /* in case extractValue doesn't set it */
+ nentries = 0;
entries = (Datum *)
DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
ginstate->supportCollation[attnum - 1],
value,
- PointerGetDatum(nentries),
+ PointerGetDatum(&nentries),
PointerGetDatum(&nullFlags)));
/*
* Generate a placeholder if the item contained no keys.
*/
- if (entries == NULL || *nentries <= 0)
+ if (entries == NULL || nentries <= 0)
{
- *nentries = 1;
+ *nentries_p = 1;
entries = palloc_object(Datum);
entries[0] = (Datum) 0;
- *categories = palloc_object(GinNullCategory);
- (*categories)[0] = GIN_CAT_EMPTY_ITEM;
+ *categories_p = palloc_object(GinNullCategory);
+ (*categories_p)[0] = GIN_CAT_EMPTY_ITEM;
return entries;
}
/*
- * If the extractValueFn didn't create a nullFlags array, create one,
- * assuming that everything's non-null.
+ * Scan the items for any NULLs. All NULLs are considered equal, so we
+ * just need to check and remember if there are any. We remove them from
+ * the array here, and if necessary, put back one NULL entry to represent
+ * them all after deduplication.
*/
- if (nullFlags == NULL)
- nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
+ hasNull = false;
+ if (nullFlags)
+ {
+ int32 numNonNulls = 0;
+
+ for (int32 i = 0; i < nentries; i++)
+ {
+ if (nullFlags[i])
+ hasNull = true;
+ else
+ {
+ entries[numNonNulls] = entries[i];
+ numNonNulls++;
+ }
+ }
+ nentries = numNonNulls;
+ }
/*
* If there's more than one key, sort and unique-ify.
@@ -506,63 +512,35 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
* pretty bad too. For small numbers of keys it'd likely be better to use
* a simple insertion sort.
*/
- if (*nentries > 1)
+ if (nentries > 1)
{
- keyEntryData *keydata;
cmpEntriesArg arg;
-
- keydata = palloc_array(keyEntryData, *nentries);
- for (i = 0; i < *nentries; i++)
- {
- keydata[i].datum = entries[i];
- keydata[i].isnull = nullFlags[i];
- }
-
arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
arg.collation = ginstate->supportCollation[attnum - 1];
arg.haveDups = false;
- qsort_arg(keydata, *nentries, sizeof(keyEntryData),
- cmpEntries, &arg);
- if (arg.haveDups)
- {
- /* there are duplicates, must get rid of 'em */
- int32 j;
-
- entries[0] = keydata[0].datum;
- nullFlags[0] = keydata[0].isnull;
- j = 1;
- for (i = 1; i < *nentries; i++)
- {
- if (cmpEntries(&keydata[i - 1], &keydata[i], &arg) != 0)
- {
- entries[j] = keydata[i].datum;
- nullFlags[j] = keydata[i].isnull;
- j++;
- }
- }
- *nentries = j;
- }
- else
- {
- /* easy, no duplicates */
- for (i = 0; i < *nentries; i++)
- {
- entries[i] = keydata[i].datum;
- nullFlags[i] = keydata[i].isnull;
- }
- }
+ qsort_arg_entries(entries, nentries, &arg);
- pfree(keydata);
+ if (arg.haveDups)
+ nentries = qunique_arg(entries, nentries, sizeof(Datum), cmpEntries, &arg);
}
/*
- * Create GinNullCategory representation from nullFlags.
+ * Create GinNullCategory representation.
*/
- *categories = (GinNullCategory *) palloc0(*nentries * sizeof(GinNullCategory));
- for (i = 0; i < *nentries; i++)
- (*categories)[i] = (nullFlags[i] ? GIN_CAT_NULL_KEY : GIN_CAT_NORM_KEY);
+ StaticAssertStmt(GIN_CAT_NORM_KEY == 0, "Assuming GIN_CAT_NORM_KEY=0");
+ categories = palloc0_array(GinNullCategory, nentries + (hasNull ? 1 : 0));
+
+ /* Put back a NULL entry, if there were any */
+ if (hasNull)
+ {
+ entries[nentries] = (Datum) 0;
+ categories[nentries] = GIN_CAT_NULL_KEY;
+ nentries++;
+ }
+ *nentries_p = nentries;
+ *categories_p = categories;
return entries;
}
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 7c3b4db94cd..c878546b9d2 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -99,7 +99,7 @@ extern void GinInitPage(Page page, uint32 f, Size pageSize);
extern void GinInitMetabuffer(Buffer b);
extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
Datum value, bool isNull,
- int32 *nentries, GinNullCategory **categories);
+ int32 *nentries_p, GinNullCategory **categories_p);
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
--
2.51.0