Following up on my previous messages about optimizing eqjoinsel() and eqjoinsel_semi() for Var1 = Var2 clauses, I’d like to share detailed profiling results showing the effect of the patch on JOB for different values of default_statistics_target.

The first table shows the total planner time (summed over all 113 queries) before and after applying the patch, along with the speedup achieved:

default_statistics_target | Planner Speedup (×) | Planner Before (ms) | Planner After (ms)
--------------------------+---------------------+---------------------+--------------------
                     100  | *1.00x*       | 1828.433     |        1820.556
                    1000  | *1.12x*       | 2194.282     |        1963.110
                    2500  | *2.15x*       | 4606.705     |        2140.126
                    5000  | *6.37x*       | 16661.581     |        2616.109
                    7500  | *11.76x*       | 35988.569     |        3061.161                    10000  | *19.01x*       | 66616.620     |        3504.144


The second table shows the profiling of eqjoinsel() using *perf*, demonstrating that the function, which dominates planning at high statistics targets, becomes essentially negligible after the patch:

default_statistics_target | eqjoinsel() Before (perf) | eqjoinsel() After (perf)
--------------------------+---------------------------+--------------------------
                     100  |                     0.01% |                     0.04%                     1000  |                     6.23% |                     0.06%                     2500  |                    35.45% |                     0.23%                     5000  |                    66.14% |                     0.53%                     7500  |                    72.70% |                     0.97%                    10000  |                    75.42% |                     1.25%

I’ve attached v3 of the patch. This version adds a check for NULL values when comparing MCV entries, ensuring correctness in edge cases.

--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC,
https://tantorlabs.com
From 65757822257e13b1e2a50bf86b9080ed8b86adec Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.ru>
Date: Wed, 3 Sep 2025 19:40:50 +0300
Subject: [PATCH v3] Optimize selectivity estimation for Var = Var clauses
 using merge-based MCV comparison

Previously, selectivity estimation for join and semi-join clauses of the form
Var = Var relied on a nested-loop comparison of MCV lists, resulting in O(N^2)
behavior with respect to the number of common values.

This patch introduces a merge-based strategy when the underlying type supports
BTREE ordering. In such cases, the MCVs are sorted using the datatype-specific
comparison operator, and selectivity is estimated via linear-time merge scan.
---
 src/backend/utils/adt/selfuncs.c    | 311 ++++++++++++++++++++++------
 src/backend/utils/cache/lsyscache.c |  61 ++++++
 src/include/utils/lsyscache.h       |   1 +
 src/include/utils/selfuncs.h        |   8 +
 4 files changed, 321 insertions(+), 60 deletions(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1c480cfaaf7..400c5fdcb9b 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -217,6 +217,7 @@ static bool get_actual_variable_endpoint(Relation heapRel,
 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
 static double btcost_correlation(IndexOptInfo *index,
 								 VariableStatData *vardata);
+static int compare_mcv_items(const void *a, const void *b, void *arg);
 
 
 /*
@@ -2463,7 +2464,6 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
 		 * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
 		 */
 		LOCAL_FCINFO(fcinfo, 2);
-		FmgrInfo	eqproc;
 		bool	   *hasmatch1;
 		bool	   *hasmatch2;
 		double		nullfrac1 = stats1->stanullfrac;
@@ -2479,52 +2479,139 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
 					totalsel2;
 		int			i,
 					nmatches;
-
-		fmgr_info(opfuncoid, &eqproc);
-
-		/*
-		 * Save a few cycles by setting up the fcinfo struct just once. Using
-		 * FunctionCallInvoke directly also avoids failure if the eqproc
-		 * returns NULL, though really equality functions should never do
-		 * that.
-		 */
-		InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
-								 NULL, NULL);
-		fcinfo->args[0].isnull = false;
-		fcinfo->args[1].isnull = false;
+		Oid			compare_func_oid = InvalidOid;
 
 		hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
 		hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
 
 		/*
 		 * Note we assume that each MCV will match at most one member of the
-		 * other MCV list.  If the operator isn't really equality, there could
-		 * be multiple matches --- but we don't look for them, both for speed
-		 * and because the math wouldn't add up...
+		 * other MCV list.
 		 */
 		matchprodfreq = 0.0;
 		nmatches = 0;
-		for (i = 0; i < sslot1->nvalues; i++)
+		if (get_comparison_op_for_sort(opfuncoid, &compare_func_oid))
 		{
-			int			j;
+			LOCAL_FCINFO(merge_fcinfo, 2);
+			FmgrInfo			cmpproc;
+			McvItem			   *mcvs1,
+							   *mcvs2;
+			int					ptr1,
+								ptr2;
+
+			fmgr_info(compare_func_oid, &cmpproc);
+
+			InitFunctionCallInfoData(*merge_fcinfo, &cmpproc, 2,
+										collation, NULL, NULL);
+
+			mcvs1 = (McvItem *) palloc(sslot1->nvalues * sizeof(McvItem));
+			mcvs2 = (McvItem *) palloc(sslot2->nvalues * sizeof(McvItem));
 
-			fcinfo->args[0].value = sslot1->values[i];
+			/*
+ 			 * Copy the MCV values and their frequencies into a temporary array
+ 			 * so we can safely sort them without altering the original statistics.
+ 			 */
+			for (i = 0; i < sslot1->nvalues; i++)
+			{
+				mcvs1[i].value = sslot1->values[i];
+				mcvs1[i].original_idx = i;
+				mcvs1[i].frequency = sslot1->numbers[i];
+			}
 
-			for (j = 0; j < sslot2->nvalues; j++)
+			for (i = 0; i < sslot2->nvalues; i++)
 			{
-				Datum		fresult;
+				mcvs2[i].value = sslot2->values[i];
+				mcvs2[i].original_idx = i;
+				mcvs2[i].frequency = sslot2->numbers[i];
+			}
 
-				if (hasmatch2[j])
-					continue;
-				fcinfo->args[1].value = sslot2->values[j];
-				fcinfo->isnull = false;
-				fresult = FunctionCallInvoke(fcinfo);
-				if (!fcinfo->isnull && DatumGetBool(fresult))
+			qsort_arg(mcvs1, sslot1->nvalues, sizeof(McvItem), compare_mcv_items, &merge_fcinfo);
+			qsort_arg(mcvs2, sslot2->nvalues, sizeof(McvItem), compare_mcv_items, &merge_fcinfo);
+
+			ptr1 = ptr2 = 0;
+			while (ptr1 < sslot1->nvalues && ptr2 < sslot2->nvalues)
+			{
+				McvItem *item1 = &mcvs1[ptr1];
+				McvItem *item2 = &mcvs2[ptr2];
+				Datum result_datum;
+
+				merge_fcinfo->args[0].value = item1->value;
+				merge_fcinfo->args[1].value = item2->value;
+				merge_fcinfo->isnull = false;
+
+				result_datum = FunctionCallInvoke(merge_fcinfo);
+				if (!merge_fcinfo->isnull)
 				{
-					hasmatch1[i] = hasmatch2[j] = true;
-					matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
-					nmatches++;
-					break;
+					int32 cmp_result = DatumGetInt32(result_datum);
+					if (cmp_result == 0)
+					{ 
+						hasmatch1[item1->original_idx] = true;
+						hasmatch2[item2->original_idx] = true;
+						
+						matchprodfreq += item1->frequency * item2->frequency;
+						nmatches++;
+
+						ptr1++;
+						ptr2++;
+					}
+					else if (cmp_result < 0)
+						ptr1++;
+					else
+						ptr2++;
+				}
+				else
+				{
+					ptr1++;
+					ptr2++;
+				}
+			}
+
+			pfree(mcvs1);
+			pfree(mcvs2);
+		}
+		else
+		{
+			FmgrInfo	eqproc;
+
+			fmgr_info(opfuncoid, &eqproc);
+			/*
+			* Save a few cycles by setting up the fcinfo struct just once. Using
+			* FunctionCallInvoke directly also avoids failure if the eqproc
+			* returns NULL, though really equality functions should never do
+			* that.
+			*/
+			InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+									NULL, NULL);
+			fcinfo->args[0].isnull = false;
+			fcinfo->args[1].isnull = false;
+
+			/*
+			 * If the operator isn't really equality, there could
+		 	 * be multiple matches --- but we don't look for them, both for speed
+		 	 * and because the math wouldn't add up...
+		 	 */
+			for (i = 0; i < sslot1->nvalues; i++)
+			{
+				int         j;
+
+				fcinfo->args[0].value = sslot1->values[i];
+
+				for (j = 0; j < sslot2->nvalues; j++)
+				{
+					Datum		fresult;
+
+					if (hasmatch2[j])
+						continue;
+					fcinfo->args[1].value = sslot2->values[j];
+					fcinfo->isnull = false;
+					fresult = FunctionCallInvoke(fcinfo);
+					if (!fcinfo->isnull && DatumGetBool(fresult))
+					{
+						hasmatch1[i] = hasmatch2[j] = true;
+						matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
+						nmatches++;
+						break;
+					}
 				}
 			}
 		}
@@ -2690,7 +2777,6 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
 		 * in a skewed distribution this gives us a big leg up in accuracy.
 		 */
 		LOCAL_FCINFO(fcinfo, 2);
-		FmgrInfo	eqproc;
 		bool	   *hasmatch1;
 		bool	   *hasmatch2;
 		double		nullfrac1 = stats1->stanullfrac;
@@ -2700,6 +2786,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
 		int			i,
 					nmatches,
 					clamped_nvalues2;
+		Oid			compare_func_oid = InvalidOid;
 
 		/*
 		 * The clamping above could have resulted in nd2 being less than
@@ -2710,19 +2797,6 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
 		 */
 		clamped_nvalues2 = Min(sslot2->nvalues, nd2);
 
-		fmgr_info(opfuncoid, &eqproc);
-
-		/*
-		 * Save a few cycles by setting up the fcinfo struct just once. Using
-		 * FunctionCallInvoke directly also avoids failure if the eqproc
-		 * returns NULL, though really equality functions should never do
-		 * that.
-		 */
-		InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
-								 NULL, NULL);
-		fcinfo->args[0].isnull = false;
-		fcinfo->args[1].isnull = false;
-
 		hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
 		hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
 
@@ -2733,26 +2807,120 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
 		 * and because the math wouldn't add up...
 		 */
 		nmatches = 0;
-		for (i = 0; i < sslot1->nvalues; i++)
+		if (get_comparison_op_for_sort(opfuncoid, &compare_func_oid))
 		{
-			int			j;
+			LOCAL_FCINFO(merge_fcinfo, 2);
+			FmgrInfo			cmpproc;
+			McvItem			   *mcvs1,
+							   *mcvs2;
+			int					ptr1,
+								ptr2;
+
+			fmgr_info(compare_func_oid, &cmpproc);
+			InitFunctionCallInfoData(*merge_fcinfo, &cmpproc, 2,
+										collation, NULL, NULL);
 
-			fcinfo->args[0].value = sslot1->values[i];
+			mcvs1 = (McvItem *) palloc(sslot1->nvalues * sizeof(McvItem));
+			mcvs2 = (McvItem *) palloc(clamped_nvalues2 * sizeof(McvItem));
 
-			for (j = 0; j < clamped_nvalues2; j++)
+			/*
+ 			 * Copy the MCV values and their frequencies into a temporary array
+ 			 * so we can safely sort them without altering the original statistics.
+ 			 */
+			for (i = 0; i < sslot1->nvalues; i++)
 			{
-				Datum		fresult;
+				mcvs1[i].value = sslot1->values[i];
+				mcvs1[i].original_idx = i;
+				mcvs1[i].frequency = sslot1->numbers[i];
+			}
 
-				if (hasmatch2[j])
-					continue;
-				fcinfo->args[1].value = sslot2->values[j];
-				fcinfo->isnull = false;
-				fresult = FunctionCallInvoke(fcinfo);
-				if (!fcinfo->isnull && DatumGetBool(fresult))
+			for (i = 0; i < clamped_nvalues2; i++)
+			{
+				mcvs2[i].value = sslot2->values[i];
+				mcvs2[i].original_idx = i;
+				mcvs2[i].frequency = sslot2->numbers[i];
+			}
+
+			qsort_arg(mcvs1, sslot1->nvalues, sizeof(McvItem), compare_mcv_items, &merge_fcinfo);
+			qsort_arg(mcvs2, clamped_nvalues2, sizeof(McvItem), compare_mcv_items, &merge_fcinfo);
+
+			ptr1 = ptr2 = 0;
+			while (ptr1 < sslot1->nvalues && ptr2 < clamped_nvalues2)
+			{
+				McvItem *item1 = &mcvs1[ptr1];
+				McvItem *item2 = &mcvs2[ptr2];
+				Datum result_datum;
+
+				merge_fcinfo->args[0].value = item1->value;
+				merge_fcinfo->args[1].value = item2->value;
+				merge_fcinfo->isnull = false;
+
+				result_datum = FunctionCallInvoke(merge_fcinfo);
+				if (!merge_fcinfo->isnull)
 				{
-					hasmatch1[i] = hasmatch2[j] = true;
-					nmatches++;
-					break;
+					int32 cmp_result = DatumGetInt32(result_datum);
+					if (cmp_result == 0)
+					{ 
+						hasmatch1[item1->original_idx] = hasmatch2[item2->original_idx] = true;
+						
+						nmatches++;
+
+						ptr1++;
+						ptr2++;
+					}
+					else if (cmp_result < 0)
+						ptr1++;
+					else
+						ptr2++;
+				}
+				else
+				{
+					ptr1++;
+					ptr2++;
+				}
+			}
+
+			pfree(mcvs1);
+			pfree(mcvs2);
+		}
+		else
+		{
+			FmgrInfo	eqproc;
+
+			fmgr_info(opfuncoid, &eqproc);
+			InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+								 NULL, NULL);
+
+			fcinfo->args[0].isnull = false;
+			fcinfo->args[1].isnull = false;
+
+			/*
+			* Save a few cycles by setting up the fcinfo struct just once. Using
+			* FunctionCallInvoke directly also avoids failure if the eqproc
+			* returns NULL, though really equality functions should never do
+			* that.
+			*/
+			for (i = 0; i < sslot1->nvalues; i++)
+			{
+				int			j;
+
+				fcinfo->args[0].value = sslot1->values[i];
+
+				for (j = 0; j < clamped_nvalues2; j++)
+				{
+					Datum		fresult;
+
+						if (hasmatch2[j])
+							continue;
+						fcinfo->args[1].value = sslot2->values[j];
+						fcinfo->isnull = false;
+						fresult = FunctionCallInvoke(fcinfo);
+						if (!fcinfo->isnull && DatumGetBool(fresult))
+						{
+							hasmatch1[i] = hasmatch2[j] = true;
+							nmatches++;
+							break;
+					}
 				}
 			}
 		}
@@ -8793,3 +8961,26 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 
 	*indexPages = index->pages;
 }
+
+/*
+ * Comparator for sorting MCV items
+ */
+static int
+compare_mcv_items(const void *a, const void *b, void *arg)
+{
+	FunctionCallInfo	fcinfo_comp = *((FunctionCallInfo *) arg);
+	Datum       		result_datum;
+
+    const McvItem *item1 = (const McvItem *) a;
+    const McvItem *item2 = (const McvItem *) b;
+
+    fcinfo_comp->args[0].value = item1->value;
+    fcinfo_comp->args[0].isnull = false;
+
+    fcinfo_comp->args[1].value = item2->value;
+    fcinfo_comp->args[1].isnull = false;
+
+    result_datum = FunctionCallInvoke(fcinfo_comp);
+
+    return DatumGetInt32(result_datum);
+}
\ No newline at end of file
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index fa7cd7e06a7..8d868f3a3f3 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -17,6 +17,7 @@
 
 #include "access/hash.h"
 #include "access/htup_details.h"
+#include "access/nbtree.h"
 #include "bootstrap/bootstrap.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_am.h"
@@ -40,6 +41,7 @@
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_transform.h"
 #include "catalog/pg_type.h"
+#include "commands/defrem.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "utils/array.h"
@@ -316,6 +318,65 @@ get_ordering_op_properties(Oid opno,
 	return result;
 }
 
+/*
+ * get_comparison_op_for_sort
+ *      Get the OID of a datatype-specific comparison operator
+ *      associated with a given equality operator.
+ *
+ * This is typically used in clauses like Var = Var, where both arguments
+ * are of the same type. We look up the corresponding BTORDER_PROC for
+ * the default B-tree opclass of the datatype.
+ *
+ * However, for special cases like array_eq and record_eq, a comparison
+ * operator may exist (btarraycmp, btrecordcmp), but sorting is only
+ * possible if all element or field types are themselves sortable.
+ * Rather than reimplement that logic here, we simply exclude such
+ * cases by checking for btarraycmp and btrecordcmp explicitly.
+ *
+ * Returns false if no suitable comparison operator is found.
+ */
+bool
+get_comparison_op_for_sort(Oid opno, Oid *cmpopno)
+{
+	bool		result = false;
+	HeapTuple   proctup;
+
+	proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(opno));
+	if (HeapTupleIsValid(proctup))
+	{
+		Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+		/* Only consider binary operators where both arguments are of the same type. */
+		if (procform->pronargs == 2 &&
+			OidIsValid(procform->proargtypes.values[0]) &&
+			OidIsValid(procform->proargtypes.values[1]) &&
+			procform->proargtypes.values[0] == procform->proargtypes.values[1])
+		{
+			Oid         type_oid = procform->proargtypes.values[0];
+			Oid         opclass_oid = GetDefaultOpClass(type_oid, BTREE_AM_OID);
+
+			if (OidIsValid(opclass_oid))
+			{
+				Oid	opfamily_oid = get_opclass_family(opclass_oid);
+
+				*cmpopno = get_opfamily_proc(opfamily_oid, type_oid, type_oid, BTORDER_PROC);
+
+				/*
+ 				 * Skip array_eq and record_eq: their comparison functions exist,
+ 				 * but sorting is only possible if all elements or fields are sortable.
+ 				 */
+				if (OidIsValid(*cmpopno) &&
+					*cmpopno != F_BTARRAYCMP &&
+					*cmpopno != F_BTRECORDCMP)
+					result = true;
+			}
+		}
+		ReleaseSysCache(proctup);
+	}
+
+	return result;
+}
+
 /*
  * get_equality_op_for_ordering_op
  *		Get the OID of the datatype-specific equality operator
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index c65cee4f24c..7e1188c04dd 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -80,6 +80,7 @@ extern Oid	get_opfamily_member_for_cmptype(Oid opfamily, Oid lefttype, Oid right
 extern bool get_ordering_op_properties(Oid opno,
 									   Oid *opfamily, Oid *opcintype, CompareType *cmptype);
 extern Oid	get_equality_op_for_ordering_op(Oid opno, bool *reverse);
+extern bool get_comparison_op_for_sort(Oid opno, Oid *cmpopno);
 extern Oid	get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type);
 extern List *get_mergejoin_opfamilies(Oid opno);
 extern bool get_compatible_hash_operators(Oid opno,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index fb4fa53363d..eb42f0df930 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -138,6 +138,14 @@ typedef struct
 	double		num_sa_scans;	/* # indexscans from ScalarArrayOpExprs */
 } GenericCosts;
 
+/* Represents an MCV statistic entry */
+typedef struct McvItem
+{
+	Datum       value;
+	int         original_idx;	/* The original index of the item in the array before sorting. */
+	double      frequency;		/* Frequency of value in statistics */
+} McvItem;
+
 /* Hooks for plugins to get control when we ask for stats */
 typedef bool (*get_relation_stats_hook_type) (PlannerInfo *root,
 											  RangeTblEntry *rte,
-- 
2.34.1

Reply via email to