Hello,

During the review of using extended statistics to improve join estimates
[1], I found some code level optimization opportunities which apply to
existing code as well. To make the discussion easier, open this new
thread.

I don't know how to name the thread name, just use the patch 1 for the
naming.

commit d228d9734e70b4f43ad824d736fb1279d2aad5fc (HEAD -> misc_stats)
Author: yizhi.fzh <yizhi....@alibaba-inc.com>
Date:   Mon Apr 8 11:43:37 2024 +0800

    Fast path for empty clauselist in clauselist_selectivity_ext
    
    It should be common in the real life, for example:
    
    SELECT * FROM t1, t2 WHERE t1.a = t2.a AND t1.a > 3;
    
    clauses == NIL at the scan level of t2.
    
    This would also make some debug life happier.

commit e852ce631f9348d5d29c8a53090ee71f7253767c
Author: yizhi.fzh <yizhi....@alibaba-inc.com>
Date:   Mon Apr 8 11:13:57 2024 +0800

    Improve FunctionCall2Coll with FunctionCallInvoke
    
    If a FunctionCall2Coll is called multi times with the same FmgrInfo,
    turning it into FunctionCallInvoke will be helpful since the later one
    can use the shared FunctionCallInfo.
    
    There are many other places like GITSTATE which have the similar
    chances, but I'd see if such changes is necessary at the first
    place.


[1]
https://www.postgresql.org/message-id/c8c0ff31-3a8a-7562-bbd3-78b2ec65f16c%40enterprisedb.com
 

-- 
Best Regards
Andy Fan
>From e852ce631f9348d5d29c8a53090ee71f7253767c Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi....@alibaba-inc.com>
Date: Mon, 8 Apr 2024 11:13:57 +0800
Subject: [PATCH v1 1/2] Improve FunctionCall2Coll with FunctionCallInvoke

If a FunctionCall2Coll is called multi times with the same FmgrInfo,
turning it into FunctionCallInvoke will be helpful since the later one
can use the shared FunctionCallInfo.

There are many other places like GITSTATE which have the similar
chances, but I'd see if such changes is necessary at the first place.
---
 src/backend/utils/adt/selfuncs.c | 35 +++++++++++++++++---------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 35f8f306ee..8dfeba5444 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -196,7 +196,7 @@ static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 							   Oid sortop, Oid collation,
 							   Datum *min, Datum *max);
 static void get_stats_slot_range(AttStatsSlot *sslot,
-								 Oid opfuncoid, FmgrInfo *opproc,
+								 FunctionCallInfo fcinfo,
 								 Oid collation, int16 typLen, bool typByVal,
 								 Datum *min, Datum *max, bool *p_have_data);
 static bool get_actual_variable_range(PlannerInfo *root,
@@ -5905,6 +5905,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 	bool		typByVal;
 	Oid			opfuncoid;
 	FmgrInfo	opproc;
+	LOCAL_FCINFO(fcinfo, 2);
 	AttStatsSlot sslot;
 
 	/*
@@ -5936,8 +5937,6 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 									   (opfuncoid = get_opcode(sortop))))
 		return false;
 
-	opproc.fn_oid = InvalidOid; /* mark this as not looked up yet */
-
 	get_typlenbyval(vardata->atttype, &typLen, &typByVal);
 
 	/*
@@ -5957,6 +5956,11 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 		free_attstatsslot(&sslot);
 	}
 
+	fmgr_info(opfuncoid, &opproc);
+	InitFunctionCallInfoData(*fcinfo, &opproc, 2, collation, NULL, NULL);
+	fcinfo->args[0].isnull = false;
+	fcinfo->args[1].isnull = false;
+
 	/*
 	 * Otherwise, if there is a histogram with some other ordering, scan it
 	 * and get the min and max values according to the ordering we want.  This
@@ -5968,7 +5972,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
 						 ATTSTATSSLOT_VALUES))
 	{
-		get_stats_slot_range(&sslot, opfuncoid, &opproc,
+		get_stats_slot_range(&sslot, fcinfo,
 							 collation, typLen, typByVal,
 							 &tmin, &tmax, &have_data);
 		free_attstatsslot(&sslot);
@@ -6003,7 +6007,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 		}
 
 		if (use_mcvs)
-			get_stats_slot_range(&sslot, opfuncoid, &opproc,
+			get_stats_slot_range(&sslot, fcinfo,
 								 collation, typLen, typByVal,
 								 &tmin, &tmax, &have_data);
 		free_attstatsslot(&sslot);
@@ -6021,7 +6025,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
  * to what we find in the statistics array.
  */
 static void
-get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc,
+get_stats_slot_range(AttStatsSlot *sslot, FunctionCallInfo fcinfo,
 					 Oid collation, int16 typLen, bool typByVal,
 					 Datum *min, Datum *max, bool *p_have_data)
 {
@@ -6031,10 +6035,6 @@ get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc,
 	bool		found_tmin = false;
 	bool		found_tmax = false;
 
-	/* Look up the comparison function, if we didn't already do so */
-	if (opproc->fn_oid != opfuncoid)
-		fmgr_info(opfuncoid, opproc);
-
 	/* Scan all the slot's values */
 	for (int i = 0; i < sslot->nvalues; i++)
 	{
@@ -6045,16 +6045,19 @@ get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc,
 			*p_have_data = have_data = true;
 			continue;
 		}
-		if (DatumGetBool(FunctionCall2Coll(opproc,
-										   collation,
-										   sslot->values[i], tmin)))
+
+		fcinfo->args[0].value = sslot->values[i];
+		fcinfo->args[1].value = tmin;
+		if (DatumGetBool(FunctionCallInvoke(fcinfo)))
 		{
 			tmin = sslot->values[i];
 			found_tmin = true;
 		}
-		if (DatumGetBool(FunctionCall2Coll(opproc,
-										   collation,
-										   tmax, sslot->values[i])))
+
+		fcinfo->args[0].value = tmax;
+		fcinfo->args[1].value = sslot->values[i];
+
+		if (DatumGetBool(FunctionCallInvoke(fcinfo)))
 		{
 			tmax = sslot->values[i];
 			found_tmax = true;
-- 
2.34.1

>From d228d9734e70b4f43ad824d736fb1279d2aad5fc Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi....@alibaba-inc.com>
Date: Mon, 8 Apr 2024 11:43:37 +0800
Subject: [PATCH v1 2/2] Fast path for empty clauselist in
 clauselist_selectivity_ext

It should be common in the real life, for example:

SELECT * FROM t1, t2 WHERE t1.a = t2.a AND t1.a > 3;

clauses == NIL at the scan level of t2.

This would also make some debug life happier.
---
 src/backend/optimizer/path/clausesel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 0ab021c1e8..dff498d581 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -128,6 +128,9 @@ clauselist_selectivity_ext(PlannerInfo *root,
 	ListCell   *l;
 	int			listidx;
 
+	if (list_length(clauses) == 0)
+		return 1.0;
+
 	/*
 	 * If there's exactly one clause, just go directly to
 	 * clause_selectivity_ext(). None of what we might do below is relevant.
-- 
2.34.1

Reply via email to