rebase and refactor format for cfbot
вт, 30 июн. 2026 г. в 14:26, prankware <[email protected]>:
>
> Hi everyone,
>
> The planner ignores column statistics when an equality has a COALESCE
> expression on one side. For a clause like COALESCE(a, b) = $1, or a join on
> COALESCE(t1.a, t1.b) = COALESCE(t2.c, t2.d), there are no statistics on the
> COALESCE node itself, so eqsel() and eqjoinsel() return the generic 0.005
> estimate while the per-column stats for a, b, c and d sit unused. The only way
> around this today is an expression index or extended statistics on that exact
> expression, which doesn't scale across many different COALESCE clauses.
> estimate_hash_bucket_stats() has the same gap: a COALESCE hash key gets a
> default ndistinct and therefore a default bucket size. Since these expressions
> are common in joins and filters over nullable or fallback columns, the default
> estimate can be far enough off to flip the join order or join method.
>
> The idea is to estimate straight from the existing per-column stats, with no
> extra statistics object. COALESCE(arg_1, ..., arg_n) returns arg_i only when
> arg_1 .. arg_{i-1} are all NULL, so the chance of reaching branch i is the
> product of stanullfrac over the earlier branches. Selectivity of
> COALESCE(l_1..l_M) = COALESCE(r_1..r_N) is then the sum over branch pairs of
> P(reach l_i) * P(reach r_j) * sel(l_i = r_j), and each sel(l_i = r_j) is a
> recursive call back into eqsel()/eqjoinsel(). A non-COALESCE side is treated
> as
> a one-branch list, so scalar COALESCE(a, b) = const falls out of the same
> code,
> and the same decomposition feeds estimate_hash_bucket_stats(). If any branch
> is
> missing stats, the code bails and today's behavior is unchanged.
>
> A minimal example:
>
> CREATE TABLE t (a int, b int);
> INSERT INTO t
> SELECT CASE WHEN i % 5 < 2 THEN NULL ELSE i END, i
> FROM generate_series(1, 1000) i;
> ANALYZE t;
>
> EXPLAIN SELECT * FROM t WHERE COALESCE(a, b) = 42;
>
> After ANALYZE, a is NULL in 400 of the 1000 rows (stanullfrac 0.4, ndistinct
> 600) and b has no NULLs (stanullfrac 0, ndistinct 1000). COALESCE(a, b) is
> unique, so exactly one row matches. Each branch is weighted by the probability
> of reaching it, the product of stanullfrac over the branches before it:
>
> branch a: reach 1.0, sel(a = 42) = (1 - 0.4) / 600 = 0.001
> branch b: reach 0.4, sel(b = 42) = (1 - 0.0) / 1000 = 0.001
>
> selectivity = 1.0 * 0.001 + 0.4 * 0.001 = 0.0014 -> ~1 row out of 1000
>
> The patch lands on ~1 row, matching reality. The 0.005 default (5 rows) is not
> derived from the table at all: it is the 1/DEFAULT_NUM_DISTINCT constant the
> planner falls back to with no statistics on the expression, so it stays 5 rows
> regardless of the null fraction or the ndistinct of a and b.
>
> Feedback is welcome.
>
> Regards,
> Egor Savelev,
> Tantor Labs LLC,
> https://tantorlabs.com/
From 7f72f2575e7e132d0ed21a45f8306c9685f682b6 Mon Sep 17 00:00:00 2001
From: prankware <[email protected]>
Date: Tue, 30 Jun 2026 16:45:14 +0300
Subject: [PATCH v1] Coalesce eqsel eqjoinsel
---
src/backend/utils/adt/selfuncs.c | 464 +++++++++++++++++++++++++++++--
1 file changed, 434 insertions(+), 30 deletions(-)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index cbc70fde716..eb38fa724f0 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -302,6 +302,278 @@ eqsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
}
+/*
+ * Extract the estimable branches of a CoalesceExpr. Strips RelabelType,
+ * skips NULL-constant branches, and stops at the first non-NULL Const
+ * (setting *terminates_with_const). Returns false if the expression
+ * is constant or no branches remain.
+ */
+static bool
+match_coalesce_join_side(Node *side,
+ List **stripped_args,
+ bool *terminates_with_const)
+{
+ CoalesceExpr *c;
+ ListCell *lc;
+ List *result = NIL;
+
+ *terminates_with_const = false;
+ *stripped_args = NIL;
+
+ if (side == NULL || !IsA(side, CoalesceExpr))
+ return false;
+
+ c = (CoalesceExpr *) side;
+
+ if (list_length(c->args) < 2)
+ return false;
+
+ foreach(lc, c->args)
+ {
+ Node *arg = (Node *) lfirst(lc);
+
+ /* strip RelabelType */
+ while (arg && IsA(arg, RelabelType))
+ arg = (Node *) ((RelabelType *) arg)->arg;
+
+ /* leading Const makes COALESCE itself constant */
+ if (arg == NULL || (lc == list_head(c->args) && IsA(arg, Const)))
+ {
+ list_free(result);
+ return false;
+ }
+
+ if (IsA(arg, Const))
+ {
+ if (((Const *) arg)->consttype != c->coalescetype)
+ {
+ list_free(result);
+ return false;
+ }
+
+ /* NULL Const is skipped */
+ if (!((Const *) arg)->constisnull)
+ {
+ result = lappend(result, arg);
+ *terminates_with_const = true;
+ break;
+ }
+ }
+ else
+ {
+ if (exprType(arg) != c->coalescetype)
+ {
+ list_free(result);
+ return false;
+ }
+ result = lappend(result, arg);
+ }
+ }
+
+ if (result == NIL)
+ return false;
+
+ *stripped_args = result;
+ return true;
+}
+
+/*
+ * Fill prefix_probs[i] = Prod_{j<i} stanullfrac(args[j]) for a stripped
+ * COALESCE arg list. Returns false if any non-Const arg lacks statistics.
+ */
+static bool
+get_coalesce_prefix_probs(PlannerInfo *root, List *args, double *prefix_probs)
+{
+ ListCell *lc;
+ int i = 0;
+ double prefix = 1.0;
+
+ foreach(lc, args)
+ {
+ Node *arg = (Node *) lfirst(lc);
+
+ prefix_probs[i++] = prefix;
+
+ /* nothing to fold for the last arg or a Const */
+ if (lnext(args, lc) == NULL || IsA(arg, Const))
+ continue;
+
+ {
+ VariableStatData vd;
+ double p_i;
+
+ examine_variable(root, arg, 0, &vd);
+ if (!HeapTupleIsValid(vd.statsTuple))
+ {
+ ReleaseVariableStats(vd);
+ return false;
+ }
+ p_i = ((Form_pg_statistic) GETSTRUCT(vd.statsTuple))->stanullfrac;
+ ReleaseVariableStats(vd);
+
+ if (p_i < 0.0 || p_i > 1.0)
+ return false;
+
+ prefix *= p_i;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Common guts of eqsel/eqjoinsel when one or both operands are CoalesceExpr.
+ * Computes:
+ *
+ * sel(COALESCE(l_1,...,l_M) = COALESCE(r_1,...,r_N)) =
+ * Sum_{i,j} P_L(reach i) * P_R(reach j) * sel(l_i = r_j)
+ *
+ * where P(reach i) = Prod_{j<i} stanullfrac(arg_j). A non-COALESCE operand
+ * is treated as a single-element list.
+ */
+static bool
+try_coalesce_eq(PG_FUNCTION_ARGS, double *selec_out)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ bool is_eqjoin = (fcinfo->flinfo != NULL &&
+ fcinfo->flinfo->fn_oid == F_EQJOINSEL);
+ SpecialJoinInfo *sjinfo = is_eqjoin ?
+ (SpecialJoinInfo *) PG_GETARG_POINTER(4) : NULL;
+ int eqsel_varRelid = is_eqjoin ? 0 : PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
+ Node *left;
+ Node *right;
+ List *left_args = NIL;
+ List *right_args = NIL;
+ bool left_is_coalesce;
+ bool right_is_coalesce;
+ bool left_term_const = false;
+ bool right_term_const = false;
+ double *left_prefix;
+ double *right_prefix;
+ double acc_selec = 0.0;
+ ListCell *llc;
+ int li;
+
+ if (list_length(args) != 2)
+ return false;
+
+ left = (Node *) linitial(args);
+ right = (Node *) lsecond(args);
+
+ left_is_coalesce = match_coalesce_join_side(left, &left_args,
+ &left_term_const);
+ right_is_coalesce = match_coalesce_join_side(right, &right_args,
+ &right_term_const);
+
+ if (!left_is_coalesce && !right_is_coalesce)
+ return false;
+
+ /* one side is a CoalesceExpr that match_coalesce_join_side rejected */
+ if ((left_is_coalesce && !right_is_coalesce && IsA(right, CoalesceExpr)) ||
+ (right_is_coalesce && !left_is_coalesce && IsA(left, CoalesceExpr)))
+ {
+ list_free(left_args);
+ list_free(right_args);
+ return false;
+ }
+
+ if (!left_is_coalesce)
+ {
+ left_args = list_make1(left);
+ left_term_const = IsA(left, Const);
+ }
+ if (!right_is_coalesce)
+ {
+ right_args = list_make1(right);
+ right_term_const = IsA(right, Const);
+ }
+
+ if (is_eqjoin && left_term_const && right_term_const)
+ {
+ list_free(left_args);
+ list_free(right_args);
+ return false;
+ }
+
+ left_prefix = (double *) palloc(sizeof(double) * list_length(left_args));
+ right_prefix = (double *) palloc(sizeof(double) * list_length(right_args));
+
+ if (!get_coalesce_prefix_probs(root, left_args, left_prefix) ||
+ !get_coalesce_prefix_probs(root, right_args, right_prefix))
+ {
+ pfree(left_prefix);
+ pfree(right_prefix);
+ list_free(left_args);
+ list_free(right_args);
+ return false;
+ }
+
+ li = 0;
+ foreach(llc, left_args)
+ {
+ Node *larg = (Node *) lfirst(llc);
+ bool lconst = IsA(larg, Const);
+ ListCell *rlc;
+ int ri = 0;
+
+ if (left_prefix[li] < 1.0e-12)
+ break;
+
+ foreach(rlc, right_args)
+ {
+ Node *rarg = (Node *) lfirst(rlc);
+ bool rconst = IsA(rarg, Const);
+ List *sub_args;
+ Selectivity contrib;
+
+ if (right_prefix[ri] < 1.0e-12)
+ break;
+
+ sub_args = list_make2(copyObject(larg), copyObject(rarg));
+
+ if (!is_eqjoin || lconst || rconst)
+ {
+ contrib = DatumGetFloat8(DirectFunctionCall4Coll(eqsel,
+ collation,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(sub_args),
+ Int32GetDatum(eqsel_varRelid)));
+ }
+ else
+ {
+ contrib = DatumGetFloat8(DirectFunctionCall5Coll(eqjoinsel,
+ collation,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(sub_args),
+ Int16GetDatum(JOIN_INNER),
+ PointerGetDatum(sjinfo)));
+ }
+
+ CLAMP_PROBABILITY(contrib);
+ acc_selec += left_prefix[li] * right_prefix[ri] * contrib;
+
+ list_free(sub_args);
+ ri++;
+ }
+
+ li++;
+ }
+
+ pfree(left_prefix);
+ pfree(right_prefix);
+ list_free(left_args);
+ list_free(right_args);
+
+ CLAMP_PROBABILITY(acc_selec);
+ *selec_out = acc_selec;
+ return true;
+}
+
/*
* Common code for eqsel() and neqsel()
*/
@@ -318,6 +590,9 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
bool varonleft;
double selec;
+ if (try_coalesce_eq(fcinfo, &selec))
+ return selec;
+
/*
* When asked about <>, we do the estimation using the corresponding =
* operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
@@ -2418,6 +2693,9 @@ eqjoinsel(PG_FUNCTION_ARGS)
bool join_is_reversed;
RelOptInfo *inner_rel;
+ if (try_coalesce_eq(fcinfo, &selec))
+ PG_RETURN_FLOAT8((float8) selec);
+
get_join_variables(root, args, sjinfo,
&vardata1, &vardata2, &join_is_reversed);
@@ -4374,6 +4652,154 @@ estimate_multivariate_bucketsize(PlannerInfo *root, RelOptInfo *inner,
return otherclauses;
}
+/*
+ * Most-common-value frequency for vardata. Falls back to 1/ntuples when
+ * only a histogram slot is present. Returns 0.0 if no statistics are
+ * available.
+ */
+static void
+get_variable_mcv_freq(VariableStatData *vardata, Selectivity *mcv_freq)
+{
+ AttStatsSlot sslot;
+
+ *mcv_freq = 0.0;
+
+ if (!HeapTupleIsValid(vardata->statsTuple))
+ return;
+
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_NUMBERS))
+ {
+ if (sslot.nnumbers > 0)
+ *mcv_freq = sslot.numbers[0];
+ free_attstatsslot(&sslot);
+ }
+ else if (get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ 0))
+ {
+ /* no MCVs but histogram present: column is likely unique */
+ if (vardata->rel && vardata->rel->tuples > 0)
+ *mcv_freq = 1.0 / vardata->rel->tuples;
+ }
+}
+
+/*
+ * Estimate bucket stats for a CoalesceExpr hashkey when examine_variable()
+ * returned a default ndistinct. Uses per-branch ndistinct and mcv_freq,
+ * weighted by null fall-through probability.
+ */
+static bool
+hash_bucket_stats_coalesce_dispatch(PlannerInfo *root,
+ Node *hashkey,
+ double nbuckets,
+ Selectivity *mcv_freq,
+ Selectivity *bucketsize_frac)
+{
+ List *stripped_args;
+ bool terminates_with_const;
+ double *prefix;
+ int nargs;
+ int i;
+ ListCell *lc;
+ double nd_mix = 0.0;
+ Selectivity mcv_mix = 0.0;
+ double rel_rows_proxy = 0.0;
+ double rel_tuples_proxy = 0.0;
+ double estfract;
+
+ if (!match_coalesce_join_side(hashkey, &stripped_args, &terminates_with_const))
+ return false;
+
+ nargs = list_length(stripped_args);
+ prefix = (double *) palloc(sizeof(double) * nargs);
+
+ if (!get_coalesce_prefix_probs(root, stripped_args, prefix))
+ {
+ pfree(prefix);
+ list_free(stripped_args);
+ return false;
+ }
+
+ i = 0;
+ foreach(lc, stripped_args)
+ {
+ Node *arg = (Node *) lfirst(lc);
+
+ if (IsA(arg, Const))
+ {
+ mcv_mix = Max(mcv_mix, prefix[i]);
+ if (prefix[i] > 0.0)
+ nd_mix += 1.0;
+ }
+ else
+ {
+ VariableStatData vd;
+ double nd_i;
+ bool isdefault;
+ Selectivity mcv_i;
+
+ examine_variable(root, arg, 0, &vd);
+ nd_i = get_variable_numdistinct(&vd, &isdefault);
+
+ if (isdefault)
+ {
+ ReleaseVariableStats(vd);
+ pfree(prefix);
+ list_free(stripped_args);
+ return false;
+ }
+
+ get_variable_mcv_freq(&vd, &mcv_i);
+
+ nd_mix = Max(nd_mix, nd_i);
+ mcv_mix = Max(mcv_mix, prefix[i] * mcv_i);
+
+ if (i == 0 && vd.rel && vd.rel->tuples > 0)
+ {
+ rel_rows_proxy = vd.rel->rows;
+ rel_tuples_proxy = vd.rel->tuples;
+ }
+
+ ReleaseVariableStats(vd);
+ }
+
+ i++;
+ }
+
+ pfree(prefix);
+ list_free(stripped_args);
+
+ if (rel_tuples_proxy > 0.0)
+ {
+ nd_mix *= rel_rows_proxy / rel_tuples_proxy;
+ nd_mix = clamp_row_est(nd_mix);
+ }
+
+ if (nd_mix <= 0.0)
+ return false;
+
+ if (nd_mix > nbuckets)
+ estfract = 1.0 / nbuckets;
+ else
+ estfract = 1.0 / nd_mix;
+
+ CLAMP_PROBABILITY(mcv_mix);
+ *mcv_freq = Max(*mcv_freq, mcv_mix);
+ CLAMP_PROBABILITY(*mcv_freq);
+ estfract = Max(estfract, *mcv_freq);
+
+ if (estfract < 1.0e-6)
+ estfract = 1.0e-6;
+ else if (estfract > 1.0)
+ estfract = 1.0;
+
+ *bucketsize_frac = (Selectivity) estfract;
+
+ return true;
+}
+
/*
* Estimate hash bucket statistics when the specified expression is used
* as a hash key for the given number of buckets.
@@ -4427,43 +4853,21 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
double estfract,
ndistinct;
bool isdefault;
- AttStatsSlot sslot;
examine_variable(root, hashkey, 0, &vardata);
- /* Initialize *mcv_freq to "unknown" */
- *mcv_freq = 0.0;
-
- /* Look up the frequency of the most common value, if available */
- if (HeapTupleIsValid(vardata.statsTuple))
- {
- if (get_attstatsslot(&sslot, vardata.statsTuple,
- STATISTIC_KIND_MCV, InvalidOid,
- ATTSTATSSLOT_NUMBERS))
- {
- /*
- * The first MCV stat is for the most common value.
- */
- if (sslot.nnumbers > 0)
- *mcv_freq = sslot.numbers[0];
- free_attstatsslot(&sslot);
- }
- else if (get_attstatsslot(&sslot, vardata.statsTuple,
- STATISTIC_KIND_HISTOGRAM, InvalidOid,
- 0))
- {
- /*
- * If there are no recorded MCVs, but we do have a histogram, then
- * assume that ANALYZE determined that the column is unique.
- */
- if (vardata.rel && vardata.rel->tuples > 0)
- *mcv_freq = 1.0 / vardata.rel->tuples;
- }
- }
+ get_variable_mcv_freq(&vardata, mcv_freq);
/* Get number of distinct values */
ndistinct = get_variable_numdistinct(&vardata, &isdefault);
+ if (isdefault && hash_bucket_stats_coalesce_dispatch(root, hashkey, nbuckets,
+ mcv_freq, bucketsize_frac))
+ {
+ ReleaseVariableStats(vardata);
+ return;
+ }
+
/*
* If ndistinct isn't real, punt. We normally return 0.1, but if the
* mcv_freq is known to be even higher than that, use it instead.
base-commit: cd3ad3bc03567ee120a638c840112b8865e055a8
--
2.43.0