On 8/3/2024 18:53, Andy Fan wrote:
I just reviewed the bad queries plan for the past half years internally,
I found many queries used the Nested loop which is the direct cause. now
I think I find out a new reason for this, because the missed optimizer
statistics cause the rows in outer relation to be 1, which make the Nest
loop is choosed. I'm not sure your idea could help on this or can help
on this than mine at this aspect.
Having had the same problem for a long time, I've made an attempt and
invented a patch that probes an index to determine whether the estimated
constant is within statistics' scope.
I remember David's remark on the overhead problem, but I don't argue it
here. This patch is on the table to have one more solution sketch for
further discussion.
Also, Andy, if you have a specific problem with index choosing, you can
try a tiny option that makes the index-picking technique less dependent
on the ordering of index lists [1].
[1]
https://www.postgresql.org/message-id/[email protected]
--
regards,
Andrei Lepikhov
Postgres Professional
diff --git a/src/backend/utils/adt/like_support.c
b/src/backend/utils/adt/like_support.c
index 2635050861..bc8e59bbf3 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -643,7 +643,7 @@ patternsel_common(PlannerInfo *root,
/*
* Pattern specifies an exact match, so estimate as for '='
*/
- result = var_eq_const(&vardata, eqopr, collation,
prefix->constvalue,
+ result = var_eq_const(root, &vardata, eqopr, collation,
prefix->constvalue,
false, true, false);
}
else
@@ -1294,7 +1294,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData
*vardata,
* probably off the end of the histogram, and thus we probably got a
very
* small estimate from the >= condition; so we still need to clamp.
*/
- eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
+ eq_sel = var_eq_const(root, vardata, eqopr, collation,
prefixcon->constvalue,
false, true, false);
prefixsel = Max(prefixsel, eq_sel);
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index cea777e9d4..be6213046e 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -273,7 +273,7 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
* in the query.)
*/
if (IsA(other, Const))
- selec = var_eq_const(&vardata, operator, collation,
+ selec = var_eq_const(root, &vardata, operator, collation,
((Const *)
other)->constvalue,
((Const *)
other)->constisnull,
varonleft, negate);
@@ -286,13 +286,133 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
return selec;
}
+/*
+ * Lookup the value in the index and try to estimate number of the tuples with
+ * the same value.
+ */
+static Selectivity
+estimate_ntuples_by_index(PlannerInfo *root, VariableStatData *vardata,
+ Datum constval,
+ Oid collation, Oid
regprocoid, int32 stawidth)
+{
+ RelOptInfo *rel = vardata->rel;
+ RangeTblEntry *rte = root->simple_rte_array[rel->relid];
+ ListCell *lc;
+ int ntuples = 0;
+ Selectivity selec = -1.;
+
+ foreach(lc, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+ ScanKeyData scankeys[1];
+ SnapshotData SnapshotNonVacuumable;
+ IndexScanDesc index_scan;
+ Relation heapRel;
+ Relation indexRel;
+
+ if (index->relam != BTREE_AM_OID)
+ continue;
+ if (index->indpred != NIL)
+ continue;
+ if (index->hypothetical)
+ continue;
+ if (!match_index_to_operand(vardata->var, 0, index))
+ continue;
+
+ heapRel = table_open(rte->relid, NoLock);
+ indexRel = index_open(index->indexoid, NoLock);
+
+ ScanKeyEntryInitialize(&scankeys[0],
+ 0,
+ 1,
+
BTEqualStrategyNumber,
+
vardata->atttype,
+ collation,
+ regprocoid,
+ constval);
+ InitNonVacuumableSnapshot(SnapshotNonVacuumable,
+
GlobalVisTestFor(heapRel));
+
+ index_scan = index_beginscan(heapRel, indexRel,
+
&SnapshotNonVacuumable,
+ 1, 0);
+ index_scan->xs_want_itup = true;
+ index_rescan(index_scan, scankeys, 1, NULL, 0);
+ while (index_getnext_tid(index_scan, ForwardScanDirection) !=
NULL)
+ {
+ ntuples++;
+ }
+
+ selec = (ntuples > 0) ? ntuples / vardata->rel->tuples :
+ 1. /
vardata->rel->tuples;
+
+ index_endscan(index_scan);
+ index_close(indexRel, NoLock);
+ table_close(heapRel, NoLock);
+ break;
+ }
+ return selec;
+}
+
+/*
+ * Returns 0 if the value covered by the histogram.
+ */
+static bool
+const_out_of_scope(VariableStatData *vardata, Datum value)
+{
+ AttStatsSlot sslot;
+ bool result = false;
+
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_HISTOGRAM,
InvalidOid,
+ ATTSTATSSLOT_VALUES))
+ {
+ FmgrInfo opproc;
+ TypeCacheEntry *type;
+ Oid funcoid;
+ bool cmpres;
+
+ /* Load operators for the type */
+ type = lookup_type_cache(vardata->vartype, TYPECACHE_LT_OPR |
TYPECACHE_GT_OPR);
+
+ /* Check: low boundary greater than value */
+ funcoid = get_opcode(type->gt_opr);
+ fmgr_info(funcoid, &opproc);
+ cmpres = DatumGetBool(FunctionCall2Coll(&opproc,
+
type->typcollation,
+
sslot.values[0],
+
value));
+
+
+ if (cmpres)
+ result = true;
+ else
+ {
+ /* Check: top boundary lower than value */
+ funcoid = get_opcode(type->lt_opr);
+ fmgr_info(funcoid, &opproc);
+ cmpres = DatumGetBool(FunctionCall2Coll(&opproc,
+
type->typcollation,
+
sslot.values[sslot.nvalues - 1],
+
value));
+ if (cmpres)
+ result = true;
+ }
+
+ free_attstatsslot(&sslot);
+ }
+
+ return result;
+}
+
/*
* var_eq_const --- eqsel for var = const case
*
* This is exported so that some other estimation functions can use it.
*/
double
-var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
+var_eq_const(PlannerInfo *root, VariableStatData *vardata, Oid oproid, Oid
collation,
Datum constval, bool constisnull,
bool varonleft, bool negate)
{
@@ -300,6 +420,7 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid
collation,
double nullfrac = 0.0;
bool isdefault;
Oid opfuncoid;
+ Form_pg_statistic stats;
/*
* If the constant is NULL, assume operator is strict and return zero,
ie,
@@ -314,8 +435,6 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid
collation,
*/
if (HeapTupleIsValid(vardata->statsTuple))
{
- Form_pg_statistic stats;
-
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
nullfrac = stats->stanullfrac;
}
@@ -402,6 +521,17 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid
collation,
*/
selec = sslot.numbers[i];
}
+ else if (const_out_of_scope(vardata, constval) &&
+ (selec = estimate_ntuples_by_index(root,
vardata, constval,
+
collation, opfuncoid,
+
stats->stawidth)) >= 0.)
+ {
+ /*
+ * value to estimate is out of the boundaries of the
histogram. And
+ * we attempted to find an index and probe it.
+ * At this point we successfully done it.
+ */
+ }
else
{
/*
@@ -1521,7 +1651,7 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
* A boolean variable V is equivalent to the clause V = 't', so
we
* compute the selectivity as if that is what we have.
*/
- selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
+ selec = var_eq_const(root, &vardata, BooleanEqualOperator,
InvalidOid,
BoolGetDatum(true),
false, true, false);
}
else
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 2fa4c4fc1b..fe789483f0 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -180,7 +180,7 @@ extern double ineq_histogram_selectivity(PlannerInfo *root,
bool isgt, bool iseq,
Oid collation,
Datum constval, Oid consttype);
-extern double var_eq_const(VariableStatData *vardata,
+extern double var_eq_const(PlannerInfo *root, VariableStatData *vardata,
Oid oproid, Oid collation,
Datum constval, bool
constisnull,
bool varonleft, bool negate);
diff --git a/src/test/regress/expected/partition_join.out
b/src/test/regress/expected/partition_join.out
index 6560fe2416..6522dc64f2 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -2229,8 +2229,8 @@ select * from prtx1
where not exists (select 1 from prtx2
where prtx2.a=prtx1.a and prtx2.b=prtx1.b and prtx2.c=123)
and a<20 and c=120;
- QUERY PLAN
--------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------------
Append
-> Nested Loop Anti Join
-> Seq Scan on prtx1_1
@@ -2244,17 +2244,12 @@ where not exists (select 1 from prtx2
-> Bitmap Index Scan on prtx2_1_c_idx
Index Cond: (c = 123)
-> Nested Loop Anti Join
+ Join Filter: ((prtx2_2.a = prtx1_2.a) AND (prtx2_2.b = prtx1_2.b))
-> Seq Scan on prtx1_2
Filter: ((a < 20) AND (c = 120))
- -> Bitmap Heap Scan on prtx2_2
- Recheck Cond: ((b = prtx1_2.b) AND (c = 123))
- Filter: (a = prtx1_2.a)
- -> BitmapAnd
- -> Bitmap Index Scan on prtx2_2_b_idx
- Index Cond: (b = prtx1_2.b)
- -> Bitmap Index Scan on prtx2_2_c_idx
- Index Cond: (c = 123)
-(23 rows)
+ -> Index Scan using prtx2_2_c_idx on prtx2_2
+ Index Cond: (c = 123)
+(18 rows)
select * from prtx1
where not exists (select 1 from prtx2