On 5/20/24 16:40, Andrei Lepikhov wrote:
On 20/5/2024 15:52, Andy Fan wrote:
+ if (clauselist_selectivity_hook)
+ *return* clauselist_selectivity_hook(root, clauses, ..)
Of course - library may estimate not all the clauses - it is a reason,
why I added input/output parameter 'estimatedclauses' by analogy with
statext_clauselist_selectivity.
Here is a polished and a bit modified version of the hook proposed.
Additionally, I propose exporting the statext_mcv_clauselist_selectivity
routine, likewise dependencies_clauselist_selectivity. This could
potentially enhance the functionality of the PostgreSQL estimation code.
To clarify the purpose, I want an optional, loaded as a library, more
conservative estimation based on distinct statistics. Let's provide (a
bit degenerate) example:
CREATE TABLE is_test(x1 integer, x2 integer, x3 integer, x4 integer);
INSERT INTO is_test (x1,x2,x3,x4)
SELECT x%5,x%7,x%11,x%13 FROM generate_series(1,1E3) AS x;
INSERT INTO is_test (x1,x2,x3,x4)
SELECT 14,14,14,14 FROM generate_series(1,100) AS x;
CREATE STATISTICS ist_stat (dependencies,ndistinct)
ON x1,x2,x3,x4 FROM is_test;
ANALYZE is_test;
EXPLAIN (ANALYZE, COSTS ON, SUMMARY OFF, TIMING OFF)
SELECT * FROM is_test WHERE x1=14 AND x2=14 AND x3=14 AND x4=14;
DROP TABLE is_test CASCADE;
I see:
(cost=0.00..15.17 rows=3 width=16) (actual rows=100 loops=1)
Dependency works great if it is the same for all the data in the
columns. But we get underestimations if we have different laws for
subsets of rows. So, if we don't have MCV statistics, sometimes we need
to pass over dependency statistics and use ndistinct instead.
--
regards,
Andrei Lepikhov
Postgres Professional
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 0ab021c1e8..1508a1beea 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -128,6 +128,11 @@ clauselist_selectivity_ext(PlannerInfo *root,
ListCell *l;
int listidx;
+ if (clauselist_selectivity_hook)
+ s1 = clauselist_selectivity_hook(root, clauses, varRelid, jointype,
+ sjinfo, &estimatedclauses,
+ use_extended_stats);
+
/*
* If there's exactly one clause, just go directly to
* clause_selectivity_ext(). None of what we might do below is relevant.
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 99fdf208db..b1722f5a60 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1712,7 +1712,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
* 0-based 'clauses' indexes we estimate for and also skip clause items that
* already have a bit set.
*/
-static Selectivity
+Selectivity
statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
JoinType jointype, SpecialJoinInfo *sjinfo,
RelOptInfo *rel, Bitmapset **estimatedclauses,
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 5f5d7959d8..ff98fda08c 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -146,6 +146,7 @@
/* Hooks for plugins to get control when we ask for stats */
get_relation_stats_hook_type get_relation_stats_hook = NULL;
get_index_stats_hook_type get_index_stats_hook = NULL;
+clauselist_selectivity_hook_type clauselist_selectivity_hook = NULL;
static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 7f2bf18716..436f30bdde 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -104,6 +104,14 @@ extern void BuildRelationExtStatistics(Relation onerel, bool inh, double totalro
extern int ComputeExtStatisticsRows(Relation onerel,
int natts, VacAttrStats **vacattrstats);
extern bool statext_is_kind_built(HeapTuple htup, char type);
+extern Selectivity statext_mcv_clauselist_selectivity(PlannerInfo *root,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel,
+ Bitmapset **estimatedclauses,
+ bool is_or);
extern Selectivity dependencies_clauselist_selectivity(PlannerInfo *root,
List *clauses,
int varRelid,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index f2563ad1cb..253f584c65 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -148,6 +148,17 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root,
VariableStatData *vardata);
extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook;
+/* Hooks for plugins to get control when we ask for selectivity estimation */
+typedef Selectivity (*clauselist_selectivity_hook_type) (
+ PlannerInfo *root,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ Bitmapset **estimatedclauses,
+ bool use_extended_stats);
+extern PGDLLIMPORT clauselist_selectivity_hook_type clauselist_selectivity_hook;
+
/* Functions in selfuncs.c */
extern void examine_variable(PlannerInfo *root, Node *node, int varRelid,