On Sun, Dec 22, 2019 at 06:16:48PM -0600, Justin Pryzby wrote:
> On Tue, Nov 19, 2019 at 01:34:21PM -0600, Justin Pryzby wrote:
> > Tom implemented "Planner support functions":
> > https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=a391ff3c3d418e404a2c6e4ff0865a107752827b
> > https://www.postgresql.org/docs/12/xfunc-optimization.html
> >
> > I wondered whether there was any consideration to extend that to allow
> > providing improved estimates of "group by". That currently requires
> > manually
> > by creating an expression index, if the function is IMMUTABLE (which is not
> > true for eg. date_trunc of timestamptz).
>
> I didn't hear back so tried implementing this for date_trunc(). Currently,
> the
> I currently assume that the input data has 1 second granularity:
...
> If the input timestamps have (say) hourly granularity, rowcount will be
> *underestimated* by 3600x, which is worse than the behavior in master of
> overestimating by (for "day") 24x.
>
> I'm trying to think of ways to address that:
In the attached, I handled that by using histogram and variable's initial
ndistinct estimate, giving good estimates even for intermediate granularities
of input timestamps.
|postgres=# DROP TABLE IF EXISTS t; CREATE TABLE t(i) AS SELECT a FROM
generate_series(now(), now()+'11 day'::interval, '15
minutes')a,generate_series(1,9)b; ANALYZE t;
|
|postgres=# explain analyze SELECT date_trunc('hour',i) i FROM t GROUP BY 1;
| HashAggregate (cost=185.69..188.99 rows=264 width=8) (actual
time=42.110..42.317 rows=265 loops=1)
|
|postgres=# explain analyze SELECT date_trunc('minute',i) i FROM t GROUP BY 1;
| HashAggregate (cost=185.69..198.91 rows=1057 width=8) (actual
time=41.685..42.264 rows=1057 loops=1)
|
|postgres=# explain analyze SELECT date_trunc('day',i) i FROM t GROUP BY 1;
| HashAggregate (cost=185.69..185.83 rows=11 width=8) (actual
time=46.672..46.681 rows=12 loops=1)
|
|postgres=# explain analyze SELECT date_trunc('second',i) i FROM t GROUP BY 1;
| HashAggregate (cost=185.69..198.91 rows=1057 width=8) (actual
time=41.816..42.435 rows=1057 loops=1)
>From 772876dbd64ea0b1d2bb28f9ab67f577c4050468 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <[email protected]>
Date: Sun, 15 Dec 2019 20:27:24 -0600
Subject: [PATCH v2 1/2] Planner support functions for GROUP BY f()..
..implemented for date_trunc()
See also a391ff3c3d418e404a2c6e4ff0865a107752827b
---
src/backend/optimizer/util/plancat.c | 47 +++++++++++++++++
src/backend/utils/adt/selfuncs.c | 28 +++++++++++
src/backend/utils/adt/timestamp.c | 97 ++++++++++++++++++++++++++++++++++++
src/include/catalog/catversion.h | 2 +-
src/include/catalog/pg_proc.dat | 15 ++++--
src/include/nodes/nodes.h | 3 +-
src/include/nodes/supportnodes.h | 17 +++++++
src/include/optimizer/plancat.h | 2 +
8 files changed, 206 insertions(+), 5 deletions(-)
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index c15654e..2469ca6 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2009,6 +2009,53 @@ get_function_rows(PlannerInfo *root, Oid funcid, Node *node)
}
/*
+ * Return a multiplier [0..1] to help estimate effect on rowcount of GROUP BY
+ * f(x), relative to input x.
+ */
+double
+get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var)
+{
+ HeapTuple proctup;
+ Form_pg_proc procform;
+
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(proctup))
+ elog(ERROR, "cache lookup failed for function %u", funcid);
+ procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+ if (OidIsValid(procform->prosupport))
+ {
+ SupportRequestGroupBy *sresult;
+ SupportRequestGroupBy req;
+
+ req.type = T_SupportRequestGroupBy;
+ req.root = root;
+ req.funcid = funcid;
+ req.node = node;
+ req.var = var;
+ req.factor = 1; /* just for sanity */
+
+ sresult = (SupportRequestGroupBy *)
+ DatumGetPointer(OidFunctionCall1(procform->prosupport,
+ PointerGetDatum(&req)));
+
+ if (sresult == &req)
+ {
+ /* Success */
+ ReleaseSysCache(proctup);
+ return req.factor;
+ }
+ }
+
+ /* XXX No support function, or it failed */
+
+ ReleaseSysCache(proctup);
+
+ return 1;
+}
+
+
+/*
* has_unique_index
*
* Detect whether there is a unique index on the specified attribute
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index ff02b5a..eb0b86f 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3154,10 +3154,38 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
*/
foreach(l2, varshere)
{
+ double ret;
Node *var = (Node *) lfirst(l2);
examine_variable(root, var, 0, &vardata);
varinfos = add_unique_group_var(root, varinfos, var, &vardata);
+
+ /* If we group by a function of a simple var, try to call its support function to help estimate GROUP BY */
+ if (HeapTupleIsValid(vardata.statsTuple) &&
+ IsA(groupexpr, FuncExpr) && IsA(var, Var))
+ // && (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
+ {
+ Form_pg_statistic stats = (Form_pg_statistic)GETSTRUCT(vardata.statsTuple);
+ FuncExpr *expr = (FuncExpr *) groupexpr;
+
+ Var *v = (Var*) var;
+ RangeTblEntry *rte = root->simple_rte_array[v->varno];
+ char *reln = get_rel_name(rte->relid);
+ char *coln = get_attname(rte->relid, v->varattno, true);
+ ret = get_function_groupby(root, expr->funcid, groupexpr, var);
+
+ fprintf(stderr, "HERE %s %d %s.%s ndistinct=%f ret=%f\n", __FUNCTION__, __LINE__,
+ reln?reln:"null",
+ coln?coln:"null",
+ stats->stadistinct, ret);
+
+ Assert(ret>=0);
+ Assert(ret<=1);
+ numdistinct *= ret;
+
+ /* Can we do anything special with stats->stadistinct that's not already done in general? */
+ }
+
ReleaseVariableStats(vardata);
}
}
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 945b8f8..359ad32 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -5554,3 +5554,100 @@ generate_series_timestamptz(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
}
+
+
+/*
+ * Planner support function for date_trunc
+ * Try to estimate the factor by which to correct the estimate of ngroups for GROUP BY.
+ */
+Datum
+date_trunc_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq;
+ SupportRequestGroupBy *req;
+ List *args;
+ Node *arg1, *arg2;
+ Node *var;
+ char *start;
+ int typmod;
+
+ rawreq = (Node *) PG_GETARG_POINTER(0);
+ if (!IsA(rawreq, SupportRequestGroupBy))
+ PG_RETURN_POINTER(NULL);
+
+ req = (SupportRequestGroupBy *) rawreq;
+ if (!is_funcclause(req->node)) /* be paranoid */
+ PG_RETURN_POINTER(NULL);
+
+ args = ((FuncExpr *) req->node)->args;
+ arg1 = linitial(args);
+ arg2 = lsecond(args);
+ /* arg3 may be the timezone */
+
+ var = req->var;
+
+ /* XXX Assumes the input has 1-second granularity */
+
+ // XXX: only work on const?
+ start = TextDatumGetCString(((Const*)arg1)->constvalue);
+
+ // XXX: not working due to promotion ?
+ // exprType(var) is still not right, since date_trunc(a, b::date) uses b's type and not date..
+ // ...but date_trunc('', x::date) is weird
+ // exprType(arg2)==TIMESTAMPOID || exprType(arg2)==TIMESTAMPTZOID)
+ // if (req->funcid==1217 || req->funcid==1284 || req->funcid==2020)
+ /* Reset if it's not a timestamp */
+ if (exprType(var)==DATEOID) {
+ req->factor = 60*60*24;
+ } else if (exprType(var)==TIMESTAMPOID || exprType(var)==TIMESTAMPTZOID) {
+ int typmod = exprTypmod(arg2); // XXX: vartypmod ?
+ /* If the input has decimal digits, the grouping effect is stronger */
+ if (typmod != -1) {
+ req->factor /= 2<<typmod;
+ if (strcmp(start, "microseconds")==0) {
+ /* do nothing? */
+ } else if (strcmp(start, "milliseconds")==0) {
+ /* do nothing? */
+ }
+ }
+
+ if (strcmp(start, "second")==0) {
+ /* do nothing */
+ } else if (strcmp(start, "minute")==0) {
+ req->factor /= 60;
+ } else if (strcmp(start, "hour")==0) {
+ req->factor /= 60*60;
+ }
+ }
+
+ // else { elog(ERROR, "unknown type %u", exprType(var)); }
+
+ if (strcmp(start, "day")==0) {
+ req->factor /= 60*60*24;
+ } else if (strcmp(start, "week")==0) {
+ req->factor /= 60*60*24*7;
+ } else if (strcmp(start, "month")==0) {
+ /* 30 days */
+ req->factor /= 60*60*24*30;
+ } else if (strcmp(start, "quarter")==0) {
+ req->factor /= 60*60*24*30*3;
+ } else if (strcmp(start, "year")==0) {
+ req->factor /= 60*60*24*365.24;
+ } else if (strcmp(start, "decade")==0) {
+ req->factor /= 60*60*24*365.25*10;
+ } else if (strcmp(start, "century")==0) {
+ req->factor /= 60*60*24*365.25*100;
+ } else if (strcmp(start, "millennium")==0) {
+ req->factor /= 60*60*24*365.25*1000;
+ } else if (req->factor > 1) {
+ /* Maybe a DATE with finer graularity trunc */
+ req->factor = 1;
+ }
+ // else { elog(ERROR, "", ); }
+
+ /* Fudge Factor, since the input may be already "grouped", say at multiples of 15min, */
+ /* or otherwise have course granularity to begin with */
+ // factor/=4;
+
+ PG_RETURN_POINTER(req);
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index eca67a1..e2c05be 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201911242
+#define CATALOG_VERSION_NO 201911243
#endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index aae50d6..7a8c3d1 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -2350,11 +2350,14 @@
{ oid => '1217',
descr => 'truncate timestamp with time zone to specified units',
proname => 'date_trunc', provolatile => 's', prorettype => 'timestamptz',
- proargtypes => 'text timestamptz', prosrc => 'timestamptz_trunc' },
+ proargtypes => 'text timestamptz', prosrc => 'timestamptz_trunc',
+ prosupport => 'date_trunc_support', },
{ oid => '1284',
descr => 'truncate timestamp with time zone to specified units in specified time zone',
proname => 'date_trunc', provolatile => 's', prorettype => 'timestamptz',
- proargtypes => 'text timestamptz text', prosrc => 'timestamptz_trunc_zone' },
+ proargtypes => 'text timestamptz text', prosrc => 'timestamptz_trunc_zone',
+ prosupport => 'date_trunc_support', },
+# XXX:
{ oid => '1218', descr => 'truncate interval to specified units',
proname => 'date_trunc', prorettype => 'interval',
proargtypes => 'text interval', prosrc => 'interval_trunc' },
@@ -5632,7 +5635,13 @@
proargtypes => 'timestamptz', prosrc => 'timestamptz_time' },
{ oid => '2020', descr => 'truncate timestamp to specified units',
proname => 'date_trunc', prorettype => 'timestamp',
- proargtypes => 'text timestamp', prosrc => 'timestamp_trunc' },
+ proargtypes => 'text timestamp', prosrc => 'timestamp_trunc',
+ prosupport => 'date_trunc_support', },
+
+{ oid => '5449', descr => 'planner support for date_trunc',
+ proname => 'date_trunc_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'date_trunc_support', },
+
{ oid => '2021', descr => 'extract field from timestamp',
proname => 'date_part', prorettype => 'float8',
proargtypes => 'text timestamp', prosrc => 'timestamp_part' },
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 8692a32..97e7796 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -513,7 +513,8 @@ typedef enum NodeTag
T_SupportRequestSelectivity, /* in nodes/supportnodes.h */
T_SupportRequestCost, /* in nodes/supportnodes.h */
T_SupportRequestRows, /* in nodes/supportnodes.h */
- T_SupportRequestIndexCondition /* in nodes/supportnodes.h */
+ T_SupportRequestIndexCondition, /* in nodes/supportnodes.h */
+ T_SupportRequestGroupBy, /* in nodes/supportnodes.h */
} NodeTag;
/*
diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h
index 460d75b..cb4ea44 100644
--- a/src/include/nodes/supportnodes.h
+++ b/src/include/nodes/supportnodes.h
@@ -168,6 +168,23 @@ typedef struct SupportRequestRows
double rows; /* number of rows expected to be returned */
} SupportRequestRows;
+/* How many fewer rows are output after GROUPing BY a function */
+typedef struct SupportRequestGroupBy
+{
+ NodeTag type;
+
+ /* Input fields: */
+ struct PlannerInfo *root; /* Planner's infrastructure (could be NULL) */
+ Oid funcid; /* function we are inquiring about */
+ Node *var; /* original (2nd) argument */
+ Node *node; /* parse node invoking function */
+
+ /* Output fields: */
+ double factor; /* [0..1] fraction of rows in GROUP BY f(x)
+ relative to GROUP BY x */
+
+} SupportRequestGroupBy;
+
/*
* The IndexCondition request allows the support function to generate
* a directly-indexable condition based on a target function call that is
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index bbb27f8..cbe5386 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -70,6 +70,8 @@ extern void add_function_cost(PlannerInfo *root, Oid funcid, Node *node,
extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
+extern double get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var);
+
extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
extern bool has_stored_generated_columns(PlannerInfo *root, Index rti);
--
2.7.4
>From e287fa474fea487ace0ee7d476f84b6f787cc2a7 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <[email protected]>
Date: Tue, 24 Dec 2019 22:02:22 -0600
Subject: [PATCH v2 2/2] Pass ndistinct and minmax to allow good estimates even
with timestamps of granularity other than 1sec
---
src/backend/optimizer/util/plancat.c | 8 ++-
src/backend/utils/adt/selfuncs.c | 36 ++++++++---
src/backend/utils/adt/timestamp.c | 113 ++++++++++++++++++++++-------------
src/include/nodes/supportnodes.h | 2 +
src/include/optimizer/plancat.h | 2 +-
5 files changed, 110 insertions(+), 51 deletions(-)
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 2469ca6..aab794c 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2011,9 +2011,13 @@ get_function_rows(PlannerInfo *root, Oid funcid, Node *node)
/*
* Return a multiplier [0..1] to help estimate effect on rowcount of GROUP BY
* f(x), relative to input x.
+ *
+ * minmax is an array of (min,max) values for the variable, which might be
+ * useful to determine its granularity (like timestamps per second, minute or
+ * hour).
*/
double
-get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var)
+get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var, double ndistinct, Datum *minmax)
{
HeapTuple proctup;
Form_pg_proc procform;
@@ -2033,6 +2037,8 @@ get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var)
req.funcid = funcid;
req.node = node;
req.var = var;
+ req.ndistinct = ndistinct;
+ req.minmax = minmax;
req.factor = 1; /* just for sanity */
sresult = (SupportRequestGroupBy *)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index eb0b86f..a7f396d 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -116,6 +116,7 @@
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
@@ -3168,16 +3169,37 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
Form_pg_statistic stats = (Form_pg_statistic)GETSTRUCT(vardata.statsTuple);
FuncExpr *expr = (FuncExpr *) groupexpr;
- Var *v = (Var*) var;
+ Datum minmax[2];
+ TypeCacheEntry *tce;
+ Var *v = (Var*) var;
RangeTblEntry *rte = root->simple_rte_array[v->varno];
- char *reln = get_rel_name(rte->relid);
- char *coln = get_attname(rte->relid, v->varattno, true);
- ret = get_function_groupby(root, expr->funcid, groupexpr, var);
-
- fprintf(stderr, "HERE %s %d %s.%s ndistinct=%f ret=%f\n", __FUNCTION__, __LINE__,
+ char *reln = get_rel_name(rte->relid);
+ char *coln = get_attname(rte->relid, v->varattno, true);
+
+ /* Seems like maybe this should be defined here and pass a single argument to groupby helper? */
+ SupportRequestGroupBy req = {
+ .type = T_SupportRequestGroupBy,
+ .root = root,
+ .funcid = expr->funcid,
+ .node = groupexpr,
+ .var = var,
+ .ndistinct = stats->stadistinct >= 0 ? stats->stadistinct :
+ -stats->stadistinct * vardata.rel->tuples,
+ .minmax = minmax,
+ .factor = 1, /* just for sanity */
+ };
+
+ fprintf(stderr, "HERE %s %d %s.%s tuples=%f, stadistinct=%f ndistinct=%f ret=%f\n", __FUNCTION__, __LINE__,
reln?reln:"null",
coln?coln:"null",
- stats->stadistinct, ret);
+ vardata.rel->tuples,
+ stats->stadistinct, req.ndistinct, ret);
+
+ tce = lookup_type_cache(v->vartype, TYPECACHE_LT_OPR);
+ if (get_variable_range(root, &vardata, tce->lt_opr, minmax, minmax+1))
+ ret = get_function_groupby(root, expr->funcid, groupexpr, var, req.ndistinct, minmax);
+ else
+ ret = get_function_groupby(root, expr->funcid, groupexpr, var, req.ndistinct, NULL);
Assert(ret>=0);
Assert(ret<=1);
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 359ad32..0314ef9 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -5563,13 +5563,44 @@ generate_series_timestamptz(PG_FUNCTION_ARGS)
Datum
date_trunc_support(PG_FUNCTION_ARGS)
{
- Node *rawreq;
+ Node *rawreq;
SupportRequestGroupBy *req;
- List *args;
- Node *arg1, *arg2;
- Node *var;
+ List *args;
+ Node *arg1, *arg2;
+ Node *var;
char *start;
- int typmod;
+ int typmod, i;
+ long unsigned int diff;
+
+ const struct {
+ const char *name; /* Granularity name */
+ const int factor; /* Multiplier */
+ const double minsecsec; /* Minimum number of distinct second values per second in range of the histogram */
+ } grans[] = {
+ /* XXX: these factors not applied unless...typmod>1 ? */
+ { "microseconds", 1 },
+ { "milliseconds", 1 },
+
+ /*
+ * These factors not applied unless minmax indicates the incoming
+ * timestamp is at fine enough granularity that truncating to courser
+ * granularity would affect result.
+ */
+ { "second", 1, 1.0 },
+ { "minute", 60, 1.0/60 },
+ { "hour", 60, 1.0/60/60 },
+ { "day", 24, 1.0/60/60/24 }, // XXX: should not handle for DATEOID
+
+ /* These factors applied up to the specified granularity */
+ { "week", 7 },
+ { "month", 30 },
+ { "quarter", 3 },
+ { "year", 4 },
+ { "decade", 10 },
+ { "century", 10 },
+ { "millennium", 10 },
+
+ };
rawreq = (Node *) PG_GETARG_POINTER(0);
if (!IsA(rawreq, SupportRequestGroupBy))
@@ -5584,13 +5615,45 @@ date_trunc_support(PG_FUNCTION_ARGS)
arg2 = lsecond(args);
/* arg3 may be the timezone */
- var = req->var;
+ // XXX: handle if these are null ?
+ diff = req->minmax ? timestamptz_to_time_t(DatumGetTimestamp(req->minmax[1])) -
+ timestamptz_to_time_t(DatumGetTimestamp(req->minmax[0]))
+ : 0;
- /* XXX Assumes the input has 1-second granularity */
+ fprintf(stderr, "got distinct %f diff=%ld\n", req->ndistinct, diff);
// XXX: only work on const?
start = TextDatumGetCString(((Const*)arg1)->constvalue);
+ for (i=0; ; ++i) {
+ if (i >= sizeof(grans)/sizeof(*grans))
+ /* Unhandled truncation granularity */
+ PG_RETURN_POINTER(NULL);
+
+ fprintf(stderr, "applying factor %s %d: cur=%f %f -gt %f\n",
+ grans[i].name, grans[i].factor, req->factor,
+ req->ndistinct/diff, grans[i].minsecsec );
+
+ if (req->ndistinct / diff >= grans[i].minsecsec) {
+ if (req->ndistinct / diff > grans[i-1].minsecsec)
+ /* Apply the factor in full strength */
+ req->factor /= grans[i].factor;
+ else {
+ /* interpolate: if at (say) 15 minute granularity, then apply a 4x hourly correction, not 60x */
+ // req->factor /= grans[i].factor / (req->ndistinct / diff / grans[i].minsecsec);
+ req->factor /= req->ndistinct / diff / grans[i].minsecsec; // XXX: is this right
+ fprintf(stderr, "applying partial factor %f\n", req->ndistinct / diff / grans[i].minsecsec);
+ }
+ }
+
+ if (strcmp(start, grans[i].name) == 0)
+ break;
+ }
+
+ PG_RETURN_POINTER(req);
+
+#if 0
+ // var = req->var;
// XXX: not working due to promotion ?
// exprType(var) is still not right, since date_trunc(a, b::date) uses b's type and not date..
// ...but date_trunc('', x::date) is weird
@@ -5604,50 +5667,16 @@ date_trunc_support(PG_FUNCTION_ARGS)
/* If the input has decimal digits, the grouping effect is stronger */
if (typmod != -1) {
req->factor /= 2<<typmod;
- if (strcmp(start, "microseconds")==0) {
- /* do nothing? */
- } else if (strcmp(start, "milliseconds")==0) {
- /* do nothing? */
- }
}
- if (strcmp(start, "second")==0) {
- /* do nothing */
- } else if (strcmp(start, "minute")==0) {
- req->factor /= 60;
- } else if (strcmp(start, "hour")==0) {
- req->factor /= 60*60;
- }
}
// else { elog(ERROR, "unknown type %u", exprType(var)); }
- if (strcmp(start, "day")==0) {
- req->factor /= 60*60*24;
- } else if (strcmp(start, "week")==0) {
- req->factor /= 60*60*24*7;
- } else if (strcmp(start, "month")==0) {
- /* 30 days */
- req->factor /= 60*60*24*30;
- } else if (strcmp(start, "quarter")==0) {
- req->factor /= 60*60*24*30*3;
- } else if (strcmp(start, "year")==0) {
- req->factor /= 60*60*24*365.24;
- } else if (strcmp(start, "decade")==0) {
- req->factor /= 60*60*24*365.25*10;
- } else if (strcmp(start, "century")==0) {
- req->factor /= 60*60*24*365.25*100;
- } else if (strcmp(start, "millennium")==0) {
- req->factor /= 60*60*24*365.25*1000;
- } else if (req->factor > 1) {
/* Maybe a DATE with finer graularity trunc */
req->factor = 1;
}
- // else { elog(ERROR, "", ); }
- /* Fudge Factor, since the input may be already "grouped", say at multiples of 15min, */
- /* or otherwise have course granularity to begin with */
- // factor/=4;
+#endif
- PG_RETURN_POINTER(req);
}
diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h
index cb4ea44..f5f33bf 100644
--- a/src/include/nodes/supportnodes.h
+++ b/src/include/nodes/supportnodes.h
@@ -178,6 +178,8 @@ typedef struct SupportRequestGroupBy
Oid funcid; /* function we are inquiring about */
Node *var; /* original (2nd) argument */
Node *node; /* parse node invoking function */
+ double ndistinct; /* Initial estimate of ndistinct of the variable */
+ Datum *minmax; /* Array of (min,max) values for variable */
/* Output fields: */
double factor; /* [0..1] fraction of rows in GROUP BY f(x)
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index cbe5386..c13d40d 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -70,7 +70,7 @@ extern void add_function_cost(PlannerInfo *root, Oid funcid, Node *node,
extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
-extern double get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var);
+extern double get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var, double ndistinct, Datum *minmax);
extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
--
2.7.4