When looking at [1], I noticed that we don't have a prosupport
function for the timestamp version of generate_series.
We have this for the integer versions of generate_series(), per:
postgres=# explain analyze select * from generate_series(1, 256, 2);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Function Scan on generate_series (cost=0.00..1.28 rows=128 width=4)
(actual time=0.142..0.183 rows=128 loops=1)
The timestamp version just gives the default 1000 row estimate:
postgres=# explain analyze select * from generate_series('2024-01-01',
'2025-01-01', interval '1 day');
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Function Scan on generate_series (cost=0.00..10.00 rows=1000
width=8) (actual time=0.604..0.718 rows=367 loops=1)
I had some spare time today, so wrote a patch, which gives you:
postgres=# explain analyze select * from generate_series('2024-01-01',
'2025-01-01', interval '1 day');
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Function Scan on generate_series (cost=0.00..3.67 rows=367 width=8)
(actual time=0.258..0.291 rows=367 loops=1)
This required a bit of hackery to not have timestamp_mi() error out in
the planner when the timestamp difference calculation overflows. I
considered adding ereturn support to fix that, but that felt like
opening Pandora's box. Instead, I added some pre-checks similar to
what's in timestamp_mi() to have the support function fall back on the
1000 row estimate when there will be an overflow.
Also, there's no interval_div, so the patch has a macro that converts
interval to microseconds and does floating point division. I think
that's good enough for row estimations.
I'll park this here until July CF.
(I understand this doesn't help the case in [1] as the generate_series
inputs are not const there)
David
[1]
https://www.postgresql.org/message-id/CAMPYKo0FouB-HZ1k-_Ur2v%2BkK71q0T5icQGrp%2BSPbQJGq0H2Rw%40mail.gmail.com
From ca0e982215d1335d015a2b515f038b7186935af0 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Sun, 14 Apr 2024 14:49:39 +1200
Subject: [PATCH v1] Add support function for generate_series for timestamps
This provides the planner with row estimates for
generate_series(TIMESTAMP, TIMESTAMP, INTERVAL) and
generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL)
---
src/backend/utils/adt/timestamp.c | 83 +++++++++++++++++++++++++++++++
src/include/catalog/pg_proc.dat | 9 +++-
2 files changed, 90 insertions(+), 2 deletions(-)
diff --git a/src/backend/utils/adt/timestamp.c
b/src/backend/utils/adt/timestamp.c
index 963f2ec74a..25f2680243 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -27,6 +27,7 @@
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
+#include "optimizer/optimizer.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "parser/scansup.h"
@@ -6668,6 +6669,88 @@ generate_series_timestamptz_at_zone(PG_FUNCTION_ARGS)
return generate_series_timestamptz_internal(fcinfo);
}
+/*
+ * Planner support function for generate_series(timestamp, timestamp, interval)
+ */
+Datum
+generate_series_timestamp_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1, *arg2, *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root,
linitial(args));
+ arg2 = estimate_expression_value(req->root,
lsecond(args));
+ arg3 = estimate_expression_value(req->root,
lthird(args));
+
+ /*
+ * If any argument is constant NULL, we can safely
assume that
+ * zero rows are returned. Otherwise, if they're all
non-NULL
+ * constants, we can calculate the number of rows that
will be
+ * returned.
+ */
+ if ((IsA(arg1, Const) && ((Const *) arg1)->constisnull)
||
+ (IsA(arg2, Const) && ((Const *)
arg2)->constisnull) ||
+ (IsA(arg3, Const) && ((Const *)
arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) && IsA(arg2, Const) &&
IsA(arg3, Const))
+ {
+ Timestamp start, finish;
+ Interval *step;
+ Datum diff;
+ double dstep;
+ int64 dummy;
+
+ start = DatumGetTimestamp(((Const *)
arg1)->constvalue);
+ finish = DatumGetTimestamp(((Const *)
arg2)->constvalue);
+ step = DatumGetIntervalP(((Const *)
arg3)->constvalue);
+
+ /*
+ * Protect against overflows in timestamp_mi.
XXX convert to
+ * ereturn one day?
+ */
+ if (!TIMESTAMP_NOT_FINITE(start) &&
!TIMESTAMP_NOT_FINITE(finish) &&
+ !pg_sub_s64_overflow(finish, start,
&dummy))
+ {
+ diff = DirectFunctionCall2(timestamp_mi,
+
TimestampGetDatum(finish),
+
TimestampGetDatum(start));
+
+#define INTERVAL_TO_MICROSECONDS(i) ((((double) (i)->month * DAYS_PER_MONTH +
(i)->day)) * USECS_PER_DAY + (i)->time)
+
+ dstep = INTERVAL_TO_MICROSECONDS(step);
+
+ if (dstep != 0.0)
+ {
+ Interval *idiff =
DatumGetIntervalP(diff);
+ double ddiff =
INTERVAL_TO_MICROSECONDS(idiff);
+
+ req->rows = floor(ddiff / dstep
+ 1.0);
+ ret = (Node *) req;
+ }
+#undef INTERVAL_TO_MICROSECONDS
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+
/* timestamp_at_local()
* timestamptz_at_local()
*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 134e3b22fd..279ff2384b 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8328,15 +8328,20 @@
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
prosrc => 'generate_series_timestamp' },
{ oid => '939', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
provolatile => 's', prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval',
prosrc => 'generate_series_timestamptz' },
{ oid => '6274', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval text',
prosrc => 'generate_series_timestamptz_at_zone' },
+{ oid => '8402', descr => 'planner support for generate_series',
+ proname => 'generate_series_timestamp_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' },
# boolean aggregates
{ oid => '2515', descr => 'aggregate transition function',
--
2.40.1.windows.1