On Wed, Feb 26, 2020 at 10:50:19AM +0800, John Naylor wrote: > Hi, > > When analyzing time-series data, it's useful to be able to bin > timestamps into equally spaced ranges. date_trunc() is only able to > bin on a specified whole unit.
Thanks for adding this very handy feature! > In the attached patch for the March > commitfest, I propose a new function date_trunc_interval(), which can > truncate to arbitrary intervals, e.g.: > > select date_trunc_interval('15 minutes', timestamp '2020-02-16 > 20:48:40'); date_trunc_interval > --------------------- > 2020-02-16 20:45:00 > (1 row) I believe the following should error out, but doesn't. # SELECT date_trunc_interval('1 year 1 ms', TIMESTAMP '2001-02-16 20:38:40'); date_trunc_interval ═════════════════════ 2001-01-01 00:00:00 (1 row) > With this addition, it might be possible to turn the existing > date_trunc() functions into wrappers. I haven't done that here because > it didn't seem practical at this point. For one, the existing > functions have special treatment for weeks, centuries, and millennia. I agree that turning it into a wrapper would be separate work. > Note: I've only written the implementation for the type timestamp > without timezone. Adding timezone support would be pretty simple, > but I wanted to get feedback on the basic idea first before making > it complete. I've also written tests and very basic documentation. Please find attached an update that I believe fixes the bug I found in a principled way. Best, David. -- David Fetter <david(at)fetter(dot)org> http://fetter.org/ Phone: +1 415 235 3778 Remember to vote! Consider donating to Postgres: http://www.postgresql.org/about/donate
>From 5e36c4c888c65e358d2f87d84b64bc14d52f2b39 Mon Sep 17 00:00:00 2001 From: David Fetter <da...@fetter.org> Date: Tue, 25 Feb 2020 23:49:35 -0800 Subject: [PATCH v2] Add date_trunc_interval(interval, timestamp) To: hackers MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------2.24.1" This is a multi-part message in MIME format. --------------2.24.1 Content-Type: text/plain; charset=UTF-8; format=fixed Content-Transfer-Encoding: 8bit per John Naylor diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index ceda48e0fc..3863c222a2 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -6949,6 +6949,15 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); <entry><literal>2 days 03:00:00</literal></entry> </row> + <row> + <entry><literal><function>date_trunc_interval(<type>interval</type>, <type>timestamp</type>)</function></literal></entry> + <entry><type>timestamp</type></entry> + <entry>Truncate to specified precision; see <xref linkend="functions-datetime-trunc"/> + </entry> + <entry><literal>date_trunc_interval('15 minutes', timestamp '2001-02-16 20:38:40')</literal></entry> + <entry><literal>2001-02-16 20:30:00</literal></entry> + </row> + <row> <entry> <indexterm> @@ -7818,7 +7827,7 @@ SELECT date_part('hour', INTERVAL '4 hours 3 minutes'); </sect2> <sect2 id="functions-datetime-trunc"> - <title><function>date_trunc</function></title> + <title><function>date_trunc</function>, <function>date_trunc_interval</function></title> <indexterm> <primary>date_trunc</primary> @@ -7902,6 +7911,21 @@ SELECT date_trunc('hour', INTERVAL '3 days 02:47:33'); <lineannotation>Result: </lineannotation><computeroutput>3 days 02:00:00</computeroutput> </screen> </para> + + <para> + The function <function>date_trunc_interval</function> is + similar to the <function>date_trunc</function>, except that it + truncates to an arbitrary interval. + </para> + + <para> + Example: +<screen> +SELECT date_trunc_interval('5 minutes', TIMESTAMP '2001-02-16 20:38:40'); +<lineannotation>Result: </lineannotation><computeroutput>2001-02-16 20:35:00</computeroutput> +</screen> + </para> + </sect2> <sect2 id="functions-datetime-zoneconvert"> diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 0b6c9d5ea8..ed742592af 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -30,6 +30,7 @@ #include "nodes/nodeFuncs.h" #include "nodes/supportnodes.h" #include "parser/scansup.h" +#include "port/pg_bitutils.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/datetime.h" @@ -3804,6 +3805,144 @@ timestamptz_age(PG_FUNCTION_ARGS) *---------------------------------------------------------*/ +/* timestamp_trunc_interval() + * Truncate timestamp to specified interval. + */ +Datum +timestamp_trunc_interval(PG_FUNCTION_ARGS) +{ + Interval *interval = PG_GETARG_INTERVAL_P(0); + Timestamp timestamp = PG_GETARG_TIMESTAMP(1); + Timestamp result; + fsec_t ifsec, + tfsec; + uint32_t unit = 0, + popcount = 0; + enum TimeUnit { + us = 1 << 0, + ms = 1 << 1, + second = 1 << 2, + minute = 1 << 3, + hour = 1 << 4, + day = 1 << 5, + month = 1 << 6, + year = 1 << 7 + }; + + struct pg_tm it; + struct pg_tm tt; + + if (TIMESTAMP_NOT_FINITE(timestamp)) + PG_RETURN_TIMESTAMP(timestamp); + + if (interval2tm(*interval, &it, &ifsec) != 0) + elog(ERROR, "could not convert interval to tm"); + + if (timestamp2tm(timestamp, NULL, &tt, &tfsec, NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + if (it.tm_year != 0) + { + tt.tm_year = it.tm_year * (tt.tm_year / it.tm_year); + unit |= year; + } + if (it.tm_mon != 0) + { + tt.tm_mon = it.tm_mon * (tt.tm_mon / it.tm_mon); + unit |= month; + } + if (it.tm_mday != 0) + { + tt.tm_mday = it.tm_mday * (tt.tm_mday / it.tm_mday); + unit |= day; + } + if (it.tm_hour != 0) + { + tt.tm_hour = it.tm_hour * (tt.tm_hour / it.tm_hour); + unit |= hour; + } + if (it.tm_min != 0) + { + tt.tm_min = it.tm_min * (tt.tm_min / it.tm_min); + unit |= minute; + } + if (it.tm_sec != 0) + { + tt.tm_sec = it.tm_sec * (tt.tm_sec / it.tm_sec); + unit |= second; + } + if (ifsec > 0) + { + tfsec = ifsec * (tfsec / ifsec); + + if (ifsec >= 1000) + unit |= ms; + else + unit |= us; + } + if (unit == 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval not initialized"))); + + popcount = pg_popcount32(unit); + + if ( popcount > 1 ) + goto error; + /* + * Justify all lower timestamp units and throw an error if any + * of the lower interval units are non-zero. + */ + switch (unit) + { + case year: + tt.tm_mon = 1; + if (it.tm_mon != 0) + goto error; + case month: + tt.tm_mday = 1; + if (it.tm_mday != 0) + goto error; + case day: + tt.tm_hour = 0; + if (it.tm_hour != 0) + goto error; + case hour: + tt.tm_min = 0; + if (it.tm_min != 0) + goto error; + case minute: + tt.tm_sec = 0; + if (it.tm_sec != 0) + goto error; + case second: + tfsec = 0; + case ms: + case us: + break; + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("interval unit not supported"))); + + } + + if (tm2timestamp(&tt, tfsec, NULL, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + PG_RETURN_TIMESTAMP(result); + +error: + ereport(ERROR, + // WIP is there a better errcode? + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("only one interval unit allowed for truncation"))); +} + /* timestamp_trunc() * Truncate timestamp to specified units. */ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index eb3c1a88d1..0cec6c6799 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5660,6 +5660,9 @@ { oid => '2020', descr => 'truncate timestamp to specified units', proname => 'date_trunc', prorettype => 'timestamp', proargtypes => 'text timestamp', prosrc => 'timestamp_trunc' }, +{ oid => '8989', descr => 'truncate timestamp to specified interval', + proname => 'date_trunc_interval', prorettype => 'timestamp', + proargtypes => 'interval timestamp', prosrc => 'timestamp_trunc_interval' }, { oid => '2021', descr => 'extract field from timestamp', proname => 'date_part', prorettype => 'float8', proargtypes => 'text timestamp', prosrc => 'timestamp_part' }, diff --git a/src/test/regress/expected/timestamp.out b/src/test/regress/expected/timestamp.out index 5f97505a30..174790e872 100644 --- a/src/test/regress/expected/timestamp.out +++ b/src/test/regress/expected/timestamp.out @@ -545,6 +545,35 @@ SELECT '' AS date_trunc_week, date_trunc( 'week', timestamp '2004-02-29 15:44:17 | Mon Feb 23 00:00:00 2004 (1 row) +SELECT + interval, + date_trunc_interval(interval::interval, ts) +FROM ( + VALUES + ('5 years'), + ('1 month'), + ('7 days'), + ('2 hours'), + ('15 minutes'), + ('10 seconds'), + ('100 millisecond'), + ('250 microseconds') +) intervals (interval), +(SELECT TIMESTAMP '2004-02-29 15:44:17.71393') ts (ts); + interval | date_trunc_interval +------------------+-------------------------------- + 5 years | Sat Jan 01 00:00:00 2000 + 1 month | Sun Feb 01 00:00:00 2004 + 7 days | Sat Feb 28 00:00:00 2004 + 2 hours | Sun Feb 29 14:00:00 2004 + 15 minutes | Sun Feb 29 15:30:00 2004 + 10 seconds | Sun Feb 29 15:44:10 2004 + 100 millisecond | Sun Feb 29 15:44:17.7 2004 + 250 microseconds | Sun Feb 29 15:44:17.71375 2004 +(8 rows) + +SELECT date_trunc_interval('1 year 1 ms'::interval, '2004-02-29 15:44:17.71393'); +ERROR: only one interval unit allowed for truncation -- Test casting within a BETWEEN qualifier SELECT '' AS "54", d1 - timestamp without time zone '1997-01-02' AS diff FROM TIMESTAMP_TBL diff --git a/src/test/regress/sql/timestamp.sql b/src/test/regress/sql/timestamp.sql index 7b58c3cfa5..f46c229f6a 100644 --- a/src/test/regress/sql/timestamp.sql +++ b/src/test/regress/sql/timestamp.sql @@ -166,6 +166,24 @@ SELECT '' AS "54", d1 - timestamp without time zone '1997-01-02' AS diff SELECT '' AS date_trunc_week, date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc; +SELECT + interval, + date_trunc_interval(interval::interval, ts) +FROM ( + VALUES + ('5 years'), + ('1 month'), + ('7 days'), + ('2 hours'), + ('15 minutes'), + ('10 seconds'), + ('100 millisecond'), + ('250 microseconds') +) intervals (interval), +(SELECT TIMESTAMP '2004-02-29 15:44:17.71393') ts (ts); + +SELECT date_trunc_interval('1 year 1 ms'::interval, '2004-02-29 15:44:17.71393'); + -- Test casting within a BETWEEN qualifier SELECT '' AS "54", d1 - timestamp without time zone '1997-01-02' AS diff FROM TIMESTAMP_TBL --------------2.24.1--