Attached is a patch that provides an implementation of width_bucket() for the float8 data type, per earlier discussion. The implementation is fairly ugly, but I don't see an easy way to simplify it.
When writing this, I noticed that my previous implementation of width_bucket() probably doesn't handle NaN correctly: postgres=# select width_bucket('NaN', 1, 5, 5); width_bucket -------------- 6 (1 row) AFAICS SQL:2003 does not define a NaN value, so it doesn't address how width_bucket() should behave here. The patch changes width_bucket() so that ereport(ERROR) is raised if NaN is specified for the operand or the lower or upper bounds to width_bucket(). I think this is reasonable behavior -- any objections? float8 also allows positive and negative infinity. I disallowed infinite values for the histogram bounds, but allowed it for the operand. -Neil
Index: doc/src/sgml/func.sgml =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/doc/src/sgml/func.sgml,v retrieving revision 1.348 diff -c -p -r1.348 func.sgml *** doc/src/sgml/func.sgml 21 Dec 2006 16:05:12 -0000 1.348 --- doc/src/sgml/func.sgml 11 Jan 2007 20:21:23 -0000 *************** *** 854,859 **** --- 854,869 ---- <entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry> <entry><literal>3</literal></entry> </row> + + <row> + <entry><literal><function>width_bucket</function>(<parameter>op</parameter> <type>float8</type>, <parameter>b1</parameter> <type>float8</type>, <parameter>b2</parameter> <type>float8</type>, <parameter>count</parameter> <type>int</type>)</literal></entry> + <entry><type>int</type></entry> + <entry>return the bucket to which <parameter>operand</> would + be assigned in an equidepth histogram with <parameter>count</> + buckets, in the range <parameter>b1</> to <parameter>b2</></entry> + <entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry> + <entry><literal>3</literal></entry> + </row> </tbody> </tgroup> </table> Index: src/backend/utils/adt/float.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/utils/adt/float.c,v retrieving revision 1.146 diff -c -p -r1.146 float.c *** src/backend/utils/adt/float.c 6 Jan 2007 20:21:29 -0000 1.146 --- src/backend/utils/adt/float.c 12 Jan 2007 00:09:26 -0000 *************** float84ge(PG_FUNCTION_ARGS) *** 2745,2750 **** --- 2745,2831 ---- PG_RETURN_BOOL(float8_cmp_internal(arg1, arg2) >= 0); } + /* + * Implements the float8 version of the width_bucket() function + * defined by SQL2003. See also width_bucket_numeric(). + * + * 'bound1' and 'bound2' are the lower and upper bounds of the + * histogram's range, respectively. 'count' is the number of buckets + * in the histogram. width_bucket() returns an integer indicating the + * bucket number that 'operand' belongs in for an equiwidth histogram + * with the specified characteristics. An operand smaller than the + * lower bound is assigned to bucket 0. An operand greater than the + * upper bound is assigned to an additional bucket (with number + * count+1). + */ + Datum + width_bucket_float8(PG_FUNCTION_ARGS) + { + float8 operand = PG_GETARG_FLOAT8(0); + float8 bound1 = PG_GETARG_FLOAT8(1); + float8 bound2 = PG_GETARG_FLOAT8(2); + int32 count = PG_GETARG_INT32(3); + int32 result; + + if (count <= 0.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("count must be greater than zero"))); + + if (isnan(operand) || isnan(bound1) || isnan(bound2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("operand, lower bound and upper bound cannot be NaN"))); + + /* Note that we allow "operand" to be infinite */ + if (is_infinite(bound1) || is_infinite(bound2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("lower and upper bounds must be finite"))); + + if (bound1 < bound2) + { + if (operand < bound1) + result = 0; + else if (operand >= bound2) + { + result = count + 1; + /* check for overflow */ + if (result < count) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + } + else + result = ((float8) count * (operand - bound1) / (bound2 - bound1)) + 1; + } + else if (bound1 > bound2) + { + if (operand > bound1) + result = 0; + else if (operand <= bound2) + { + result = count + 1; + /* check for overflow */ + if (result < count) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + } + else + result = ((float8) count * (bound1 - operand) / (bound1 - bound2)) + 1; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("lower bound cannot equal upper bound"))); + result = 0; /* keep the compiler quiet */ + } + + PG_RETURN_INT32(result); + } + /* ========== PRIVATE ROUTINES ========== */ #ifndef HAVE_CBRT Index: src/backend/utils/adt/numeric.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/utils/adt/numeric.c,v retrieving revision 1.98 diff -c -p -r1.98 numeric.c *** src/backend/utils/adt/numeric.c 5 Jan 2007 22:19:41 -0000 1.98 --- src/backend/utils/adt/numeric.c 12 Jan 2007 00:08:04 -0000 *************** numeric_floor(PG_FUNCTION_ARGS) *** 874,880 **** } /* ! * width_bucket_numeric() - * * 'bound1' and 'bound2' are the lower and upper bounds of the * histogram's range, respectively. 'count' is the number of buckets --- 874,881 ---- } /* ! * Implements the numeric version of the width_bucket() function ! * defined by SQL2003. See also width_bucket_float8(). * * 'bound1' and 'bound2' are the lower and upper bounds of the * histogram's range, respectively. 'count' is the number of buckets *************** width_bucket_numeric(PG_FUNCTION_ARGS) *** 901,906 **** --- 902,914 ---- (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); + if (NUMERIC_IS_NAN(operand) || + NUMERIC_IS_NAN(bound1) || + NUMERIC_IS_NAN(bound2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), + errmsg("operand, lower bound and upper bound cannot be NaN"))); + init_var(&result_var); init_var(&count_var); *************** width_bucket_numeric(PG_FUNCTION_ARGS) *** 937,942 **** --- 945,951 ---- break; } + /* if result exceeds the range of a legal int4, we ereport here */ result = numericvar_to_int4(&result_var); free_var(&count_var); *************** width_bucket_numeric(PG_FUNCTION_ARGS) *** 946,953 **** } /* - * compute_bucket() - - * * If 'operand' is not outside the bucket range, determine the correct * bucket for it to go. The calculations performed by this function * are derived directly from the SQL2003 spec. --- 955,960 ---- Index: src/include/catalog/pg_proc.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_proc.h,v retrieving revision 1.436 diff -c -p -r1.436 pg_proc.h *** src/include/catalog/pg_proc.h 5 Jan 2007 22:19:53 -0000 1.436 --- src/include/catalog/pg_proc.h 11 Jan 2007 20:21:25 -0000 *************** DATA(insert OID = 309 ( float84gt P *** 636,641 **** --- 636,643 ---- DESCR("greater-than"); DATA(insert OID = 310 ( float84ge PGNSP PGUID 12 f f t f i 2 16 "701 700" _null_ _null_ _null_ float84ge - _null_ )); DESCR("greater-than-or-equal"); + DATA(insert OID = 320 ( width_bucket PGNSP PGUID 12 f f t f i 4 23 "701 701 701 23" _null_ _null_ _null_ width_bucket_float8 - _null_ )); + DESCR("bucket number of operand in equidepth histogram"); DATA(insert OID = 311 ( float8 PGNSP PGUID 12 f f t f i 1 701 "700" _null_ _null_ _null_ ftod - _null_ )); DESCR("convert float4 to float8"); Index: src/include/utils/builtins.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/utils/builtins.h,v retrieving revision 1.285 diff -c -p -r1.285 builtins.h *** src/include/utils/builtins.h 5 Jan 2007 22:19:58 -0000 1.285 --- src/include/utils/builtins.h 11 Jan 2007 20:21:25 -0000 *************** extern Datum float84lt(PG_FUNCTION_ARGS) *** 380,385 **** --- 380,386 ---- extern Datum float84le(PG_FUNCTION_ARGS); extern Datum float84gt(PG_FUNCTION_ARGS); extern Datum float84ge(PG_FUNCTION_ARGS); + extern Datum width_bucket_float8(PG_FUNCTION_ARGS); /* dbsize.c */ extern Datum pg_tablespace_size_oid(PG_FUNCTION_ARGS); Index: src/test/regress/sql/numeric.sql =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/test/regress/sql/numeric.sql,v retrieving revision 1.12 diff -c -p -r1.12 numeric.sql *** src/test/regress/sql/numeric.sql 26 Jun 2005 03:04:37 -0000 1.12 --- src/test/regress/sql/numeric.sql 11 Jan 2007 22:09:29 -0000 *************** INSERT INTO ceil_floor_round VALUES ('-0 *** 667,688 **** SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round; DROP TABLE ceil_floor_round; ! -- Testing for width_bucket() ! -- NULL result ! SELECT width_bucket(NULL, NULL, NULL, NULL); -- errors SELECT width_bucket(5.0, 3.0, 4.0, 0); SELECT width_bucket(5.0, 3.0, 4.0, -5); ! SELECT width_bucket(3.0, 3.0, 3.0, 888); -- normal operation ! CREATE TABLE width_bucket_test (operand numeric); ! COPY width_bucket_test FROM stdin; -5.2 ! -0.0000000000001 ! 0.0000000000001 1 1.99999999999999 2 --- 667,692 ---- SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round; DROP TABLE ceil_floor_round; ! -- Testing for width_bucket(). For convenience, we test both the ! -- numeric and float8 versions of the function in this file. -- errors SELECT width_bucket(5.0, 3.0, 4.0, 0); SELECT width_bucket(5.0, 3.0, 4.0, -5); ! SELECT width_bucket(3.5, 3.0, 3.0, 888); ! SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0); ! SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5); ! SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888); ! SELECT width_bucket('NaN', 3.0, 4.0, 888); ! SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888); -- normal operation ! CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8); ! COPY width_bucket_test (operand_num) FROM stdin; -5.2 ! -0.0000000001 ! 0.000000000001 1 1.99999999999999 2 *************** COPY width_bucket_test FROM stdin; *** 699,716 **** 9.99999999999999 10 10.0000000000001 - NaN \. SELECT ! operand, ! width_bucket(operand, 0, 10, 5) AS wb_1, ! width_bucket(operand, 10, 0, 5) AS wb_2, ! width_bucket(operand, 2, 8, 4) AS wb_3, ! width_bucket(operand, 5.0, 5.5, 20) AS wb_4, ! width_bucket(operand, -25, 25, 10) AS wb_5 FROM width_bucket_test; DROP TABLE width_bucket_test; -- TO_CHAR() --- 703,733 ---- 9.99999999999999 10 10.0000000000001 \. + UPDATE width_bucket_test SET operand_f8 = operand_num::float8; + SELECT ! operand_num, operand_f8, ! width_bucket(operand_num, 0, 10, 5) AS wb_1, ! width_bucket(operand_f8, 0, 10, 5) AS wb_1f, ! width_bucket(operand_num, 10, 0, 5) AS wb_2, ! width_bucket(operand_f8, 10, 0, 5) AS wb_2f, ! width_bucket(operand_num, 2, 8, 4) AS wb_3, ! width_bucket(operand_f8, 2, 8, 4) AS wb_3f, ! width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4, ! width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f, ! width_bucket(operand_num, -25, 25, 10) AS wb_5, ! width_bucket(operand_f8, -25, 25, 10) AS wb_5f FROM width_bucket_test; + -- for float8 only, check positive and negative infinity: we require + -- finite bucket bounds, but allow an infinite operand + SELECT width_bucket(0.0::float8, 'Infinity'::float8, 5, 10); -- error + SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error + SELECT width_bucket('Infinity'::float8, 1, 10, 10), + width_bucket('-Infinity'::float8, 1, 10, 10); + DROP TABLE width_bucket_test; -- TO_CHAR() *************** SELECT '' AS to_char_1, to_char(val, '9G *** 719,725 **** FROM num_data; SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999') ! FROM num_data; SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR') FROM num_data; --- 736,742 ---- FROM num_data; SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999') ! FROM num_data; SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR') FROM num_data;
---------------------------(end of broadcast)--------------------------- TIP 1: if posting/reading through Usenet, please send an appropriate subscribe-nomail command to [EMAIL PROTECTED] so that your message can get through to the mailing list cleanly