[pgsql-patches] width_bucket() for float8

Neil Conway Thu, 11 Jan 2007 18:11:56 -0800

Attached is a patch that provides an implementation of width_bucket()
for the float8 data type, per earlier discussion. The implementation is
fairly ugly, but I don't see an easy way to simplify it.


When writing this, I noticed that my previous implementation of
width_bucket() probably doesn't handle NaN correctly:

    postgres=# select width_bucket('NaN', 1, 5, 5);
     width_bucket 
    --------------
                6
    (1 row)

AFAICS SQL:2003 does not define a NaN value, so it doesn't address how
width_bucket() should behave here. The patch changes width_bucket() so
that ereport(ERROR) is raised if NaN is specified for the operand or the
lower or upper bounds to width_bucket(). I think this is reasonable
behavior -- any objections?

float8 also allows positive and negative infinity. I disallowed infinite
values for the histogram bounds, but allowed it for the operand.

-Neil

Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.348
diff -c -p -r1.348 func.sgml
*** doc/src/sgml/func.sgml	21 Dec 2006 16:05:12 -0000	1.348
--- doc/src/sgml/func.sgml	11 Jan 2007 20:21:23 -0000
***************
*** 854,859 ****
--- 854,869 ----
         <entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry>
         <entry><literal>3</literal></entry>
        </row>
+ 
+       <row>
+        <entry><literal><function>width_bucket</function>(<parameter>op</parameter> <type>float8</type>, <parameter>b1</parameter> <type>float8</type>, <parameter>b2</parameter> <type>float8</type>, <parameter>count</parameter> <type>int</type>)</literal></entry>
+        <entry><type>int</type></entry>
+        <entry>return the bucket to which <parameter>operand</> would
+        be assigned in an equidepth histogram with <parameter>count</>
+        buckets, in the range <parameter>b1</> to <parameter>b2</></entry>
+        <entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry>
+        <entry><literal>3</literal></entry>
+       </row>
       </tbody>
      </tgroup>
     </table>
Index: src/backend/utils/adt/float.c
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/utils/adt/float.c,v
retrieving revision 1.146
diff -c -p -r1.146 float.c
*** src/backend/utils/adt/float.c	6 Jan 2007 20:21:29 -0000	1.146
--- src/backend/utils/adt/float.c	12 Jan 2007 00:09:26 -0000
*************** float84ge(PG_FUNCTION_ARGS)
*** 2745,2750 ****
--- 2745,2831 ----
  	PG_RETURN_BOOL(float8_cmp_internal(arg1, arg2) >= 0);
  }
  
+ /*
+  * Implements the float8 version of the width_bucket() function
+  * defined by SQL2003. See also width_bucket_numeric().
+  *
+  * 'bound1' and 'bound2' are the lower and upper bounds of the
+  * histogram's range, respectively. 'count' is the number of buckets
+  * in the histogram. width_bucket() returns an integer indicating the
+  * bucket number that 'operand' belongs in for an equiwidth histogram
+  * with the specified characteristics. An operand smaller than the
+  * lower bound is assigned to bucket 0. An operand greater than the
+  * upper bound is assigned to an additional bucket (with number
+  * count+1).
+  */
+ Datum
+ width_bucket_float8(PG_FUNCTION_ARGS)
+ {
+ 	float8 		operand = PG_GETARG_FLOAT8(0);
+ 	float8 		bound1 	= PG_GETARG_FLOAT8(1);
+ 	float8 		bound2 	= PG_GETARG_FLOAT8(2);
+ 	int32 		count 	= PG_GETARG_INT32(3);
+ 	int32 		result;
+ 
+ 	if (count <= 0.0)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ 				 errmsg("count must be greater than zero")));
+ 
+ 	if (isnan(operand) || isnan(bound1) || isnan(bound2))
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ 				 errmsg("operand, lower bound and upper bound cannot be NaN")));
+ 
+ 	/* Note that we allow "operand" to be infinite */
+ 	if (is_infinite(bound1) || is_infinite(bound2))
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ 				 errmsg("lower and upper bounds must be finite")));
+ 
+ 	if (bound1 < bound2)
+ 	{
+ 		if (operand < bound1)
+ 			result = 0;
+ 		else if (operand >= bound2)
+ 		{
+ 			result = count + 1;
+ 			/* check for overflow */
+ 			if (result < count)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ 						 errmsg("integer out of range")));
+ 		}
+ 		else
+ 			result = ((float8) count * (operand - bound1) / (bound2 - bound1)) + 1;
+ 	}
+ 	else if (bound1 > bound2)
+ 	{
+ 		if (operand > bound1)
+ 			result = 0;
+ 		else if (operand <= bound2)
+ 		{
+ 			result = count + 1;
+ 			/* check for overflow */
+ 			if (result < count)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ 						 errmsg("integer out of range")));
+ 		}
+ 		else
+ 			result = ((float8) count * (bound1 - operand) / (bound1 - bound2)) + 1;
+ 	}
+ 	else
+ 	{
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ 				 errmsg("lower bound cannot equal upper bound")));
+ 		result = 0;		/* keep the compiler quiet */
+ 	}
+ 
+ 	PG_RETURN_INT32(result);
+ }
+ 
  /* ========== PRIVATE ROUTINES ========== */
  
  #ifndef HAVE_CBRT
Index: src/backend/utils/adt/numeric.c
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/utils/adt/numeric.c,v
retrieving revision 1.98
diff -c -p -r1.98 numeric.c
*** src/backend/utils/adt/numeric.c	5 Jan 2007 22:19:41 -0000	1.98
--- src/backend/utils/adt/numeric.c	12 Jan 2007 00:08:04 -0000
*************** numeric_floor(PG_FUNCTION_ARGS)
*** 874,880 ****
  }
  
  /*
!  * width_bucket_numeric() -
   *
   * 'bound1' and 'bound2' are the lower and upper bounds of the
   * histogram's range, respectively. 'count' is the number of buckets
--- 874,881 ----
  }
  
  /*
!  * Implements the numeric version of the width_bucket() function
!  * defined by SQL2003. See also width_bucket_float8().
   *
   * 'bound1' and 'bound2' are the lower and upper bounds of the
   * histogram's range, respectively. 'count' is the number of buckets
*************** width_bucket_numeric(PG_FUNCTION_ARGS)
*** 901,906 ****
--- 902,914 ----
  				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
  				 errmsg("count must be greater than zero")));
  
+ 	if (NUMERIC_IS_NAN(operand) ||
+ 		NUMERIC_IS_NAN(bound1) ||
+ 		NUMERIC_IS_NAN(bound2))
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ 				 errmsg("operand, lower bound and upper bound cannot be NaN")));
+ 
  	init_var(&result_var);
  	init_var(&count_var);
  
*************** width_bucket_numeric(PG_FUNCTION_ARGS)
*** 937,942 ****
--- 945,951 ----
  			break;
  	}
  
+ 	/* if result exceeds the range of a legal int4, we ereport here */
  	result = numericvar_to_int4(&result_var);
  
  	free_var(&count_var);
*************** width_bucket_numeric(PG_FUNCTION_ARGS)
*** 946,953 ****
  }
  
  /*
-  * compute_bucket() -
-  *
   * If 'operand' is not outside the bucket range, determine the correct
   * bucket for it to go. The calculations performed by this function
   * are derived directly from the SQL2003 spec.
--- 955,960 ----
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.436
diff -c -p -r1.436 pg_proc.h
*** src/include/catalog/pg_proc.h	5 Jan 2007 22:19:53 -0000	1.436
--- src/include/catalog/pg_proc.h	11 Jan 2007 20:21:25 -0000
*************** DATA(insert OID = 309 (  float84gt		   P
*** 636,641 ****
--- 636,643 ----
  DESCR("greater-than");
  DATA(insert OID = 310 (  float84ge		   PGNSP PGUID 12 f f t f i 2 16 "701 700" _null_ _null_ _null_ float84ge - _null_ ));
  DESCR("greater-than-or-equal");
+ DATA(insert OID = 320 ( width_bucket	   PGNSP PGUID 12 f f t f i 4 23 "701 701 701 23" _null_ _null_ _null_	width_bucket_float8 - _null_ ));
+ DESCR("bucket number of operand in equidepth histogram");
  
  DATA(insert OID = 311 (  float8			   PGNSP PGUID 12 f f t f i 1 701 "700" _null_ _null_ _null_	ftod - _null_ ));
  DESCR("convert float4 to float8");
Index: src/include/utils/builtins.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.285
diff -c -p -r1.285 builtins.h
*** src/include/utils/builtins.h	5 Jan 2007 22:19:58 -0000	1.285
--- src/include/utils/builtins.h	11 Jan 2007 20:21:25 -0000
*************** extern Datum float84lt(PG_FUNCTION_ARGS)
*** 380,385 ****
--- 380,386 ----
  extern Datum float84le(PG_FUNCTION_ARGS);
  extern Datum float84gt(PG_FUNCTION_ARGS);
  extern Datum float84ge(PG_FUNCTION_ARGS);
+ extern Datum width_bucket_float8(PG_FUNCTION_ARGS);
  
  /* dbsize.c */
  extern Datum pg_tablespace_size_oid(PG_FUNCTION_ARGS);
Index: src/test/regress/sql/numeric.sql
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/test/regress/sql/numeric.sql,v
retrieving revision 1.12
diff -c -p -r1.12 numeric.sql
*** src/test/regress/sql/numeric.sql	26 Jun 2005 03:04:37 -0000	1.12
--- src/test/regress/sql/numeric.sql	11 Jan 2007 22:09:29 -0000
*************** INSERT INTO ceil_floor_round VALUES ('-0
*** 667,688 ****
  SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round;
  DROP TABLE ceil_floor_round;
  
! -- Testing for width_bucket()
! -- NULL result
! SELECT width_bucket(NULL, NULL, NULL, NULL);
  
  -- errors
  SELECT width_bucket(5.0, 3.0, 4.0, 0);
  SELECT width_bucket(5.0, 3.0, 4.0, -5);
! SELECT width_bucket(3.0, 3.0, 3.0, 888);
  
  -- normal operation
! CREATE TABLE width_bucket_test (operand numeric);
  
! COPY width_bucket_test FROM stdin;
  -5.2
! -0.0000000000001
! 0.0000000000001
  1
  1.99999999999999
  2
--- 667,692 ----
  SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round;
  DROP TABLE ceil_floor_round;
  
! -- Testing for width_bucket(). For convenience, we test both the
! -- numeric and float8 versions of the function in this file.
  
  -- errors
  SELECT width_bucket(5.0, 3.0, 4.0, 0);
  SELECT width_bucket(5.0, 3.0, 4.0, -5);
! SELECT width_bucket(3.5, 3.0, 3.0, 888);
! SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0);
! SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5);
! SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888);
! SELECT width_bucket('NaN', 3.0, 4.0, 888);
! SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888);
  
  -- normal operation
! CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8);
  
! COPY width_bucket_test (operand_num) FROM stdin;
  -5.2
! -0.0000000001
! 0.000000000001
  1
  1.99999999999999
  2
*************** COPY width_bucket_test FROM stdin;
*** 699,716 ****
  9.99999999999999
  10
  10.0000000000001
- NaN
  \.
  
  SELECT
!     operand,
!     width_bucket(operand, 0, 10, 5) AS wb_1,
!     width_bucket(operand, 10, 0, 5) AS wb_2,
!     width_bucket(operand, 2, 8, 4) AS wb_3,
!     width_bucket(operand, 5.0, 5.5, 20) AS wb_4,
!     width_bucket(operand, -25, 25, 10) AS wb_5
      FROM width_bucket_test;
  
  DROP TABLE width_bucket_test;
  
  -- TO_CHAR()
--- 703,733 ----
  9.99999999999999
  10
  10.0000000000001
  \.
  
+ UPDATE width_bucket_test SET operand_f8 = operand_num::float8;
+ 
  SELECT
!     operand_num, operand_f8,
!     width_bucket(operand_num, 0, 10, 5) AS wb_1,
!     width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
!     width_bucket(operand_num, 10, 0, 5) AS wb_2,
!     width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
!     width_bucket(operand_num, 2, 8, 4) AS wb_3,
!     width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
!     width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
!     width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
!     width_bucket(operand_num, -25, 25, 10) AS wb_5,
!     width_bucket(operand_f8, -25, 25, 10) AS wb_5f
      FROM width_bucket_test;
  
+ -- for float8 only, check positive and negative infinity: we require
+ -- finite bucket bounds, but allow an infinite operand
+ SELECT width_bucket(0.0::float8, 'Infinity'::float8, 5, 10); -- error
+ SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error
+ SELECT width_bucket('Infinity'::float8, 1, 10, 10),
+        width_bucket('-Infinity'::float8, 1, 10, 10);
+ 
  DROP TABLE width_bucket_test;
  
  -- TO_CHAR()
*************** SELECT '' AS to_char_1, to_char(val, '9G
*** 719,725 ****
  	FROM num_data;
  
  SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999')
! 	FROM num_data;	
  
  SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR')
  	FROM num_data;
--- 736,742 ----
  	FROM num_data;
  
  SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999')
! 	FROM num_data;
  
  SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR')
  	FROM num_data;

---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
       subscribe-nomail command to [EMAIL PROTECTED] so that your
       message can get through to the mailing list cleanly

[pgsql-patches] width_bucket() for float8

Reply via email to