On 01/09/14 01:42, Tom Lane wrote:
BTW, was there a reason for not noticing the case of exact match in
the search loop, and falling out early? As it stands the code will
reliably choose the leftmost match if there are multiple equal items
in the search array, but do we care about such cases?
I am not sure if we care, probably not.
Anyway I attached patch that I am happy with. I am not yet sure what to
do with naming.
--
Petr Jelinek http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index daa56e9..bda8386 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -901,25 +901,36 @@
<indexterm>
<primary>width_bucket</primary>
</indexterm>
- <literal><function>width_bucket(<parameter>op</parameter> <type>numeric</type>, <parameter>b1</parameter> <type>numeric</type>, <parameter>b2</parameter> <type>numeric</type>, <parameter>count</parameter> <type>int</type>)</function></literal>
+ <literal><function>width_bucket(<parameter>operand</parameter> <type>numeric</type>, <parameter>b1</parameter> <type>numeric</type>, <parameter>b2</parameter> <type>numeric</type>, <parameter>count</parameter> <type>int</type>)</function></literal>
</entry>
<entry><type>int</type></entry>
<entry>return the bucket to which <parameter>operand</> would
be assigned in an equidepth histogram with <parameter>count</>
- buckets, in the range <parameter>b1</> to <parameter>b2</></entry>
+ buckets spanning the range <parameter>b1</> to <parameter>b2</></entry>
<entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry>
<entry><literal>3</literal></entry>
</row>
<row>
- <entry><literal><function>width_bucket(<parameter>op</parameter> <type>dp</type>, <parameter>b1</parameter> <type>dp</type>, <parameter>b2</parameter> <type>dp</type>, <parameter>count</parameter> <type>int</type>)</function></literal></entry>
+ <entry><literal><function>width_bucket(<parameter>operand</parameter> <type>dp</type>, <parameter>b1</parameter> <type>dp</type>, <parameter>b2</parameter> <type>dp</type>, <parameter>count</parameter> <type>int</type>)</function></literal></entry>
<entry><type>int</type></entry>
<entry>return the bucket to which <parameter>operand</> would
be assigned in an equidepth histogram with <parameter>count</>
- buckets, in the range <parameter>b1</> to <parameter>b2</></entry>
+ buckets spanning the range <parameter>b1</> to <parameter>b2</></entry>
<entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry>
<entry><literal>3</literal></entry>
</row>
+
+ <row>
+ <entry><literal><function>width_bucket(<parameter>operand</parameter> <type>anyelement</type>, <parameter>thresholds</parameter> <type>anyarray</type>)</function></literal></entry>
+ <entry><type>int</type></entry>
+ <entry>return the bucket to which <parameter>operand</> would
+ be assigned given an array listing the upper bounds of the buckets;
+ the <parameter>thresholds</> array <emphasis>must be sorted</>,
+ smallest first, or unexpected results will be obtained</entry>
+ <entry><literal>width_bucket(now(), array['yesterday', 'today', 'tomorrow']::timestamptz[])</literal></entry>
+ <entry><literal>2</literal></entry>
+ </row>
</tbody>
</tgroup>
</table>
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index f8e94ec..57376ea 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -15,8 +15,10 @@
#include "postgres.h"
#include <ctype.h>
+#include <math.h>
#include "access/htup_details.h"
+#include "catalog/pg_type.h"
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "utils/array.h"
@@ -130,6 +132,15 @@ static ArrayType *array_replace_internal(ArrayType *array,
Datum replace, bool replace_isnull,
bool remove, Oid collation,
FunctionCallInfo fcinfo);
+static int width_bucket_fixed(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry);
+static int width_bucket_fixed_float8(Datum operand, ArrayType *thresholds);
+static int width_bucket_variable(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry);
/*
@@ -5502,3 +5513,219 @@ array_replace(PG_FUNCTION_ARGS)
fcinfo);
PG_RETURN_ARRAYTYPE_P(array);
}
+
+/*
+ * Implements width_bucket(anyelement, anyarray).
+ *
+ * 'thresholds' is an array containing upper bound values for each bucket;
+ * these must be sorted from smallest to largest, or bogus results will be
+ * produced. If N thresholds are supplied, the output is from 0 to N:
+ * 0 is for inputs < first threshold, N is for inputs >= last threshold.
+ */
+Datum
+width_bucket_generic(PG_FUNCTION_ARGS)
+{
+ Datum operand = PG_GETARG_DATUM(0);
+ ArrayType *thresholds = PG_GETARG_ARRAYTYPE_P(1);
+ Oid element_type = ARR_ELEMTYPE(thresholds);
+ int result;
+
+ /* Check input */
+ if (ARR_NDIM(thresholds) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("thresholds must be one-dimensional array")));
+
+ if (array_contains_nulls(thresholds))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("thresholds array must not contain NULLs")));
+
+ /* For float8 use optimized implementation */
+ if (element_type == FLOAT8OID)
+ result = width_bucket_fixed_float8(operand, thresholds);
+ else
+ {
+ Oid collation = PG_GET_COLLATION();
+ TypeCacheEntry *typentry;
+
+ /* Cache information about the input type */
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL ||
+ typentry->type_id != element_type)
+ {
+ typentry = lookup_type_cache(element_type,
+ TYPECACHE_CMP_PROC_FINFO);
+ if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a comparison function for type %s",
+ format_type_be(element_type))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ /* We have two generic implementation paths for fixed- and variable-width types */
+ if (typentry->typlen > 0)
+ result = width_bucket_fixed(operand, thresholds, collation, typentry);
+ else
+ result = width_bucket_variable(operand, thresholds, collation, typentry);
+ }
+
+ /* Avoid leaking memory when handed toasted input. */
+ PG_FREE_IF_COPY(thresholds, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * width_bucket for fixed-width data types.
+ */
+static int
+width_bucket_fixed(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry)
+{
+ char *thresholds_data;
+ int typlen;
+ FunctionCallInfoData locfcinfo;
+ int left;
+ int right;
+
+ /*
+ * Since we know the array contains no NULLs, we can just index it
+ * directly.
+ */
+ thresholds_data = (char *) ARR_DATA_PTR(thresholds);
+ typlen = typentry->typlen;
+
+ InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Find the bucket */
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+ while (left < right)
+ {
+ int mid;
+ char *ptr;
+ int32 cmpresult;
+
+ mid = (left + right) / 2;
+
+ ptr = thresholds_data + mid * typlen;
+
+ locfcinfo.arg[0] = operand;
+ locfcinfo.arg[1] = fetch_att(ptr, typentry->typbyval, typlen);
+ locfcinfo.argnull[0] = false;
+ locfcinfo.argnull[1] = false;
+ locfcinfo.isnull = false;
+
+ cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo));
+
+ if (cmpresult < 0)
+ right = mid;
+ else
+ left = mid + 1;
+ }
+
+ return left;
+}
+
+/*
+ * Optimized width_bucket for float8 data types.
+ *
+ * See width_bucket_fixed.
+ */
+int
+width_bucket_fixed_float8(Datum operand, ArrayType *thresholds)
+{
+ float8 op = DatumGetFloat8(operand);
+ float8 *thresholds_data;
+ int left;
+ int right;
+
+ thresholds_data = (float8 *) ARR_DATA_PTR(thresholds);
+
+ /* Find the bucket */
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+ while (left < right) {
+ int mid = (left + right) / 2;
+
+ if ((isnan(thresholds_data[mid]) && !isnan(op)) || op < thresholds_data[mid])
+ right = mid;
+ else
+ left = mid + 1;
+ }
+
+ return left;
+}
+
+/*
+ * width_bucket for variable-width data types.
+ */
+static int
+width_bucket_variable(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry)
+{
+ char *thresholds_data;
+ int typlen;
+ bool typbyval;
+ char typalign;
+ FunctionCallInfoData locfcinfo;
+ int left;
+ int right;
+
+ thresholds_data = ARR_DATA_PTR(thresholds);
+ typlen = typentry->typlen;
+ typbyval = typentry->typbyval;
+ typalign = typentry->typalign;
+
+ InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Find the bucket */
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+ while (left < right)
+ {
+ int i;
+ int mid;
+ char *ptr = thresholds_data;
+ int32 cmpresult;
+
+ mid = (left + right) / 2;
+
+ for (i = left; i < mid; i++)
+ {
+ ptr = att_addlength_pointer(ptr, typlen, ptr);
+ ptr = (char *) att_align_nominal(ptr, typalign);
+ }
+
+ locfcinfo.arg[0] = operand;
+ locfcinfo.arg[1] = fetch_att(ptr, typbyval, typlen);
+ locfcinfo.argnull[0] = false;
+ locfcinfo.argnull[1] = false;
+ locfcinfo.isnull = false;
+
+ cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo));
+
+ if (cmpresult < 0)
+ right = mid;
+ else
+ {
+ /*
+ * Move the thresholds pointer so we don't have to seek from
+ * the beginning of array again.
+ * */
+ thresholds_data = att_addlength_pointer(ptr, typlen, ptr);
+ thresholds_data = (char *) att_align_nominal(thresholds_data, typalign);
+ left = mid + 1;
+ }
+ }
+
+ return left;
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 5176ed0..9b55acc 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -885,6 +885,8 @@ DATA(insert OID = 2334 ( array_agg_finalfn PGNSP PGUID 12 1 0 0 0 f f f f f f
DESCR("aggregate final function");
DATA(insert OID = 2335 ( array_agg PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 2277 "2283" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
DESCR("concatenate aggregate input into an array");
+DATA(insert OID = 3218 ( width_bucket PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "2283 2277" _null_ _null_ _null_ _null_ width_bucket_generic _null_ _null_ _null_ ));
+DESCR("bucket number of operand given a sorted array of bucket upper bounds");
DATA(insert OID = 3816 ( array_typanalyze PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 16 "2281" _null_ _null_ _null_ _null_ array_typanalyze _null_ _null_ _null_ ));
DESCR("array typanalyze");
DATA(insert OID = 3817 ( arraycontsel PGNSP PGUID 12 1 0 0 0 f f f f t f s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ arraycontsel _null_ _null_ _null_ ));
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 9bbfaae..2abaccd 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -214,6 +214,7 @@ extern Datum array_fill_with_lower_bounds(PG_FUNCTION_ARGS);
extern Datum array_unnest(PG_FUNCTION_ARGS);
extern Datum array_remove(PG_FUNCTION_ARGS);
extern Datum array_replace(PG_FUNCTION_ARGS);
+extern Datum width_bucket_generic(PG_FUNCTION_ARGS);
extern Datum array_ref(ArrayType *array, int nSubscripts, int *indx,
int arraytyplen, int elmlen, bool elmbyval, char elmalign,
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 4286691..04f2400 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1706,3 +1706,102 @@ select length(md5((f1[1]).c2)) from dest;
drop table dest;
drop type textandtext;
+-- Tests for generic form of width_bucket()
+SELECT
+ op,
+ width_bucket(op, ARRAY[1, 3, 5, 10.0]) AS wb_1,
+ width_bucket(op, ARRAY[0, 5.5, 9.99]) AS wb_2,
+ width_bucket(op, ARRAY[-6, -5, 2.0]) AS wb_3
+FROM (VALUES
+ (-5.2),
+ (-0.0000000001),
+ (0.000000000001),
+ (1),
+ (1.99999999999999),
+ (2),
+ (2.00000000000001),
+ (3),
+ (4),
+ (4.5),
+ (5),
+ (5.5),
+ (6),
+ (7),
+ (8),
+ (9),
+ (9.99999999999999),
+ (10),
+ (10.0000000000001)
+) v(op);
+ op | wb_1 | wb_2 | wb_3
+------------------+------+------+------
+ -5.2 | 0 | 0 | 1
+ -0.0000000001 | 0 | 0 | 2
+ 0.000000000001 | 0 | 1 | 2
+ 1 | 1 | 1 | 2
+ 1.99999999999999 | 1 | 1 | 2
+ 2 | 1 | 1 | 3
+ 2.00000000000001 | 1 | 1 | 3
+ 3 | 2 | 1 | 3
+ 4 | 2 | 1 | 3
+ 4.5 | 2 | 1 | 3
+ 5 | 3 | 1 | 3
+ 5.5 | 3 | 2 | 3
+ 6 | 3 | 2 | 3
+ 7 | 3 | 2 | 3
+ 8 | 3 | 2 | 3
+ 9 | 3 | 2 | 3
+ 9.99999999999999 | 3 | 3 | 3
+ 10 | 4 | 3 | 3
+ 10.0000000000001 | 4 | 3 | 3
+(19 rows)
+
+SELECT
+ op,
+ width_bucket(op, ARRAY[1, 3, 5, 10]) AS wb_1
+FROM generate_series(0,11) as op;
+ op | wb_1
+----+------
+ 0 | 0
+ 1 | 1
+ 2 | 1
+ 3 | 2
+ 4 | 2
+ 5 | 3
+ 6 | 3
+ 7 | 3
+ 8 | 3
+ 9 | 3
+ 10 | 4
+ 11 | 4
+(12 rows)
+
+SELECT width_bucket(now(),
+ array['yesterday', 'today', 'tomorrow']::timestamptz[]);
+ width_bucket
+--------------
+ 2
+(1 row)
+
+SELECT width_bucket(5, ARRAY[3]);
+ width_bucket
+--------------
+ 1
+(1 row)
+
+SELECT width_bucket(5, '{}');
+ width_bucket
+--------------
+ 0
+(1 row)
+
+-- error cases
+SELECT width_bucket('5'::text, ARRAY[3, 4]::integer[]);
+ERROR: function width_bucket(text, integer[]) does not exist
+LINE 1: SELECT width_bucket('5'::text, ARRAY[3, 4]::integer[]);
+ ^
+HINT: No function matches the given name and argument types. You might need to add explicit type casts.
+SELECT width_bucket(5, ARRAY[3, 4, NULL]);
+ERROR: thresholds array must not contain NULLs
+SELECT width_bucket(5, ARRAY[ARRAY[1, 2], ARRAY[3, 4]]);
+ERROR: thresholds must be one-dimensional array
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index d9f7cbf..eef3eb3 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -476,3 +476,48 @@ drop table src;
select length(md5((f1[1]).c2)) from dest;
drop table dest;
drop type textandtext;
+
+-- Tests for generic form of width_bucket()
+
+SELECT
+ op,
+ width_bucket(op, ARRAY[1, 3, 5, 10.0]) AS wb_1,
+ width_bucket(op, ARRAY[0, 5.5, 9.99]) AS wb_2,
+ width_bucket(op, ARRAY[-6, -5, 2.0]) AS wb_3
+FROM (VALUES
+ (-5.2),
+ (-0.0000000001),
+ (0.000000000001),
+ (1),
+ (1.99999999999999),
+ (2),
+ (2.00000000000001),
+ (3),
+ (4),
+ (4.5),
+ (5),
+ (5.5),
+ (6),
+ (7),
+ (8),
+ (9),
+ (9.99999999999999),
+ (10),
+ (10.0000000000001)
+) v(op);
+
+SELECT
+ op,
+ width_bucket(op, ARRAY[1, 3, 5, 10]) AS wb_1
+FROM generate_series(0,11) as op;
+
+SELECT width_bucket(now(),
+ array['yesterday', 'today', 'tomorrow']::timestamptz[]);
+
+SELECT width_bucket(5, ARRAY[3]);
+SELECT width_bucket(5, '{}');
+
+-- error cases
+SELECT width_bucket('5'::text, ARRAY[3, 4]::integer[]);
+SELECT width_bucket(5, ARRAY[3, 4, NULL]);
+SELECT width_bucket(5, ARRAY[ARRAY[1, 2], ARRAY[3, 4]]);
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers