Here is a new function which could produce an array of numbers with a controllable array length and duplicated elements in these arrays. I used it when working with gin index, and I think it would be helpful for others as well, so here it is.
select * from normal_rand_array(5, 10, 1.8::numeric, 3.5::numeric); normal_rand_array ----------------------------------------------- {3.3,2.3,2.7,3.2,2.0,2.7,3.4,2.7,2.3,2.9} {3.3,1.8,2.9,3.4,2.0,1.8,2.0,3.5,2.8,2.5} {2.1,1.9,2.3,1.9,2.5,2.7,2.4,2.9,1.8} {2.3,2.5,2.4,2.7,2.7,2.3,2.9,3.3,3.3,1.9,3.5} {2.8,3.4,2.7,1.8,3.3,2.3,2.2,3.5,2.6,2.5} (5 rows) select * from normal_rand_array(5, 10, 1.8::int4, 3.5::int4); normal_rand_array ------------------------------------- {3,2,2,3,4,2} {2,4,2,3,3,3,3,2,2,3,3,2,3,2} {2,4,3} {4,2,3,4,2,4,2,2,3,4,3,3,2,4,4,2,3} {4,3,3,4,3,3,4,2,4} (5 rows) the 5 means it needs to produce 5 rows in total and the 10 is the average array length, and 1.8 is the minvalue for the random function and 3.5 is the maxvalue. -- Best Regards Andy Fan
>From 397dcaf67f29057b80aebbb6116b49ac8344547c Mon Sep 17 00:00:00 2001 From: Andy Fan <zhihuifan1...@163.com> Date: Sat, 8 Jun 2024 13:21:08 +0800 Subject: [PATCH v20240608 1/1] Add function normal_rand_array function to contrib/tablefunc. It can produce an array of numbers with n controllable array length and duplicated elements in these arrays. --- contrib/tablefunc/Makefile | 2 +- contrib/tablefunc/expected/tablefunc.out | 26 ++++ contrib/tablefunc/sql/tablefunc.sql | 10 ++ contrib/tablefunc/tablefunc--1.0--1.1.sql | 7 ++ contrib/tablefunc/tablefunc.c | 140 ++++++++++++++++++++++ contrib/tablefunc/tablefunc.control | 2 +- doc/src/sgml/tablefunc.sgml | 10 ++ src/backend/utils/adt/arrayfuncs.c | 7 ++ 8 files changed, 202 insertions(+), 2 deletions(-) create mode 100644 contrib/tablefunc/tablefunc--1.0--1.1.sql diff --git a/contrib/tablefunc/Makefile b/contrib/tablefunc/Makefile index 191a3a1d38..f0c67308fd 100644 --- a/contrib/tablefunc/Makefile +++ b/contrib/tablefunc/Makefile @@ -3,7 +3,7 @@ MODULES = tablefunc EXTENSION = tablefunc -DATA = tablefunc--1.0.sql +DATA = tablefunc--1.0.sql tablefunc--1.0--1.1.sql PGFILEDESC = "tablefunc - various functions that return tables" REGRESS = tablefunc diff --git a/contrib/tablefunc/expected/tablefunc.out b/contrib/tablefunc/expected/tablefunc.out index ddece79029..9f0cbbfbbe 100644 --- a/contrib/tablefunc/expected/tablefunc.out +++ b/contrib/tablefunc/expected/tablefunc.out @@ -12,6 +12,32 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2); -- negative number of tuples SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2); ERROR: number of rows cannot be negative +SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::numeric, 8::numeric) as i; + count | avg +-------+-------------------- + 10 | 3.0000000000000000 +(1 row) + +SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int4, 8::int4) as i; + count | avg +-------+-------------------- + 10 | 3.0000000000000000 +(1 row) + +SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int8, 8::int8) as i; + count | avg +-------+-------------------- + 10 | 3.0000000000000000 +(1 row) + +SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::float8, 8::float8) as i; + count | avg +-------+-------------------- + 10 | 3.0000000000000000 +(1 row) + +SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 'abc'::text, 'def'::text) as i; +ERROR: unsupported type 25 in normal_rand_array. -- -- crosstab() -- diff --git a/contrib/tablefunc/sql/tablefunc.sql b/contrib/tablefunc/sql/tablefunc.sql index 0fb8e40de2..dec57cfc66 100644 --- a/contrib/tablefunc/sql/tablefunc.sql +++ b/contrib/tablefunc/sql/tablefunc.sql @@ -8,6 +8,16 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2); -- negative number of tuples SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2); +SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::numeric, 8::numeric) as i; + +SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int4, 8::int4) as i; + +SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int8, 8::int8) as i; + +SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::float8, 8::float8) as i; + +SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 'abc'::text, 'def'::text) as i; + -- -- crosstab() -- diff --git a/contrib/tablefunc/tablefunc--1.0--1.1.sql b/contrib/tablefunc/tablefunc--1.0--1.1.sql new file mode 100644 index 0000000000..9d13e80ff0 --- /dev/null +++ b/contrib/tablefunc/tablefunc--1.0--1.1.sql @@ -0,0 +1,7 @@ +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION tablefunc UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION normal_rand_array(int4, int4, anyelement, anyelement) +RETURNS setof anyarray +AS 'MODULE_PATHNAME','normal_rand_array' +LANGUAGE C VOLATILE STRICT; diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c index 7d1b5f5143..6d26aa843b 100644 --- a/contrib/tablefunc/tablefunc.c +++ b/contrib/tablefunc/tablefunc.c @@ -42,7 +42,9 @@ #include "lib/stringinfo.h" #include "miscadmin.h" #include "tablefunc.h" +#include "utils/array.h" #include "utils/builtins.h" +#include "utils/fmgroids.h" PG_MODULE_MAGIC; @@ -91,6 +93,13 @@ typedef struct bool use_carry; /* use second generated value */ } normal_rand_fctx; +typedef struct +{ + int carry_len; + FunctionCallInfo fcinfo; + FunctionCallInfo random_len_fcinfo; +} normal_rand_array_fctx; + #define xpfree(var_) \ do { \ if (var_ != NULL) \ @@ -269,6 +278,137 @@ normal_rand(PG_FUNCTION_ARGS) SRF_RETURN_DONE(funcctx); } +/* + * normal_rand_array - return requested number of random arrays + * with a Gaussian (Normal) distribution. + * + * inputs are int numvals, int mean_len, anyelement minvalue, + * anyelement maxvalue returns setof anyelement[] + */ +PG_FUNCTION_INFO_V1(normal_rand_array); +Datum +normal_rand_array(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + uint64 call_cntr; + uint64 max_calls; + normal_rand_array_fctx *fctx; + MemoryContext oldcontext; + Datum minvalue, maxvalue; + int array_mean_len; + Oid target_oid, random_fn_oid; + + array_mean_len = PG_GETARG_INT32(1); + minvalue = PG_GETARG_DATUM(2); + maxvalue = PG_GETARG_DATUM(3); + + target_oid = get_fn_expr_argtype(fcinfo->flinfo, 2); + + if (target_oid == INT4OID) + random_fn_oid = F_RANDOM_INT4_INT4; + else if (target_oid == INT8OID) + random_fn_oid = F_RANDOM_INT8_INT8; + else if (target_oid == FLOAT8OID) + random_fn_oid = F_RANDOM_; + else if (target_oid == NUMERICOID) + random_fn_oid = F_RANDOM_NUMERIC_NUMERIC; + else + elog(ERROR, "unsupported type %d in normal_rand_array.", + target_oid); + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + int32 num_tuples; + FmgrInfo *random_len_flinfo, *random_val_flinfo; + FunctionCallInfo random_len_fcinfo, random_val_fcinfo; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* total number of tuples to be returned */ + num_tuples = PG_GETARG_INT32(0); + if (num_tuples < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("number of rows cannot be negative"))); + funcctx->max_calls = num_tuples; + + /* allocate memory for user context */ + fctx = (normal_rand_array_fctx *) palloc(sizeof(normal_rand_array_fctx)); + + random_len_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2)); + random_len_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo)); + fmgr_info(F_RANDOM_INT4_INT4, random_len_flinfo); + InitFunctionCallInfoData(*random_len_fcinfo, random_len_flinfo, 2, InvalidOid, NULL, NULL); + + random_len_fcinfo->args[0].isnull = false; + random_len_fcinfo->args[1].isnull = false; + random_len_fcinfo->args[0].value = 0; + random_len_fcinfo->args[1].value = array_mean_len; + + random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2)); + random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo)); + fmgr_info(random_fn_oid, random_val_flinfo); + InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2, InvalidOid, NULL, NULL); + + random_val_fcinfo->args[0].isnull = false; + random_val_fcinfo->args[1].isnull = false; + random_val_fcinfo->args[0].value = minvalue; + random_val_fcinfo->args[1].value = maxvalue; + + fctx->carry_len = -1; + fctx->fcinfo = random_val_fcinfo; + fctx->random_len_fcinfo = random_len_fcinfo; + + funcctx->user_fctx = fctx; + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + call_cntr = funcctx->call_cntr; + max_calls = funcctx->max_calls; + fctx = funcctx->user_fctx; + + if (call_cntr < max_calls) /* do when there is more left to send */ + { + int array_len; + int i; + Datum *results; + + if (fctx->carry_len != -1) + { + array_len = fctx->carry_len; + fctx->carry_len = -1; + } + else + { + array_len = Int32GetDatum(FunctionCallInvoke(fctx->random_len_fcinfo)); + fctx->carry_len = 2 * array_mean_len - array_len; + } + + results = palloc(array_len * sizeof(Datum)); + + for(i = 0; i < array_len; i++) + results[i] = FunctionCallInvoke(fctx->fcinfo); + + + SRF_RETURN_NEXT(funcctx, PointerGetDatum( + construct_array_builtin(results, array_len, target_oid))); + } + else + /* do when there is no more left */ + SRF_RETURN_DONE(funcctx); +} + /* * get_normal_pair() * Assigns normally distributed (Gaussian) values to a pair of provided diff --git a/contrib/tablefunc/tablefunc.control b/contrib/tablefunc/tablefunc.control index 7b25d16170..9cc6222a4f 100644 --- a/contrib/tablefunc/tablefunc.control +++ b/contrib/tablefunc/tablefunc.control @@ -1,6 +1,6 @@ # tablefunc extension comment = 'functions that manipulate whole tables, including crosstab' -default_version = '1.0' +default_version = '1.1' module_pathname = '$libdir/tablefunc' relocatable = true trusted = true diff --git a/doc/src/sgml/tablefunc.sgml b/doc/src/sgml/tablefunc.sgml index e10fe7009d..014c36b81c 100644 --- a/doc/src/sgml/tablefunc.sgml +++ b/doc/src/sgml/tablefunc.sgml @@ -53,6 +53,16 @@ </para></entry> </row> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <function>normal_rand_array</function> ( <parameter>numvals</parameter> <type>integer</type>, <parameter>meanarraylen</parameter> <type>int4</type>, <parameter>minval</parameter> <type>anyelement</type>, <parameter>maxval</parameter> <type>anyelement</type> ) + <returnvalue>setof anyarray</returnvalue> + </para> + <para> + Produces a set of normally distributed random array of numbers. + </para></entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <function>crosstab</function> ( <parameter>sql</parameter> <type>text</type> ) diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index d6641b570d..7c95cc05bc 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3397,6 +3397,12 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype) elmalign = TYPALIGN_INT; break; + case FLOAT8OID: + elmlen = sizeof(float8); + elmbyval = FLOAT8PASSBYVAL; + elmalign = TYPALIGN_DOUBLE; + break; + case INT2OID: elmlen = sizeof(int16); elmbyval = true; @@ -3429,6 +3435,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype) break; case TEXTOID: + case NUMERICOID: elmlen = -1; elmbyval = false; elmalign = TYPALIGN_INT; -- 2.45.2