Am 24.07.22 um 21:42 schrieb Fabien COELHO:

Duno. I'm still wondering what it should do. I'm pretty sure that the documentation should be clear about a shared seed, if any. I do not think that departing from the standard is a good thing, either.

Are sure it violates the standard? I could not find anything about it. The private prng state for random() was introduced in 2018 [0]. Neither commit nor discussion mentions any standard compliance.

[0] https://www.postgresql.org/message-id/E1gdNAo-00036g-TB%40gemulon.postgresql.org

I updated the documentation for setseed().

If someone wants a limit, they can easily "LEAST(#1 dim size, other limit)" to get it, it is easy enough with a strict function.

Convinced. It errors out now if n is out of bounds.

Martin
From afb7c022abd26b82a4fd3611313a83f144909554 Mon Sep 17 00:00:00 2001
From: Martin Kalcher <martin.kalc...@aboutsource.net>
Date: Sun, 17 Jul 2022 18:06:04 +0200
Subject: [PATCH v2] Introduce array_shuffle() and array_sample()

* array_shuffle() shuffles the elements of an array.
* array_sample() chooses n elements from an array by random.

The new functions share its prng state with random() and thus interoperate
with setseed().
---
 doc/src/sgml/func.sgml                  |  40 +++++-
 src/backend/utils/adt/Makefile          |   1 +
 src/backend/utils/adt/array_userfuncs.c | 180 ++++++++++++++++++++++++
 src/backend/utils/adt/float.c           |  37 +----
 src/backend/utils/adt/user_prng.c       |  87 ++++++++++++
 src/include/catalog/pg_proc.dat         |   6 +
 src/include/utils/user_prng.h           |  19 +++
 src/test/regress/expected/arrays.out    |  66 +++++++++
 src/test/regress/sql/arrays.sql         |  16 +++
 9 files changed, 417 insertions(+), 35 deletions(-)
 create mode 100644 src/backend/utils/adt/user_prng.c
 create mode 100644 src/include/utils/user_prng.h

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index b6783b7ad0..8c51a17eda 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1820,7 +1820,8 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
         <returnvalue>void</returnvalue>
        </para>
        <para>
-        Sets the seed for subsequent <literal>random()</literal> calls;
+        Sets the seed for subsequent calls to random functions, like
+        <literal>random()</literal> or <literal>array_shuffle()</literal>;
         argument must be between -1.0 and 1.0, inclusive
        </para>
        <para>
@@ -1838,7 +1839,8 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
    applications; see the <xref linkend="pgcrypto"/> module for a more
    secure alternative.
    If <function>setseed()</function> is called, the series of results of
-   subsequent <function>random()</function> calls in the current session
+   subsequent calls to random functions, like <function>random()</function> or
+   <function>array_shuffle()</function>, in the current session
    can be repeated by re-issuing <function>setseed()</function> with the same
    argument.
   </para>
@@ -19395,6 +19397,40 @@ SELECT NULLIF(value, '(none)') ...
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>array_sample</primary>
+        </indexterm>
+        <function>array_sample</function> ( <parameter>array</parameter> <type>anyarray</type>, <parameter>n</parameter> <type>integer</type> )
+        <returnvalue>anyarray</returnvalue>
+       </para>
+       <para>
+        Returns <parameter>n</parameter> randomly chosen elements from <parameter>array</parameter> in selection order.
+       </para>
+       <para>
+        <literal>array_sample(ARRAY[[1,2],[3,4],[5,6]], 2)</literal>
+        <returnvalue>{{5,6},{1,2}}</returnvalue>
+       </para></entry>
+      </row>
+
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>array_shuffle</primary>
+        </indexterm>
+        <function>array_shuffle</function> ( <type>anyarray</type> )
+        <returnvalue>anyarray</returnvalue>
+       </para>
+       <para>
+        Shuffles the first dimension of the array.
+       </para>
+       <para>
+        <literal>array_shuffle(ARRAY[[1,2],[3,4],[5,6]])</literal>
+        <returnvalue>{{5,6},{1,2},{3,4}}</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 7c722ea2ce..42b65e58bb 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -109,6 +109,7 @@ OBJS = \
 	tsvector.o \
 	tsvector_op.o \
 	tsvector_parser.o \
+	user_prng.o \
 	uuid.o \
 	varbit.o \
 	varchar.o \
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index ca70590d7d..d62f1aa4ef 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -17,6 +17,7 @@
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
+#include "utils/user_prng.h"
 #include "utils/typcache.h"
 
 
@@ -902,3 +903,182 @@ array_positions(PG_FUNCTION_ARGS)
 
 	PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
 }
+
+/*
+ * Produce array with n randomly chosen items from given array in random order.
+ *
+ * array: array object to examine (must not be NULL)
+ * n: number of items (must not be greater than the size of the arrays first dimension)
+ * elmtyp, elmlen, elmbyval, elmalign: info for the datatype of the items
+ *
+ * NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info
+ * from the system catalogs, given the elmtyp. However, the caller is
+ * in a better position to cache this info across multiple uses, or even
+ * to hard-wire values if the element type is hard-wired.
+ */
+static ArrayType *
+array_shuffle_n(ArrayType *array, int n,
+				Oid elmtyp, int elmlen, bool elmbyval, char elmalign)
+{
+	ArrayType  *result;
+	int			ndim,
+			   *dims,
+			   *lbs,
+				rdims[MAXDIM],
+				nelm,
+				nitem,
+				i,
+				j,
+				k;
+	Datum		elm,
+			   *elms,
+			   *ielms,
+			   *jelms;
+	bool		nul,
+			   *nuls,
+			   *inuls,
+			   *jnuls;
+
+	ndim = ARR_NDIM(array);
+	dims = ARR_DIMS(array);
+	lbs = ARR_LBOUND(array);
+
+	if (ndim < 1 || dims[0] < 1 || n < 1)
+		return construct_empty_array(elmtyp);
+
+	deconstruct_array(array, elmtyp, elmlen, elmbyval, elmalign,
+					  &elms, &nuls, &nelm);
+
+	nitem = dims[0];			/* total number of items */
+	nelm /= nitem;				/* number of elements per item */
+
+	ielms = elms;
+	inuls = nuls;
+
+	/*
+	 * Shuffle array using Fisher-Yates algorithm. Iterate array and swap head
+	 * (ielms) with an randomly chosen item (jelms) at each iteration.
+	 */
+	for (i = 0; i < n; i++)
+	{
+		j = (int) user_prng_uint64_range(0, nitem - i - 1) * nelm;
+		jelms = ielms + j;
+		jnuls = inuls + j;
+
+		/* Swap all elements in item (i) with elements in item (j). */
+		for (k = 0; k < nelm; k++)
+		{
+			elm = *ielms;
+			nul = *inuls;
+			*ielms = *jelms;
+			*inuls = *jnuls;
+			*jelms = elm;
+			*jnuls = nul;
+			ielms++;
+			inuls++;
+			jelms++;
+			jnuls++;
+		}
+	}
+
+	memcpy(rdims, dims, ndim * sizeof(int));
+	rdims[0] = n;
+
+	result = construct_md_array(elms, nuls, ndim, rdims, lbs,
+								elmtyp, elmlen, elmbyval, elmalign);
+
+	pfree(elms);
+	pfree(nuls);
+
+	return result;
+}
+
+/*
+ * Shuffle the elements of an array.
+ */
+Datum
+array_shuffle(PG_FUNCTION_ARGS)
+{
+	ArrayType  *array,
+			   *result;
+	int16		elmlen;
+	bool		elmbyval;
+	char		elmalign;
+	Oid			elmtyp;
+	TypeCacheEntry *typentry;
+	int			n;
+
+	array = PG_GETARG_ARRAYTYPE_P(0);
+
+	/* Return empty array immediately. */
+	if (ARR_NDIM(array) < 1)
+		PG_RETURN_ARRAYTYPE_P(array);
+
+	n = ARR_DIMS(array)[0];
+
+	/* There is no point in shuffling arrays with less then two items. */
+	if (n < 2)
+		PG_RETURN_ARRAYTYPE_P(array);
+
+	elmtyp = ARR_ELEMTYPE(array);
+	typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+	if (typentry == NULL || typentry->type_id != elmtyp)
+	{
+		typentry = lookup_type_cache(elmtyp, 0);
+		fcinfo->flinfo->fn_extra = (void *) typentry;
+	}
+	elmlen = typentry->typlen;
+	elmbyval = typentry->typbyval;
+	elmalign = typentry->typalign;
+
+	result = array_shuffle_n(array, n,
+							 elmtyp, elmlen, elmbyval, elmalign);
+
+	PG_FREE_IF_COPY(array, 0);
+
+	PG_RETURN_ARRAYTYPE_P(result);
+}
+
+/*
+ * Choose N random elements from an array.
+ */
+Datum
+array_sample(PG_FUNCTION_ARGS)
+{
+	ArrayType  *array,
+			   *result;
+	int16		elmlen;
+	bool		elmbyval;
+	char		elmalign;
+	Oid			elmtyp;
+	TypeCacheEntry *typentry;
+	int			n,
+				nitem;
+
+	array = PG_GETARG_ARRAYTYPE_P(0);
+	n = PG_GETARG_INT32(1);
+	nitem = (ARR_NDIM(array) < 1) ? 0 : ARR_DIMS(array)[0];
+
+	if (n < 0 || n > nitem)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("sample size must be between 0 and %d", nitem)));
+
+	elmtyp = ARR_ELEMTYPE(array);
+	typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+	if (typentry == NULL || typentry->type_id != elmtyp)
+	{
+		typentry = lookup_type_cache(elmtyp, 0);
+		fcinfo->flinfo->fn_extra = (void *) typentry;
+	}
+	elmlen = typentry->typlen;
+	elmbyval = typentry->typbyval;
+	elmalign = typentry->typalign;
+
+	result = array_shuffle_n(array, n,
+							 elmtyp, elmlen, elmbyval, elmalign);
+
+	PG_FREE_IF_COPY(array, 0);
+
+	PG_RETURN_ARRAYTYPE_P(result);
+}
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index fc8f39a7a9..29dabbbc6a 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -21,15 +21,13 @@
 
 #include "catalog/pg_type.h"
 #include "common/int.h"
-#include "common/pg_prng.h"
 #include "common/shortest_dec.h"
 #include "libpq/pqformat.h"
-#include "miscadmin.h"
 #include "utils/array.h"
 #include "utils/float.h"
 #include "utils/fmgrprotos.h"
+#include "utils/user_prng.h"
 #include "utils/sortsupport.h"
-#include "utils/timestamp.h"
 
 
 /*
@@ -64,10 +62,6 @@ float8		degree_c_sixty = 60.0;
 float8		degree_c_one_half = 0.5;
 float8		degree_c_one = 1.0;
 
-/* State for drandom() and setseed() */
-static bool drandom_seed_set = false;
-static pg_prng_state drandom_seed;
-
 /* Local function prototypes */
 static double sind_q1(double x);
 static double cosd_q1(double x);
@@ -2749,30 +2743,8 @@ datanh(PG_FUNCTION_ARGS)
 Datum
 drandom(PG_FUNCTION_ARGS)
 {
-	float8		result;
-
-	/* Initialize random seed, if not done yet in this process */
-	if (unlikely(!drandom_seed_set))
-	{
-		/*
-		 * If possible, initialize the seed using high-quality random bits.
-		 * Should that fail for some reason, we fall back on a lower-quality
-		 * seed based on current time and PID.
-		 */
-		if (unlikely(!pg_prng_strong_seed(&drandom_seed)))
-		{
-			TimestampTz now = GetCurrentTimestamp();
-			uint64		iseed;
-
-			/* Mix the PID with the most predictable bits of the timestamp */
-			iseed = (uint64) now ^ ((uint64) MyProcPid << 32);
-			pg_prng_seed(&drandom_seed, iseed);
-		}
-		drandom_seed_set = true;
-	}
-
-	/* pg_prng_double produces desired result range [0.0 - 1.0) */
-	result = pg_prng_double(&drandom_seed);
+	/* user_prng_double produces desired result range [0.0 - 1.0) */
+	float8		result = user_prng_double();
 
 	PG_RETURN_FLOAT8(result);
 }
@@ -2792,8 +2764,7 @@ setseed(PG_FUNCTION_ARGS)
 				 errmsg("setseed parameter %g is out of allowed range [-1,1]",
 						seed)));
 
-	pg_prng_fseed(&drandom_seed, seed);
-	drandom_seed_set = true;
+	user_prng_seed(seed);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/adt/user_prng.c b/src/backend/utils/adt/user_prng.c
new file mode 100644
index 0000000000..a541e955ef
--- /dev/null
+++ b/src/backend/utils/adt/user_prng.c
@@ -0,0 +1,87 @@
+/*-------------------------------------------------------------------------
+ *
+ * user_prng.c
+ *	  Random number generator for user facing functions.
+ *
+ * Portions Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/adt/user_prng.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "common/pg_prng.h"
+#include "miscadmin.h"
+#include "utils/user_prng.h"
+#include "utils/timestamp.h"
+
+/*
+ * Process wide state vector used by all user facing random functions,
+ * like random() or array_shuffle().
+ */
+static pg_prng_state user_prng_state;
+static bool user_prng_state_set = false;
+
+/*
+ * Ensure the prng is seeded.
+ */
+static inline void
+user_prng_ensure_seed(void)
+{
+	/* Initialize prng state, if not done yet in this process. */
+	if (unlikely(!user_prng_state_set))
+	{
+		/*
+		 * If possible, initialize the state using high-quality random bits.
+		 * Should that fail for some reason, we fall back on a lower-quality
+		 * seed based on current time and PID.
+		 */
+		if (unlikely(!pg_prng_strong_seed(&user_prng_state)))
+		{
+			uint64		now = (uint64) GetCurrentTimestamp();
+			uint64		pid = (uint64) MyProcPid;
+
+			/* Mix PID with the most predictable bits of the timestamp. */
+			pg_prng_seed(&user_prng_state, now ^ (pid << 32));
+		}
+
+		user_prng_state_set = true;
+	}
+}
+
+/*
+ * Seed the prng.
+ */
+void
+user_prng_seed(float8 seed)
+{
+	pg_prng_fseed(&user_prng_state, seed);
+
+	user_prng_state_set = true;
+}
+
+/*
+ * Select a random double from the range [0.0, 1.0).
+ */
+float8
+user_prng_double(void)
+{
+	user_prng_ensure_seed();
+
+	return pg_prng_double(&user_prng_state);
+}
+
+/*
+ * Select a random uint64 from the range [min, max].
+ */
+uint64
+user_prng_uint64_range(uint64 min, uint64 max)
+{
+	user_prng_ensure_seed();
+
+	return pg_prng_uint64_range(&user_prng_state, min, max);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 2e41f4d9e8..9933d10f6d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1681,6 +1681,12 @@
   proname => 'arraycontjoinsel', provolatile => 's', prorettype => 'float8',
   proargtypes => 'internal oid internal int2 internal',
   prosrc => 'arraycontjoinsel' },
+{ oid => '8464', descr => 'shuffle array',
+  proname => 'array_shuffle', provolatile => 'v', proparallel => 'r', proisstrict => 't',
+  prorettype => 'anyarray', proargtypes => 'anyarray', prosrc => 'array_shuffle' },
+{ oid => '8465', descr => 'take samples from array',
+  proname => 'array_sample', provolatile => 'v', proparallel => 'r', proisstrict => 't',
+  prorettype => 'anyarray', proargtypes => 'anyarray int4', prosrc => 'array_sample' },
 
 { oid => '764', descr => 'large object import',
   proname => 'lo_import', provolatile => 'v', proparallel => 'u',
diff --git a/src/include/utils/user_prng.h b/src/include/utils/user_prng.h
new file mode 100644
index 0000000000..959c1d727d
--- /dev/null
+++ b/src/include/utils/user_prng.h
@@ -0,0 +1,19 @@
+/*-------------------------------------------------------------------------
+ *
+ * user_prng.h
+ *	  Random number generator for user facing functions.
+ *
+ * Portions Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * src/include/utils/user_prng.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef USER_PRNG_H
+#define USER_PRNG_H
+
+extern void user_prng_seed(float8 seed);
+extern float8 user_prng_double(void);
+extern uint64 user_prng_uint64_range(uint64 min, uint64 max);
+
+#endif							/* USER_PRNG_H */
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index ce6f3a65f9..bd6061fa1a 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2445,3 +2445,69 @@ SELECT trim_array(ARRAY[1, 2, 3], -1); -- fail
 ERROR:  number of elements to trim must be between 0 and 3
 SELECT trim_array(ARRAY[1, 2, 3], 10); -- fail
 ERROR:  number of elements to trim must be between 0 and 3
+-- array_shuffle
+SELECT setseed(0.1); -- ensure predictable results
+ setseed 
+---------
+ 
+(1 row)
+
+SELECT array_shuffle('{NULL,1,2,3,4,5}'::int[]);
+  array_shuffle   
+------------------
+ {5,NULL,1,2,4,3}
+(1 row)
+
+SELECT array_shuffle('{NULL,1,2,3,4,5}'::int[]);
+  array_shuffle   
+------------------
+ {4,1,2,NULL,3,5}
+(1 row)
+
+SELECT array_shuffle('[-1:2][2:3]={{1,2},{3,4},{5,6},{7,8}}'::int[]);
+             array_shuffle             
+---------------------------------------
+ [-1:2][2:3]={{7,8},{3,4},{1,2},{5,6}}
+(1 row)
+
+SELECT array_shuffle('{{{1,2},{3,4}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[]);
+                 array_shuffle                  
+------------------------------------------------
+ {{{5,6},{7,8}},{{1,2},{3,4}},{{9,10},{11,12}}}
+(1 row)
+
+-- array_sample
+SELECT setseed(0.1); -- ensure predictable results
+ setseed 
+---------
+ 
+(1 row)
+
+SELECT array_sample('{NULL,1,2,3,4,5}'::int[], 3);
+ array_sample 
+--------------
+ {5,NULL,1}
+(1 row)
+
+SELECT array_sample('{NULL,1,2,3,4,5}'::int[], 3);
+ array_sample 
+--------------
+ {5,3,4}
+(1 row)
+
+SELECT array_sample('[-1:2][2:3]={{1,2},{3,4},{5,6},{7,8}}'::int[], 3);
+          array_sample           
+---------------------------------
+ [-1:1][2:3]={{1,2},{3,4},{5,6}}
+(1 row)
+
+SELECT array_sample('{{{1,2},{3,4}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2);
+           array_sample           
+----------------------------------
+ {{{1,2},{3,4}},{{9,10},{11,12}}}
+(1 row)
+
+SELECT array_sample('{NULL,1,2,3,4,5}'::int[], -1); -- fail
+ERROR:  sample size must be between 0 and 6
+SELECT array_sample('{NULL,1,2,3,4,5}'::int[], 7); --fail
+ERROR:  sample size must be between 0 and 6
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index f774faf856..09a3ecff61 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -754,3 +754,19 @@ FROM
 
 SELECT trim_array(ARRAY[1, 2, 3], -1); -- fail
 SELECT trim_array(ARRAY[1, 2, 3], 10); -- fail
+
+-- array_shuffle
+SELECT setseed(0.1); -- ensure predictable results
+SELECT array_shuffle('{NULL,1,2,3,4,5}'::int[]);
+SELECT array_shuffle('{NULL,1,2,3,4,5}'::int[]);
+SELECT array_shuffle('[-1:2][2:3]={{1,2},{3,4},{5,6},{7,8}}'::int[]);
+SELECT array_shuffle('{{{1,2},{3,4}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[]);
+
+-- array_sample
+SELECT setseed(0.1); -- ensure predictable results
+SELECT array_sample('{NULL,1,2,3,4,5}'::int[], 3);
+SELECT array_sample('{NULL,1,2,3,4,5}'::int[], 3);
+SELECT array_sample('[-1:2][2:3]={{1,2},{3,4},{5,6},{7,8}}'::int[], 3);
+SELECT array_sample('{{{1,2},{3,4}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2);
+SELECT array_sample('{NULL,1,2,3,4,5}'::int[], -1); -- fail
+SELECT array_sample('{NULL,1,2,3,4,5}'::int[], 7); --fail
-- 
2.37.1

Reply via email to