Hello Hironobu-san,

Here is a v15 which is a rebase, plus a large simplification of the modmul function if an int128 type is available, which is probably always…

Could you have a look and possibly revalidate?

Sorry for the late reply. I reviewed this patch.

Function nbits(), which was previously discussed, has been simplified by using the function pg_popcount64().

By adding the mathematical explanation, it has been easier to understand the operation of this function.

I believe that these improvements will have a positive impact on maintenance.

The patch could be applied successfully and the tests passed without problems.

So, I think the latest patch is fine.


Best regards,



On 3/3/19 12:55 PM, Fabien COELHO wrote:

Indeed, the patch needs a rebase & conflit resolution. I'll do it. Later.

Here is an update:

  - take advantage of pg_bitutils (although I noted that the "slow"
    popcount there could be speeded-up and shorten with a bitwise operator
    implementation that I just removed from pgbench).

  - add comments about the bijective transformations in the code.

As already stated, this function makes sense for people who want to test performance with pgbench using non uniform rands. If you do not want to do that, you will probably find the function pretty useless. I can't help it.

Also, non uniform rands is also a way to test pg lock contention behavior.

You have signed up as a reviewer for this patch.  Do you know when you'll have time to do the review?

Regards,



--
Fabien Coelho - CRI, MINES ParisTech
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index ef22a484e7..24a2c147cc 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -938,7 +938,7 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
 
       <row>
        <entry> <literal>default_seed</literal> </entry>
-       <entry>seed used in hash functions by default</entry>
+       <entry>seed used in hash and pseudo-random permutation functions by default</entry>
       </row>
 
       <row>
@@ -1434,6 +1434,13 @@ SELECT 2 AS two, 3 AS three \gset p_
        <entry><literal>pow(2.0, 10)</literal>, <literal>power(2.0, 10)</literal></entry>
        <entry><literal>1024.0</literal></entry>
       </row>
+      <row>
+       <entry><literal><function>pr_perm(<replaceable>i</replaceable>, <replaceable>size</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
+       <entry>integer</entry>
+       <entry>pseudo-random permutation in [0,size)</entry>
+       <entry><literal>pr_perm(0, 4)</literal></entry>
+       <entry><literal>0</literal>, <literal>1</literal>, <literal>2</literal> or <literal>3</literal></entry>
+      </row>
       <row>
        <entry><literal><function>random(<replaceable>lb</replaceable>, <replaceable>ub</replaceable>)</function></literal></entry>
        <entry>integer</entry>
@@ -1599,6 +1606,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
 </programlisting>
   </para>
 
+  <para>
+    Function <literal>pr_perm</literal> implements a pseudo-random permutation.
+    It allows to mix the output of non uniform random functions so that
+    values drawn more often are not trivially correlated.
+    It permutes integers in [0, size) using a seed by applying rounds of
+    simple invertible functions, similarly to an encryption function,
+    although beware that it is not at all cryptographically secure.
+    Compared to <literal>hash</literal> functions discussed above, the function
+    ensures that a perfect permutation is applied: there are no collisions
+    nor holes in the output values.
+    Values outside the interval are interpreted modulo the size.
+    The function errors if size is not positive.
+    If no seed is provided, <literal>:default_seed</literal> is used.
+    For a given size and seed, the function is fully deterministic: if two
+    permutations on the same size must not be correlated, use distinct seeds
+    as outlined in the previous example about hash functions.
+  </para>
+
   <para>
    As an example, the full definition of the built-in TPC-B-like
    transaction is:
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index 17c9ec32c5..13410787d4 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -19,6 +19,7 @@
 #define PGBENCH_NARGS_VARIABLE	(-1)
 #define PGBENCH_NARGS_CASE		(-2)
 #define PGBENCH_NARGS_HASH		(-3)
+#define PGBENCH_NARGS_PRPERM	(-4)
 
 PgBenchExpr *expr_parse_result;
 
@@ -370,6 +371,9 @@ static const struct
 	{
 		"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
 	},
+	{
+		"pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM
+	},
 	/* keep as last array element */
 	{
 		NULL, 0, 0
@@ -482,6 +486,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
 			}
 			break;
 
+		/* pseudo-random permutation function with optional seed argument */
+		case PGBENCH_NARGS_PRPERM:
+			if (len < 2 || len > 3)
+				expr_yyerror_more(yyscanner, "unexpected number of arguments",
+								  PGBENCH_FUNCTIONS[fnumber].fname);
+
+			if (len == 2)
+			{
+				PgBenchExpr *var = make_variable("default_seed");
+				args = make_elist(var, args);
+			}
+			break;
+
 		/* common case: positive arguments number */
 		default:
 			Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 8b84658ccd..c532978398 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -38,6 +38,7 @@
 #include "getopt_long.h"
 #include "libpq-fe.h"
 #include "portability/instr_time.h"
+#include "port/pg_bitutils.h"
 
 #include <ctype.h>
 #include <float.h>
@@ -1039,6 +1040,307 @@ getHashMurmur2(int64 val, uint64 seed)
 	return (int64) result;
 }
 
+/* pseudo-random permutation */
+
+/* 16 so that % 16 can be optimized to & 0x0f */
+#define PRP_PRIMES 16
+/*
+ * 24 bits mega primes from https://primes.utm.edu/lists/small/millions/
+ * the i-th prime, i \in [0, 15], is the first prime above 2^23 + i * 2^19
+ */
+static uint64 primes[PRP_PRIMES] = {
+	UINT64CONST(8388617),
+	UINT64CONST(8912921),
+	UINT64CONST(9437189),
+	UINT64CONST(9961487),
+	UINT64CONST(10485767),
+	UINT64CONST(11010059),
+	UINT64CONST(11534351),
+	UINT64CONST(12058679),
+	UINT64CONST(12582917),
+	UINT64CONST(13107229),
+	UINT64CONST(13631489),
+	UINT64CONST(14155777),
+	UINT64CONST(14680067),
+	UINT64CONST(15204391),
+	UINT64CONST(15728681),
+	UINT64CONST(16252967)
+};
+
+/* how many "encryption" rounds to apply */
+#define PRP_ROUNDS 4
+
+/* return smallest mask holding n  */
+static uint64
+compute_mask(uint64 n)
+{
+	n |= n >> 1;
+	n |= n >> 2;
+	n |= n >> 4;
+	n |= n >> 8;
+	n |= n >> 16;
+	n |= n >> 32;
+	return n;
+}
+
+#if !defined(PG_INT128_TYPE)
+/* length of n binary representation */
+static int
+nbits(uint64 n)
+{
+	/* set lower bits to 1 and count them */
+	return pg_popcount64(compute_mask(n));
+}
+#endif
+
+/*
+ * Compute (x * y) % m, where x and y in [0, 2^64), m in [1, 2^64).
+ *
+ * Use improved interleaved modular multiplication algorithm to avoid
+ * overflows when necessary.
+ */
+static uint64
+modular_multiply(uint64 x, uint64 y, const uint64 m)
+{
+#if defined(PG_INT128_TYPE)
+	return (PG_INT128_TYPE) x * (PG_INT128_TYPE) y % (PG_INT128_TYPE) m;
+#else
+	int			i,
+				bits;
+	uint64		r;
+
+	Assert(m >= 1);
+
+	/* Because of (x * y) % m = (x % m * y % m) % m */
+	if (x >= m)
+		x %= m;
+	if (y >= m)
+		y %= m;
+
+	/* Return the trivial result. */
+	if (x == 0 || y == 0 || m == 1)
+		return 0;
+
+	/* Return the result if (x * y) can be multiplied without overflow. */
+	if (nbits(x) + nbits(y) <= 64)
+		return (x * y) % m;
+
+	/* To reduce the for loop in the algorithm below, ensure y <= x. */
+	if (x < y)
+	{
+		uint64		tmp = x;
+
+		x = y;
+		y = tmp;
+	}
+
+	/*-----
+	 * Interleaved modular multiplication algorithm from:
+	 *
+	 * D.N. Amanor et al., "Efficient hardware architecture for modular
+	 * multiplication on FPGAs", in International Conference on Field
+	 * Programmable Logic and Applications, Aug 2005, pp. 539-542.
+	 *
+	 * This algorithm is usually used in the field of digital circuit design.
+	 *
+	 * Input: X, Y, M; 0 <= X, Y <= M;
+	 * Output: R = X *  Y mod M;
+	 * bits: number of bits of Y
+	 * Y[i]: i th bit of Y
+	 *
+	 * 1. R = 0;
+	 * 2. for (i = bits - 1; i >= 0; i--) {
+	 * 3.   R = 2 * R;
+	 * 4.   if (Y[i] == 0x1)
+	 * 5.     R += X;
+	 * 6.   if (R >= M) R -= M;
+	 * 7.   if (R >= M) R -= M;
+	 *    }
+	 *
+	 * In Steps 3 and 5, overflow should be avoided. The details are explained
+	 * in each step.
+	 * Steps 6 and 7 can be instead a modular operation (R %= M).
+	 *
+	 * Note that, in this implementation, the distributive property of modular
+	 * operation shown in below is used whenever it can be applied.
+	 *   (X * Y) % M = X % M * Y % M, (X + Y) % M = X % M + Y % M
+	 *
+	 * For ease of understanding, an example is shown.
+	 * Example: (11 * 5) % 7
+	 *
+	 * [Notation] ":=" means substitution.
+	 *
+	 * [initial value]
+	 * R := 0x0000
+	 *
+	 * [i=2]
+	 *  R := (0x0000 * 2) % 0x0111 = 0x0000
+	 *  R := (R + 0x1011) % 0x0111 = 0x0100
+	 *
+	 * [i=1]
+	 *  R := (0x0100 * 2) % 0x0111 = 0x1000 % 0x0111 = 0x0001
+	 *
+	 * [i=0]
+	 *  R := (0x0001 * 2) % 0x0111 = 0x0010
+	 *  R := (R + 0x1011) % 0x0111 = 0x1101 % 0x0111 = 0x0110
+	 *
+	 * [result]
+	 * R = 6
+	 */
+
+	bits = nbits(y);
+	r = 0;
+
+	for (i = bits - 1; i >= 0; i--)
+	{
+		if (r > UINT64CONST(0x7fffffffffffffff))
+			/*-----
+			 * To avoid overflow, transform from (2 * r) to (2 * r) % m, and
+			 * further transform to mathematically equivalent form below:
+			 *
+			 * For ease of understanding, an example using one digit decimal number,
+			 * where r = 6 and m = 7, is shown.
+			 *  (6 * 2) % 7 = (7 - 1) * 2 % 7 = (7 % 7 - 1 % 7) * 2 % 7
+			 *              = (0 - 1) * 2 % 7 = -2 % 7 = 7 - 2 = 5.
+			 * Generally, if (r * 2) overflows,
+			 * (r * 2) % m = m - (m - r) * 2.
+			 */
+			r = m - ((m - r) << 1);
+		else
+			r <<= 1;
+
+		if ((y >> i) & 0x1)
+		{
+			if (r > UINT64CONST(0xffffffffffffffff) - x)
+				/*-----
+				 * To compute (r + x) without overflow: transform to
+				 * (r + x) % m, and then to (r + x - m).
+				 *
+				 * An example using one digit decimal number, where r = 6, x = 5 and
+				 * m = 7, is shown.
+				 *  (6 + 5) % 7 = (6 + (5 - 7 + 7)) % 7 = 6 % 7 + (5 - 7) % 7 + 7 % 7
+				 *              = 6 + (5 - 7) + 0 = 4.
+				 * Generally, if (r + x) overflows,
+				 * (r + x) % m = r + (x - m).
+				 */
+				r += x - m;
+			else
+				r += x;
+		}
+
+		r %= m;
+	}
+
+	return r;
+#endif
+}
+
+/*
+ * Donald Knuth's linear congruential generator
+ *
+ * Relying on multiplication overflows is part of the design
+ * of this simple pseudo random number generator.
+ */
+#define DK_LCG_MUL UINT64CONST(6364136223846793005)
+#define DK_LCG_INC UINT64CONST(1442695040888963407)
+
+/* do not use all small bits */
+#define LCG_SHIFT 13
+
+/*
+ * PRP: parametric pseudo-random permutation
+ *
+ * Result in [0, size) is a permutation for inputs in the same set.
+ *
+ * Note that this function does not pass statistical tests: eg
+ * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable.
+ * Things worsen for large sizes as there are many more permutations
+ * (size!) than seeds to select them (2^64 < 21!).
+ * However it is inexpensive compared to an actual encryption function,
+ * and the quality is good enough to avoid trivial correlations on
+ * large sizes, which is the expected use case.
+ *
+ * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE.
+ * DO NOT USE FOR SUCH PURPOSE.
+ */
+static int64
+pseudorandom_perm(const int64 data, const int64 isize, const int64 seed)
+{
+	/* computations are performed on unsigned values */
+	uint64		size = (uint64) isize;
+	uint64		v = (uint64) data % size;
+	uint64		key = (uint64) seed;
+	/* size-1 ensures 2 possibly overlapping halves */
+	uint64		mask = compute_mask(size - 1) >> 1;
+
+	/* nothing to permute */
+	if (isize == 1)
+		return 0;
+
+	Assert(isize >= 2);
+
+	/*-----
+	 * Apply 4 rounds of bijective transformations using key updated
+	 * at each stage:
+	 *
+	 * (1) scramble: partial xors on overlapping power-of-2 subsets
+	 *     for instance with v in 0 .. 14 (i.e. with size == 15): 
+	 *     if v is in 0 .. 7 do v = (v ^ k) % 8
+	 *     if v is in 7 .. 14 do v = 14 - ((14-v) ^ k) % 8
+	 *     note that because of the overlap (here 7), v may be changed twice.
+	 *     this transformation if bijective because the condition to apply it
+	 *     is still true after applying it, and xor itself is bijective on a
+	 *     power-of-2 size.
+	 *
+	 * (2) scatter: linear modulo
+	 *     v = (v * p + k) % size
+	 *     this transformation is bijective is p & size are prime, which is
+	 *     ensured in the code by the while loop which discards primes when
+	 *     size is a multiple of it.
+	 *
+	 */
+	for (unsigned int i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES)
+	{
+		uint64		t;
+
+		/* first "half" whitening, for v in 0 .. mask */
+		key = key * DK_LCG_MUL + DK_LCG_INC;
+		if (v <= mask)
+			v ^= (key >> LCG_SHIFT) & mask;
+
+		/* second (possibly overlapping) "half" whitening */
+		key = key * DK_LCG_MUL + DK_LCG_INC;
+		t = size - 1 - v;
+		if (t <= mask)
+		{
+			t ^= (key >> LCG_SHIFT) & mask;
+			v = size - 1 - t;
+		}
+
+		/* at most 2 primes are skipped for a given size */
+		while (unlikely(size % primes[p] == 0))
+			p = (p + 1) % PRP_PRIMES;
+
+		/* scatter values with a prime multiplication */
+		key = key * DK_LCG_MUL + DK_LCG_INC;
+
+		/* Performance shortcut for 24 bit primes, ok for size up to ~10E12 */
+		if ((v & UINT64CONST(0xffffffffff)) == v)
+			v = (primes[p] * v + (key >> LCG_SHIFT)) % size;
+		else
+			/*-----
+			 * Note: the add cannot overflow as size is under 63 bits:
+			 * mmv = mm(prime, v, size) < size <= 0x7fffffffffffffff = (1<<63)-1
+			 * ks = key >> LCG_SHIFTS <= 2^51
+			 * => mmv + ks < (1<<64) - 1
+			 */
+			v = (modular_multiply(primes[p], v, size) + (key >> LCG_SHIFT)) % size;
+	}
+
+	/* back to signed */
+	return (int64) v;
+}
+
 /*
  * Initialize the given SimpleStats struct to all zeroes
  */
@@ -2383,6 +2685,27 @@ evalStandardFunc(CState *st,
 				return true;
 			}
 
+		case PGBENCH_PRPERM:
+			{
+				int64	val, size, seed;
+
+				Assert(nargs == 3);
+
+				if (!coerceToInt(&vargs[0], &val) ||
+					!coerceToInt(&vargs[1], &size) ||
+					!coerceToInt(&vargs[2], &seed))
+					return false;
+
+				if (size < 1)
+				{
+					fprintf(stderr, "pr_perm size parameter must be >= 1\n");
+					return false;
+				}
+
+				setIntValue(retval, pseudorandom_perm(val, size, seed));
+				return true;
+			}
+
 		default:
 			/* cannot get here */
 			Assert(0);
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index c4a1e298e0..573a67ec69 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -99,7 +99,8 @@ typedef enum PgBenchFunction
 	PGBENCH_IS,
 	PGBENCH_CASE,
 	PGBENCH_HASH_FNV1A,
-	PGBENCH_HASH_MURMUR2
+	PGBENCH_HASH_MURMUR2,
+	PGBENCH_PRPERM
 } PgBenchFunction;
 
 typedef struct PgBenchExpr PgBenchExpr;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index dc2c72fa92..5547ee4466 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -331,6 +331,17 @@ pgbench(
 		qr{command=98.: int 5432\b},                    # :random_seed
 		qr{command=99.: int -9223372036854775808\b},    # min int
 		qr{command=100.: int 9223372036854775807\b},    # max int
+		qr{command=101.: boolean true\b},
+		qr{command=102.: boolean true\b},
+		qr{command=103.: boolean true\b},
+		qr{command=104.: boolean true\b},
+		qr{command=105.: boolean true\b},
+		qr{command=109.: boolean true\b},
+		qr{command=110.: boolean true\b},
+		qr{command=111.: boolean true\b},
+		qr{command=112.: boolean true\b},
+		qr{command=113.: int 9223372036854775797\b},
+		qr{command=114.: boolean true\b},
 	],
 	'pgbench expressions',
 	{
@@ -458,6 +469,37 @@ SELECT :v0, :v1, :v2, :v3;
 -- minint constant parsing
 \set min debug(-9223372036854775808)
 \set max debug(-(:min + 1))
+-- pseudo-random permutation
+\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1)
+\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3)
+\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6)
+\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + pr_perm(4, 5) = 10)
+\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) + \
+             pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) + \
+             pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 16) + \
+             pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 16) = 120)
+-- random sanity check
+\set size random(2, 1000)
+\set v random(0, :size - 1)
+\set p pr_perm(:v, :size)
+\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p <> pr_perm(:v + 1, :size))
+-- actual values
+\set t debug(pr_perm(:v, 1) = 0)
+\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \
+             pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0)
+-- ~50 bits test to exercise full modular multiplication
+\set t debug(pr_perm(0, 1000000000000000, 54321) = 9406454989259 and \
+             pr_perm(1999999999999999, 1000000000000000, 54321) = 54570773902028 and \
+             pr_perm(2500000000000000, 1000000000000000, 54321) = 771082311307468)
+-- 63 bits tests
+\set size debug(:max - 10)
+\set ok debug(pr_perm(:size-1, :size, 5432) = 7214172101785397543 and \
+              pr_perm(:size-2, :size, 5432) = 4060085360303920649 and \
+              pr_perm(:size-3, :size, 5432) =  919477102797071521 and \
+              pr_perm(:size-4, :size, 5432) = 7588404289602340497 and \
+              pr_perm(:size-5, :size, 5432) = 5568354808723584469 and \
+              pr_perm(:size-6, :size, 5432) = 2410458883211907565 and \
+              pr_perm(:size-7, :size, 5432) = 1738667467693569814)
 }
 	});
 
@@ -771,9 +813,13 @@ SELECT LEAST(} . join(', ', (':i') x 256) . q{)}
 	],
 	[ 'misc empty script', 1, [qr{empty command list for script}], q{} ],
 	[
-		'bad boolean',                     2,
+	   'bad boolean',                     2,
 		[qr{malformed variable.*trueXXX}], q{\set b :badtrue or true}
 	],
+	[
+		'invalid pr_perm size',				2,
+		[qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 0)}
+	],
 
 	# GSET
 	[
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index 69a6d03bb3..0acbb7fbbb 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -306,6 +306,16 @@ my @script_tests = (
 		'double overflow 3',
 		[qr{double constant overflow}],
 		{ 'overflow-3.sql' => "\\set d .1E310\n" }
+	],
+	[
+		'not enough arguments for pr_perm',
+		[qr{unexpected number of arguments \(pr_perm\)}],
+		{ 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" }
+	],
+	[
+		'too many arguments for pr_perm',
+		[qr{unexpected number of arguments \(pr_perm\)}],
+		{ 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" }
 	],);
 
 for my $t (@script_tests)

Reply via email to