Hello Hironobu-san,

However, the implementation of the scatter operation in this patch overflows in many cases if the variable:size is 38 bit integer or greater. Because the variable:size and the item of the array:primes[] which stores 27-29 bit integers are multiplicated. If overflow occurs, the scatter operation does not satisfy bijective.

Indeed. Again, thanks for the debug! As you contributed some code, I added you as a co-author in the CF entry.

Attached a v3, based on your fix, plus some additional changes:
 - explicitly declare unsigned variables where appropriate, to avoid casts
 - use smaller 24 bits primes instead of 27-29 bits
 - add a shortcut for multiplier below 24 bits and y value below 40 bits,
   which should avoid the manually implemented multiplication in most
   practical cases (tables with over 2^40 rows are pretty rare...).
 - change the existing shortcut to look a the number of bits instead of
   using 32 limits.
 - add a test for minimal code coverage with over 40 bits sizes
 - attempt to improve the documentation
 - some comments were updates, hopefully for the better

The main idea behind the smaller primes is to avoid the expensive modmul implementation on most realistic cases.

--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 88cf8b3933..9b8e90e26f 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -917,7 +917,7 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
 
       <row>
        <entry> <literal>default_seed</literal> </entry>
-       <entry>seed used in hash functions by default</entry>
+       <entry>seed used in hash and pseudo-random permutation functions by default</entry>
       </row>
 
       <row>
@@ -1370,6 +1370,13 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
        <entry><literal>pow(2.0, 10)</literal>, <literal>power(2.0, 10)</literal></entry>
        <entry><literal>1024.0</literal></entry>
       </row>
+      <row>
+       <entry><literal><function>pr_perm(<replaceable>i</replaceable>, <replaceable>size</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
+       <entry>integer</entry>
+       <entry>pseudo-random permutation in [0,size)</entry>
+       <entry><literal>pr_perm(0, 4)</literal></entry>
+       <entry><literal>0</literal>, <literal>1</literal>, <literal>2</literal> or <literal>3</literal></entry>
+      </row>
       <row>
        <entry><literal><function>random(<replaceable>lb</replaceable>, <replaceable>ub</replaceable>)</function></literal></entry>
        <entry>integer</entry>
@@ -1531,6 +1538,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
 </programlisting>
   </para>
 
+  <para>
+    Function <literal>pr_perm</literal> implements a pseudo-random permutation.
+    It allows to mix the output of non uniform random functions so that
+    values drawn more often are not trivially correlated.
+    It permutes integers in [0, size) using a seed by applying rounds of
+    simple invertible functions, similarly to an encryption function,
+    although beware that it is not at all cryptographically secure.
+    Compared to <literal>hash</literal> functions discussed above, the function
+    ensures that a perfect permutation is applied: there are no collisions
+    nor holes in the output values.
+    Values outside the interval are interpreted modulo the size.
+    The function errors if size is not positive.
+    If no seed is provided, <literal>:default_seed</literal> is used.
+    For a given size and seed, the function is fully deterministic: if two
+    permutations on the same size must not be correlated, use distinct seeds
+    as outlined in the previous example about hash functions.
+  </para>
+
   <para>
    As an example, the full definition of the built-in TPC-B-like
    transaction is:
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index f7c56cc6a3..762a62959b 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -19,6 +19,7 @@
 #define PGBENCH_NARGS_VARIABLE	(-1)
 #define PGBENCH_NARGS_CASE		(-2)
 #define PGBENCH_NARGS_HASH		(-3)
+#define PGBENCH_NARGS_PRPERM	(-4)
 
 PgBenchExpr *expr_parse_result;
 
@@ -366,6 +367,9 @@ static const struct
 	{
 		"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
 	},
+	{
+		"pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM
+	},
 	/* keep as last array element */
 	{
 		NULL, 0, 0
@@ -478,6 +482,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
 			}
 			break;
 
+		/* pseudo-random permutation function with optional seed argument */
+		case PGBENCH_NARGS_PRPERM:
+			if (len < 2 || len > 3)
+				expr_yyerror_more(yyscanner, "unexpected number of arguments",
+								  PGBENCH_FUNCTIONS[fnumber].fname);
+
+			if (len == 2)
+			{
+				PgBenchExpr *var = make_variable("default_seed");
+				args = make_elist(var, args);
+			}
+			break;
+
 		/* common case: positive arguments number */
 		default:
 			Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 41b756c089..895e424452 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -986,6 +986,249 @@ getHashMurmur2(int64 val, uint64 seed)
 	return (int64) result;
 }
 
+/* pseudo-random permutation */
+
+/* 16 so that % 16 can be optimized to & 0x0f */
+#define PRP_PRIMES 16
+/*
+ * 24 bits mega primes from https://primes.utm.edu/lists/small/millions/
+ * the i-th prime, i \in [0, 15], is the first prime above 2^23 + i * 2^19
+ */
+static uint64 primes[PRP_PRIMES] = {
+	UINT64CONST(8388617),
+	UINT64CONST(8912921),
+	UINT64CONST(9437189),
+	UINT64CONST(9961487),
+	UINT64CONST(10485767),
+	UINT64CONST(11010059),
+	UINT64CONST(11534351),
+	UINT64CONST(12058679),
+	UINT64CONST(12582917),
+	UINT64CONST(13107229),
+	UINT64CONST(13631489),
+	UINT64CONST(14155777),
+	UINT64CONST(14680067),
+	UINT64CONST(15204391),
+	UINT64CONST(15728681),
+	UINT64CONST(16252967)
+};
+
+/* how many "encryption" rounds to apply */
+#define PRP_ROUNDS 4
+
+/* return largest mask in 0 .. n-1 */
+static uint64 compute_prp_mask(uint64 n)
+{
+	n |= n >> 1;
+	n |= n >> 2;
+	n |= n >> 4;
+	n |= n >> 8;
+	n |= n >> 16;
+	n |= n >> 32;
+	return n >> 1;
+}
+
+/*
+ * Count bits in integer representation
+ *
+ * There are SSE instructions & some compiler builtins for that,
+ * but they are not portable.
+ */
+static int popcount64(uint64 n)
+{
+	n -= (n >> 1) & UINT64CONST(0x5555555555555555);
+	n = (n & UINT64CONST(0x3333333333333333)) + ((n >> 2) & UINT64CONST(0x3333333333333333));
+	n = (n + (n >> 4)) & UINT64CONST(0x0F0F0F0F0F0F0F0F);
+	return (n * UINT64CONST(0x0101010101010101)) >> 56;
+}
+
+/* length of n binary representation */
+static int nbits(uint64 n)
+{
+	/* set all lower bits to 1 */
+	n |= n >> 1;
+	n |= n >> 2;
+	n |= n >> 4;
+	n |= n >> 8;
+	n |= n >> 16;
+	n |= n >> 32;
+	/* then count them */
+	return popcount64(n);
+}
+
+/*
+ * Compute (x * y) % m, where x and y in [0, 2^64), m in [1, 2^64).
+ *
+ * Use improved interleaved modular multiplication algorithm to avoid
+ * overflows when necessary.
+ */
+static uint64 modular_multiply(uint64 x, uint64 y, const uint64 m)
+{
+	int		i, bits;
+	uint64		r;
+
+	Assert(m >= 1);
+
+	/* Performance shortcut for our 24 bit primes, ok for m up to ~10E12 */
+	if ((x & UINT64CONST(0xffffff)) == x && (y & UINT64CONST(0xffffffffff)) == y)
+		return (x * y) % m;
+
+	/* Because of (x * y) % m = (x % m * y % m) % m */
+	if (x >= m)
+		x %= m;
+	if (y >= m)
+		y %= m;
+
+	/* Return the trivial result. */
+	if (x == 0 || y == 0 || m == 1)
+		return 0;
+
+	/* Return the result if (x * y) can be multiplied without overflow. */
+	if (nbits(x) + nbits(y) <= 64)
+		return (x * y) % m;
+
+	/* To reduce the for loop in the algorithm below, ensure y <= x. */
+	if (x < y)
+	{
+		uint64 tmp = x;
+		x = y;
+		y = tmp;
+	}
+
+	/* Interleaved modular multiplication algorithm from:
+	 *
+	 * D.N. Amanor et al., "Efficient hardware architecture for
+	 * modular multiplication on FPGAs", in International Conference on
+	 * Field Programmable Logic and Applications, Aug 2005, pp. 539-542.
+	 *
+	 * This algorithm is usually used in the field of digital circuit
+	 * design.
+	 *
+	 * Input: X, Y, M; 0 <= X, Y <= M;
+	 * Output: R = X *  Y mod M;
+	 * bits: number of bits of Y
+	 * Y[i]: i th bit of Y
+	 *
+	 * 1. R = 0;
+	 * 2. for (i = bits - 1; i >= 0; i--) {
+	 * 3. 	R = 2 * R;
+	 * 4. 	if (Y[i] == 0x1)
+	 * 5. 		R += X;
+	 * 6. 	if (R >= M) R -= M;
+	 * 7.	if (R >= M) R -= M;
+	 *   }
+	 *
+	 * In Steps 3 and 5, overflow should be avoided.
+	 * Steps 6 and 7 can be instead a modular operation (R %= M).
+	 */
+
+	bits = nbits(y);
+	r = 0;
+
+	for (i = bits - 1; i >= 0; i--)
+	{
+		if (r > UINT64CONST(0x7fffffffffffffff))
+			/* To avoid overflow, transform from (2 * r) to
+			 * (2 * r) % m, and further transform to
+			 * mathematically equivalent form shown below:
+			 */
+			r = m - ((m - r) << 1);
+		else
+			r <<= 1;
+
+		if ((y >> i) & 0x1)
+		{
+			/* Compute (r + x) without overflow using same
+			 * transformations described in the above comment.
+			 */
+			if (m > UINT64CONST(0x7fffffffffffffff))
+				r = ((m - r) > x) ? r + x : r + x - m;
+			else
+				r = (r > m) ? r - m + x : r + x;
+		}
+
+		r %= m;
+	}
+
+	return r;
+}
+
+/* Donald Knuth linear congruential generator */
+#define DK_LCG_MUL UINT64CONST(6364136223846793005)
+#define DK_LCG_INC UINT64CONST(1442695040888963407)
+
+/* do not use all small bits */
+#define LCG_SHIFT 13
+
+/*
+ * PRP: parametric pseudo-random permutation
+ *
+ * Result in [0, size) is a permutation for inputs in the same set.
+ *
+ * Note that this function does not pass statistical tests: eg
+ * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable.
+ * Things worsen for large sizes as there are many more permutations
+ * (size!) than seeds to select them (2^64 < 21!).
+ * However it is inexpensive compared to an actual encryption function,
+ * and the quality is good enough to avoid trivial correlations on
+ * large sizes, which is the expected use case.
+ *
+ * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE.
+ * PLEASE DO NOT USE FOR SUCH PURPOSE.
+ */
+static int64
+pseudorandom_perm(const int64 data, const int64 isize, const int64 seed)
+{
+	/* computations are performed on unsigned values */
+	uint64 size = (uint64) isize;
+	uint64 v = (uint64) data % size;
+	uint64 key = (uint64) seed;
+	/* size-1: ensures 2 possibly overlapping halves */
+	uint64 mask = compute_prp_mask(size-1);
+
+	unsigned int i, p;
+
+	/* nothing to permute */
+	if (isize == 1)
+		return 0;
+
+	Assert(isize >= 2);
+
+	/* apply 4 rounds of bijective transformations:
+	 * (1) scramble: partial xors on overlapping power-or-2 subsets
+	 * (2) scatter: linear modulo
+	 */
+	for (i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES)
+	{
+		uint64 t;
+
+		/* first "half" whitening, for v in 0 .. mask */
+		key = key * DK_LCG_MUL + DK_LCG_INC;
+		if (v <= mask)
+			v ^= (key >> LCG_SHIFT) & mask;
+
+		/* second (possibly overlapping) "half" whitening */
+		key = key * DK_LCG_MUL + DK_LCG_INC;
+		t = size - 1 - v;
+		if (t <= mask)
+		{
+			t ^= (key >> LCG_SHIFT) & mask;
+			v = size - 1 - t;
+		}
+
+		/* at most 2 primes are skipped for a given size */
+		while (unlikely(size % primes[p] == 0))
+			p = (p + 1) % PRP_PRIMES;
+
+		/* scatter values with a prime multiplication */
+		key = key * DK_LCG_MUL + DK_LCG_INC;
+		v = (modular_multiply(primes[p], v, size) + (key >> LCG_SHIFT)) % size;
+	}
+
+	/* back to signed */
+	return (int64) v;
+}
+
 /*
  * Initialize the given SimpleStats struct to all zeroes
  */
@@ -2319,6 +2562,27 @@ evalStandardFunc(TState *thread, CState *st,
 				return true;
 			}
 
+		case PGBENCH_PRPERM:
+			{
+				int64	val, size, seed;
+
+				Assert(nargs == 3);
+
+				if (!coerceToInt(&vargs[0], &val) ||
+					!coerceToInt(&vargs[1], &size) ||
+					!coerceToInt(&vargs[2], &seed))
+					return false;
+
+				if (size < 1)
+				{
+					fprintf(stderr, "pr_perm size parameter must be >= 1\n");
+					return false;
+				}
+
+				setIntValue(retval, pseudorandom_perm(val, size, seed));
+				return true;
+			}
+
 		default:
 			/* cannot get here */
 			Assert(0);
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index 6983865b92..665c450c2c 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -99,7 +99,8 @@ typedef enum PgBenchFunction
 	PGBENCH_IS,
 	PGBENCH_CASE,
 	PGBENCH_HASH_FNV1A,
-	PGBENCH_HASH_MURMUR2
+	PGBENCH_HASH_MURMUR2,
+	PGBENCH_PRPERM
 } PgBenchFunction;
 
 typedef struct PgBenchExpr PgBenchExpr;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 2fc021dde7..7ea4914bf2 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -322,6 +322,15 @@ pgbench(
 		qr{command=96.: int 1\b},       # :scale
 		qr{command=97.: int 0\b},       # :client_id
 		qr{command=98.: int 5432\b},    # :random_seed
+		qr{command=99.: boolean true\b},
+		qr{command=100.: boolean true\b},
+		qr{command=101.: boolean true\b},
+		qr{command=102.: boolean true\b},
+		qr{command=103.: boolean true\b},
+		qr{command=107.: boolean true\b},
+		qr{command=108.: boolean true\b},
+		qr{command=109.: boolean true\b},
+		qr{command=110.: boolean true\b},
 	],
 	'pgbench expressions',
 	{
@@ -447,6 +456,28 @@ SELECT :v0, :v1, :v2, :v3;
 \set sc debug(:scale)
 \set ci debug(:client_id)
 \set rs debug(:random_seed)
+-- pseudo-random permutation
+\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1)
+\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3)
+\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6)
+\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + pr_perm(4, 5) = 10)
+\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) + \
+             pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) + \
+             pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 16) + \
+             pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 16) = 120)
+-- random sanity check
+\set size random(2, 1000)
+\set v random(0, :size - 1)
+\set p pr_perm(:v, :size)
+\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p <> pr_perm(:v + 1, :size))
+-- actual values
+\set t debug(pr_perm(:v, 1) = 0)
+\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \
+             pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0)
+-- ~50 bits test to exercise full modular multiplication
+\set t debug(pr_perm(0, 1000000000000000, 54321) = 9406454989259 and \
+             pr_perm(1999999999999999, 1000000000000000, 54321) = 54570773902028 and \
+             pr_perm(2500000000000000, 1000000000000000, 54321) = 771082311307468)
 }
 	});
 
@@ -731,6 +762,10 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i);
 	[
 		'bad boolean',                     0,
 		[qr{malformed variable.*trueXXX}], q{\set b :badtrue or true}
+	],
+	[
+		'invalid pr_perm size',				0,
+		[qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 0)}
 	],);
 
 
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index c1c2c1e3d4..ff02cfb46b 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -290,6 +290,16 @@ my @script_tests = (
 		'too many arguments for hash',
 		[qr{unexpected number of arguments \(hash\)}],
 		{ 'bad-hash-2.sql' => "\\set i hash(1,2,3)\n" }
+	],
+	[
+		'not enough arguments for pr_perm',
+		[qr{unexpected number of arguments \(pr_perm\)}],
+		{ 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" }
+	],
+	[
+		'too many arguments for pr_perm',
+		[qr{unexpected number of arguments \(pr_perm\)}],
+		{ 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" }
 	],);
 
 for my $t (@script_tests)

Reply via email to