Hello,

> Also, the alternate implementation should not change the result, so
> something looks amiss in your version. Moreover, I'm unclear how to
> implement an overflow multiply with the safe no overflow version.
(snip)

I made an honest mistake. I had assumed the modulo number of Knuth's LCG is (2 ^ 64 - 1).


BTW, I found other overflow issue.
In pseudorandom_perm(), `modular_multiply() + (key >> LCG_SHIFT)` may overflow if the result of modular_multiply() is large. Therefore, I've improved it.

Also, I've simplified Step 5 in modular_multiply().


I attached pgbench-prp-func-9.patch.

Best regards,
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index af2dea1..5b09f73 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -327,6 +327,26 @@ fi])# PGAC_C_BUILTIN_BSWAP64
 
 
 
+# PGAC_C_BUILTIN_CLZLL
+# -------------------------
+# Check if the C compiler understands __builtin_clzll(),
+# and define HAVE__BUILTIN_CLZLL if so.
+# Both GCC & CLANG seem to have one.
+AC_DEFUN([PGAC_C_BUILTIN_CLZLL],
+[AC_CACHE_CHECK(for __builtin_clzll, pgac_cv__builtin_clzll,
+[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+[static unsigned long int x = __builtin_clzll(0xaabbccddeeff0011);]
+)],
+[pgac_cv__builtin_clzll=yes],
+[pgac_cv__builtin_clzll=no])])
+if test x"$pgac_cv__builtin_clzll" = xyes ; then
+AC_DEFINE(HAVE__BUILTIN_CLZLL, 1,
+          [Define to 1 if your compiler understands __builtin_clzll.])
+fi])# PGAC_C_BUILTIN_CLZLL
+
+
+
+
 # PGAC_C_BUILTIN_CONSTANT_P
 # -------------------------
 # Check if the C compiler understands __builtin_constant_p(),
diff --git a/configure b/configure
index 43ae8c8..c455505 100755
--- a/configure
+++ b/configure
@@ -13952,6 +13952,30 @@ if test x"$pgac_cv__builtin_bswap64" = xyes ; then
 $as_echo "#define HAVE__BUILTIN_BSWAP64 1" >>confdefs.h
 
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzll" >&5
+$as_echo_n "checking for __builtin_clzll... " >&6; }
+if ${pgac_cv__builtin_clzll+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+static unsigned long int x = __builtin_clzll(0xaabbccddeeff0011);
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  pgac_cv__builtin_clzll=yes
+else
+  pgac_cv__builtin_clzll=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clzll" >&5
+$as_echo "$pgac_cv__builtin_clzll" >&6; }
+if test x"$pgac_cv__builtin_clzll" = xyes ; then
+
+$as_echo "#define HAVE__BUILTIN_CLZLL 1" >>confdefs.h
+
+fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_constant_p" >&5
 $as_echo_n "checking for __builtin_constant_p... " >&6; }
 if ${pgac_cv__builtin_constant_p+:} false; then :
diff --git a/configure.in b/configure.in
index 519ecd5..f7eb78a 100644
--- a/configure.in
+++ b/configure.in
@@ -1486,6 +1486,7 @@ PGAC_C_TYPES_COMPATIBLE
 PGAC_C_BUILTIN_BSWAP16
 PGAC_C_BUILTIN_BSWAP32
 PGAC_C_BUILTIN_BSWAP64
+PGAC_C_BUILTIN_CLZLL
 PGAC_C_BUILTIN_CONSTANT_P
 PGAC_C_BUILTIN_UNREACHABLE
 PGAC_C_COMPUTED_GOTO
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index b5e3a62..77cbfcd 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -929,7 +929,7 @@ pgbench <optional> <replaceable>options</replaceable> 
</optional> <replaceable>d
 
       <row>
        <entry> <literal>default_seed</literal> </entry>
-       <entry>seed used in hash functions by default</entry>
+       <entry>seed used in hash and pseudo-random permutation functions by 
default</entry>
       </row>
 
       <row>
@@ -1390,6 +1390,13 @@ pgbench <optional> <replaceable>options</replaceable> 
</optional> <replaceable>d
        <entry><literal>1024.0</literal></entry>
       </row>
       <row>
+       <entry><literal><function>pr_perm(<replaceable>i</replaceable>, 
<replaceable>size</replaceable> [, <replaceable>seed</replaceable> ] 
)</function></literal></entry>
+       <entry>integer</entry>
+       <entry>pseudo-random permutation in [0,size)</entry>
+       <entry><literal>pr_perm(0, 4)</literal></entry>
+       <entry><literal>0</literal>, <literal>1</literal>, <literal>2</literal> 
or <literal>3</literal></entry>
+      </row>
+      <row>
        <entry><literal><function>random(<replaceable>lb</replaceable>, 
<replaceable>ub</replaceable>)</function></literal></entry>
        <entry>integer</entry>
        <entry>uniformly-distributed random integer in <literal>[lb, 
ub]</literal></entry>
@@ -1551,6 +1558,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) 
/
   </para>
 
   <para>
+    Function <literal>pr_perm</literal> implements a pseudo-random permutation.
+    It allows to mix the output of non uniform random functions so that
+    values drawn more often are not trivially correlated.
+    It permutes integers in [0, size) using a seed by applying rounds of
+    simple invertible functions, similarly to an encryption function,
+    although beware that it is not at all cryptographically secure.
+    Compared to <literal>hash</literal> functions discussed above, the function
+    ensures that a perfect permutation is applied: there are no collisions
+    nor holes in the output values.
+    Values outside the interval are interpreted modulo the size.
+    The function errors if size is not positive.
+    If no seed is provided, <literal>:default_seed</literal> is used.
+    For a given size and seed, the function is fully deterministic: if two
+    permutations on the same size must not be correlated, use distinct seeds
+    as outlined in the previous example about hash functions.
+  </para>
+
+  <para>
    As an example, the full definition of the built-in TPC-B-like
    transaction is:
 
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index bab6f8a..94fcebb 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -19,6 +19,7 @@
 #define PGBENCH_NARGS_VARIABLE (-1)
 #define PGBENCH_NARGS_CASE             (-2)
 #define PGBENCH_NARGS_HASH             (-3)
+#define PGBENCH_NARGS_PRPERM   (-4)
 
 PgBenchExpr *expr_parse_result;
 
@@ -370,6 +371,9 @@ static const struct
        {
                "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
        },
+       {
+               "pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM
+       },
        /* keep as last array element */
        {
                NULL, 0, 0
@@ -482,6 +486,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList 
*args)
                        }
                        break;
 
+               /* pseudo-random permutation function with optional seed 
argument */
+               case PGBENCH_NARGS_PRPERM:
+                       if (len < 2 || len > 3)
+                               expr_yyerror_more(yyscanner, "unexpected number 
of arguments",
+                                                                 
PGBENCH_FUNCTIONS[fnumber].fname);
+
+                       if (len == 2)
+                       {
+                               PgBenchExpr *var = 
make_variable("default_seed");
+                               args = make_elist(var, args);
+                       }
+                       break;
+
                /* common case: positive arguments number */
                default:
                        Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 81bc6d8..7322291 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -1066,6 +1066,253 @@ getHashMurmur2(int64 val, uint64 seed)
        return (int64) result;
 }
 
+/* pseudo-random permutation */
+
+/* 16 so that % 16 can be optimized to & 0x0f */
+#define PRP_PRIMES 16
+/*
+ * 24 bits mega primes from https://primes.utm.edu/lists/small/millions/
+ * the i-th prime, i \in [0, 15], is the first prime above 2^23 + i * 2^19
+ */
+static uint64 primes[PRP_PRIMES] = {
+       UINT64CONST(8388617),
+       UINT64CONST(8912921),
+       UINT64CONST(9437189),
+       UINT64CONST(9961487),
+       UINT64CONST(10485767),
+       UINT64CONST(11010059),
+       UINT64CONST(11534351),
+       UINT64CONST(12058679),
+       UINT64CONST(12582917),
+       UINT64CONST(13107229),
+       UINT64CONST(13631489),
+       UINT64CONST(14155777),
+       UINT64CONST(14680067),
+       UINT64CONST(15204391),
+       UINT64CONST(15728681),
+       UINT64CONST(16252967)
+};
+
+/* how many "encryption" rounds to apply */
+#define PRP_ROUNDS 4
+
+/* return smallest mask holding n  */
+static uint64 compute_mask(uint64 n)
+{
+       n |= n >> 1;
+       n |= n >> 2;
+       n |= n >> 4;
+       n |= n >> 8;
+       n |= n >> 16;
+       n |= n >> 32;
+       return n;
+}
+
+/* length of n binary representation */
+static int nbits(uint64 n)
+{
+#ifdef HAVE__BUILTIN_CLZLL
+       return 64 - (n != 0 ? __builtin_clzll(n) : 64);
+#else /* use clever no branching bitwise operator version */
+       /* set all lower bits to 1 */
+       n = compute_mask(n);
+       /* then count them */
+       n -= (n >> 1) & UINT64CONST(0x5555555555555555);
+       n = (n & UINT64CONST(0x3333333333333333)) + ((n >> 2) & 
UINT64CONST(0x3333333333333333));
+       n = (n + (n >> 4)) & UINT64CONST(0x0F0F0F0F0F0F0F0F);
+       return (n * UINT64CONST(0x0101010101010101)) >> 56;
+#endif /* HAVE__BUILTIN_CLZLL */
+}
+
+/*
+ * Compute (x * y) % m, where x and y in [0, 2^64), m in [1, 2^64).
+ *
+ * Use improved interleaved modular multiplication algorithm to avoid
+ * overflows when necessary.
+ */
+static uint64 modular_multiply(uint64 x, uint64 y, const uint64 m)
+{
+       int             i, bits;
+       uint64          r;
+
+       Assert(m >= 1);
+
+       /* Because of (x * y) % m = (x % m * y % m) % m */
+       if (x >= m)
+               x %= m;
+       if (y >= m)
+               y %= m;
+
+       /* Return the trivial result. */
+       if (x == 0 || y == 0 || m == 1)
+               return 0;
+
+       /* Return the result if (x * y) can be multiplied without overflow. */
+       if (nbits(x) + nbits(y) <= 64)
+               return (x * y) % m;
+
+       /* To reduce the for loop in the algorithm below, ensure y <= x. */
+       if (x < y)
+       {
+               uint64 tmp = x;
+               x = y;
+               y = tmp;
+       }
+
+       /* Interleaved modular multiplication algorithm from:
+        *
+        * D.N. Amanor et al., "Efficient hardware architecture for
+        * modular multiplication on FPGAs", in International Conference on
+        * Field Programmable Logic and Applications, Aug 2005, pp. 539-542.
+        *
+        * This algorithm is usually used in the field of digital circuit
+        * design.
+        *
+        * Input: X, Y, M; 0 <= X, Y <= M;
+        * Output: R = X *  Y mod M;
+        * bits: number of bits of Y
+        * Y[i]: i th bit of Y
+        *
+        * 1. R = 0;
+        * 2. for (i = bits - 1; i >= 0; i--) {
+        * 3.   R = 2 * R;
+        * 4.   if (Y[i] == 0x1)
+        * 5.           R += X;
+        * 6.   if (R >= M) R -= M;
+        * 7.   if (R >= M) R -= M;
+        *   }
+        *
+        * In Steps 3 and 5, overflow should be avoided.
+        * Steps 6 and 7 can be instead a modular operation (R %= M).
+        */
+
+       bits = nbits(y);
+       r = 0;
+
+       for (i = bits - 1; i >= 0; i--)
+       {
+               if (r > UINT64CONST(0x7fffffffffffffff))
+                       /* To avoid overflow, transform from (2 * r) to
+                        * (2 * r) % m, and further transform to
+                        * mathematically equivalent form shown below:
+                        */
+                       r = m - ((m - r) << 1);
+               else
+                       r <<= 1;
+
+               if ((y >> i) & 0x1)
+               {
+                       if (r > UINT64CONST(0xffffffffffffffff) - x)
+                               /* To calculate (r + x) without overflow,
+                                * transform to (r + x) % m, and further
+                                * transform to (r + x - m).
+                                */
+                               r += x - m;
+                       else
+                               r += x;
+               }
+
+               r %= m;
+       }
+
+       return r;
+}
+
+/*
+ * Donald Knuth's linear congruential generator
+ *
+ * Relying on multiplication overflows is part of the design
+ * of this simple pseudo random number generator.
+ */
+#define DK_LCG_MUL UINT64CONST(6364136223846793005)
+#define DK_LCG_INC UINT64CONST(1442695040888963407)
+
+/* do not use all small bits */
+#define LCG_SHIFT 13
+
+/*
+ * PRP: parametric pseudo-random permutation
+ *
+ * Result in [0, size) is a permutation for inputs in the same set.
+ *
+ * Note that this function does not pass statistical tests: eg
+ * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable.
+ * Things worsen for large sizes as there are many more permutations
+ * (size!) than seeds to select them (2^64 < 21!).
+ * However it is inexpensive compared to an actual encryption function,
+ * and the quality is good enough to avoid trivial correlations on
+ * large sizes, which is the expected use case.
+ *
+ * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE.
+ * DO NOT USE FOR SUCH PURPOSE.
+ */
+static int64
+pseudorandom_perm(const int64 data, const int64 isize, const int64 seed)
+{
+       /* computations are performed on unsigned values */
+       uint64 size = (uint64) isize;
+       uint64 v = (uint64) data % size;
+       uint64 key = (uint64) seed;
+       /* size-1: ensures 2 possibly overlapping halves */
+       uint64 mask = compute_mask(size-1) >> 1;
+
+       unsigned int i, p;
+
+       /* nothing to permute */
+       if (isize == 1)
+               return 0;
+
+       Assert(isize >= 2);
+
+       /* apply 4 rounds of bijective transformations:
+        * (1) scramble: partial xors on overlapping power-or-2 subsets
+        * (2) scatter: linear modulo
+        */
+       for (i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % 
PRP_PRIMES)
+       {
+               uint64 t;
+
+               /* first "half" whitening, for v in 0 .. mask */
+               key = key * DK_LCG_MUL + DK_LCG_INC;
+               if (v <= mask)
+                       v ^= (key >> LCG_SHIFT) & mask;
+
+               /* second (possibly overlapping) "half" whitening */
+               key = key * DK_LCG_MUL + DK_LCG_INC;
+               t = size - 1 - v;
+               if (t <= mask)
+               {
+                       t ^= (key >> LCG_SHIFT) & mask;
+                       v = size - 1 - t;
+               }
+
+               /* at most 2 primes are skipped for a given size */
+               while (unlikely(size % primes[p] == 0))
+                       p = (p + 1) % PRP_PRIMES;
+
+               /* scatter values with a prime multiplication */
+               key = key * DK_LCG_MUL + DK_LCG_INC;
+
+               /* Performance shortcut for our 24 bit primes, ok for size up 
to ~10E12 */
+               if ((v & UINT64CONST(0xffffffffff)) == v)
+                       v = (primes[p] * v + (key >> LCG_SHIFT)) % size;
+               else
+               {
+                       v = modular_multiply(primes[p], v, size);
+                       if (v > UINT64CONST(0xffffffffffffffff) - (key >> 
LCG_SHIFT))
+                               /* Even if `size` is subtracted for avoiding 
overflow,
+                                * the result is the same because of modulo 
operation.
+                                */
+                               v = (v + (key >> LCG_SHIFT) - size) % size;
+                       else
+                               v = (v + (key >> LCG_SHIFT)) % size;
+               }
+       }
+
+       /* back to signed */
+       return (int64) v;
+}
+
 /*
  * Initialize the given SimpleStats struct to all zeroes
  */
@@ -2420,6 +2667,27 @@ evalStandardFunc(TState *thread, CState *st,
                                return true;
                        }
 
+               case PGBENCH_PRPERM:
+                       {
+                               int64   val, size, seed;
+
+                               Assert(nargs == 3);
+
+                               if (!coerceToInt(&vargs[0], &val) ||
+                                       !coerceToInt(&vargs[1], &size) ||
+                                       !coerceToInt(&vargs[2], &seed))
+                                       return false;
+
+                               if (size < 1)
+                               {
+                                       fprintf(stderr, "pr_perm size parameter 
must be >= 1\n");
+                                       return false;
+                               }
+
+                               setIntValue(retval, pseudorandom_perm(val, 
size, seed));
+                               return true;
+                       }
+
                default:
                        /* cannot get here */
                        Assert(0);
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index de50340..12d5c29 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -99,7 +99,8 @@ typedef enum PgBenchFunction
        PGBENCH_IS,
        PGBENCH_CASE,
        PGBENCH_HASH_FNV1A,
-       PGBENCH_HASH_MURMUR2
+       PGBENCH_HASH_MURMUR2,
+       PGBENCH_PRPERM
 } PgBenchFunction;
 
 typedef struct PgBenchExpr PgBenchExpr;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl 
b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 0081989..6ebbcb7 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -323,6 +323,13 @@ pgbench(
                qr{command=98.: int 5432\b},    # :random_seed
                qr{command=99.: int -9223372036854775808\b},    # min int
                qr{command=100.: int 9223372036854775807\b},    # max int
+               qr{command=101.: boolean true\b},
+               qr{command=102.: boolean true\b},
+               qr{command=103.: boolean true\b},
+               qr{command=104.: boolean true\b},
+               qr{command=105.: boolean true\b},
+               qr{command=109.: boolean true\b},
+               qr{command=110.: boolean true\b},
        ],
        'pgbench expressions',
        {
@@ -450,6 +457,28 @@ SELECT :v0, :v1, :v2, :v3;
 -- minint constant parsing
 \set min debug(-9223372036854775808)
 \set max debug(-(:min + 1))
+-- pseudo-random permutation
+\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1)
+\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3)
+\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6)
+\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + 
pr_perm(4, 5) = 10)
+\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) 
+ \
+             pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) 
+ \
+             pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 
16) + \
+             pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 
16) = 120)
+-- random sanity check
+\set size random(2, 1000)
+\set v random(0, :size - 1)
+\set p pr_perm(:v, :size)
+\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p 
<> pr_perm(:v + 1, :size))
+-- actual values
+\set t debug(pr_perm(:v, 1) = 0)
+\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \
+             pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0)
+-- ~50 bits test to exercise full modular multiplication
+\set t debug(pr_perm(0, 1000000000000000, 54321) = 9406454989259 and \
+             pr_perm(1999999999999999, 1000000000000000, 54321) = 
54570773902028 and \
+             pr_perm(2500000000000000, 1000000000000000, 54321) = 
771082311307468)
 }
        });
 
@@ -740,6 +769,10 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i);
        [
                'bad boolean',                     2,
                [qr{malformed variable.*trueXXX}], q{\set b :badtrue or true}
+       ],
+       [
+               'invalid pr_perm size',                         2,
+               [qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 
0)}
        ],);
 
 
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl 
b/src/bin/pgbench/t/002_pgbench_no_server.pl
index 696c378..7c03ef2 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -306,6 +306,16 @@ my @script_tests = (
                'double overflow 3',
                [qr{double constant overflow}],
                { 'overflow-3.sql' => "\\set d .1E310\n" }
+       ],
+       [
+               'not enough arguments for pr_perm',
+               [qr{unexpected number of arguments \(pr_perm\)}],
+               { 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" }
+       ],
+       [
+               'too many arguments for pr_perm',
+               [qr{unexpected number of arguments \(pr_perm\)}],
+               { 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" }
        ],);
 
 for my $t (@script_tests)
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 9798bd2..4cf3750 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -734,6 +734,9 @@
 /* Define to 1 if your compiler understands __builtin_bswap64. */
 #undef HAVE__BUILTIN_BSWAP64
 
+/* Define to 1 if your compiler understands __builtin_clzll. */
+#undef HAVE__BUILTIN_CLZLL
+
 /* Define to 1 if your compiler understands __builtin_constant_p. */
 #undef HAVE__BUILTIN_CONSTANT_P
 

Reply via email to