On Sun, 14 Mar 2021 at 16:08, Fabien COELHO <coe...@cri.ensmp.fr> wrote: > > > My main question on this now is, do you have a scholar reference for > > this algorithm? > > Nope, otherwise I would have put a reference. I'm a scholar though, if > it helps:-) > > I could not find any algorithm that fitted the bill. The usual approach > (eg benchmarking designs) is too use some hash function and assume that it > is not a permutation, too bad. > > Basically the algorithm mimics a few rounds of cryptographic encryption > adapted to any size and simple operators, whereas encryption function are > restricted to power of two blocks (eg the Feistel network). The structure > is the same AES with its AddRoundKey the xor-ing stage (adapted to non > power of two in whiten above), MixColumns which does the scattering, and > for key expansion, I used Donald Knuth generator. Basically one could say > that permute is an inexpensive and insecure AES:-) >
I spent a little time playing around with this, trying to come up with a reasonable way to test it. It's apparent from the code and associated comments that the transformation is bijective and so will produce a permutation, but it's less obvious how random that permutation will be. Obviously the Fisher-Yates algorithm would give the most random permutation, but that's not really practical in this context. Any other algorithm will, in some sense, be less random than that, so I think it's really just a question of testing that it's random enough to satisfy the intended use case. The approach to testing I tried was to use the Kolmogorov-Smirnov test to test how uniformly random a couple of quantities are: 1). For a given size and fixed input value, and a large number of randomly chosen seed values, is the output uniformly random? 2). For a given size and a pair of nearby input values, are the two output values uniformly randomly spaced apart? This second test seems desirable to ensure sufficient shuffling so that inputs coming from some non-uniform random distribution are scattered sufficiently to distribute the maxima/minima (x -> x + rand mod size would pass test 1, so that by itself is insufficient). I tested this using the attached (somewhat ugly) stand-alone test C program, and for the most part these 2 tests seemed to pass. The program also tests that the output really is a permutation, just to be sure, and this was confirmed in all cases. However, I noticed that the results are less good when size is a power of 2. In this case, the results seem significantly less uniform, suggesting that for such sizes, there is an increased chance that the permuted output might still be skewed. Based on the above, I also experimented with a different permutation algorithm (permute2() in the test), which attempts to inject more randomness into the bijection, and between pairs of inputs, to make the output less predictable and less likely to be accidentally non-uniform. It's possible that it suffers from other problems, but it did do better in these 2 tests. Maybe some other tests might be useful, but I really don't know. As noted above, any algorithm is likely to lead to some pattern in the output (e.g., it looks like both these algorithms cause adjacent inputs to always end up separated by an odd number), so a judgement call will be required to decide what is random enough. Regards, Dean
#include <math.h> #include <stdio.h> #include <stdlib.h> typedef unsigned char uint8; typedef long int64; typedef unsigned long uint64; typedef unsigned __int128 uint128; typedef int64 (*permute_fn)(const int64, const int64, const int64); #define PRP_PRIMES 16 static uint64 primes[PRP_PRIMES] = { 8388617, 8912921, 9437189, 9961487, 10485767, 11010059, 11534351, 12058679, 12582917, 13107229, 13631489, 14155777, 14680067, 15204391, 15728681, 16252967 }; #define PRP_ROUNDS 4 static uint64 compute_mask(uint64 n) { n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; n |= n >> 16; n |= n >> 32; return n; } static uint64 modular_multiply(uint64 x, uint64 y, const uint64 m) { return (uint128) x * (uint128) y % (uint128) m; } #define DK_LCG_MUL 6364136223846793005L #define DK_LCG_INC 1442695040888963407L #define LCG_SHIFT 13 static int64 permute(const int64 data, const int64 isize, const int64 seed) { uint64 size = (uint64) isize; uint64 v = (uint64) data % size; uint64 key = (uint64) seed; uint64 mask = compute_mask(size - 1) >> 1; if (isize == 1) return 0; for (unsigned int i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES) { uint64 t; key = key * DK_LCG_MUL + DK_LCG_INC; if (v <= mask) v ^= (key >> LCG_SHIFT) & mask; key = key * DK_LCG_MUL + DK_LCG_INC; t = size - 1 - v; if (t <= mask) { t ^= (key >> LCG_SHIFT) & mask; v = size - 1 - t; } while (size % primes[p] == 0) p = (p + 1) % PRP_PRIMES; key = key * DK_LCG_MUL + DK_LCG_INC; if ((v & 0xffffffffffL) == v) v = (primes[p] * v + (key >> LCG_SHIFT)) % size; else v = (modular_multiply(primes[p], v, size) + (key >> LCG_SHIFT)) % size; } return (int64) v; } static int64 permute2(const int64 data, const int64 isize, const int64 seed) { unsigned short eseed[] = { (seed >> 32) & 0xffff, (seed >> 16) &0xffff, seed & 0xffff }; uint64 size = (uint64) isize; uint64 v = (uint64) data % size; uint64 mask = compute_mask(size - 1) >> 1; int i; if (isize == 1) return 0; for (i = 0; i < 4; i++) { uint64 t; uint64 m; int p; uint64 offset; // Try to separate nearby inputs by a random amount. Note: the // multiplier must be odd to get a bijection, and we must consume // the erand48() value from the sequence unconditionally, even if // it isn't used, so that later values aren't dependent on v, // ensuring that this remains a bijection. m = (uint64) (erand48(eseed) * mask) | 1; if (v <= mask) { t = v & mask; t = (t * m) & mask; v = (v & ~mask) | t; } // LCG with random 24-bit prime and random offset p = (int) (erand48(eseed) * PRP_PRIMES); while (size % primes[p] == 0) p++; offset = (uint64) (erand48(eseed) * size); if ((v & 0xffffffffff) == v) v = (primes[p] * v + offset) % size; else v = (modular_multiply(primes[p], v, size) + offset) % size; } return (int64) v; } static int int64_cmp(const void *a, const void *b) { int64 x = *((int64 *) a); int64 y = *((int64 *) b); return x - y; } int main() { permute_fn permute_fn = &permute; unsigned short seed[3] = { 1234, 5678, 9012 }; int s[] = { 256, 720, 1023, 1024, 1025, 4096, 9973, 10000, 10001, 10007, (1<<14)-3, (1<<14)-2, (1<<14)-1, 1<<14, (1<<14)+1, (1<<14)+1, 1<<16, 1<<17, 1<<18, 1<<19, 1<<20, 1<<21, 1<<22, -1 }; int x; int y; int64 size; int64 seed64; int i; for (x = 0, size = s[x]; size > 0; size = s[++x]) { for (y = 1; y <= 4; y++) { int N = 10000 + size * 2; int64 *rvals = malloc(N * sizeof(int64)); int64 *pvals = malloc(N * sizeof(int64)); int64 *dvals = malloc(N * sizeof(int64)); double Dr = 0; double Dp = 0; double Dd = 0; double Kr; double Kp; double Kd; double D_alpha; seed64 = erand48(seed) * (1L<<62); for (i = 0; i < size; i++) { pvals[i] = (*permute_fn)(i, size, seed64); } qsort(pvals, size, sizeof(int64), int64_cmp); for (i = 0; i < size; i++) { if (pvals[i] != i) { printf("permute() failed (size=%ld, seed=%ld)\n", size, seed64); for (int j = 0; j < size && j < 100; j++) printf(" %ld", pvals[j]); printf("\npvals[%ld] = %ld\n", i, pvals[i]); return 1; } } for (i = 0; i < N; i++) { rvals[i] = (int64) (erand48(seed) * size); seed64 = erand48(seed) * (1L<<62); pvals[i] = (*permute_fn)(y-1, size, seed64); dvals[i] = pvals[i] - (*permute_fn)(y*2-1, size, seed64); if (dvals[i] < 0) dvals[i] += size; } qsort(rvals, N, sizeof(int64), int64_cmp); qsort(pvals, N, sizeof(int64), int64_cmp); qsort(dvals, N, sizeof(int64), int64_cmp); /* if (size == (1<<10) && y == 1) { int64 last=0; int c=0; printf("dvals:\n"); for (i = 0; i < N; i++) if (dvals[i] != last) { printf("%ld,%d\n", last, c); last = dvals[i]; c=1; } else c++; } */ for (i = 1; i <= N; i++) { double D; D = (double) i / N - (double) rvals[i-1] / size; if (D > Dr) Dr = D; D = (double) rvals[i-1] / size - (double) (i-1) / N; if (D > Dr) Dr = D; D = (double) i / N - (double) pvals[i-1] / size; if (D > Dp) Dp = D; D = (double) pvals[i-1] / size - (double) (i-1) / N; if (D > Dp) Dp = D; D = (double) i / N - (double) dvals[i-1] / size; if (D > Dd) Dd = D; D = (double) dvals[i-1] / size - (double) (i-1) / N; if (D > Dd) Dd = D; } free(rvals); free(pvals); free(dvals); Kr = Dr * sqrt(N); Kp = Dp * sqrt(N); Kd = Dd * sqrt(N); // Critical value by confidence level // 0.001 1.94947 // 0.01 1.62762 // 0.02 1.51743 // 0.05 1.35810 // 0.1 1.22385 // 0.15 1.13795 // 0.2 1.07275 D_alpha = 1.94947 / sqrt(N); printf("size=%ld, y=%d, N=%d:\n" " Dr=%f, Kr=%f %s\n" " Dp=%f, Kp=%f %s\n" " Dd=%f, Kd=%f %s\n", size, y, N, Dr, Kr, Dr > D_alpha ? "non-uniform" : "uniform", Dp, Kp, Dp > D_alpha ? "non-uniform" : "uniform", Dd, Kd, Dd > D_alpha ? "non-uniform" : "uniform"); } } return 0; }