On 05/02/11 14:13, Jim Meyering wrote:
> Pádraig Brady wrote:
>> Though on trying shuf to generate the numbers I notice:
>>
>> $ shuf -i0-$((2**31)) -n1
>> 1241475845
>> $ shuf -i0-$((2**31)) -n2
>> shuf: memory exhausted
>
> For small N and large B-A, that code should be improved.
> It certainly doesn't need to allocate so much space.
I had a quick look this morning, and did a simple
sparse array implementation (attached) using the hash module.
It could be improved to have less dependence on malloc(),
but it gives a large improvement for the above case at least.
I tested with this:
for n in $(seq 2 32); do
for h in $(seq 2 32); do
test $h -gt $n && continue
for s in o n; do
test $s = o && shuf=shuf || shuf=./shuf
num=$(env time -f "$s:${h},${n} = %e,%M" \
$shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l)
test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2
done
done
done
A interpretation of a part of the output is:
log2(h) log2(n) time(s) mem(KiB) notes
old 2 20 0.01 17728
new 2 20 0.00 2272 less mem, less time
old 13 20 0.01 17744
new 13 20 0.01 3024 time equivalent
old 14 20 0.02 17744
new 14 20 0.03 5344
old 15 20 0.03 17728
new 15 20 0.06 9360 half mem, half speed
old 16 20 0.05 17728
new 16 20 0.05 17792 back to non sparse
cheers,
Pádraig.
diff --git a/gl/lib/randperm.c b/gl/lib/randperm.c
index 97c8d9a..9b0f03d 100644
--- a/gl/lib/randperm.c
+++ b/gl/lib/randperm.c
@@ -22,6 +22,7 @@
#include "randperm.h"
#include <limits.h>
+#include <stdlib.h>
#include "xalloc.h"
@@ -57,6 +58,73 @@ randperm_bound (size_t h, size_t n)
return bound;
}
+#include "hash.h"
+
+/* Use this to suppress gcc's `...may be used before initialized' warnings. */
+#ifdef lint
+# define IF_LINT(Code) Code
+#else
+# define IF_LINT(Code) /* empty */
+#endif
+
+struct val_ent
+{
+ size_t index;
+ size_t val;
+};
+
+static size_t
+val_hash (void const *x, size_t table_size)
+{
+ struct val_ent const *ent = x;
+ return ent->index % table_size;
+}
+
+static bool
+val_cmp (void const *x, void const *y)
+{
+ struct val_ent const *ent1 = x;
+ struct val_ent const *ent2 = y;
+ return ent1->index == ent2->index;
+}
+
+static void
+sparse_swap (Hash_table *v, size_t* rv, size_t i, size_t j)
+{
+ struct val_ent *v1 = hash_delete (v, &(struct val_ent) {i,0});
+ struct val_ent *v2 = hash_delete (v, &(struct val_ent) {j,0});
+ if (!v1)
+ {
+ v1 = xmalloc (sizeof *v1);
+ v1->index = v1->val = i;
+ }
+ if (!v2)
+ {
+ v2 = xmalloc (sizeof *v2);
+ v2->index = v2->val = j;
+ }
+
+ size_t t = v1->val;
+ v1->val = v2->val;
+ v2->val = t;
+ if (!hash_insert (v, v1))
+ xalloc_die();
+ if (!hash_insert (v, v2))
+ xalloc_die();
+
+ /* As an optimization, keep the array of values we're returning,
+ updated here. */
+ rv[i] = v1->val;
+}
+
+static void
+swap (size_t *v, size_t i, size_t j)
+{
+ size_t t = v[i];
+ v[i] = v[j];
+ v[j] = t;
+}
+
/* From R, allocate and return a malloc'd array of the first H elements
of a random permutation of N elements. H must not exceed N.
Return NULL if H is zero. */
@@ -79,21 +147,64 @@ randperm_new (struct randint_source *r, size_t h, size_t n)
default:
{
+ /* The algorithm is essentially the same in both
+ the sparse and non sparse case. In the sparse case we use
+ a hash to implement sparse storage for the set of n numbers
+ we're shuffling. When to use the sparse method was
+ determined with the help of this script:
+
+ #!/bin/sh
+ for n in $(seq 2 32); do
+ for h in $(seq 2 32); do
+ test $h -gt $n && continue
+ for s in o n; do
+ test $s = o && shuf=shuf || shuf=./shuf
+ num=$(env time -f "$s:${h},${n} = %e,%M" \
+ $shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l)
+ test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2
+ done
+ done
+ done
+
+ This showed that if sparseness = n/h, then:
+
+ sparseness = 128 => .125 mem used, and about same speed
+ sparseness = 64 => .25 mem used, but 1.5 times slower
+ sparseness = 32 => .5 mem used, but 2 times slower
+
+ Also the memory usage was only significant when n > 128Ki
+ */
+ bool sparse = (n >= (128 * 1024)) && (n / h >= 32);
+
size_t i;
+ Hash_table *sparse_v;
- v = xnmalloc (n, sizeof *v);
- for (i = 0; i < n; i++)
- v[i] = i;
+ if (sparse)
+ {
+ sparse_v = hash_initialize (h, NULL, val_hash, val_cmp, free);
+ v = xnmalloc (h, sizeof *v);
+ }
+ else
+ {
+ IF_LINT (sparse_v = NULL);
+ v = xnmalloc (n, sizeof *v);
+ for (i = 0; i < n; i++)
+ v[i] = i;
+ }
for (i = 0; i < h; i++)
{
size_t j = i + randint_choose (r, n - i);
- size_t t = v[i];
- v[i] = v[j];
- v[j] = t;
+ if (sparse)
+ sparse_swap (sparse_v, v, i, j);
+ else
+ swap (v, i, j);
}
- v = xnrealloc (v, h, sizeof *v);
+ if (sparse)
+ hash_free (sparse_v);
+ else
+ v = xnrealloc (v, h, sizeof *v);
}
break;
}