On 05/02/11 14:13, Jim Meyering wrote:
> Pádraig Brady wrote:

>> Though on trying shuf to generate the numbers I notice:
>>
>> $ shuf -i0-$((2**31)) -n1
>> 1241475845
>> $ shuf -i0-$((2**31)) -n2
>> shuf: memory exhausted
> 
> For small N and large B-A, that code should be improved.
> It certainly doesn't need to allocate so much space.

I had a quick look this morning, and did a simple
sparse array implementation (attached) using the hash module.
It could be improved to have less dependence on malloc(),
but it gives a large improvement for the above case at least.

I tested with this:

for n in $(seq 2 32); do
  for h in $(seq 2 32); do
    test $h -gt $n && continue
    for s in o n; do
      test $s = o && shuf=shuf || shuf=./shuf
      num=$(env time -f "$s:${h},${n} = %e,%M" \
            $shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l)
      test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2
    done
  done
done

A interpretation of a part of the output is:

        log2(h) log2(n) time(s) mem(KiB)  notes
old     2       20      0.01    17728
new     2       20      0.00    2272      less mem, less time
old     13      20      0.01    17744
new     13      20      0.01    3024      time equivalent
old     14      20      0.02    17744
new     14      20      0.03    5344
old     15      20      0.03    17728
new     15      20      0.06    9360      half mem, half speed
old     16      20      0.05    17728
new     16      20      0.05    17792     back to non sparse

cheers,
Pádraig.
diff --git a/gl/lib/randperm.c b/gl/lib/randperm.c
index 97c8d9a..9b0f03d 100644
--- a/gl/lib/randperm.c
+++ b/gl/lib/randperm.c
@@ -22,6 +22,7 @@
 #include "randperm.h"
 
 #include <limits.h>
+#include <stdlib.h>
 
 #include "xalloc.h"
 
@@ -57,6 +58,73 @@ randperm_bound (size_t h, size_t n)
   return bound;
 }
 
+#include "hash.h"
+
+/* Use this to suppress gcc's `...may be used before initialized' warnings. */
+#ifdef lint
+# define IF_LINT(Code) Code
+#else
+# define IF_LINT(Code) /* empty */
+#endif
+
+struct val_ent
+{
+   size_t index;
+   size_t val;
+};
+
+static size_t
+val_hash (void const *x, size_t table_size)
+{
+  struct val_ent const *ent = x;
+  return ent->index % table_size;
+}
+
+static bool
+val_cmp (void const *x, void const *y)
+{
+  struct val_ent const *ent1 = x;
+  struct val_ent const *ent2 = y;
+  return ent1->index == ent2->index;
+}
+
+static void
+sparse_swap (Hash_table *v, size_t* rv, size_t i, size_t j)
+{
+  struct val_ent *v1 = hash_delete (v, &(struct val_ent) {i,0});
+  struct val_ent *v2 = hash_delete (v, &(struct val_ent) {j,0});
+  if (!v1)
+    {
+      v1 = xmalloc (sizeof *v1);
+      v1->index = v1->val = i;
+    }
+  if (!v2)
+    {
+      v2 = xmalloc (sizeof *v2);
+      v2->index = v2->val = j;
+    }
+
+  size_t t = v1->val;
+  v1->val = v2->val;
+  v2->val = t;
+  if (!hash_insert (v, v1))
+    xalloc_die();
+  if (!hash_insert (v, v2))
+    xalloc_die();
+
+  /* As an optimization, keep the array of values we're returning,
+     updated here.  */
+  rv[i] = v1->val;
+}
+
+static void
+swap (size_t *v, size_t i, size_t j)
+{
+  size_t t = v[i];
+  v[i] = v[j];
+  v[j] = t;
+}
+
 /* From R, allocate and return a malloc'd array of the first H elements
    of a random permutation of N elements.  H must not exceed N.
    Return NULL if H is zero.  */
@@ -79,21 +147,64 @@ randperm_new (struct randint_source *r, size_t h, size_t n)
 
     default:
       {
+        /* The algorithm is essentially the same in both
+           the sparse and non sparse case.  In the sparse case we use
+           a hash to implement sparse storage for the set of n numbers
+           we're shuffling.  When to use the sparse method was
+           determined with the help of this script:
+
+           #!/bin/sh
+           for n in $(seq 2 32); do
+             for h in $(seq 2 32); do
+               test $h -gt $n && continue
+               for s in o n; do
+                 test $s = o && shuf=shuf || shuf=./shuf
+                 num=$(env time -f "$s:${h},${n} = %e,%M" \
+                       $shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l)
+                 test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2
+               done
+             done
+           done
+
+           This showed that if sparseness = n/h, then:
+
+           sparseness = 128 => .125 mem used, and about same speed
+           sparseness =  64 => .25  mem used, but 1.5 times slower
+           sparseness =  32 => .5   mem used, but 2 times slower
+
+           Also the memory usage was only significant when n > 128Ki
+        */
+        bool sparse = (n >= (128 * 1024)) && (n / h >= 32);
+
         size_t i;
+        Hash_table *sparse_v;
 
-        v = xnmalloc (n, sizeof *v);
-        for (i = 0; i < n; i++)
-          v[i] = i;
+        if (sparse)
+          {
+            sparse_v = hash_initialize (h, NULL, val_hash, val_cmp, free);
+            v = xnmalloc (h, sizeof *v);
+          }
+        else
+          {
+            IF_LINT (sparse_v = NULL);
+            v = xnmalloc (n, sizeof *v);
+            for (i = 0; i < n; i++)
+              v[i] = i;
+          }
 
         for (i = 0; i < h; i++)
           {
             size_t j = i + randint_choose (r, n - i);
-            size_t t = v[i];
-            v[i] = v[j];
-            v[j] = t;
+            if (sparse)
+              sparse_swap (sparse_v, v, i, j);
+            else
+              swap (v, i, j);
           }
 
-        v = xnrealloc (v, h, sizeof *v);
+        if (sparse)
+          hash_free (sparse_v);
+        else
+          v = xnrealloc (v, h, sizeof *v);
       }
       break;
     }

Reply via email to