---
 .gitignore       |  1 +
 Makefile         |  2 ++
 README           |  1 +
 libutil/random.c | 46 +++++++++++++++++++++++++++++
 shuf.1           | 43 +++++++++++++++++++++++++++
 shuf.c           | 77 ++++++++++++++++++++++++++++++++++++++++++++++++
 util.h           |  3 ++
 7 files changed, 173 insertions(+)
 create mode 100644 libutil/random.c
 create mode 100644 shuf.1
 create mode 100644 shuf.c

diff --git a/.gitignore b/.gitignore
index 001b055..e789e24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@
 /sha512sum
 /sha512-224sum
 /sha512-256sum
+/shuf
 /sleep
 /sort
 /split
diff --git a/Makefile b/Makefile
index 7bd9626..8e8732a 100644
--- a/Makefile
+++ b/Makefile
@@ -86,6 +86,7 @@ LIBUTILOBJ =\
        libutil/strsep.o\
        libutil/strnsubst.o\
        libutil/strtonum.o\
+       libutil/random.o\
        libutil/unescape.o\
        libutil/writeall.o
 
@@ -160,6 +161,7 @@ BIN =\
        sha512sum\
        sha512-224sum\
        sha512-256sum\
+       shuf\
        sleep\
        sort\
        split\
diff --git a/README b/README
index 5555f8b..9b287d9 100644
--- a/README
+++ b/README
@@ -112,6 +112,7 @@ The following tools are implemented:
 0=*|x sha512sum       .
 0=* x sha512-224sum   .
 0=* x sha512-256sum   .
+0=* x shuf            (-o, -z, -e, -i)
 0=*|o sleep           .
 0#*|o sort            .
 0=*|o split           .
diff --git a/libutil/random.c b/libutil/random.c
new file mode 100644
index 0000000..48eeb79
--- /dev/null
+++ b/libutil/random.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+
+/*
+ * Uniformity is achieved by generating new random numbers until the one
+ * returned is outside the range [0, 2**32 % upper_bound).  This
+ * guarantees the selected random number will be inside
+ * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound)
+ * after reduction modulo upper_bound.
+ *
+ * Copied off OpenBSD (original is arc4random_uniform)
+ */
+uint32_t
+random_uniform(uint32_t upper_bound)
+{
+       uint32_t r, min;
+
+       if (upper_bound < 2)
+               return 0;
+
+       /* 2**32 % x == (2**32 - x) % x */
+       min = -upper_bound % upper_bound;
+
+       /*
+        * This could theoretically loop forever but each retry has
+        * p > 0.5 (worst case, usually far better) of selecting a
+        * number inside the range we need, so it should rarely need
+        * to re-roll.
+        */
+       for (;;) {
+               r = random(); /* arc4random() is better, but we don't always 
have it */
+               if (r >= min)
+                       break;
+       }
+
+       return r % upper_bound;
+}
+
+void
+random_seed(void)
+{
+       struct timespec ts;
+       clock_gettime(CLOCK_REALTIME, &ts);
+       srandom(ts.tv_nsec); /* not a good source of randomness, but eh */
+}
diff --git a/shuf.1 b/shuf.1
new file mode 100644
index 0000000..1f744a7
--- /dev/null
+++ b/shuf.1
@@ -0,0 +1,43 @@
+.Dd 2024-02-25
+.Dt SHUF 1
+.Os
+.Sh NAME
+.Nm shuf
+.Nd randomly permute input lines
+.Sh SYNOPSIS
+.Nm
+.Op Fl r
+.Op Ar file ...
+.Sh DESCRIPTION
+.Nm
+is a utility that outputs a random permutation of its input lines.
+By default,
+.Nm
+reads from stdin and outputs to stdout.
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl r
+Do not permute.
+Instead, choose lines at random, with replacement.
+By default, it repeats indefinitely.
+.El
+.Sh NOTES
+This
+.Nm
+misses a few options.
+.Pp
+.Fl n
+can be reproduced using
+.Xr head 1 .
+.Pp
+.Fl i
+can be reproduced using
+.Xr seq 1 .
+.Pp
+.Fl e
+can be reproduced using
+.Xr printf 1 .
+.Sh SEE ALSO
+.Xr sort 1 ,
+.Xr seq 1 ,
+.Xr head 1
diff --git a/shuf.c b/shuf.c
new file mode 100644
index 0000000..36b3898
--- /dev/null
+++ b/shuf.c
@@ -0,0 +1,77 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "text.h"
+#include "util.h"
+
+static void
+usage(void)
+{
+       eprintf("usage: %s [-r] [file ...]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+       FILE *fp;
+       int i, j, rflag = 0, ret = 0;
+       struct linebuf buf = EMPTY_LINEBUF;
+       struct line line;
+
+       ARGBEGIN {
+       case 'r':
+               rflag = 1;
+               break;
+       default:
+               usage();
+       } ARGEND
+
+       if (!argc) {
+               getlines(stdin, &buf);
+       } else {
+               for (; *argv; argc--, argv++) {
+                       if (!strcmp(*argv, "-")) {
+                               *argv = "<stdin>";
+                               fp = stdin;
+                       } else if (!(fp = fopen(*argv, "r"))) {
+                               weprintf("fopen %s:", *argv);
+                               ret = 1;
+                               continue;
+                       }
+                       getlines(fp, &buf);
+                       if (fp != stdin && fshut(fp, *argv))
+                               ret = 1;
+               }
+       }
+
+       random_seed();
+
+       if (!rflag) {
+               for (i = buf.nlines - 1; i > 0; i--) {
+                       j = random_uniform(i+1);
+                       line = buf.lines[j];
+                       buf.lines[j] = buf.lines[i];
+                       buf.lines[i] = line;
+               }
+
+               for (i = 0; i < buf.nlines; i++) {
+                       line = buf.lines[i];
+                       fwrite(line.data, 1, line.len, stdout);
+               }
+       } else {
+               for (;;) {
+                       j = random_uniform(buf.nlines);
+                       line = buf.lines[j];
+                       fwrite(line.data, 1, line.len, stdout);
+               }
+       }
+       for (i = 0; i < buf.nlines; i++)
+               free(buf.lines[i].data);
+
+       ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
+
+       return ret;
+}
diff --git a/util.h b/util.h
index 346f6ca..4c9b5b2 100644
--- a/util.h
+++ b/util.h
@@ -3,6 +3,7 @@
 
 #include <regex.h>
 #include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 
 #include "arg.h"
@@ -91,3 +92,5 @@ int mkdirp(const char *, mode_t, mode_t);
 #undef memmem
 #define memmem xmemmem
 void *memmem(const void *, size_t, const void *, size_t);
+void random_seed(void);
+uint32_t random_uniform(uint32_t);
-- 
2.44.0


Reply via email to