bug#37859: [PATCH] shuf: fix bug with ‘-r -n 0’

Paul Eggert Mon, 21 Oct 2019 13:22:41 -0700

‘shuf -r -n 0 file’ would mistakenly read from standard input.
Problem reported by my student Jingnong Qu while reimplementing a
shuf subset in Python as an exercise in UCLA Computer Science 35L:
https://web.cs.ucla.edu/classes/fall19/cs35L/assign/assign3.html
* NEWS: Mention the fix.  Also, ASCIIfy a previous item.
* src/shuf.c (main): Fix bug.
* tests/misc/shuf.sh: Add a test case for the bug.
---
 NEWS               |  5 ++++-
 src/shuf.c         | 53 +++++++++++++++++++++++++---------------------
 tests/misc/shuf.sh |  4 ++++
 3 files changed, 37 insertions(+), 25 deletions(-)


diff --git a/NEWS b/NEWS
index fe38e80d4..476c02aed 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,9 @@ GNU coreutils NEWS                                    -*- 
outline -*-
   (like Solaris 10 and Solaris 11).
   [bug introduced in coreutils-8.31]
 
+  'shuf -r -n 0 file' no longer mistakenly reads from standard input.
+  [bug introduced with the --repeat feature in coreutils-8.22]
+
   split no longer reports a "output file suffixes exhausted" error
   when the specified number of files is evenly divisible by 10, 16, 26,
   for --numeric, --hex, or default alphabetic suffixes respectively.
@@ -210,7 +213,7 @@ GNU coreutils NEWS                                    -*- 
outline -*-
   'mv -n A B' no longer suffers from a race condition that can
   overwrite a simultaneously-created B.  This bug fix requires
   platform support for the renameat2 or renameatx_np syscalls, found
-  in recent Linux and macOS kernels.  As a side effect, ‘mv -n A A’
+  in recent Linux and macOS kernels.  As a side effect, 'mv -n A A'
   now silently does nothing if A exists.
   [bug introduced with coreutils-7.1]
 
diff --git a/src/shuf.c b/src/shuf.c
index 968d3641c..6a1aa0158 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -493,7 +493,12 @@ main (int argc, char **argv)
     }
 
   /* Prepare input.  */
-  if (echo)
+  if (head_lines == 0)
+    {
+      n_lines = 0;
+      line = NULL;
+    }
+  else if (echo)
     {
       input_from_argv (operand, n_operands, eolbyte);
       n_lines = n_operands;
@@ -507,54 +512,54 @@ main (int argc, char **argv)
   else
     {
       /* If an input file is specified, re-open it as stdin.  */
-      if (n_operands == 1)
-        if (! (STREQ (operand[0], "-") || ! head_lines
-               || freopen (operand[0], "r", stdin)))
-          die (EXIT_FAILURE, errno, "%s", quotef (operand[0]));
+      if (n_operands == 1
+          && ! (STREQ (operand[0], "-")
+                || freopen (operand[0], "r", stdin)))
+        die (EXIT_FAILURE, errno, "%s", quotef (operand[0]));
 
       fadvise (stdin, FADVISE_SEQUENTIAL);
 
-      if (! repeat && head_lines != SIZE_MAX
-          && (! head_lines || input_size () > RESERVOIR_MIN_INPUT))
+      if (repeat || head_lines == SIZE_MAX
+          || input_size () <= RESERVOIR_MIN_INPUT)
         {
-          use_reservoir_sampling = true;
-          n_lines = SIZE_MAX;   /* unknown number of input lines, for now.  */
+          n_lines = read_input (stdin, eolbyte, &input_lines);
+          line = input_lines;
         }
       else
         {
-          n_lines = read_input (stdin, eolbyte, &input_lines);
-          line = input_lines;
+          use_reservoir_sampling = true;
+          n_lines = SIZE_MAX;   /* unknown number of input lines, for now.  */
         }
     }
 
-  if (! repeat)
-    head_lines = MIN (head_lines, n_lines);
+  /* The adjusted head line count; can be less than HEAD_LINES if the
+     input is small and if not repeating.  */
+  size_t ahead_lines = repeat || head_lines < n_lines ? head_lines : n_lines;
 
   randint_source = randint_all_new (random_source,
                                     (use_reservoir_sampling || repeat
                                      ? SIZE_MAX
-                                     : randperm_bound (head_lines, n_lines)));
+                                     : randperm_bound (ahead_lines, n_lines)));
   if (! randint_source)
     die (EXIT_FAILURE, errno, "%s", quotef (random_source));
 
   if (use_reservoir_sampling)
     {
       /* Instead of reading the entire file into 'line',
-         use reservoir-sampling to store just "head_lines" random lines.  */
-      n_lines = read_input_reservoir_sampling (stdin, eolbyte, head_lines,
+         use reservoir-sampling to store just AHEAD_LINES random lines.  */
+      n_lines = read_input_reservoir_sampling (stdin, eolbyte, ahead_lines,
                                                randint_source, &reservoir);
-      head_lines = n_lines;
+      ahead_lines = n_lines;
     }
 
   /* Close stdin now, rather than earlier, so that randint_all_new
      doesn't have to worry about opening something other than
      stdin.  */
-  if (! (echo || input_range)
-      && (fclose (stdin) != 0))
+  if (! (head_lines == 0 || echo || input_range || fclose (stdin) == 0))
     die (EXIT_FAILURE, errno, _("read error"));
 
   if (!repeat)
-    permutation = randperm_new (randint_source, head_lines, n_lines);
+    permutation = randperm_new (randint_source, ahead_lines, n_lines);
 
   if (outfile && ! freopen (outfile, "w", stdout))
     die (EXIT_FAILURE, errno, "%s", quotef (outfile));
@@ -569,10 +574,10 @@ main (int argc, char **argv)
           if (n_lines == 0)
             die (EXIT_FAILURE, 0, _("no lines to repeat"));
           if (input_range)
-            i = write_random_numbers (randint_source, head_lines,
+            i = write_random_numbers (randint_source, ahead_lines,
                                       lo_input, hi_input, eolbyte);
           else
-            i = write_random_lines (randint_source, head_lines, line, n_lines);
+            i = write_random_lines (randint_source, ahead_lines, line, 
n_lines);
         }
     }
   else
@@ -580,10 +585,10 @@ main (int argc, char **argv)
       if (use_reservoir_sampling)
         i = write_permuted_output_reservoir (n_lines, reservoir, permutation);
       else if (input_range)
-        i = write_permuted_numbers (head_lines, lo_input,
+        i = write_permuted_numbers (ahead_lines, lo_input,
                                     permutation, eolbyte);
       else
-        i = write_permuted_lines (head_lines, line, permutation);
+        i = write_permuted_lines (ahead_lines, line, permutation);
     }
 
   if (i != 0)
diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh
index bbc54a9d2..c866f5de5 100755
--- a/tests/misc/shuf.sh
+++ b/tests/misc/shuf.sh
@@ -39,6 +39,10 @@ compare in out > /dev/null && { fail=1; echo "not random?" 
1>&2; }
 sort -n out > out1
 compare in out1 || { fail=1; echo "not a permutation" 1>&2; }
 
+# Exercize shuf's -r -n 0 options, with no standard input.
+shuf -r -n 0 in <&- >out || fail=1
+compare /dev/null out || fail=1
+
 # Exercise shuf's -e option.
 t=$(shuf -e a b c d e | sort | fmt)
 test "$t" = 'a b c d e' || { fail=1; echo "not a permutation" 1>&2; }
-- 
2.21.0

bug#37859: [PATCH] shuf: fix bug with ‘-r -n 0’

Reply via email to