‘shuf -r -n 0 file’ would mistakenly read from standard input. Problem reported by my student Jingnong Qu while reimplementing a shuf subset in Python as an exercise in UCLA Computer Science 35L: https://web.cs.ucla.edu/classes/fall19/cs35L/assign/assign3.html * NEWS: Mention the fix. Also, ASCIIfy a previous item. * src/shuf.c (main): Fix bug. * tests/misc/shuf.sh: Add a test case for the bug. --- NEWS | 5 ++++- src/shuf.c | 53 +++++++++++++++++++++++++--------------------- tests/misc/shuf.sh | 4 ++++ 3 files changed, 37 insertions(+), 25 deletions(-)
diff --git a/NEWS b/NEWS index fe38e80d4..476c02aed 100644 --- a/NEWS +++ b/NEWS @@ -29,6 +29,9 @@ GNU coreutils NEWS -*- outline -*- (like Solaris 10 and Solaris 11). [bug introduced in coreutils-8.31] + 'shuf -r -n 0 file' no longer mistakenly reads from standard input. + [bug introduced with the --repeat feature in coreutils-8.22] + split no longer reports a "output file suffixes exhausted" error when the specified number of files is evenly divisible by 10, 16, 26, for --numeric, --hex, or default alphabetic suffixes respectively. @@ -210,7 +213,7 @@ GNU coreutils NEWS -*- outline -*- 'mv -n A B' no longer suffers from a race condition that can overwrite a simultaneously-created B. This bug fix requires platform support for the renameat2 or renameatx_np syscalls, found - in recent Linux and macOS kernels. As a side effect, ‘mv -n A A’ + in recent Linux and macOS kernels. As a side effect, 'mv -n A A' now silently does nothing if A exists. [bug introduced with coreutils-7.1] diff --git a/src/shuf.c b/src/shuf.c index 968d3641c..6a1aa0158 100644 --- a/src/shuf.c +++ b/src/shuf.c @@ -493,7 +493,12 @@ main (int argc, char **argv) } /* Prepare input. */ - if (echo) + if (head_lines == 0) + { + n_lines = 0; + line = NULL; + } + else if (echo) { input_from_argv (operand, n_operands, eolbyte); n_lines = n_operands; @@ -507,54 +512,54 @@ main (int argc, char **argv) else { /* If an input file is specified, re-open it as stdin. */ - if (n_operands == 1) - if (! (STREQ (operand[0], "-") || ! head_lines - || freopen (operand[0], "r", stdin))) - die (EXIT_FAILURE, errno, "%s", quotef (operand[0])); + if (n_operands == 1 + && ! (STREQ (operand[0], "-") + || freopen (operand[0], "r", stdin))) + die (EXIT_FAILURE, errno, "%s", quotef (operand[0])); fadvise (stdin, FADVISE_SEQUENTIAL); - if (! repeat && head_lines != SIZE_MAX - && (! head_lines || input_size () > RESERVOIR_MIN_INPUT)) + if (repeat || head_lines == SIZE_MAX + || input_size () <= RESERVOIR_MIN_INPUT) { - use_reservoir_sampling = true; - n_lines = SIZE_MAX; /* unknown number of input lines, for now. */ + n_lines = read_input (stdin, eolbyte, &input_lines); + line = input_lines; } else { - n_lines = read_input (stdin, eolbyte, &input_lines); - line = input_lines; + use_reservoir_sampling = true; + n_lines = SIZE_MAX; /* unknown number of input lines, for now. */ } } - if (! repeat) - head_lines = MIN (head_lines, n_lines); + /* The adjusted head line count; can be less than HEAD_LINES if the + input is small and if not repeating. */ + size_t ahead_lines = repeat || head_lines < n_lines ? head_lines : n_lines; randint_source = randint_all_new (random_source, (use_reservoir_sampling || repeat ? SIZE_MAX - : randperm_bound (head_lines, n_lines))); + : randperm_bound (ahead_lines, n_lines))); if (! randint_source) die (EXIT_FAILURE, errno, "%s", quotef (random_source)); if (use_reservoir_sampling) { /* Instead of reading the entire file into 'line', - use reservoir-sampling to store just "head_lines" random lines. */ - n_lines = read_input_reservoir_sampling (stdin, eolbyte, head_lines, + use reservoir-sampling to store just AHEAD_LINES random lines. */ + n_lines = read_input_reservoir_sampling (stdin, eolbyte, ahead_lines, randint_source, &reservoir); - head_lines = n_lines; + ahead_lines = n_lines; } /* Close stdin now, rather than earlier, so that randint_all_new doesn't have to worry about opening something other than stdin. */ - if (! (echo || input_range) - && (fclose (stdin) != 0)) + if (! (head_lines == 0 || echo || input_range || fclose (stdin) == 0)) die (EXIT_FAILURE, errno, _("read error")); if (!repeat) - permutation = randperm_new (randint_source, head_lines, n_lines); + permutation = randperm_new (randint_source, ahead_lines, n_lines); if (outfile && ! freopen (outfile, "w", stdout)) die (EXIT_FAILURE, errno, "%s", quotef (outfile)); @@ -569,10 +574,10 @@ main (int argc, char **argv) if (n_lines == 0) die (EXIT_FAILURE, 0, _("no lines to repeat")); if (input_range) - i = write_random_numbers (randint_source, head_lines, + i = write_random_numbers (randint_source, ahead_lines, lo_input, hi_input, eolbyte); else - i = write_random_lines (randint_source, head_lines, line, n_lines); + i = write_random_lines (randint_source, ahead_lines, line, n_lines); } } else @@ -580,10 +585,10 @@ main (int argc, char **argv) if (use_reservoir_sampling) i = write_permuted_output_reservoir (n_lines, reservoir, permutation); else if (input_range) - i = write_permuted_numbers (head_lines, lo_input, + i = write_permuted_numbers (ahead_lines, lo_input, permutation, eolbyte); else - i = write_permuted_lines (head_lines, line, permutation); + i = write_permuted_lines (ahead_lines, line, permutation); } if (i != 0) diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh index bbc54a9d2..c866f5de5 100755 --- a/tests/misc/shuf.sh +++ b/tests/misc/shuf.sh @@ -39,6 +39,10 @@ compare in out > /dev/null && { fail=1; echo "not random?" 1>&2; } sort -n out > out1 compare in out1 || { fail=1; echo "not a permutation" 1>&2; } +# Exercize shuf's -r -n 0 options, with no standard input. +shuf -r -n 0 in <&- >out || fail=1 +compare /dev/null out || fail=1 + # Exercise shuf's -e option. t=$(shuf -e a b c d e | sort | fmt) test "$t" = 'a b c d e' || { fail=1; echo "not a permutation" 1>&2; } -- 2.21.0