Dear maintainers,

Since coreutils v8.9 I was using a customized 'stat', which has the additional
features implied by the command line options and by the format sequence shown
below:

  $ stat --help
  Usage: stat [OPTION]... [FILE]...
     or: stat [OPTION]... --files-from=F
  Display file or file system status.
  ...
        --digest-type=WORD
                          when computing file content sums use specified
                          message digest algorithm: md5, sha1, sha224, sha256,
                          sha384 or sha512;  when the option is not specified
                          compute sha1 digests
  ...
        --files-from=F    display status of files specified by names in file F;
                            If F is - then read names from standard input
  ...
        --quoting-style=WORD
                          use quoting style WORD for file names:
                            literal, locale, shell, shell-always, c, escape
                            c-maybe, clocale;
                          when no option is given, use literal style
  ...
  The valid format sequences for files (without --file-system):
  ...
    %S   file content digest sum
  ...
  $

These features were used as essential composing parts in the following scenario:

  (1)  $ find EXPR -print|sort|stat --files-from=- ... > STAT_SRC
  (1b) $ find EXPR -print0|sort -null|stat --files0-from=- ... > STAT_SRC

  (2)  $ find EXPR -print0|du --files0-from=- ...
  (2b) $ find EXPR -print0|sort -null|du --files0-from=- ...

  (3)  $ mkisofs -exclude-list EXCL -o ISO_FILE ...

  (4)  $ find ISO_LOOP --not type d -print|sort|stat --files-from=- ... > 
STAT_ISO
  (4b) $ find ISO_LOOP --not type d -print0|sort -null|stat --files0-from=- ... 
> STAT_ISO

  (5)  $ diff -u <(cut out inode column from STAT_SRC) <(cut out inode column 
from STAT_ISO)
       $ wdiff STAT_SRC STAT_ISO

Here EXPR and EXCL are generated based upon a (potentially elaborated) list of
file globbing patterns. Zooming into the details of the 'find|stat' commands
above would show something like:

  $ eval find $(GEN-FIND-EXPR)|sort|stat \
  --printf '%S\t%f\t%a\t%u\t%g\t%h\t%i\t%s\t%W\t%X\t%Y\t%Z\t%n\n' \
  --quoting-style=escape \
  --digest-type=sha1 \
  --files-from=- \
  > files-stat.output \
  2>files-stat.error

I used to employ steps (1) through (5): my 'stat' implements '--files-from'. The
(2b) and (4b) alternatives imply a fanciful fitted feature of 'sort': 
'-0|--null'
options: the program is taking in lines terminated by a NUL char instead of by
newline.

Please look into the attached patch files. They implement the features 
advertised
above, relative to the latest released source tarball (version 8.22, from 
December
13, 2013). They apply also to the current Coreutils git repository as well.

The 'files0-from' patch modifies only 'stat.c', while the other, 'files-from' --
two additional parties belonging to 'gnulib':

  $ lsdiff stat-files0-from-digest-type-quoting-style.patch 
  src/stat.c

  $ lsdiff stat-files-from-digest-type-quoting-style.patch 
  lib/argv-iter.c
  lib/argv-iter.h
  lib/readtokens0.c
  lib/readtokens0.h
  src/stat.c

There is not much to add but to note that the modifications applied to 'stat.c'
do obey to the general style of the other tools in the Coreutils package. I used
a logic which extends that found in 'md5sum.c' with regards to the computations
of digest sums: see functions 'out_hex_string', 'digest_{regular,symlink}_file'
and 'out_file_digest'. The other changes were applied using a logic very similar
to that in 'du.c' and 'wc.c'. The modifications of source files 'argv-iter.[hc]'
and 'readtokens0.[hc]' were made such that their original behavior be preserved
while adding functionality for reading and tokenizing input separated by a given
delimiter char -- which for 'stat.c' with '--files-from' is the newline char.

Both patch sets pass the regression tests included in package. Note that these
patch sets do not touch neither the documentation nor the regression test code
of the package yet: they are only an initial proposition.

Sincerely,

Stefan Vargyas.
--- src/stat.c	2013-12-13 16:12:46.000000000 +0200
+++ src/stat.c	2014-05-22 08:50:42.000000000 +0300
@@ -55,6 +55,7 @@
 # include <fs_info.h>
 #endif
 #include <selinux/selinux.h>
+#include <assert.h>
 
 #include "system.h"
 
@@ -72,6 +73,17 @@
 #include "strftime.h"
 #include "find-mount-point.h"
 #include "xvasprintf.h"
+#include "readtokens0.h"
+#include "argv-iter.h"
+#include "physmem.h"
+#include "argmatch.h"
+#include "quotearg.h"
+#include "quote.h"
+#include "md5.h"
+#include "sha1.h"
+#include "sha256.h"
+#include "sha512.h"
+#include "fadvise.h"
 
 #if USE_STATVFS
 # define STRUCT_STATVFS struct statvfs
@@ -174,15 +186,21 @@
 
 enum
 {
-  PRINTF_OPTION = CHAR_MAX + 1
+  PRINTF_OPTION = CHAR_MAX + 1,
+  QUOTING_STYLE_OPTION,
+  FILES0_FROM_OPTION,
+  DIGEST_TYPE_OPTION,
 };
 
 static struct option const long_options[] =
 {
   {"dereference", no_argument, NULL, 'L'},
+  {"digest-type", required_argument, NULL, DIGEST_TYPE_OPTION},
   {"file-system", no_argument, NULL, 'f'},
+  {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
   {"format", required_argument, NULL, 'c'},
   {"printf", required_argument, NULL, PRINTF_OPTION},
+  {"quoting-style", required_argument, NULL, QUOTING_STYLE_OPTION},
   {"terse", no_argument, NULL, 't'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
@@ -204,6 +222,118 @@
 static char const *decimal_point;
 static size_t decimal_point_len;
 
+/* Quoting style for file name output.  */
+static enum quoting_style filename_quoting_style = literal_quoting_style;
+
+enum digest_type
+  {
+    md5_digest_type,
+    sha1_digest_type,
+    sha224_digest_type,
+    sha256_digest_type,
+    sha384_digest_type,
+    sha512_digest_type,
+  };
+
+struct digest_desc
+{
+  const char *type;
+  const char *reference;
+  int (*stream) (FILE*, void*);
+  void* (*buffer) (const char*, size_t, void*);
+  size_t bits;
+  size_t align;
+};
+
+static const struct digest_desc digest_descs[] =
+{
+  /* md5 digest */
+  {
+    type:      "MD5",
+    reference: "RFC 1321",
+    stream:    md5_stream,
+    buffer:    md5_buffer,
+    bits:      128,
+    align:     4
+  },
+
+  /* sha1 digest */
+  {
+    type:      "SHA1",
+    reference: "FIPS-180-1",
+    stream:    sha1_stream,
+    buffer:    sha1_buffer,
+    bits:      160,
+    align:     4
+  },
+
+  /* sha224 digest */
+  {
+    type:      "SHA224",
+    reference: "RFC 3874",
+    stream:    sha224_stream,
+    buffer:    sha224_buffer,
+    bits:      224,
+    align:     4
+  },
+
+  /* sha256 digest */
+  {
+    type:      "SHA256",
+    reference: "FIPS-180-2",
+    stream:    sha256_stream,
+    buffer:    sha256_buffer,
+    bits:      256,
+    align:     4
+  },
+
+  /* sha384 digest */
+  {
+    type:      "SHA384",
+    reference: "FIPS-180-2",
+    stream:    sha384_stream,
+    buffer:    sha384_buffer,
+    bits:      384,
+    align:     8
+  },
+
+  /* sha512 digest */
+  {
+    type:      "SHA512",
+    reference: "FIPS-180-2",
+    stream:    sha512_stream,
+    buffer:    sha512_buffer,
+    bits:      512,
+    align:     8
+  },
+};
+
+#define MAX_DIGEST_ALIGN 8
+#define MAX_DIGEST_BITS  512
+#define MAX_DIGEST_BYTES (MAX_DIGEST_BITS / CHAR_BIT)
+
+static char const* const digest_type_args[] = {
+  "md5",
+  "sha1",
+  "sha224",
+  "sha256",
+  "sha384",
+  "sha512",
+  0
+};
+static const enum digest_type digest_type_vals[] = {
+  md5_digest_type,
+  sha1_digest_type,
+  sha224_digest_type,
+  sha256_digest_type,
+  sha384_digest_type,
+  sha512_digest_type,
+};
+ARGMATCH_VERIFY (digest_type_args, digest_type_vals);
+
+/* Type of computed digest sums.  */
+static enum digest_type file_digest_type = sha1_digest_type;
+
 /* Return the type of the specified file system.
    Some systems have statfvs.f_basetype[FSTYPSZ] (AIX, HP-UX, and Solaris).
    Others have statvfs.f_fstypename[_VFS_NAMELEN] (NetBSD 3.0).
@@ -592,6 +722,23 @@
   return printf (pformat, -0.25);
 }
 
+/* Output string in hexadecimal, using printf's %02x format
+   for each char in it.  */
+static void
+out_hex_string (char *pformat, size_t prefix_len, char const *arg,
+                size_t arg_size)
+{
+  char const *end = arg + arg_size;
+  char *str = alloca (2 * arg_size + 1);
+  char *ptr = str;
+
+  for (; arg < end; arg++, ptr += 2)
+    sprintf(ptr, "%02x", (unsigned char) *arg);
+  *ptr = '\0';
+
+  out_string (pformat, prefix_len, str);
+}
+
 /* Output the number of seconds since the Epoch, using a format that
    acts like printf's %f format.  */
 static void
@@ -909,6 +1056,112 @@
   return z;
 }
 
+/* An interface to the function STREAM.
+   Operate on regular file FILENAME.
+
+   Put the checksum in *BIN_RESULT, which must be properly aligned.
+   Return true if successful.  */
+static bool
+digest_regular_file (int (*stream) (FILE*, void*), const char *filename,
+                     unsigned char *bin_result)
+{
+  FILE *fp;
+  int err;
+
+  fp = fopen (filename, "rb");
+  if (fp == NULL)
+    {
+      error (0, errno, "%s", filename);
+      return false;
+    }
+
+  fadvise (fp, FADVISE_SEQUENTIAL);
+
+  err = stream (fp, bin_result);
+  if (err)
+    {
+      error (0, errno, "%s", quote (filename));
+      fclose (fp);
+      return false;
+    }
+
+  if (fclose (fp) != 0)
+    {
+      error (0, errno, "%s", quote (filename));
+      return false;
+    }
+
+  return true;
+}
+
+/* An interface to the function BUFFER.
+   Operate on symlink file FILENAME of size LINK_SIZE.
+
+   Put the checksum in *BIN_RESULT, which must be properly aligned.
+   Return true if successful.  */
+static bool
+digest_symlink_file (void* (*buffer) (const char*, size_t, void*),
+                     const char *filename, size_t link_size,
+                     unsigned char *bin_result)
+{
+  char *link_name = areadlink_with_size (filename, link_size);
+  if (link_name == NULL)
+    {
+      error (0, errno, _("cannot read symbolic link %s"),
+             quote (filename));
+      return false;
+    }
+
+  (void) buffer (link_name, link_size, bin_result);
+
+  free (link_name);
+
+  return true;
+}
+
+/* Print the digest sum. Return zero upon success, nonzero upon failure.  */
+static bool ATTRIBUTE_WARN_UNUSED_RESULT
+out_file_digest (char *pformat, size_t prefix_len, char const *filename, 
+                 struct stat *statbuf)
+{
+  static unsigned char raw_buffer[MAX_DIGEST_BYTES + MAX_DIGEST_ALIGN];
+  const struct digest_desc *desc;
+  unsigned char *bin_buffer;
+  size_t n_bin_buffer;
+  bool digest = false;
+  bool fail = false;
+
+  desc = &digest_descs[file_digest_type];
+
+  assert (desc->bits % CHAR_BIT == 0);
+  assert (desc->bits / CHAR_BIT <= MAX_DIGEST_BYTES);
+  assert (desc->align <= MAX_DIGEST_ALIGN);
+
+  /* The number of bytes required by the digest sum.  */
+  n_bin_buffer = desc->bits / CHAR_BIT;
+
+  /* Make sure bin_buffer is properly aligned.  */
+  bin_buffer = ptr_align (raw_buffer, desc->align);
+
+  assert (bin_buffer >= raw_buffer);
+  assert (bin_buffer + n_bin_buffer <= raw_buffer + sizeof (raw_buffer));
+
+  /* FIXME: maybe we shouldn't compute the digest sum
+     from one call to the other when filenames are identical.  */
+  if ((digest = S_ISREG (statbuf->st_mode)))
+    fail = !digest_regular_file (desc->stream, filename, bin_buffer);
+  else if ((digest = S_ISLNK (statbuf->st_mode)))
+    fail = !digest_symlink_file (desc->buffer, filename, statbuf->st_size,
+                                 bin_buffer);
+
+  if (digest && !fail)
+    out_hex_string (pformat, prefix_len, bin_buffer, n_bin_buffer);
+  else
+    out_string (pformat, prefix_len, fail ? "?" : "-");
+
+  return fail;
+}
+
 /* Print stat info.  Return zero upon success, nonzero upon failure.  */
 static bool
 print_stat (char *pformat, size_t prefix_len, unsigned int m,
@@ -922,7 +1175,8 @@
   switch (m)
     {
     case 'n':
-      out_string (pformat, prefix_len, filename);
+      out_string (pformat, prefix_len, quotearg_style (filename_quoting_style,
+                                                       filename));
       break;
     case 'N':
       out_string (pformat, prefix_len, quote (filename));
@@ -992,6 +1246,9 @@
     case 's':
       out_int (pformat, prefix_len, statbuf->st_size);
       break;
+    case 'S':
+      fail |= out_file_digest (pformat, prefix_len, filename, statbuf);
+      break;
     case 'B':
       out_uint (pformat, prefix_len, ST_NBLOCKSIZE);
       break;
@@ -1357,7 +1614,11 @@
     emit_try_help ();
   else
     {
-      printf (_("Usage: %s [OPTION]... FILE...\n"), program_name);
+      printf (_("\
+Usage: %s [OPTION]... [FILE]...\n\
+   or: %s [OPTION]... --files0-from=F\n\
+"), program_name, program_name);
+
       fputs (_("\
 Display file or file system status.\n\
 "), stdout);
@@ -1366,14 +1627,29 @@
 
       fputs (_("\
   -L, --dereference     follow links\n\
+      --digest-type=WORD\n\
+                        when computing file content sums use specified\n\
+                        message digest algorithm: md5, sha1, sha224, sha256,\n\
+                        sha384 or sha512;  when the option is not specified\n\
+                        compute sha1 digests\n\
   -f, --file-system     display file system status instead of file status\n\
 "), stdout);
+       fputs (_("\
+      --files0-from=F   display status of files specified by\n\
+                          NUL-terminated names in file F;\n\
+                          If F is - then read names from standard input\n\
+"), stdout);
       fputs (_("\
   -c  --format=FORMAT   use the specified FORMAT instead of the default;\n\
                           output a newline after each use of FORMAT\n\
       --printf=FORMAT   like --format, but interpret backslash escapes,\n\
                           and do not output a mandatory trailing newline;\n\
                           if you want a newline, include \\n in FORMAT\n\
+      --quoting-style=WORD\n\
+                        use quoting style WORD for file names:\n\
+                          literal, locale, shell, shell-always, c, escape\n\
+                          c-maybe, clocale;\n\
+                        when no option is given, use literal style\n\
   -t, --terse           print the information in terse form\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
@@ -1404,6 +1680,7 @@
   %N   quoted file name with dereference if symbolic link\n\
   %o   optimal I/O transfer size hint\n\
   %s   total size, in bytes\n\
+  %S   file content digest sum\n\
   %t   major device type in hex, for character/block device special files\n\
   %T   minor device type in hex, for character/block device special files\n\
 "), stdout);
@@ -1455,6 +1732,9 @@
   char *format = NULL;
   char *format2;
   bool ok = true;
+  FILE *stream = NULL;
+  char *files_from = NULL;
+  struct Tokens tok;
 
   initialize_main (&argc, &argv);
   set_program_name (argv[0]);
@@ -1496,6 +1776,22 @@
           terse = true;
           break;
 
+        case DIGEST_TYPE_OPTION:
+          file_digest_type = XARGMATCH ("--digest-type", optarg,
+                                        digest_type_args,
+                                        digest_type_vals);
+          break;
+
+        case FILES0_FROM_OPTION:
+          files_from = optarg;
+          break;
+
+        case QUOTING_STYLE_OPTION:
+          filename_quoting_style = XARGMATCH ("--quoting-style", optarg,
+                                              quoting_style_args,
+                                              quoting_style_vals);
+          break;
+
         case_GETOPT_HELP_CHAR;
 
         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
@@ -1505,12 +1801,6 @@
         }
     }
 
-  if (argc == optind)
-    {
-      error (0, 0, _("missing operand"));
-      usage (EXIT_FAILURE);
-    }
-
   if (format)
     format2 = format;
   else
@@ -1519,10 +1809,113 @@
       format2 = default_format (fs, terse, true);
     }
 
-  for (i = optind; i < argc; i++)
-    ok &= (fs
-           ? do_statfs (argv[i], format)
-           : do_stat (argv[i], format, format2));
+  bool read_tokens = false;
+  struct argv_iterator *ai;
+  if (files_from)
+    {
+      /* When using --files0-from=F, you may not specify any files
+         on the command-line.  */
+      if (optind < argc)
+        {
+          error (0, 0, _("extra operand %s"), quote (argv[optind]));
+          fprintf (stderr, "%s\n",
+                   _("file operands cannot be combined with --files0-from"));
+          usage (EXIT_FAILURE);
+        }
+
+      if (STREQ (files_from, "-"))
+        stream = stdin;
+      else
+        {
+          stream = fopen (files_from, "r");
+          if (stream == NULL)
+            error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
+                   quote (files_from));
+        }
+
+      /* Read the file list into RAM if we can detect its size and that
+         size is reasonable.  Otherwise, we'll read a name at a time.  */
+      struct stat st;
+      if (fstat (fileno (stream), &st) == 0
+          && S_ISREG (st.st_mode)
+          && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
+        {
+          read_tokens = true;
+          readtokens0_init (&tok);
+          if (! readtokens0 (stream, &tok))
+            error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
+                   quote (files_from));
+          ai = argv_iter_init_argv (tok.tok);
+        }
+      else
+        {
+          ai = argv_iter_init_stream (stream);
+        }
+    }
+  else if (argc > optind)
+    {
+      ai = argv_iter_init_argv (argv + optind);
+    }
+  else
+    {
+      error (0, 0, _("missing operand"));
+      usage (EXIT_FAILURE);
+    }
+
+  if (!ai)
+    xalloc_die ();
+
+  while (true)
+    {
+      enum argv_iter_err ai_err;
+      char *file_name = argv_iter (ai, &ai_err);
+      if (!file_name)
+        {
+          switch (ai_err)
+            {
+            case AI_ERR_EOF:
+              goto argv_iter_done;
+            case AI_ERR_READ:
+              error (0, errno, _("%s: read error"),
+                     quotearg_colon (files_from));
+              ok = false;
+              goto argv_iter_done;
+            case AI_ERR_MEM:
+              xalloc_die ();
+            default:
+              assert (!"unexpected error code from argv_iter");
+            }
+        }
+
+      /* Silently ignore input empty lines when
+         given --files0-from=FILE.  */
+      if (files_from && !file_name[0])
+        continue;
+
+      if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
+        {
+          /* Give a better diagnostic in an unusual case:
+             printf - | stat --files0-from=- */
+          error (0, 0, _("when reading file names from stdin, "
+                         "no file name of %s allowed"),
+                 quote (file_name));
+          ok = false;
+          continue;
+        }
+
+      ok &= (fs
+             ? do_statfs (file_name, format)
+             : do_stat (file_name, format, format2));
+    }
+ argv_iter_done:
+
+  if (read_tokens)
+    readtokens0_free (&tok);
+
+  argv_iter_free (ai);
+
+  if (files_from && (ferror (stream) || fclose (stream) != 0))
+    error (EXIT_FAILURE, 0, _("error reading %s"), quote (files_from));
 
   exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }
--- lib/argv-iter.c	2013-12-04 16:53:33.000000000 +0200
+++ lib/argv-iter.c	2014-05-20 22:59:57.000000000 +0300
@@ -27,6 +27,7 @@
   /* Test FP to determine whether in read-mode or argv-mode. */
   /* file-mode: fp records position */
   FILE *fp;
+  int delim;
   size_t item_idx;
   char *tok;
   size_t buf_len;
@@ -43,6 +44,7 @@
   if (!ai)
     return NULL;
   ai->fp = NULL;
+  ai->delim = 0;
   ai->arg_list = argv;
   ai->p = argv;
   return ai;
@@ -53,10 +55,19 @@
 struct argv_iterator *
 argv_iter_init_stream (FILE *fp)
 {
+  return argv_iter_init_stream_delim (fp, '\0');
+}
+
+/* Initialize to read from the stream, FP.
+   The input is expected to contain a list of DELIM-delimited tokens.  */
+struct argv_iterator *
+argv_iter_init_stream_delim (FILE *fp, int delim)
+{
   struct argv_iterator *ai = malloc (sizeof *ai);
   if (!ai)
     return NULL;
   ai->fp = fp;
+  ai->delim = delim;
   ai->tok = NULL;
   ai->buf_len = 0;
 
@@ -70,12 +81,23 @@
 {
   if (ai->fp)
     {
-      ssize_t len = getdelim (&ai->tok, &ai->buf_len, '\0', ai->fp);
+      ssize_t len = getdelim (&ai->tok, &ai->buf_len, ai->delim, ai->fp);
       if (len < 0)
         {
           *err = feof (ai->fp) ? AI_ERR_EOF : AI_ERR_READ;
           return NULL;
         }
+      else if (len > 0)
+        {
+          if (ai->delim)
+            {
+              char *p = &ai->tok[len - 1];
+              if (*p == ai->delim)
+                *p = 0;
+            }
+        }
+      else
+        abort ();
 
       *err = AI_ERR_OK;
       ai->item_idx++;
--- lib/argv-iter.h	2013-12-04 16:53:33.000000000 +0200
+++ lib/argv-iter.h	2014-05-20 21:17:42.000000000 +0300
@@ -34,6 +34,8 @@
   _GL_ARG_NONNULL ((1));
 struct argv_iterator *argv_iter_init_stream (FILE *fp)
   _GL_ARG_NONNULL ((1));
+struct argv_iterator *argv_iter_init_stream_delim (FILE *fp, int delim)
+  _GL_ARG_NONNULL ((1));
 char *argv_iter (struct argv_iterator *, enum argv_iter_err *)
   _GL_ARG_NONNULL ((1, 2));
 size_t argv_iter_n_args (struct argv_iterator const *)
--- lib/readtokens0.c	2013-12-04 16:53:33.000000000 +0200
+++ lib/readtokens0.c	2014-05-20 23:22:41.000000000 +0300
@@ -65,7 +65,16 @@
 bool
 readtokens0 (FILE *in, struct Tokens *t)
 {
+  return readtokens0_delim (in, t, '\0');
+}
 
+/* Read DELIM-separated tokens from stream IN into T until EOF or error.
+   The final DELIM is optional.  Always append a NULL pointer to the
+   resulting list of token pointers, but that pointer isn't counted
+   via t->n_tok.  Return true if successful.  */
+bool
+readtokens0_delim (FILE *in, struct Tokens *t, int delim)
+{
   while (1)
     {
       int c = fgetc (in);
@@ -84,6 +93,8 @@
           break;
         }
 
+      if (c == delim)
+        c = '\0';
       obstack_1grow (&t->o_data, c);
       if (c == '\0')
         save_token (t);
--- lib/readtokens0.h	2013-12-04 16:53:33.000000000 +0200
+++ lib/readtokens0.h	2014-05-20 23:23:02.000000000 +0300
@@ -38,5 +38,6 @@
 void readtokens0_init (struct Tokens *t);
 void readtokens0_free (struct Tokens *t);
 bool readtokens0 (FILE *in, struct Tokens *t);
+bool readtokens0_delim (FILE *in, struct Tokens *t, int delim);
 
 #endif
--- src/stat.c	2013-12-13 16:12:46.000000000 +0200
+++ src/stat.c	2014-05-22 08:50:42.000000000 +0300
@@ -55,6 +55,7 @@
 # include <fs_info.h>
 #endif
 #include <selinux/selinux.h>
+#include <assert.h>
 
 #include "system.h"
 
@@ -72,6 +73,17 @@
 #include "strftime.h"
 #include "find-mount-point.h"
 #include "xvasprintf.h"
+#include "readtokens0.h"
+#include "argv-iter.h"
+#include "physmem.h"
+#include "argmatch.h"
+#include "quotearg.h"
+#include "quote.h"
+#include "md5.h"
+#include "sha1.h"
+#include "sha256.h"
+#include "sha512.h"
+#include "fadvise.h"
 
 #if USE_STATVFS
 # define STRUCT_STATVFS struct statvfs
@@ -174,15 +186,21 @@
 
 enum
 {
-  PRINTF_OPTION = CHAR_MAX + 1
+  PRINTF_OPTION = CHAR_MAX + 1,
+  QUOTING_STYLE_OPTION,
+  FILES_FROM_OPTION,
+  DIGEST_TYPE_OPTION,
 };
 
 static struct option const long_options[] =
 {
   {"dereference", no_argument, NULL, 'L'},
+  {"digest-type", required_argument, NULL, DIGEST_TYPE_OPTION},
   {"file-system", no_argument, NULL, 'f'},
+  {"files-from", required_argument, NULL, FILES_FROM_OPTION},
   {"format", required_argument, NULL, 'c'},
   {"printf", required_argument, NULL, PRINTF_OPTION},
+  {"quoting-style", required_argument, NULL, QUOTING_STYLE_OPTION},
   {"terse", no_argument, NULL, 't'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
@@ -204,6 +222,118 @@
 static char const *decimal_point;
 static size_t decimal_point_len;
 
+/* Quoting style for file name output.  */
+static enum quoting_style filename_quoting_style = literal_quoting_style;
+
+enum digest_type
+  {
+    md5_digest_type,
+    sha1_digest_type,
+    sha224_digest_type,
+    sha256_digest_type,
+    sha384_digest_type,
+    sha512_digest_type,
+  };
+
+struct digest_desc
+{
+  const char *type;
+  const char *reference;
+  int (*stream) (FILE*, void*);
+  void* (*buffer) (const char*, size_t, void*);
+  size_t bits;
+  size_t align;
+};
+
+static const struct digest_desc digest_descs[] =
+{
+  /* md5 digest */
+  {
+    type:      "MD5",
+    reference: "RFC 1321",
+    stream:    md5_stream,
+    buffer:    md5_buffer,
+    bits:      128,
+    align:     4
+  },
+
+  /* sha1 digest */
+  {
+    type:      "SHA1",
+    reference: "FIPS-180-1",
+    stream:    sha1_stream,
+    buffer:    sha1_buffer,
+    bits:      160,
+    align:     4
+  },
+
+  /* sha224 digest */
+  {
+    type:      "SHA224",
+    reference: "RFC 3874",
+    stream:    sha224_stream,
+    buffer:    sha224_buffer,
+    bits:      224,
+    align:     4
+  },
+
+  /* sha256 digest */
+  {
+    type:      "SHA256",
+    reference: "FIPS-180-2",
+    stream:    sha256_stream,
+    buffer:    sha256_buffer,
+    bits:      256,
+    align:     4
+  },
+
+  /* sha384 digest */
+  {
+    type:      "SHA384",
+    reference: "FIPS-180-2",
+    stream:    sha384_stream,
+    buffer:    sha384_buffer,
+    bits:      384,
+    align:     8
+  },
+
+  /* sha512 digest */
+  {
+    type:      "SHA512",
+    reference: "FIPS-180-2",
+    stream:    sha512_stream,
+    buffer:    sha512_buffer,
+    bits:      512,
+    align:     8
+  },
+};
+
+#define MAX_DIGEST_ALIGN 8
+#define MAX_DIGEST_BITS  512
+#define MAX_DIGEST_BYTES (MAX_DIGEST_BITS / CHAR_BIT)
+
+static char const* const digest_type_args[] = {
+  "md5",
+  "sha1",
+  "sha224",
+  "sha256",
+  "sha384",
+  "sha512",
+  0
+};
+static const enum digest_type digest_type_vals[] = {
+  md5_digest_type,
+  sha1_digest_type,
+  sha224_digest_type,
+  sha256_digest_type,
+  sha384_digest_type,
+  sha512_digest_type,
+};
+ARGMATCH_VERIFY (digest_type_args, digest_type_vals);
+
+/* Type of computed digest sums.  */
+static enum digest_type file_digest_type = sha1_digest_type;
+
 /* Return the type of the specified file system.
    Some systems have statfvs.f_basetype[FSTYPSZ] (AIX, HP-UX, and Solaris).
    Others have statvfs.f_fstypename[_VFS_NAMELEN] (NetBSD 3.0).
@@ -592,6 +722,23 @@
   return printf (pformat, -0.25);
 }
 
+/* Output string in hexadecimal, using printf's %02x format
+   for each char in it.  */
+static void
+out_hex_string (char *pformat, size_t prefix_len, char const *arg,
+                size_t arg_size)
+{
+  char const *end = arg + arg_size;
+  char *str = alloca (2 * arg_size + 1);
+  char *ptr = str;
+
+  for (; arg < end; arg++, ptr += 2)
+    sprintf(ptr, "%02x", (unsigned char) *arg);
+  *ptr = '\0';
+
+  out_string (pformat, prefix_len, str);
+}
+
 /* Output the number of seconds since the Epoch, using a format that
    acts like printf's %f format.  */
 static void
@@ -909,6 +1056,112 @@
   return z;
 }
 
+/* An interface to the function STREAM.
+   Operate on regular file FILENAME.
+
+   Put the checksum in *BIN_RESULT, which must be properly aligned.
+   Return true if successful.  */
+static bool
+digest_regular_file (int (*stream) (FILE*, void*), const char *filename,
+                     unsigned char *bin_result)
+{
+  FILE *fp;
+  int err;
+
+  fp = fopen (filename, "rb");
+  if (fp == NULL)
+    {
+      error (0, errno, "%s", filename);
+      return false;
+    }
+
+  fadvise (fp, FADVISE_SEQUENTIAL);
+
+  err = stream (fp, bin_result);
+  if (err)
+    {
+      error (0, errno, "%s", quote (filename));
+      fclose (fp);
+      return false;
+    }
+
+  if (fclose (fp) != 0)
+    {
+      error (0, errno, "%s", quote (filename));
+      return false;
+    }
+
+  return true;
+}
+
+/* An interface to the function BUFFER.
+   Operate on symlink file FILENAME of size LINK_SIZE.
+
+   Put the checksum in *BIN_RESULT, which must be properly aligned.
+   Return true if successful.  */
+static bool
+digest_symlink_file (void* (*buffer) (const char*, size_t, void*),
+                     const char *filename, size_t link_size,
+                     unsigned char *bin_result)
+{
+  char *link_name = areadlink_with_size (filename, link_size);
+  if (link_name == NULL)
+    {
+      error (0, errno, _("cannot read symbolic link %s"),
+             quote (filename));
+      return false;
+    }
+
+  (void) buffer (link_name, link_size, bin_result);
+
+  free (link_name);
+
+  return true;
+}
+
+/* Print the digest sum. Return zero upon success, nonzero upon failure.  */
+static bool ATTRIBUTE_WARN_UNUSED_RESULT
+out_file_digest (char *pformat, size_t prefix_len, char const *filename, 
+                 struct stat *statbuf)
+{
+  static unsigned char raw_buffer[MAX_DIGEST_BYTES + MAX_DIGEST_ALIGN];
+  const struct digest_desc *desc;
+  unsigned char *bin_buffer;
+  size_t n_bin_buffer;
+  bool digest = false;
+  bool fail = false;
+
+  desc = &digest_descs[file_digest_type];
+
+  assert (desc->bits % CHAR_BIT == 0);
+  assert (desc->bits / CHAR_BIT <= MAX_DIGEST_BYTES);
+  assert (desc->align <= MAX_DIGEST_ALIGN);
+
+  /* The number of bytes required by the digest sum.  */
+  n_bin_buffer = desc->bits / CHAR_BIT;
+
+  /* Make sure bin_buffer is properly aligned.  */
+  bin_buffer = ptr_align (raw_buffer, desc->align);
+
+  assert (bin_buffer >= raw_buffer);
+  assert (bin_buffer + n_bin_buffer <= raw_buffer + sizeof (raw_buffer));
+
+  /* FIXME: maybe we shouldn't compute the digest sum
+     from one call to the other when filenames are identical.  */
+  if ((digest = S_ISREG (statbuf->st_mode)))
+    fail = !digest_regular_file (desc->stream, filename, bin_buffer);
+  else if ((digest = S_ISLNK (statbuf->st_mode)))
+    fail = !digest_symlink_file (desc->buffer, filename, statbuf->st_size,
+                                 bin_buffer);
+
+  if (digest && !fail)
+    out_hex_string (pformat, prefix_len, bin_buffer, n_bin_buffer);
+  else
+    out_string (pformat, prefix_len, fail ? "?" : "-");
+
+  return fail;
+}
+
 /* Print stat info.  Return zero upon success, nonzero upon failure.  */
 static bool
 print_stat (char *pformat, size_t prefix_len, unsigned int m,
@@ -922,7 +1175,8 @@
   switch (m)
     {
     case 'n':
-      out_string (pformat, prefix_len, filename);
+      out_string (pformat, prefix_len, quotearg_style (filename_quoting_style,
+                                                       filename));
       break;
     case 'N':
       out_string (pformat, prefix_len, quote (filename));
@@ -992,6 +1246,9 @@
     case 's':
       out_int (pformat, prefix_len, statbuf->st_size);
       break;
+    case 'S':
+      fail |= out_file_digest (pformat, prefix_len, filename, statbuf);
+      break;
     case 'B':
       out_uint (pformat, prefix_len, ST_NBLOCKSIZE);
       break;
@@ -1357,7 +1614,11 @@
     emit_try_help ();
   else
     {
-      printf (_("Usage: %s [OPTION]... FILE...\n"), program_name);
+      printf (_("\
+Usage: %s [OPTION]... [FILE]...\n\
+   or: %s [OPTION]... --files-from=F\n\
+"), program_name, program_name);
+
       fputs (_("\
 Display file or file system status.\n\
 "), stdout);
@@ -1366,14 +1627,28 @@
 
       fputs (_("\
   -L, --dereference     follow links\n\
+      --digest-type=WORD\n\
+                        when computing file content sums use specified\n\
+                        message digest algorithm: md5, sha1, sha224, sha256,\n\
+                        sha384 or sha512;  when the option is not specified\n\
+                        compute sha1 digests\n\
   -f, --file-system     display file system status instead of file status\n\
 "), stdout);
+       fputs (_("\
+      --files-from=F    display status of files specified by names in file F;\n\
+                          If F is - then read names from standard input\n\
+"), stdout);
       fputs (_("\
   -c  --format=FORMAT   use the specified FORMAT instead of the default;\n\
                           output a newline after each use of FORMAT\n\
       --printf=FORMAT   like --format, but interpret backslash escapes,\n\
                           and do not output a mandatory trailing newline;\n\
                           if you want a newline, include \\n in FORMAT\n\
+      --quoting-style=WORD\n\
+                        use quoting style WORD for file names:\n\
+                          literal, locale, shell, shell-always, c, escape\n\
+                          c-maybe, clocale;\n\
+                        when no option is given, use literal style\n\
   -t, --terse           print the information in terse form\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
@@ -1404,6 +1679,7 @@
   %N   quoted file name with dereference if symbolic link\n\
   %o   optimal I/O transfer size hint\n\
   %s   total size, in bytes\n\
+  %S   file content digest sum\n\
   %t   major device type in hex, for character/block device special files\n\
   %T   minor device type in hex, for character/block device special files\n\
 "), stdout);
@@ -1455,6 +1731,9 @@
   char *format = NULL;
   char *format2;
   bool ok = true;
+  FILE *stream = NULL;
+  char *files_from = NULL;
+  struct Tokens tok;
 
   initialize_main (&argc, &argv);
   set_program_name (argv[0]);
@@ -1496,6 +1775,22 @@
           terse = true;
           break;
 
+        case DIGEST_TYPE_OPTION:
+          file_digest_type = XARGMATCH ("--digest-type", optarg,
+                                        digest_type_args,
+                                        digest_type_vals);
+          break;
+
+        case FILES_FROM_OPTION:
+          files_from = optarg;
+          break;
+
+        case QUOTING_STYLE_OPTION:
+          filename_quoting_style = XARGMATCH ("--quoting-style", optarg,
+                                              quoting_style_args,
+                                              quoting_style_vals);
+          break;
+
         case_GETOPT_HELP_CHAR;
 
         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
@@ -1505,12 +1800,6 @@
         }
     }
 
-  if (argc == optind)
-    {
-      error (0, 0, _("missing operand"));
-      usage (EXIT_FAILURE);
-    }
-
   if (format)
     format2 = format;
   else
@@ -1519,10 +1808,113 @@
       format2 = default_format (fs, terse, true);
     }
 
-  for (i = optind; i < argc; i++)
-    ok &= (fs
-           ? do_statfs (argv[i], format)
-           : do_stat (argv[i], format, format2));
+  bool read_tokens = false;
+  struct argv_iterator *ai;
+  if (files_from)
+    {
+      /* When using --files-from=F, you may not specify any files
+         on the command-line.  */
+      if (optind < argc)
+        {
+          error (0, 0, _("extra operand %s"), quote (argv[optind]));
+          fprintf (stderr, "%s\n",
+                   _("file operands cannot be combined with --files-from"));
+          usage (EXIT_FAILURE);
+        }
+
+      if (STREQ (files_from, "-"))
+        stream = stdin;
+      else
+        {
+          stream = fopen (files_from, "r");
+          if (stream == NULL)
+            error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
+                   quote (files_from));
+        }
+
+      /* Read the file list into RAM if we can detect its size and that
+         size is reasonable.  Otherwise, we'll read a name at a time.  */
+      struct stat st;
+      if (fstat (fileno (stream), &st) == 0
+          && S_ISREG (st.st_mode)
+          && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
+        {
+          read_tokens = true;
+          readtokens0_init (&tok);
+          if (! readtokens0_delim (stream, &tok, '\n'))
+            error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
+                   quote (files_from));
+          ai = argv_iter_init_argv (tok.tok);
+        }
+      else
+        {
+          ai = argv_iter_init_stream_delim (stream, '\n');
+        }
+    }
+  else if (argc > optind)
+    {
+      ai = argv_iter_init_argv (argv + optind);
+    }
+  else
+    {
+      error (0, 0, _("missing operand"));
+      usage (EXIT_FAILURE);
+    }
+
+  if (!ai)
+    xalloc_die ();
+
+  while (true)
+    {
+      enum argv_iter_err ai_err;
+      char *file_name = argv_iter (ai, &ai_err);
+      if (!file_name)
+        {
+          switch (ai_err)
+            {
+            case AI_ERR_EOF:
+              goto argv_iter_done;
+            case AI_ERR_READ:
+              error (0, errno, _("%s: read error"),
+                     quotearg_colon (files_from));
+              ok = false;
+              goto argv_iter_done;
+            case AI_ERR_MEM:
+              xalloc_die ();
+            default:
+              assert (!"unexpected error code from argv_iter");
+            }
+        }
+
+      /* Silently ignore input empty lines when
+         given --files-from=FILE.  */
+      if (files_from && !file_name[0])
+        continue;
+
+      if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
+        {
+          /* Give a better diagnostic in an unusual case:
+             printf - | stat --files-from=- */
+          error (0, 0, _("when reading file names from stdin, "
+                         "no file name of %s allowed"),
+                 quote (file_name));
+          ok = false;
+          continue;
+        }
+
+      ok &= (fs
+             ? do_statfs (file_name, format)
+             : do_stat (file_name, format, format2));
+    }
+ argv_iter_done:
+
+  if (read_tokens)
+    readtokens0_free (&tok);
+
+  argv_iter_free (ai);
+
+  if (files_from && (ferror (stream) || fclose (stream) != 0))
+    error (EXIT_FAILURE, 0, _("error reading %s"), quote (files_from));
 
   exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }

Reply via email to