Alright, I've redone it so now the hashes create and expect GNU escaping
 in both the BSD and GNU output format.
I left the --tag format indication of text files to be a space before the algo. 
name
so far, but I can change that if needed.
Cheers,
 Ondrej

----- Original Message -----
From: "Pádraig Brady" <[email protected]>
To: "Ondrej Oprala" <[email protected]>
Cc: [email protected], "D Yu Bolkhovityanov" <[email protected]>
Sent: Thursday, July 26, 2012 5:50:48 PM
Subject: Re: RFE: hash-type in sum utils

On 07/26/2012 03:06 PM, Ondrej Oprala wrote:
> Sorry but I cant seem to create a filename that would cause md5sum --tag  to
> output escaping different from the BSD's md5. Could you be more specific on 
> the BSD escaping problem, please?
> Also thanks for the formatting patch.

Oops, I assumed BSD's utils would at least escape '\n'
but looking at their code they just output it directly.

So therefore BSD and old GNU don't support checksum files
in BSD format when any file names contain a '\n'. Fair enough.

So then , I suppose we should use the existing GNU escape mechanism
in this case too. I.E. if there is a '\n' character in the name,
then add a '\' at the start of the line and replace any
'\' with "\\" and '\n' with "\" + "n".

BTW, I'm wondering why we enable escaping on output when
there are '\' characters in the name, but no '\n' chars?
Avoiding that would increase compatibility with the BSDs.

cheers,
Pádraig.
diff -up coreutils-8.17/src/md5sum.c.prefix coreutils-8.17/src/md5sum.c
--- coreutils-8.17/src/md5sum.c.prefix	2012-07-24 11:25:07.132521741 +0200
+++ coreutils-8.17/src/md5sum.c	2012-07-30 14:16:40.469463863 +0200
@@ -48,6 +48,7 @@
 # define DIGEST_BITS 128
 # define DIGEST_REFERENCE "RFC 1321"
 # define DIGEST_ALIGN 4
+# define PREFIX_INDEX 0
 #elif HASH_ALGO_SHA1
 # define PROGRAM_NAME "sha1sum"
 # define DIGEST_TYPE_STRING "SHA1"
@@ -55,6 +56,7 @@
 # define DIGEST_BITS 160
 # define DIGEST_REFERENCE "FIPS-180-1"
 # define DIGEST_ALIGN 4
+# define PREFIX_INDEX 1
 #elif HASH_ALGO_SHA256
 # define PROGRAM_NAME "sha256sum"
 # define DIGEST_TYPE_STRING "SHA256"
@@ -62,6 +64,7 @@
 # define DIGEST_BITS 256
 # define DIGEST_REFERENCE "FIPS-180-2"
 # define DIGEST_ALIGN 4
+# define PREFIX_INDEX 2
 #elif HASH_ALGO_SHA224
 # define PROGRAM_NAME "sha224sum"
 # define DIGEST_TYPE_STRING "SHA224"
@@ -69,6 +72,7 @@
 # define DIGEST_BITS 224
 # define DIGEST_REFERENCE "RFC 3874"
 # define DIGEST_ALIGN 4
+# define PREFIX_INDEX 3
 #elif HASH_ALGO_SHA512
 # define PROGRAM_NAME "sha512sum"
 # define DIGEST_TYPE_STRING "SHA512"
@@ -76,6 +80,7 @@
 # define DIGEST_BITS 512
 # define DIGEST_REFERENCE "FIPS-180-2"
 # define DIGEST_ALIGN 8
+# define PREFIX_INDEX 4
 #elif HASH_ALGO_SHA384
 # define PROGRAM_NAME "sha384sum"
 # define DIGEST_TYPE_STRING "SHA384"
@@ -83,6 +88,7 @@
 # define DIGEST_BITS 384
 # define DIGEST_REFERENCE "FIPS-180-2"
 # define DIGEST_ALIGN 8
+# define PREFIX_INDEX 5
 #else
 # error "Can't decide which hash algorithm to compile."
 #endif
@@ -135,7 +141,8 @@ enum
 {
   STATUS_OPTION = CHAR_MAX + 1,
   QUIET_OPTION,
-  STRICT_OPTION
+  STRICT_OPTION,
+  TAG_OPTION
 };
 
 static struct option const long_options[] =
@@ -147,11 +154,22 @@ static struct option const long_options[
   { "text", no_argument, NULL, 't' },
   { "warn", no_argument, NULL, 'w' },
   { "strict", no_argument, NULL, STRICT_OPTION },
+  { "tag", no_argument, NULL, TAG_OPTION },
   { GETOPT_HELP_OPTION_DECL },
   { GETOPT_VERSION_OPTION_DECL },
   { NULL, 0, NULL, 0 }
 };
 
+static const char *const prefixes[] =
+{
+  "MD5 ",
+  "SHA1 ",
+  "SHA256 ",
+  "SHA224 ",
+  "SHA512 ",
+  "SHA384 "
+};
+
 void
 usage (int status)
 {
@@ -215,21 +233,67 @@ space for text), and name for each FILE.
 
 #define ISWHITE(c) ((c) == ' ' || (c) == '\t')
 
+static bool 
+filename_escape (char *s, int s_len, char **file_name)
+{
+      /* Translate each '\n' string in the file name to a NEWLINE,
+         and each '\\' string to a backslash.  */
+
+  *file_name = s;
+  char *dst = s;
+  int i = 0;
+
+  while (i < s_len)
+    {
+      switch (s[i])
+        {
+        case '\\':
+          if (i == s_len - 1)
+            {
+              /* A valid line does not end with a backslash.  */
+              return false;
+            }
+          ++i;
+          switch (s[i++])
+            {
+            case 'n':
+              *dst++ = '\n';
+              break;
+            case '\\':
+              *dst++ = '\\';
+              break;
+            default:
+              /* Only '\' or 'n' may follow a backslash.  */
+              return false;
+            }
+          break;
+
+        case '\0':
+          /* The file name may not contain a NUL.  */
+          return false;
+          break;
+
+        default:
+          *dst++ = s[i++];
+          break;
+        }
+    }
+  *dst = '\0';
+  return true;
+}
+
 /* Split the checksum string S (of length S_LEN) from a BSD 'md5' or
    'sha1' command into two parts: a hexadecimal digest, and the file
    name.  S is modified.  Return true if successful.  */
 
 static bool
 bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest,
-             char **file_name)
+             char **file_name, bool escaped_filename)
 {
   size_t i;
 
   if (s_len == 0)
     return false;
-
-  *file_name = s;
-
   /* Find end of filename. The BSD 'md5' and 'sha1' commands do not escape
      filenames, so search backwards for the last ')'. */
   i = s_len - 1;
@@ -239,6 +303,12 @@ bsd_split_3 (char *s, size_t s_len, unsi
   if (s[i] != ')')
     return false;
 
+  *file_name = s;
+  
+  if (escaped_filename)
+    if (!filename_escape (s, i, file_name))
+      return false;
+
   s[i++] = '\0';
 
   while (ISWHITE (s[i]))
@@ -271,7 +341,16 @@ split_3 (char *s, size_t s_len,
   while (ISWHITE (s[i]))
     ++i;
 
+  if (s[i] == '\\')
+    {
+      ++i;
+      escaped_filename = true;
+    }
+
   /* Check for BSD-style checksum line. */
+  if (s[i] == ' ')
+    ++i;
+
   algo_name_len = strlen (DIGEST_TYPE_STRING);
   if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len))
     {
@@ -282,9 +361,12 @@ split_3 (char *s, size_t s_len,
           *binary = 0;
           return bsd_split_3 (s +      i + algo_name_len + 1,
                               s_len - (i + algo_name_len + 1),
-                              hex_digest, file_name);
+                              hex_digest, file_name, escaped_filename);
         }
     }
+  else if (escaped_filename && (s[i] == ' ' || s[i + 1] == ' '))
+    return false;
+
 
   /* Ignore this line if it is too short.
      Each line must have at least 'min_digest_line_length - 1' (or one more, if
@@ -293,12 +375,7 @@ split_3 (char *s, size_t s_len,
   if (s_len - i < min_digest_line_length + (s[i] == '\\'))
     return false;
 
-  if (s[i] == '\\')
-    {
-      ++i;
-      escaped_filename = true;
-    }
-  *hex_digest = (unsigned char *) &s[i];
+    *hex_digest = (unsigned char *) &s[i];
 
   /* The first field has to be the n-character hexadecimal
      representation of the message digest.  If it is not followed
@@ -333,49 +410,8 @@ split_3 (char *s, size_t s_len,
   *file_name = &s[i];
 
   if (escaped_filename)
-    {
-      /* Translate each '\n' string in the file name to a NEWLINE,
-         and each '\\' string to a backslash.  */
-
-      char *dst = &s[i];
-
-      while (i < s_len)
-        {
-          switch (s[i])
-            {
-            case '\\':
-              if (i == s_len - 1)
-                {
-                  /* A valid line does not end with a backslash.  */
-                  return false;
-                }
-              ++i;
-              switch (s[i++])
-                {
-                case 'n':
-                  *dst++ = '\n';
-                  break;
-                case '\\':
-                  *dst++ = '\\';
-                  break;
-                default:
-                  /* Only '\' or 'n' may follow a backslash.  */
-                  return false;
-                }
-              break;
-
-            case '\0':
-              /* The file name may not contain a NUL.  */
-              return false;
-              break;
+    return filename_escape (&s[i], s_len - i, file_name);
 
-            default:
-              *dst++ = s[i++];
-              break;
-            }
-        }
-      *dst = '\0';
-    }
   return true;
 }
 
@@ -636,6 +672,31 @@ digest_check (const char *checkfile_name
           && (!strict || n_improperly_formatted_lines == 0));
 }
 
+static void
+print_filename (char *file)
+{
+  size_t i;
+  /* Translate each NEWLINE byte to the string, "\\n",
+     and each backslash to "\\\\".  */
+  for (i = 0; i < strlen (file); ++i)
+    {
+      switch (file[i])
+        {
+        case '\n':
+          fputs ("\\n", stdout);
+          break;
+
+        case '\\':
+          fputs ("\\\\", stdout);
+          break;
+
+        default:
+          putchar (file[i]);
+          break;
+        }
+    }
+}
+
 int
 main (int argc, char **argv)
 {
@@ -646,6 +707,7 @@ main (int argc, char **argv)
   int opt;
   bool ok = true;
   int binary = -1;
+  bool prefix_tag = false;
 
   /* Setting values of global variables.  */
   initialize_main (&argc, &argv);
@@ -690,6 +752,9 @@ main (int argc, char **argv)
       case STRICT_OPTION:
         strict = true;
         break;
+      case TAG_OPTION:
+        prefix_tag = true;
+        break;
       case_GETOPT_HELP_CHAR;
       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
       default:
@@ -754,41 +819,40 @@ main (int argc, char **argv)
             ok = false;
           else
             {
+              if (prefix_tag)
+                {
+                  if (strchr (file, '\n') || strchr (file, '\\'))
+                    putchar ('\\');
+                  if (!file_is_binary)
+                    putchar (' ');
+                  fputs (prefixes[PREFIX_INDEX], stdout);
+                  putchar ('(');
+                  print_filename (file);
+                  fputs (") = ", stdout);
+                }
+
               size_t i;
 
               /* Output a leading backslash if the file name contains
                  a newline or backslash.  */
-              if (strchr (file, '\n') || strchr (file, '\\'))
+              if (!prefix_tag && (strchr (file, '\n') || strchr (file, '\\')))
                 putchar ('\\');
 
               for (i = 0; i < (digest_hex_bytes / 2); ++i)
                 printf ("%02x", bin_buffer[i]);
 
-              putchar (' ');
-              if (file_is_binary)
-                putchar ('*');
-              else
-                putchar (' ');
-
-              /* Translate each NEWLINE byte to the string, "\\n",
-                 and each backslash to "\\\\".  */
-              for (i = 0; i < strlen (file); ++i)
+              if (!prefix_tag)
                 {
-                  switch (file[i])
-                    {
-                    case '\n':
-                      fputs ("\\n", stdout);
-                      break;
-
-                    case '\\':
-                      fputs ("\\\\", stdout);
-                      break;
-
-                    default:
-                      putchar (file[i]);
-                      break;
-                    }
+                  putchar (' ');
+
+                  if (file_is_binary)
+                    putchar ('*');
+                  else
+                    putchar (' ');
+
+                  print_filename (file);
                 }
+
               putchar ('\n');
             }
         }

Reply via email to