Ok, I've made all the changes(though I had to change print_filename contents
around a bit, it wouldn't
print out the file name properly unless the name consisted of '\n' and '\'
chars only), added a NEWS entry
and a simple test to md5sum-bsd. Syntax-check was also fine with my changes.
Cheers,
Ondrej
----- Original Message -----
From: "Jim Meyering" <[email protected]>
To: "Ondrej Oprala" <[email protected]>
Cc: "Pádraig Brady" <[email protected]>, "D Yu Bolkhovityanov"
<[email protected]>, [email protected]
Sent: Monday, July 30, 2012 3:43:45 PM
Subject: Re: RFE: hash-type in sum utils
Ondrej Oprala wrote:
> Alright, I've redone it so now the hashes create and expect GNU escaping
> in both the BSD and GNU output format.
> I left the --tag format indication of text files to be a space before
> the algo. name
> so far, but I can change that if needed.
...
> +static const char *const prefixes[] =
> +{
> + "MD5 ",
> + "SHA1 ",
> + "SHA256 ",
> + "SHA224 ",
> + "SHA512 ",
> + "SHA384 "
> +};
> +
...
> + fputs (prefixes[PREFIX_INDEX], stdout);
It looks like you can remove that "prefixes" array and replace
the sole use with this, no?
fputs (DIGEST_TYPE_STRING, stdout);
putchar (' ');
> +static bool
> +filename_escape (char *s, int s_len, char **file_name)
s/int/size_t/
> +{
> + /* Translate each '\n' string in the file name to a NEWLINE,
> + and each '\\' string to a backslash. */
Please put the function-describing comment before the function
definition. It should mention the meaning of the S_LEN
and **FILE_NAME parameters and should explain the meaning of
the return value.
/* Translate each '\n' string in the file name, S, to a NEWLINE,
and each '\\' string to a backslash. ... */
Please change the name to e.g. filename_unescape, since it is not
escaping special characters, but performing the reverse operation.
It is unescaping them.
> + *file_name = s;
I would set this only at the end, just before returning "true".
> + char *dst = s;
> + int i = 0;
"i" is always non-negative and indexes a string, so please use size_t
in place of that "int".
> + while (i < s_len)
> + {
> + switch (s[i])
> + {
> + case '\\':
> + if (i == s_len - 1)
> + {
> + /* A valid line does not end with a backslash. */
> + return false;
> + }
> + ++i;
> + switch (s[i++])
> + {
> + case 'n':
> + *dst++ = '\n';
> + break;
> + case '\\':
> + *dst++ = '\\';
> + break;
> + default:
> + /* Only '\' or 'n' may follow a backslash. */
> + return false;
> + }
> + break;
> +
> + case '\0':
> + /* The file name may not contain a NUL. */
> + return false;
> + break;
The "break" after return may be classified as unreachable,
so please remove it.
> + default:
> + *dst++ = s[i++];
> + break;
> + }
> + }
> + *dst = '\0';
> + return true;
> +}
> +static void
> +print_filename (char *file)
Please make that parameter const:
print_filename (char const *file)
> +{
> + size_t i;
> + /* Translate each NEWLINE byte to the string, "\\n",
> + and each backslash to "\\\\". */
Please use the following instead.
Otherwise, the code traverses FILE twice.
first via strlen, and again via the loop.
while (*file)
{
switch (*file++)
[and s/file[i]/*file/ in the putchar]
> + for (i = 0; i < strlen (file); ++i)
> + {
> + switch (file[i])
> + {
> + case '\n':
> + fputs ("\\n", stdout);
> + break;
> +
> + case '\\':
> + fputs ("\\\\", stdout);
> + break;
> +
> + default:
> + putchar (file[i]);
> + break;
> + }
> + }
> +}
Once you've done the above, consider adding tests to exercise the change
as well as a NEWS entry.
Finally, please run "make syntax-check".
Among other things, that will ensure that your change does
not add trailing blanks.
+ if (file_is_binary)
+ putchar ('*');
+ else
+ putchar (' ');
Please replace those four lines with this one:
putchar (file_is_binary ? '*' : ' ');
you could slightly more concise, but I would not suggest this :-)
putchar (" *"[!!file_is_binary]);
diff -up coreutils-8.17/src/md5sum.c.hashtag coreutils-8.17/src/md5sum.c
--- coreutils-8.17/src/md5sum.c.hashtag 2012-07-31 11:49:26.325612606 +0200
+++ coreutils-8.17/src/md5sum.c 2012-07-31 12:57:16.598535788 +0200
@@ -135,7 +135,8 @@ enum
{
STATUS_OPTION = CHAR_MAX + 1,
QUIET_OPTION,
- STRICT_OPTION
+ STRICT_OPTION,
+ TAG_OPTION
};
static struct option const long_options[] =
@@ -147,6 +148,7 @@ static struct option const long_options[
{ "text", no_argument, NULL, 't' },
{ "warn", no_argument, NULL, 'w' },
{ "strict", no_argument, NULL, STRICT_OPTION },
+ { "tag", no_argument, NULL, TAG_OPTION },
{ GETOPT_HELP_OPTION_DECL },
{ GETOPT_VERSION_OPTION_DECL },
{ NULL, 0, NULL, 0 }
@@ -215,21 +217,72 @@ space for text), and name for each FILE.
#define ISWHITE(c) ((c) == ' ' || (c) == '\t')
+/* Translate each '\n' string in the file name beginning
+ at string S (of length S_LEN) to a NEWLINE,
+ and each '\\' string to a backslash; FILE_NAME becoming
+ the pointer used to print the actual file name. Return
+ true unless file name is invalid. */
+
+static bool
+filename_unescape (char *s, size_t s_len, char **file_name)
+{
+
+ char *dst = s;
+ size_t i = 0;
+
+ while (i < s_len)
+ {
+ switch (s[i])
+ {
+ case '\\':
+ if (i == s_len - 1)
+ {
+ /* A valid line does not end with a backslash. */
+ return false;
+ }
+ ++i;
+ switch (s[i++])
+ {
+ case 'n':
+ *dst++ = '\n';
+ break;
+ case '\\':
+ *dst++ = '\\';
+ break;
+ default:
+ /* Only '\' or 'n' may follow a backslash. */
+ return false;
+ }
+ break;
+
+ case '\0':
+ /* The file name may not contain a NUL. */
+ return false;
+
+ default:
+ *dst++ = s[i++];
+ break;
+ }
+ }
+ *dst = '\0';
+
+ *file_name = s;
+
+ return true;
+}
+
/* Split the checksum string S (of length S_LEN) from a BSD 'md5' or
'sha1' command into two parts: a hexadecimal digest, and the file
name. S is modified. Return true if successful. */
static bool
bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest,
- char **file_name)
+ char **file_name, bool escaped_filename)
{
size_t i;
if (s_len == 0)
return false;
-
- *file_name = s;
-
/* Find end of filename. The BSD 'md5' and 'sha1' commands do not escape
filenames, so search backwards for the last ')'. */
i = s_len - 1;
@@ -239,6 +292,12 @@ bsd_split_3 (char *s, size_t s_len, unsi
if (s[i] != ')')
return false;
+ *file_name = s;
+
+ if (escaped_filename)
+ if (!filename_unescape (s, i, file_name))
+ return false;
+
s[i++] = '\0';
while (ISWHITE (s[i]))
@@ -271,7 +330,16 @@ split_3 (char *s, size_t s_len,
while (ISWHITE (s[i]))
++i;
+ if (s[i] == '\\')
+ {
+ ++i;
+ escaped_filename = true;
+ }
+
/* Check for BSD-style checksum line. */
+ if (s[i] == ' ')
+ ++i;
+
algo_name_len = strlen (DIGEST_TYPE_STRING);
if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len))
{
@@ -282,9 +350,12 @@ split_3 (char *s, size_t s_len,
*binary = 0;
return bsd_split_3 (s + i + algo_name_len + 1,
s_len - (i + algo_name_len + 1),
- hex_digest, file_name);
+ hex_digest, file_name, escaped_filename);
}
}
+ else if (escaped_filename && (s[i] == ' ' || s[i + 1] == ' '))
+ return false;
+
/* Ignore this line if it is too short.
Each line must have at least 'min_digest_line_length - 1' (or one more, if
@@ -293,12 +364,7 @@ split_3 (char *s, size_t s_len,
if (s_len - i < min_digest_line_length + (s[i] == '\\'))
return false;
- if (s[i] == '\\')
- {
- ++i;
- escaped_filename = true;
- }
- *hex_digest = (unsigned char *) &s[i];
+ *hex_digest = (unsigned char *) &s[i];
/* The first field has to be the n-character hexadecimal
representation of the message digest. If it is not followed
@@ -333,49 +399,8 @@ split_3 (char *s, size_t s_len,
*file_name = &s[i];
if (escaped_filename)
- {
- /* Translate each '\n' string in the file name to a NEWLINE,
- and each '\\' string to a backslash. */
-
- char *dst = &s[i];
-
- while (i < s_len)
- {
- switch (s[i])
- {
- case '\\':
- if (i == s_len - 1)
- {
- /* A valid line does not end with a backslash. */
- return false;
- }
- ++i;
- switch (s[i++])
- {
- case 'n':
- *dst++ = '\n';
- break;
- case '\\':
- *dst++ = '\\';
- break;
- default:
- /* Only '\' or 'n' may follow a backslash. */
- return false;
- }
- break;
-
- case '\0':
- /* The file name may not contain a NUL. */
- return false;
- break;
+ return filename_unescape (&s[i], s_len - i, file_name);
- default:
- *dst++ = s[i++];
- break;
- }
- }
- *dst = '\0';
- }
return true;
}
@@ -636,6 +661,31 @@ digest_check (const char *checkfile_name
&& (!strict || n_improperly_formatted_lines == 0));
}
+static void
+print_filename (char const *file)
+{
+ /* Translate each NEWLINE byte to the string, "\\n",
+ and each backslash to "\\\\". */
+ while (*file)
+ {
+ switch (*file)
+ {
+ case '\n':
+ fputs ("\\n", stdout);
+ break;
+
+ case '\\':
+ fputs ("\\\\", stdout);
+ break;
+
+ default:
+ putchar (*file);
+ break;
+ }
+ file++;
+ }
+}
+
int
main (int argc, char **argv)
{
@@ -646,6 +696,7 @@ main (int argc, char **argv)
int opt;
bool ok = true;
int binary = -1;
+ bool prefix_tag = false;
/* Setting values of global variables. */
initialize_main (&argc, &argv);
@@ -690,6 +741,9 @@ main (int argc, char **argv)
case STRICT_OPTION:
strict = true;
break;
+ case TAG_OPTION:
+ prefix_tag = true;
+ break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
default:
@@ -754,41 +808,37 @@ main (int argc, char **argv)
ok = false;
else
{
+ if (prefix_tag)
+ {
+ if (strchr (file, '\n') || strchr (file, '\\'))
+ putchar ('\\');
+ if (!file_is_binary)
+ putchar (' ');
+ fputs (DIGEST_TYPE_STRING, stdout);
+ fputs(" (", stdout);
+ print_filename (file);
+ fputs (") = ", stdout);
+ }
+
size_t i;
/* Output a leading backslash if the file name contains
a newline or backslash. */
- if (strchr (file, '\n') || strchr (file, '\\'))
+ if (!prefix_tag && (strchr (file, '\n') || strchr (file, '\\')))
putchar ('\\');
for (i = 0; i < (digest_hex_bytes / 2); ++i)
printf ("%02x", bin_buffer[i]);
- putchar (' ');
- if (file_is_binary)
- putchar ('*');
- else
- putchar (' ');
-
- /* Translate each NEWLINE byte to the string, "\\n",
- and each backslash to "\\\\". */
- for (i = 0; i < strlen (file); ++i)
+ if (!prefix_tag)
{
- switch (file[i])
- {
- case '\n':
- fputs ("\\n", stdout);
- break;
-
- case '\\':
- fputs ("\\\\", stdout);
- break;
-
- default:
- putchar (file[i]);
- break;
- }
+ putchar (' ');
+
+ putchar (file_is_binary ? '*' : ' ');
+
+ print_filename (file);
}
+
putchar ('\n');
}
}
diff -up coreutils-8.17/tests/misc/md5sum-bsd.hashtag coreutils-8.17/tests/misc/md5sum-bsd
--- coreutils-8.17/tests/misc/md5sum-bsd.hashtag 2012-07-31 12:44:27.729009885 +0200
+++ coreutils-8.17/tests/misc/md5sum-bsd 2012-07-31 12:49:49.547002374 +0200
@@ -38,4 +38,15 @@ md5sum --strict -c check.md5 || fail=1
# an option to avoid the ambiguity.
tail -n+2 check.md5 | md5sum --strict -c && fail=1
+#--tag option test
+
+for i in 'a' ' b' '*c' 'dd' ' '; do
+ echo "$i" > "$i"
+ md5sum --tag "$i" >> check.md5sum
+done
+sed 's/ / /' check.md5sum > check.md5
+
+md5sum --strict -c check.md5sum || fail=1
+md5sum --strict -c check.md5 || fail=1
+
Exit $fail