From c33d69ef76f66411006f36abb2f3a309ddec7c86 Mon Sep 17 00:00:00 2001 From: Ondrej Oprala Date: Tue, 31 Jul 2012 13:57:48 +0200 Subject: [PATCH] The patch adds the --tag option to *sum utils for BSD-like output --- NEWS | 70 +---------------- src/md5sum.c | 206 +++++++++++++++++++++++++++++++------------------- tests/misc/md5sum-bsd | 11 +++ 3 files changed, 143 insertions(+), 144 deletions(-) diff --git a/NEWS b/NEWS index f1255ea..17855ce 100644 --- a/NEWS +++ b/NEWS @@ -1,67 +1,5 @@ GNU coreutils NEWS -*- outline -*- -* Noteworthy changes in release ?.? (????-??-??) [?] - -** Bug fixes - - cksum now prints checksums atomically so that concurrent - processes will not intersperse their output. - [the bug dates back to the initial implementation] - - date -d "$(printf '\xb0')" would print 00:00:00 with today's date - rather than diagnosing the invalid input. Now it reports this: - date: invalid date '\260' - [This bug was present in "the beginning".] - - df no longer outputs control characters present in the mount point name. - Such characters are replaced with '?', so for example, scripts consuming - lines output by df, can work reliably. - [This bug was present in "the beginning".] - - head --lines=-N (-n-N) now resets the read pointer of a seekable input file. - This means that "head -n-3" no longer consumes all of its input, and lines - not output by head may be processed by other programs. For example, this - command now prints the final line, 2, while before it would print nothing: - seq 2 > k; (head -n-1 > /dev/null; cat) < k - [This bug was present in "the beginning".] - - ls --color would mis-color relative-named symlinks in / - [bug introduced in coreutils-8.17] - - split now ensures it doesn't overwrite the input file with generated output. - [the bug dates back to the initial implementation] - - stat and df now report the correct file system usage, - in all situations on GNU/Linux, by correctly determining the block size. - [df bug since coreutils-5.0.91, stat bug since the initial implementation] - - tail -f no longer tries to use inotify on AUFS or PanFS file systems - [you might say this was introduced in coreutils-7.5, along with inotify - support, but even now, its magic number isn't in the usual place.] - -** New features - - stat -f recognizes the new remote file system types: aufs, panfs. - -** Changes in behavior - - su: this program has been removed. We stopped installing "su" by - default with the release of coreutils-6.9.90 on 2007-12-01. Now, - that the util-linux package has the union of the Suse and Fedora - patches as well as enough support to build on the Hurd, we no longer - have any reason to include it here. - -** Improvements - - sort avoids redundant processing in the presence of inaccessible inputs, - or unwritable output. Sort now diagnoses certain errors at start-up, - rather than after potentially expensive processing. - - sort now allocates no more than 75% of physical memory by default, - to better share system resources, and thus operate more efficiently. - [The default max memory usage changed from 50% to 100% in coreutils-8.16] - - * Noteworthy changes in release 8.17 (2012-05-10) [stable] ** Bug fixes @@ -96,6 +34,10 @@ GNU coreutils NEWS -*- outline -*- stat -f recognizes new file system types: bdevfs, inodefs, qnx6 + md5sum now accepts the --tag option for BSD output with GNU filename + escaping. This also affects sha1sum, sha224sum, sha256sum, sha384sum + and sha512sum + ** Changes in behavior cp,mv,install,cat,split: now read and write a minimum of 64KiB at a time. @@ -477,10 +419,6 @@ GNU coreutils NEWS -*- outline -*- join --header now skips the ordering check for the first line even if the other file is empty. [bug introduced in coreutils-8.5] - join -v2 now ensures the default output format prints the match field - at the start of the line when it is different to the match field for - the first file. [bug present in "the beginning".] - rm -f no longer fails for EINVAL or EILSEQ on file systems that reject file names invalid for that file system. diff --git a/src/md5sum.c b/src/md5sum.c index f7e0849..eecf2c7 100644 --- a/src/md5sum.c +++ b/src/md5sum.c @@ -135,7 +135,8 @@ enum { STATUS_OPTION = CHAR_MAX + 1, QUIET_OPTION, - STRICT_OPTION + STRICT_OPTION, + TAG_OPTION }; static struct option const long_options[] = @@ -147,6 +148,7 @@ static struct option const long_options[] = { "text", no_argument, NULL, 't' }, { "warn", no_argument, NULL, 'w' }, { "strict", no_argument, NULL, STRICT_OPTION }, + { "tag", no_argument, NULL, TAG_OPTION }, { GETOPT_HELP_OPTION_DECL }, { GETOPT_VERSION_OPTION_DECL }, { NULL, 0, NULL, 0 } @@ -215,21 +217,72 @@ space for text), and name for each FILE.\n"), #define ISWHITE(c) ((c) == ' ' || (c) == '\t') +/* Translate each '\n' string in the file name beginning + at string S (of length S_LEN) to a NEWLINE, + and each '\\' string to a backslash; FILE_NAME becoming + the pointer used to print the actual file name. Return + true unless file name is invalid. */ + +static bool +filename_unescape (char *s, size_t s_len, char **file_name) +{ + + char *dst = s; + size_t i = 0; + + while (i < s_len) + { + switch (s[i]) + { + case '\\': + if (i == s_len - 1) + { + /* A valid line does not end with a backslash. */ + return false; + } + ++i; + switch (s[i++]) + { + case 'n': + *dst++ = '\n'; + break; + case '\\': + *dst++ = '\\'; + break; + default: + /* Only '\' or 'n' may follow a backslash. */ + return false; + } + break; + + case '\0': + /* The file name may not contain a NUL. */ + return false; + + default: + *dst++ = s[i++]; + break; + } + } + *dst = '\0'; + + *file_name = s; + + return true; +} + /* Split the checksum string S (of length S_LEN) from a BSD 'md5' or 'sha1' command into two parts: a hexadecimal digest, and the file name. S is modified. Return true if successful. */ static bool bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest, - char **file_name) + char **file_name, bool escaped_filename) { size_t i; if (s_len == 0) return false; - - *file_name = s; - /* Find end of filename. The BSD 'md5' and 'sha1' commands do not escape filenames, so search backwards for the last ')'. */ i = s_len - 1; @@ -239,6 +292,12 @@ bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest, if (s[i] != ')') return false; + *file_name = s; + + if (escaped_filename) + if (!filename_unescape (s, i, file_name)) + return false; + s[i++] = '\0'; while (ISWHITE (s[i])) @@ -271,7 +330,16 @@ split_3 (char *s, size_t s_len, while (ISWHITE (s[i])) ++i; + if (s[i] == '\\') + { + ++i; + escaped_filename = true; + } + /* Check for BSD-style checksum line. */ + if (s[i] == ' ') + ++i; + algo_name_len = strlen (DIGEST_TYPE_STRING); if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len)) { @@ -282,9 +350,12 @@ split_3 (char *s, size_t s_len, *binary = 0; return bsd_split_3 (s + i + algo_name_len + 1, s_len - (i + algo_name_len + 1), - hex_digest, file_name); + hex_digest, file_name, escaped_filename); } } + else if (escaped_filename && (s[i] == ' ' || s[i + 1] == ' ')) + return false; + /* Ignore this line if it is too short. Each line must have at least 'min_digest_line_length - 1' (or one more, if @@ -293,12 +364,7 @@ split_3 (char *s, size_t s_len, if (s_len - i < min_digest_line_length + (s[i] == '\\')) return false; - if (s[i] == '\\') - { - ++i; - escaped_filename = true; - } - *hex_digest = (unsigned char *) &s[i]; + *hex_digest = (unsigned char *) &s[i]; /* The first field has to be the n-character hexadecimal representation of the message digest. If it is not followed @@ -333,49 +399,8 @@ split_3 (char *s, size_t s_len, *file_name = &s[i]; if (escaped_filename) - { - /* Translate each '\n' string in the file name to a NEWLINE, - and each '\\' string to a backslash. */ - - char *dst = &s[i]; - - while (i < s_len) - { - switch (s[i]) - { - case '\\': - if (i == s_len - 1) - { - /* A valid line does not end with a backslash. */ - return false; - } - ++i; - switch (s[i++]) - { - case 'n': - *dst++ = '\n'; - break; - case '\\': - *dst++ = '\\'; - break; - default: - /* Only '\' or 'n' may follow a backslash. */ - return false; - } - break; - - case '\0': - /* The file name may not contain a NUL. */ - return false; - break; + return filename_unescape (&s[i], s_len - i, file_name); - default: - *dst++ = s[i++]; - break; - } - } - *dst = '\0'; - } return true; } @@ -636,6 +661,31 @@ digest_check (const char *checkfile_name) && (!strict || n_improperly_formatted_lines == 0)); } +static void +print_filename (char const *file) +{ + /* Translate each NEWLINE byte to the string, "\\n", + and each backslash to "\\\\". */ + while (*file) + { + switch (*file) + { + case '\n': + fputs ("\\n", stdout); + break; + + case '\\': + fputs ("\\\\", stdout); + break; + + default: + putchar (*file); + break; + } + file++; + } +} + int main (int argc, char **argv) { @@ -646,6 +696,7 @@ main (int argc, char **argv) int opt; bool ok = true; int binary = -1; + bool prefix_tag = false; /* Setting values of global variables. */ initialize_main (&argc, &argv); @@ -690,6 +741,9 @@ main (int argc, char **argv) case STRICT_OPTION: strict = true; break; + case TAG_OPTION: + prefix_tag = true; + break; case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); default: @@ -754,41 +808,37 @@ main (int argc, char **argv) ok = false; else { + if (prefix_tag) + { + if (strchr (file, '\n') || strchr (file, '\\')) + putchar ('\\'); + if (!file_is_binary) + putchar (' '); + fputs (DIGEST_TYPE_STRING, stdout); + fputs(" (", stdout); + print_filename (file); + fputs (") = ", stdout); + } + size_t i; /* Output a leading backslash if the file name contains a newline or backslash. */ - if (strchr (file, '\n') || strchr (file, '\\')) + if (!prefix_tag && (strchr (file, '\n') || strchr (file, '\\'))) putchar ('\\'); for (i = 0; i < (digest_hex_bytes / 2); ++i) printf ("%02x", bin_buffer[i]); - putchar (' '); - if (file_is_binary) - putchar ('*'); - else - putchar (' '); - - /* Translate each NEWLINE byte to the string, "\\n", - and each backslash to "\\\\". */ - for (i = 0; i < strlen (file); ++i) + if (!prefix_tag) { - switch (file[i]) - { - case '\n': - fputs ("\\n", stdout); - break; - - case '\\': - fputs ("\\\\", stdout); - break; - - default: - putchar (file[i]); - break; - } + putchar (' '); + + putchar (file_is_binary ? '*' : ' '); + + print_filename (file); } + putchar ('\n'); } } diff --git a/tests/misc/md5sum-bsd b/tests/misc/md5sum-bsd index 8226d7a..6497933 100755 --- a/tests/misc/md5sum-bsd +++ b/tests/misc/md5sum-bsd @@ -38,4 +38,15 @@ md5sum --strict -c check.md5 || fail=1 # an option to avoid the ambiguity. tail -n+2 check.md5 | md5sum --strict -c && fail=1 +#--tag option test + +for i in 'a' ' b' '*c' 'dd' ' '; do + echo "$i" > "$i" + md5sum --tag "$i" >> check.md5sum +done +sed 's/ / /' check.md5sum > check.md5 + +md5sum --strict -c check.md5sum || fail=1 +md5sum --strict -c check.md5 || fail=1 + Exit $fail -- 1.7.11.2