Hi Paul, > > 'patch' does not break through this additional lines. It simply ignores > > them. > > True, hence the proposed patch can be an intermediate one on > the way to building a better one.
OK. Can I thus commit the patch that introduces --no-dereference? > But I suppose that we should > document that the output format is likely to change, so that > people are less likely to depend on it. It depends whether you want to work on that before the next diffutils release. > > But -P currently is equivalent to --unidirectional-new-file. > > Yeouch. I forgot about that. You're right, we'd need to take the > next step in deprecating -P. (The first step was taken in 2002, when > it was removed from the documentation, precisely for this reason -- > can you tell that this has been on the TODO list for some time?...) > > We also would need to deprecate -H and -L. -H has no effect anyway Rather, -H and -L are also undocumented, like -P. > Given the large set of changes, and the likely use of -P and -L in scripts, > I expect that the first deprecation step should just warn on stderr and > continue, > rather than having a fatal error. I don't think that would be good. A warning is too easily overlooked. When a user has overlooked the warning, the change in semantics will hit him just a like a silent change. And I don't think many people use these options -P, -H, -L: They are undocumented. > and probably --speed-large-files should be removed from the documentation > too (though still a no-op for backward compatibility). Really?? --speed-large-files is essential for users who need to reduce the running time from O(N²) to O(N). When you diff files of 100 MB or so, it becomes important. > As far as the long option names go, the GNU utilities aren't > consistent here, but perhaps we should use this for diffutils: > > -P == --no-dereference (as in cp, du) > -H == --dereference-command-line (as in ls) > -L == --dereference (as in cp, du, ls, stat) > > omitting the short options for now, as the old meanings are being deprecated. I agree. Can I commit the --no-dereference patch and leave the rest to you? Find attached the updated proposals. Bruno
>From 5c1cfb7ad0793d91455a5783431657cba3bfffd5 Mon Sep 17 00:00:00 2001 From: Bruno Haible <[email protected]> Date: Sat, 7 Jan 2012 00:57:29 +0100 Subject: [PATCH 1/4] New option --no-dereference. * src/diff.h (no_dereference_symlinks): New variable. * src/diff.c: Include xreadlink.h. (longopts): Add --no-dereference option. (main): Accept --no-dereference option. (option_help_msgid): Mention the --no-dereference option. (compare_files): If no_dereference_symlinks is true, use lstat() instead of stat(). Compare symbolic links by comparing their values. * bootstrap.conf (gnulib_modules): Add lstat, stat, xreadlink. * doc/diffutils.texi (Comparing Directories, diff Options): Mention the --no-dereference option. * tests/no-dereference: New file. * tests/Makefile.am (TESTS): Add it. --- bootstrap.conf | 3 + doc/diffutils.texi | 10 +++ src/diff.c | 74 +++++++++++++++++++++- src/diff.h | 4 + tests/Makefile.am | 1 + tests/no-dereference | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 257 insertions(+), 4 deletions(-) create mode 100644 tests/no-dereference diff --git a/bootstrap.conf b/bootstrap.conf index 2399d08..51da0c0 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -48,6 +48,7 @@ hard-locale inttostr inttypes largefile +lstat maintainer-makefile manywarnings mbrtowc @@ -60,6 +61,7 @@ regex sh-quote signal sigprocmask +stat stat-macros stat-time stdint @@ -77,6 +79,7 @@ version-etc-fsf wcwidth xalloc xfreopen +xreadlink xstrtoumax ' diff --git a/doc/diffutils.texi b/doc/diffutils.texi index 5aff3c3..58b140c 100644 --- a/doc/diffutils.texi +++ b/doc/diffutils.texi @@ -1846,6 +1846,13 @@ is specified while the @option{--ignore-file-name-case} option is in effect, case is ignored when excluding file names matching the specified patterns. +To avoid that @command{diff} follows symbolic links, use the +@c later: @option{--no-dereference} (@option{-P}). +@option{--no-dereference}. +When this option is in use, +symbolic links will be treated like a special kind of files, rather than +comparing the target of each symbolic link. + @node Adjusting Output @chapter Making @command{diff} Output Prettier @@ -3848,6 +3855,9 @@ file in if-then-else format. @xref{Line Group Formats}. Use @var{format} to output a line taken from just the second file in if-then-else format. @xref{Line Formats}. +@item --no-dereference +Act on symbolic links themselves instead of what they point to. + @item --old-group-format=@var{format} Use @var{format} to output a group of lines taken from just the first file in if-then-else format. @xref{Line Group Formats}. diff --git a/src/diff.c b/src/diff.c index 004654e..112c8c5 100644 --- a/src/diff.c +++ b/src/diff.c @@ -39,6 +39,7 @@ #include <timespec.h> #include <version-etc.h> #include <xalloc.h> +#include <xreadlink.h> #include <binary-io.h> /* The official name of this program (e.g., no `g' prefix). */ @@ -120,6 +121,7 @@ enum INHIBIT_HUNK_MERGE_OPTION, LEFT_COLUMN_OPTION, LINE_FORMAT_OPTION, + NO_DEREFERENCE_OPTION, NO_IGNORE_FILE_NAME_CASE_OPTION, NORMAL_OPTION, SDIFF_MERGE_ASSIST_OPTION, @@ -188,6 +190,7 @@ static struct option const longopts[] = {"new-file", 0, 0, 'N'}, {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION}, {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION}, + {"no-dereference", 0, 0, NO_DEREFERENCE_OPTION}, {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION}, {"normal", 0, 0, NORMAL_OPTION}, {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION}, @@ -564,6 +567,10 @@ main (int argc, char **argv) specify_value (&line_format[i], optarg, "--line-format"); break; + case NO_DEREFERENCE_OPTION: + no_dereference_symlinks = true; + break; + case NO_IGNORE_FILE_NAME_CASE_OPTION: ignore_file_name_case = false; break; @@ -872,6 +879,7 @@ static char const * const option_help_msgid[] = { N_("-l, --paginate pass output through `pr' to paginate it"), "", N_("-r, --recursive recursively compare any subdirectories found"), + N_(" --no-dereference don't follow symbolic links"), N_("-N, --new-file treat absent files as empty"), N_(" --unidirectional-new-file treat absent first files as empty"), N_(" --ignore-file-name-case ignore case when comparing file names"), @@ -1128,7 +1136,10 @@ compare_files (struct comparison const *parent, set_mtime_to_now (&cmp.file[f].stat); } } - else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0) + else if ((no_dereference_symlinks + ? lstat (cmp.file[f].name, &cmp.file[f].stat) + : stat (cmp.file[f].name, &cmp.file[f].stat)) + != 0) cmp.file[f].desc = ERRNO_ENCODE (errno); } } @@ -1182,7 +1193,10 @@ compare_files (struct comparison const *parent, if (STREQ (fnm, "-")) fatal ("cannot compare `-' to a directory"); - if (stat (filename, &cmp.file[dir_arg].stat) != 0) + if ((no_dereference_symlinks + ? lstat (filename, &cmp.file[dir_arg].stat) + : stat (filename, &cmp.file[dir_arg].stat)) + != 0) { perror_with_name (filename); status = EXIT_TROUBLE; @@ -1229,8 +1243,10 @@ compare_files (struct comparison const *parent, } else if ((DIR_P (0) | DIR_P (1)) || (parent - && (! S_ISREG (cmp.file[0].stat.st_mode) - || ! S_ISREG (cmp.file[1].stat.st_mode)))) + && !((S_ISREG (cmp.file[0].stat.st_mode) + || S_ISLNK (cmp.file[0].stat.st_mode)) + && (S_ISREG (cmp.file[1].stat.st_mode) + || S_ISLNK (cmp.file[1].stat.st_mode))))) { if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) { @@ -1271,6 +1287,56 @@ compare_files (struct comparison const *parent, status = EXIT_FAILURE; } } + else if (S_ISLNK (cmp.file[0].stat.st_mode) + || S_ISLNK (cmp.file[1].stat.st_mode)) + { + /* We get here only if we use lstat(), not stat(). */ + assert (no_dereference_symlinks); + + if (S_ISLNK (cmp.file[0].stat.st_mode) + && S_ISLNK (cmp.file[1].stat.st_mode)) + { + /* Compare the values of the symbolic links. */ + char *link_value[2] = { NULL, NULL }; + + for (f = 0; f < 2; f++) + { + link_value[f] = xreadlink (cmp.file[f].name); + if (link_value[f] == NULL) + { + perror_with_name (cmp.file[f].name); + status = EXIT_TROUBLE; + break; + } + } + if (status == EXIT_SUCCESS) + { + if (strcmp (link_value[0], link_value[1]) != 0) + { + message ("Symbolic links %s and %s differ\n", + cmp.file[0].name, cmp.file[1].name); + /* This is a difference. */ + status = EXIT_FAILURE; + } + } + for (f = 0; f < 2; f++) + free (link_value[f]); + } + else + { + /* We have two files that are not to be compared, because + one of them is a symbolic link and the other one is not. */ + + message5 ("File %s is a %s while file %s is a %s\n", + file_label[0] ? file_label[0] : cmp.file[0].name, + file_type (&cmp.file[0].stat), + file_label[1] ? file_label[1] : cmp.file[1].name, + file_type (&cmp.file[1].stat)); + + /* This is a difference. */ + status = EXIT_FAILURE; + } + } else if (files_can_be_treated_as_binary && S_ISREG (cmp.file[0].stat.st_mode) && S_ISREG (cmp.file[1].stat.st_mode) diff --git a/src/diff.h b/src/diff.h index 795cc0c..05029a3 100644 --- a/src/diff.h +++ b/src/diff.h @@ -135,6 +135,10 @@ XTERN bool ignore_case; /* Ignore differences in case of letters in file names. */ XTERN bool ignore_file_name_case; +/* Act on symbolic links themselves rather than on their target + (--no-dereference). */ +XTERN bool no_dereference_symlinks; + /* File labels for `-c' output headers (--label). */ XTERN char *file_label[2]; diff --git a/tests/Makefile.am b/tests/Makefile.am index 9952d67..2f6ad53 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -8,6 +8,7 @@ TESTS = \ help-version \ function-line-vs-leading-space \ label-vs-func \ + no-dereference \ no-newline-at-eof \ stdin diff --git a/tests/no-dereference b/tests/no-dereference new file mode 100644 index 0000000..1426beb --- /dev/null +++ b/tests/no-dereference @@ -0,0 +1,169 @@ +#!/bin/sh +# Test the --no-dereference option. + +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +echo 'Simple contents' > regular1 +echo 'Sample contents' > regular2 +echo 'Sample contents' > regular3 +ln -s regular1 symlink1 +ln -s regular1 symlink1bis +ln -s regular2 symlink2 +ln -s regular3 symlink3 + +# Non-recursive comparisons. + +# Test case 3: Compare regular file with regular file. +diff --no-dereference regular1 regular2 > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +1c1 +< Simple contents +--- +> Sample contents +EOF +compare expected out || fail=1 + +# Test case 4: Compare regular file with symbolic link. +diff --no-dereference regular1 symlink1 > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +File regular1 is a regular file while file symlink1 is a symbolic link +EOF +compare expected out || fail=1 + +# Test case 5: Compare symbolic link with regular file. +diff --no-dereference symlink1 regular1 > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +File symlink1 is a symbolic link while file regular1 is a regular file +EOF +compare expected out || fail=1 + +# Test case 6: Compare symbolic links with same value. +diff --no-dereference symlink1 symlink1bis > out +test $? = 0 || fail=1 +compare /dev/null out || fail=1 + +# Test case 7: Compare symbolic links with different value and different target +# contents. +diff --no-dereference symlink1 symlink2 > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +Symbolic links symlink1 and symlink2 differ +EOF +compare expected out || fail=1 + +# Test case 8: Compare symbolic links with different value and same target +# contents. +diff --no-dereference symlink2 symlink3 > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +Symbolic links symlink2 and symlink3 differ +EOF +compare expected out || fail=1 + +# Recursive comparisons. + +# Test case 1: Compare symbolic link with nonexistent file. +mkdir subdir1a +mkdir subdir1b +ln -s nonexistent subdir1a/foo +ln -s ../regular1 subdir1a/bar +diff -r --no-dereference subdir1a subdir1b > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +Only in subdir1a: bar +Only in subdir1a: foo +EOF +compare expected out || fail=1 + +# Test case 1: Compare nonexistent file with symbolic link. +mkdir subdir2a +mkdir subdir2b +ln -s nonexistent subdir2b/foo +ln -s ../regular1 subdir2b/bar +diff -r --no-dereference subdir2a subdir2b > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +Only in subdir2b: bar +Only in subdir2b: foo +EOF +compare expected out || fail=1 + +# Test case 3: Compare regular file with regular file. +mkdir subdir3a +mkdir subdir3b +cp regular1 subdir3a/foo +cp regular2 subdir3b/foo +diff -r --no-dereference subdir3a subdir3b > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +diff -r --no-dereference subdir3a/foo subdir3b/foo +1c1 +< Simple contents +--- +> Sample contents +EOF +compare expected out || fail=1 + +# Test case 4: Compare regular file with symbolic link. +mkdir subdir4a +mkdir subdir4b +cp regular1 subdir4a/foo +ln -s ../regular1 subdir4b/foo +diff -r --no-dereference subdir4a subdir4b > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +File subdir4a/foo is a regular file while file subdir4b/foo is a symbolic link +EOF +compare expected out || fail=1 + +# Test case 5: Compare symbolic link with regular file. +mkdir subdir5a +mkdir subdir5b +ln -s ../regular1 subdir5a/foo +cp regular1 subdir5b/foo +diff -r --no-dereference subdir5a subdir5b > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +File subdir5a/foo is a symbolic link while file subdir5b/foo is a regular file +EOF +compare expected out || fail=1 + +# Test case 6: Compare symbolic links with same value. +mkdir subdir6a +mkdir subdir6b +ln -s ../regular1 subdir6a/foo +ln -s ../regular1 subdir6b/foo +diff -r --no-dereference subdir6a subdir6b > out +test $? = 0 || fail=1 +compare /dev/null out || fail=1 + +# Test case 7: Compare symbolic links with different value and different target +# contents. +mkdir subdir7a +mkdir subdir7b +ln -s ../regular1 subdir7a/foo +ln -s ../regular2 subdir7b/foo +diff -r --no-dereference subdir7a subdir7b > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +Symbolic links subdir7a/foo and subdir7b/foo differ +EOF +compare expected out || fail=1 + +# Test case 8: Compare symbolic links with different value and same target +# contents. +mkdir subdir8a +mkdir subdir8b +ln -s ../regular2 subdir8a/foo +ln -s ../regular3 subdir8b/foo +diff -r --no-dereference subdir8a subdir8b > out +test $? = 1 || fail=1 +cat <<EOF > expected || framework_failure_ +Symbolic links subdir8a/foo and subdir8b/foo differ +EOF +compare expected out || fail=1 + +Exit $fail -- 1.6.3.2
>From 0c75d97f72af7f1764657632a303b043ee6cf790 Mon Sep 17 00:00:00 2001 From: Bruno Haible <[email protected]> Date: Sun, 8 Jan 2012 02:29:24 +0100 Subject: [PATCH 2/4] Deprecate old option -P. * src/diff.c (UNIDIRECTIONAL_NEW_FILE_OPTION): New enumeration item. (longopts): Use it instead of '-P'. (main): Give an error message if option -P is used. --- src/diff.c | 9 +++++++-- 1 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/diff.c b/src/diff.c index 112c8c5..67b3109 100644 --- a/src/diff.c +++ b/src/diff.c @@ -130,6 +130,7 @@ enum SUPPRESS_COMMON_LINES_OPTION, TABSIZE_OPTION, TO_FILE_OPTION, + UNIDIRECTIONAL_NEW_FILE_OPTION, /* These options must be in sequence. */ UNCHANGED_LINE_FORMAT_OPTION, @@ -213,7 +214,7 @@ static struct option const longopts[] = {"to-file", 1, 0, TO_FILE_OPTION}, {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION}, {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION}, - {"unidirectional-new-file", 0, 0, 'P'}, + {"unidirectional-new-file", 0, 0, UNIDIRECTIONAL_NEW_FILE_OPTION}, {"unified", 2, 0, 'U'}, {"version", 0, 0, 'v'}, {"width", 1, 0, 'W'}, @@ -455,7 +456,7 @@ main (int argc, char **argv) break; case 'P': - unidirectional_new_file = true; + fatal ("option -P no longer supported, use --unidirectional-new-file for the old meaning"); break; case 'q': @@ -612,6 +613,10 @@ main (int argc, char **argv) specify_value (&to_file, optarg, "--to-file"); break; + case UNIDIRECTIONAL_NEW_FILE_OPTION: + unidirectional_new_file = true; + break; + case UNCHANGED_LINE_FORMAT_OPTION: case OLD_LINE_FORMAT_OPTION: case NEW_LINE_FORMAT_OPTION: -- 1.6.3.2
>From 71c89a405eb88d13f5edf9511bd082f9f4c01325 Mon Sep 17 00:00:00 2001 From: Bruno Haible <[email protected]> Date: Sun, 8 Jan 2012 19:39:41 +0100 Subject: [PATCH 3/4] Deprecate old option -H. * src/diff.c (SPEED_LARGE_FILES_OPTION): New enumeration item. (longopts): Use it instead of '-H'. (main): Give an error message if option -H is used. Suggested by Paul Eggert. --- src/diff.c | 9 +++++++-- 1 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/diff.c b/src/diff.c index 67b3109..2d364e9 100644 --- a/src/diff.c +++ b/src/diff.c @@ -125,6 +125,7 @@ enum NO_IGNORE_FILE_NAME_CASE_OPTION, NORMAL_OPTION, SDIFF_MERGE_ASSIST_OPTION, + SPEED_LARGE_FILES_OPTION, STRIP_TRAILING_CR_OPTION, SUPPRESS_BLANK_EMPTY_OPTION, SUPPRESS_COMMON_LINES_OPTION, @@ -204,7 +205,7 @@ static struct option const longopts[] = {"show-c-function", 0, 0, 'p'}, {"show-function-line", 1, 0, 'F'}, {"side-by-side", 0, 0, 'y'}, - {"speed-large-files", 0, 0, 'H'}, + {"speed-large-files", 0, 0, SPEED_LARGE_FILES_OPTION}, {"starting-file", 1, 0, 'S'}, {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION}, {"suppress-blank-empty", 0, 0, SUPPRESS_BLANK_EMPTY_OPTION}, @@ -411,7 +412,7 @@ main (int argc, char **argv) break; case 'H': - speed_large_files = true; + fatal ("option -H no longer supported, use --speed-large-files for the old meaning"); break; case 'i': @@ -585,6 +586,10 @@ main (int argc, char **argv) sdiff_merge_assist = true; break; + case SPEED_LARGE_FILES_OPTION: + speed_large_files = true; + break; + case STRIP_TRAILING_CR_OPTION: strip_trailing_cr = true; break; -- 1.6.3.2
>From e379f978114fed8ba653a9c63a5acd968a46825e Mon Sep 17 00:00:00 2001 From: Bruno Haible <[email protected]> Date: Sun, 8 Jan 2012 19:42:40 +0100 Subject: [PATCH 4/4] Deprecate old option -L. * src/diff.c (LABEL_OPTION): New enumeration item. (longopts): Use it instead of '-L'. (main): Give an error message if option -L is used. Suggested by Paul Eggert. --- src/diff.c | 19 ++++++++++++------- 1 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/diff.c b/src/diff.c index 2d364e9..4f372ac 100644 --- a/src/diff.c +++ b/src/diff.c @@ -119,6 +119,7 @@ enum HORIZON_LINES_OPTION, IGNORE_FILE_NAME_CASE_OPTION, INHIBIT_HUNK_MERGE_OPTION, + LABEL_OPTION, LEFT_COLUMN_OPTION, LINE_FORMAT_OPTION, NO_DEREFERENCE_OPTION, @@ -185,7 +186,7 @@ static struct option const longopts[] = {"ignore-trailing-space", 0, 0, 'Z'}, {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, {"initial-tab", 0, 0, 'T'}, - {"label", 1, 0, 'L'}, + {"label", 1, 0, LABEL_OPTION}, {"left-column", 0, 0, LEFT_COLUMN_OPTION}, {"line-format", 1, 0, LINE_FORMAT_OPTION}, {"minimal", 0, 0, 'd'}, @@ -435,12 +436,7 @@ main (int argc, char **argv) break; case 'L': - if (!file_label[0]) - file_label[0] = optarg; - else if (!file_label[1]) - file_label[1] = optarg; - else - fatal ("too many file label options"); + fatal ("option -L no longer supported, use --label for the old meaning"); break; case 'n': @@ -559,6 +555,15 @@ main (int argc, char **argv) compatibility. */ break; + case LABEL_OPTION: + if (!file_label[0]) + file_label[0] = optarg; + else if (!file_label[1]) + file_label[1] = optarg; + else + fatal ("too many file label options"); + break; + case LEFT_COLUMN_OPTION: left_column = true; break; -- 1.6.3.2
