Savannah bug #23920: warn about -path arguments ending in /. * find/parser.c (insert_path_check): new function; abstracts the common functionality of parse_path, parse_ipath, parse_wholename, parse_iwholename. Also check for a trailing slash and warn about it (unless $POSIXLY_CORRECT is set). Don't warn if the pattern also happens to be a start_point, because those could be matched. (parse_path, parse_ipath, parse_wholename, parse_iwholename): Use insert_path_check. * find/tree.c (is_start_point): new function; returns true if the indidated string matches a start point. (num_start_points, start_points): new variables, storing the starting points listed on the command line. (build_expression_tree): Set num_start_points and start_points. * find/defs.h: Declare is_start_point. * doc/find.texi (Full Name Patterns): Explain that candidate names for -path, -regex etc. will never end in a slash, so "-path /tmp/" will never match anything. * find/find.1: Likewise. --- ChangeLog | 19 ++++++++++ NEWS | 2 + doc/find.texi | 28 +++++++++++---- find/defs.h | 1 + find/find.1 | 8 ++++ find/parser.c | 112 +++++++++++++++++++++++++++++++++++++-------------------- find/tree.c | 40 +++++++++++++++++++- 7 files changed, 162 insertions(+), 48 deletions(-)
diff --git a/ChangeLog b/ChangeLog index 22b68f2..2231089 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,24 @@ 2009-07-12 James Youngman <[email protected]> + Savannah bug #23920: warn about -path arguments ending in /. + * find/parser.c (insert_path_check): new function; abstracts the + common functionality of parse_path, parse_ipath, parse_wholename, + parse_iwholename. Also check for a trailing slash and warn about + it (unless $POSIXLY_CORRECT is set). Don't warn if the pattern + also happens to be a start_point, because those could be matched. + (parse_path, parse_ipath, parse_wholename, parse_iwholename): Use + insert_path_check. + * find/tree.c (is_start_point): new function; returns true if the + indidated string matches a start point. + (num_start_points, start_points): new variables, storing the + starting points listed on the command line. + (build_expression_tree): Set num_start_points and start_points. + * find/defs.h: Declare is_start_point. + * doc/find.texi (Full Name Patterns): Explain that candidate names + for -path, -regex etc. will never end in a slash, so "-path /tmp/" + will never match anything. + * find/find.1: Likewise. + Fix Savannah bug #26327: xargs man page is vague about the number of times command is executed. * xargs/xargs.1 (DESCRIPTION): Explain more clearly that the diff --git a/NEWS b/NEWS index b7ae0af..e6cc825 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,8 @@ GNU findutils NEWS - User visible changes. -*- outline -*- (allout) #24873: Duplicate fprint option corrupts output +#23920: warn about un-matchable -path arguments ending in /. + #19120: Patch to fix single quotes in man page find(1) ** Documentation Changes diff --git a/doc/find.texi b/doc/find.texi index 6a11564..b592fb4 100644 --- a/doc/find.texi +++ b/doc/find.texi @@ -419,9 +419,18 @@ checking every file in the tree (@pxref{Directories}). The ``entire file name'' as used by @code{find} starts with the starting-point specified on the command line, and is not converted to an absolute pathname, so for example @code{cd /; find tmp -wholename /tmp} will -never match anything. The name @samp{-wholename} is GNU-specific, -but @samp{-path} is more portable; it is supported by HP-UX -...@code{find} and will soon be part of POSIX. +never match anything. + +Find compares the @samp{-path} argument with the concatenation of a +directory name and the base name of the file itâs considering. +Since the concatenation will never end with a slash, @samp{-path} +arguments ending in @samp{/} will match nothing (except perhaps a +start point specified on the command line). + +The name @samp{-wholename} is GNU-specific, but @samp{-path} is more +portable; it is supported by HP-UX @code{find} and will soon be part +of POSIX. + @end deffn @deffn Test -ipath pattern @@ -469,10 +478,15 @@ match a file named @file{./fubar3}, you can use the regular expression @samp{.*bar.} or @samp{.*b.*3}, but not @samp{f.*r3}. @xref{Regexps, , Syntax of Regular Expressions, emacs, The GNU Emacs Manual}, for a description of the syntax of regular expressions. For @samp{-iregex}, -the match is case-insensitive. There are several varieties of regular -expressions; by default this test uses POSIX basic regular -expressions, but this can be changed with the option -...@samp{-regextype}. +the match is case-insensitive. + +As for @samp{-path}, the candidate file name never ends with a slash, +so regular expressions which only match something that ends in slash +will always fail. + +There are several varieties of regular expressions; by default this +test uses POSIX basic regular expressions, but this can be changed +with the option @samp{-regextype}. @end deffn @deffn Option -regextype name diff --git a/find/defs.h b/find/defs.h index 692328a..37e0e1e 100644 --- a/find/defs.h +++ b/find/defs.h @@ -482,6 +482,7 @@ void show_success_rates(const struct predicate *node); /* tree.c */ +boolean matches_start_point(const char * glob, boolean foldcase); struct predicate * build_expression_tree PARAMS((int argc, char *argv[], int end_of_leading_options)); struct predicate * get_eval_tree PARAMS((void)); struct predicate *get_new_pred PARAMS((const struct parser_table *entry)); diff --git a/find/find.1 b/find/find.1 index e2c1aaa..b27311a 100644 --- a/find/find.1 +++ b/find/find.1 @@ -743,6 +743,14 @@ command will never match anything: find bar \-path /foo/bar/myfile \-print .br .in -1i +Find compares the +.B \-path +argument with the concatenation of a directory name and the base name +of the file it's examining. Since the concatenation will never end +with a slash, +.B \-path +arguments ending in a slash will match nothing (except perhaps a start +point specified on the command line). The predicate .B \-path is also supported by HP-UX diff --git a/find/parser.c b/find/parser.c index 427c14a..973efac 100644 --- a/find/parser.c +++ b/find/parser.c @@ -1258,34 +1258,6 @@ parse_inum (const struct parser_table* entry, char **argv, int *arg_ptr) } } -/* -ipath is deprecated (at RMS's request) in favour of - * -iwholename. See the node "GNU Manuals" in standards.texi - * for the rationale for this (basically, GNU prefers the use - * of the phrase "file name" to "path name" - */ -static boolean -parse_ipath (const struct parser_table* entry, char **argv, int *arg_ptr) -{ - const char *name; - - fnmatch_sanitycheck (); - if (collect_arg (argv, arg_ptr, &name)) - { - struct predicate *our_pred = insert_primary_withpred (entry, pred_ipath); - our_pred->need_stat = our_pred->need_type = false; - our_pred->args.str = name; - our_pred->est_success_rate = estimate_pattern_match_rate (name, 0); - return true; - } - return false; -} - -static boolean -parse_iwholename (const struct parser_table* entry, char **argv, int *arg_ptr) -{ - return parse_ipath (entry, argv, arg_ptr); -} - static boolean parse_iregex (const struct parser_table* entry, char **argv, int *arg_ptr) { @@ -1743,6 +1715,59 @@ parse_or (const struct parser_table* entry, char **argv, int *arg_ptr) return true; } +static boolean +is_feasible_path_argument(const char *arg, boolean foldcase) +{ + const char *last = strrchr (arg, '/'); + if (last && !last[1]) + { + /* The name ends with "/". */ + if (matches_start_point (arg, foldcase)) + { + /* "-path foo/" can succeed if one of the start points is "foo/". */ + return true; + } + else + { + return false; + } + } + return true; +} + + +static boolean +insert_path_check (const struct parser_table* entry, char **argv, int *arg_ptr, + const char *pred_name, PREDICATEFUNCTION pred) +{ + const char *name; + boolean foldcase = false; + + if (pred == pred_ipath) + foldcase = true; + + fnmatch_sanitycheck (); + + if (collect_arg (argv, arg_ptr, &name)) + { + struct predicate *our_pred = insert_primary_withpred (entry, pred); + our_pred->need_stat = our_pred->need_type = false; + our_pred->args.str = name; + our_pred->est_success_rate = estimate_pattern_match_rate (name, 0); + + if (!options.posixly_correct + && !is_feasible_path_argument(name, foldcase)) + { + error (0, 0, _("warning: -%s %s will not match anything " + "because it ends with /."), + pred_name, name); + our_pred->est_success_rate = 1.0e-8; + } + return true; + } + return false; +} + /* For some time, -path was deprecated (at RMS's request) in favour of * -iwholename. See the node "GNU Manuals" in standards.texi for the * rationale for this (basically, GNU prefers the use of the phrase @@ -1756,22 +1781,31 @@ parse_or (const struct parser_table* entry, char **argv, int *arg_ptr) static boolean parse_path (const struct parser_table* entry, char **argv, int *arg_ptr) { - const char *name; - if (collect_arg(argv, arg_ptr, &name)) - { - struct predicate *our_pred = insert_primary_withpred (entry, pred_path); - our_pred->need_stat = our_pred->need_type = false; - our_pred->args.str = name; - our_pred->est_success_rate = estimate_pattern_match_rate (name, 0); - return true; - } - return false; + return insert_path_check (entry, argv, arg_ptr, "path", pred_path); } static boolean parse_wholename (const struct parser_table* entry, char **argv, int *arg_ptr) { - return parse_path (entry, argv, arg_ptr); + return insert_path_check (entry, argv, arg_ptr, "wholename", pred_path); +} + +/* -ipath was deprecated (at RMS's request) in favour of + * -iwholename. See the node "GNU Manuals" in standards.texi + * for the rationale for this (basically, GNU prefers the use + * of the phrase "file name" to "path name". + * However, -path is now standardised so I un-deprecated -ipath. + */ +static boolean +parse_ipath (const struct parser_table* entry, char **argv, int *arg_ptr) +{ + return insert_path_check (entry, argv, arg_ptr, "ipath", pred_ipath); +} + +static boolean +parse_iwholename (const struct parser_table* entry, char **argv, int *arg_ptr) +{ + return insert_path_check (entry, argv, arg_ptr, "iwholename", pred_ipath); } static void diff --git a/find/tree.c b/find/tree.c index 929c5f6..3941532 100644 --- a/find/tree.c +++ b/find/tree.c @@ -20,6 +20,7 @@ #include <assert.h> #include <stdlib.h> +#include <fnmatch.h> #include "xalloc.h" #include "error.h" @@ -49,6 +50,11 @@ static struct predicate *eval_tree = NULL; /* The last predicate allocated. */ static struct predicate *last_pred = NULL; +/* The starting points. */ +static char **start_points; +static size_t num_start_points = 0; + + static struct predicate *scan_rest PARAMS((struct predicate **input, struct predicate *head, @@ -58,6 +64,34 @@ static struct predicate *set_new_parent PARAMS((struct predicate *curr, enum pre static const char *cost_name PARAMS((enum EvaluationCost cost)); +/* Return true if the indicated path name is a start + point or not. If no start points were given on the + command line, we return true for ".". +*/ +boolean +matches_start_point(const char *glob, bool foldcase) +{ + int fnmatch_flags = 0; + if (foldcase) + fnmatch_flags |= FNM_CASEFOLD; + + if (num_start_points) + { + size_t i; + for (i=0; i<num_start_points; i++) + { + if (fnmatch (glob, start_points[i], fnmatch_flags) == 0) + return true; + } + return false; + } + else + { + return fnmatch (glob, ".", fnmatch_flags) == 0; + } +} + + /* Return a pointer to a tree that represents the expression prior to non-unary operator *INPUT. Set *INPUT to point at the next input predicate node. @@ -1204,11 +1238,13 @@ build_expression_tree(int argc, char *argv[], int end_of_leading_options) predicates = NULL; /* Find where in ARGV the predicates begin by skipping the list of - * start points. + * start points. As a side effect, also figure out which is the + * first and last start point. */ + start_points = argv + end_of_leading_options; for (i = end_of_leading_options; i < argc && !looks_like_expression(argv[i], true); i++) { - /* Do nothing. */ ; + ++num_start_points; } /* Enclose the expression in `( ... )' so a default -print will -- 1.5.6.5
