On Thu, Dec 07, 2017 at 11:23:51AM -0700, Theo de Raadt wrote:
> seems fairly simple to me, but a few comments
>
> - Could mflag be removed and replaced by overloading mcount = -1
> as the no -m condition
>
> - What do other systems do with "-m 0"
GNU grep is kind of random
%-----------------------------------------------------------
root@roslunar:~# echo foo > foo
root@roslunar:~# grep -m-1 foo foo
foo
root@roslunar:~# grep -m0 foo foo
root@roslunar:~# grep -m5 foo foo
foo
%-----------------------------------------------------------
although it uses signed ints
%-----------------------------------------------------------
switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
{
case LONGINT_OK:
case LONGINT_OVERFLOW:
break;
default:
die (EXIT_TROUBLE, 0, _("invalid max count"));
}
...
if ((max_count == 0
|| (keycc == 0 && out_invert && !match_lines && !match_words))
&& list_files != LISTFILES_NONMATCHING)
return EXIT_FAILURE;
%-----------------------------------------------------------
FreeBSD handles 0 with error,
%-----------------------------------------------------------
mlimit = mcount = strtoll(optarg, &ep, 10);
if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
((errno == EINVAL) && (mcount == 0)))
err(2, NULL);
else if (ep[0] != '\0') {
errno = EINVAL;
err(2, NULL);
}
%-----------------------------------------------------------
but I think it accepts negative numbers and treats them as -m1
%-----------------------------------------------------------
/* Count the matches if we have a match limit */
if (t == 0 && mflag) {
--mcount;
if (mflag && mcount <= 0)
break;
}
%-----------------------------------------------------------
NetBSD is the only implementation that I have found that uses unsigned
ints and it also errors out on -m0.
%-----------------------------------------------------------
mcount = strtoull(optarg, &ep, 10);
if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
((errno == EINVAL) && (mcount == 0)))
err(2, NULL);
else if (ep[0] != '\0') {
errno = EINVAL;
err(2, NULL);
}
%-----------------------------------------------------------
So I think we can do whatever, but I would prefer the current
implementation.
>
> > + errno = 0;
> > + mlimit = mcount = strtonum(optarg, 1, LLONG_MAX,
> - you should not touch errno like that, this isn't the insane strtol
> interface
>
> > + errx(2, "mcount %s", errstr);
> - that is a poor error message
Updated diff with, I hope, a proper message
%-----------------------------------------------------------
grep: number of matches too small
$ obj/grep -m3.1
grep: number of matches invalid
$ obj/grep -m999999999999999999999999999999999999999999999
grep: number of matches too large
%-----------------------------------------------------------
I also added -m in the non-standard man page section as suggested by jmc@.
Index: grep.1
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.1,v
retrieving revision 1.43
diff -u -p -u -p -r1.43 grep.1
--- grep.1 13 Jan 2015 04:45:34 -0000 1.43
+++ grep.1 8 Dec 2017 21:34:24 -0000
@@ -44,6 +44,7 @@
.Op Fl C Ns Op Ar num
.Op Fl e Ar pattern
.Op Fl f Ar file
+.Op Fl m Ar num
.Op Fl -binary-files Ns = Ns Ar value
.Op Fl -context Ns Op = Ns Ar num
.Op Fl -line-buffered
@@ -216,6 +217,10 @@ Pathnames are listed once per file searc
If the standard input is searched, the string
.Dq (standard input)
is written.
+.It Fl m Ar num
+Stop after
+.Ar num
+matches.
.It Fl n
Each output line is preceded by its relative line number in the file,
starting at line 1.
@@ -354,7 +359,7 @@ utility is compliant with the
specification.
.Pp
The flags
-.Op Fl AaBbCGHhILoRUVwZ
+.Op Fl AaBbCGHhILmoRUVwZ
are extensions to that specification, and the behaviour of the
.Fl f
flag when used with an empty pattern file is left undefined.
Index: grep.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.c,v
retrieving revision 1.55
diff -u -p -u -p -r1.55 grep.c
--- grep.c 28 Nov 2015 01:17:12 -0000 1.55
+++ grep.c 8 Dec 2017 21:34:24 -0000
@@ -71,6 +71,9 @@ int cflag; /* -c: only show a count of
int hflag; /* -h: don't print filename headers */
int iflag; /* -i: ignore case */
int lflag; /* -l: only show names of files with matches */
+int mflag; /* -m x: stop reading the files after x matches */
+long long mcount; /* count for -m */
+long long mlimit; /* requested value for -m */
int nflag; /* -n: show line numbers in front of matching lines */
int oflag; /* -o: print each match */
int qflag; /* -q: quiet mode (don't output anything) */
@@ -111,15 +114,16 @@ usage(void)
#else
"usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
#endif
- "\t[-e pattern] [-f file] [--binary-files=value]
[--context[=num]]\n"
- "\t[--line-buffered] [pattern] [file ...]\n", __progname);
+ "\t[-e pattern] [-f file] [-m num] [--binary-files=value]\n"
+ "\t[--context[=num]] [--line-buffered] [pattern] [file ...]\n",
+ __progname);
exit(2);
}
#ifdef NOZ
-static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy";
#else
-static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy";
#endif
static const struct option long_options[] =
@@ -147,6 +151,7 @@ static const struct option long_options[
{"ignore-case", no_argument, NULL, 'i'},
{"files-without-match", no_argument, NULL, 'L'},
{"files-with-matches", no_argument, NULL, 'l'},
+ {"max-count", required_argument, NULL, 'm'},
{"line-number", no_argument, NULL, 'n'},
{"quiet", no_argument, NULL, 'q'},
{"silent", no_argument, NULL, 'q'},
@@ -375,6 +380,14 @@ main(int argc, char *argv[])
case 'l':
Lflag = 0;
lflag = qflag = 1;
+ break;
+ case 'm':
+ mflag = 1;
+ errno = 0;
+ mlimit = mcount = strtonum(optarg, 1, LLONG_MAX,
+ &errstr);
+ if (errstr != NULL)
+ errx(2, "number of matches %s", errstr);
break;
case 'n':
nflag = 1;
Index: grep.h
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.h,v
retrieving revision 1.24
diff -u -p -u -p -r1.24 grep.h
--- grep.h 14 Dec 2015 20:02:07 -0000 1.24
+++ grep.h 8 Dec 2017 21:34:24 -0000
@@ -66,14 +66,17 @@ extern int cflags, eflags;
/* Command line flags */
extern int Aflag, Bflag, Eflag, Fflag, Hflag, Lflag,
Rflag, Zflag,
- bflag, cflag, hflag, iflag, lflag, nflag, oflag, qflag, sflag,
- vflag, wflag, xflag;
+ bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, qflag,
+ sflag, vflag, wflag, xflag;
extern int binbehave;
extern int first, matchall, patterns, tail, file_err;
extern char **pattern;
extern fastgrep_t *fg_pattern;
extern regex_t *r_pattern;
+
+/* For -m max-count */
+extern long long mcount, mlimit;
/* For regex errors */
#define RE_ERROR_BUF 512
Index: util.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/util.c,v
retrieving revision 1.57
diff -u -p -u -p -r1.57 util.c
--- util.c 3 Apr 2017 16:18:35 -0000 1.57
+++ util.c 8 Dec 2017 21:34:24 -0000
@@ -97,6 +97,8 @@ procfile(char *fn)
file_t *f;
int c, t, z, nottext;
+ mcount = mlimit;
+
if (fn == NULL) {
fn = "(standard input)";
f = grep_fdopen(STDIN_FILENO, "r");
@@ -140,6 +142,8 @@ procfile(char *fn)
linesqueued++;
}
c += t;
+ if (mflag && mcount <= 0)
+ break;
}
if (Bflag > 0)
clearqueue();
@@ -223,6 +227,10 @@ redo:
print:
if (vflag)
c = !c;
+
+ /* Count the matches if we have a match limit */
+ if (mflag)
+ mcount -= c;
if (c && binbehave == BIN_FILE_BIN && nottext)
return c; /* Binary file */