Author: emaste
Date: Sat May 20 03:51:31 2017
New Revision: 318571
URL: https://svnweb.freebsd.org/changeset/base/318571

Log:
  bsdgrep: emit more than MAX_LINE_MATCHES per line
  
  We should not set an arbitrary cap on the number of matches on a line,
  and in any case MAX_LINE_MATCHES of 32 is much too low.  Instead, if we
  match more than MAX_LINE_MATCHES, keep processing and matching from the
  last match until all are found.
  
  For the regression test, we produce 4096 matches (larger than we expect
  we'll ever set MAX_LINE_MATCHES) and make sure we actually get 4096
  lines of output with the -o flag.
  
  We'll also make sure that every distinct line is getting its own line
  number to detect line metadata not being printed as appropriate along
  the way.
  
  PR:           218811
  Submitted by: Kyle Evans <kevan...@ksu.edu>
  Reported by:  jbeich
  Reviewed by:  cem
  Differential Revision:        https://reviews.freebsd.org/D10577

Modified:
  head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
  head/usr.bin/grep/util.c

Modified: head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
==============================================================================
--- head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh    Sat May 20 01:04:47 
2017        (r318570)
+++ head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh    Sat May 20 03:51:31 
2017        (r318571)
@@ -413,6 +413,26 @@ wflag_emptypat_body()
        atf_check -o file:test4 grep -w -e "" test4
 }
 
+atf_test_case excessive_matches
+excessive_matches_head()
+{
+       atf_set "descr" "Check for proper handling of lines with excessive 
matches (PR 218811)"
+}
+excessive_matches_body()
+{
+       grep_type
+       if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then
+               atf_expect_fail "this test does not pass with GNU grep in base"
+       fi
+
+       for i in $(jot 4096); do
+               printf "x" >> test.in
+       done
+
+       atf_check -s exit:0 -x '[ $(grep -o x test.in | wc -l) -eq 4096 ]'
+       #atf_check -s exit:1 -x 'grep -on x test.in | grep -v "1:x"'
+}
+
 atf_test_case fgrep_sanity
 fgrep_sanity_head()
 {
@@ -603,6 +623,7 @@ atf_init_test_cases()
        atf_add_test_case egrep_empty_invalid
        atf_add_test_case zerolen
        atf_add_test_case wflag_emptypat
+       atf_add_test_case excessive_matches
        atf_add_test_case wv_combo_break
        atf_add_test_case fgrep_sanity
        atf_add_test_case egrep_sanity

Modified: head/usr.bin/grep/util.c
==============================================================================
--- head/usr.bin/grep/util.c    Sat May 20 01:04:47 2017        (r318570)
+++ head/usr.bin/grep/util.c    Sat May 20 03:51:31 2017        (r318571)
@@ -63,6 +63,7 @@ static bool    first_match = true;
 struct parsec {
        regmatch_t matches[MAX_LINE_MATCHES];   /* Matches made */
        struct str ln;                          /* Current line */
+       size_t lnstart;                         /* Start of line processing */
        size_t matchidx;                        /* Latest used match index */
        bool binary;                            /* Binary file? */
 };
@@ -247,8 +248,9 @@ procfile(const char *fn)
        mcount = mlimit;
 
        for (c = 0;  c == 0 || !(lflag || qflag); ) {
-               /* Reset match count for every line processed */
+               /* Reset match count and line start for every line processed */
                pc.matchidx = 0;
+               pc.lnstart = 0;
                pc.ln.off += pc.ln.len + 1;
                if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL ||
                    pc.ln.len == 0) {
@@ -288,6 +290,14 @@ procfile(const char *fn)
                /* Print the matching line, but only if not quiet/binary */
                if (t == 0 && printmatch) {
                        printline(&pc, ':');
+                       while (pc.matchidx >= MAX_LINE_MATCHES) {
+                               /* Reset matchidx and try again */
+                               pc.matchidx = 0;
+                               if (procline(&pc) == 0)
+                                       printline(&pc, ':');
+                               else
+                                       break;
+                       }
                        first_match = false;
                        same_file = true;
                        last_outed = 0;
@@ -356,11 +366,11 @@ procline(struct parsec *pc)
 {
        regmatch_t pmatch, lastmatch, chkmatch;
        wchar_t wbegin, wend;
-       size_t st = 0, nst = 0;
+       size_t st, nst;
        unsigned int i;
        int c = 0, r = 0, lastmatches = 0, leflags = eflags;
        size_t startm = 0, matchidx;
-       int retry;
+       unsigned int retry;
 
        matchidx = pc->matchidx;
 
@@ -376,6 +386,8 @@ procline(struct parsec *pc)
        } else if (matchall)
                return (0);
 
+       st = pc->lnstart;
+       nst = 0;
        /* Initialize to avoid a false positive warning from GCC. */
        lastmatch.rm_so = lastmatch.rm_eo = 0;
 
@@ -432,12 +444,12 @@ procline(struct parsec *pc)
                                 * still match a whole word.
                                 */
                                if (r == REG_NOMATCH &&
-                                   (retry == 0 || pmatch.rm_so + 1 < retry))
+                                   (retry == pc->lnstart ||
+                                   pmatch.rm_so + 1 < retry))
                                        retry = pmatch.rm_so + 1;
                                if (r == REG_NOMATCH)
                                        continue;
                        }
-
                        lastmatches++;
                        lastmatch = pmatch;
 
@@ -466,8 +478,11 @@ procline(struct parsec *pc)
                        }
                        /* avoid excessive matching - skip further patterns */
                        if ((color == NULL && !oflag) || qflag || lflag ||
-                           matchidx >= MAX_LINE_MATCHES)
+                           matchidx >= MAX_LINE_MATCHES) {
+                               pc->lnstart = nst;
+                               lastmatches = 0;
                                break;
+                       }
                }
 
                /*
@@ -475,7 +490,7 @@ procline(struct parsec *pc)
                 * again just in case we still have a chance to match later in
                 * the string.
                 */
-               if (lastmatches == 0 && retry > 0) {
+               if (lastmatches == 0 && retry > pc->lnstart) {
                        st = retry;
                        continue;
                }
@@ -497,6 +512,7 @@ procline(struct parsec *pc)
 
                /* Advance st based on previous matches */
                st = nst;
+               pc->lnstart = st;
        }
 
        /* Reflect the new matchidx in the context */
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to