From c1603508a20b262539b4a48297a729b4b1b29a14 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <noritnk@kcn.ne.jp>
Date: Sun, 20 Sep 2020 15:39:13 +0900
Subject: [PATCH 1/2] grep: avoid unnecessary regex compilation

Grep resorts to using the regex engine when the precision of either
-o or --color is required, or when the pattern is not supported by
our DFA engine (e.g., backref). Otherwise, grep would perform regex
compilation solely to check the syntax. This change makes grep skip
that compilation in the common case for which it is unnecessary.

The compilation we are avoiding is quite costly, consuming O(N^2)
RSS for N regular expressions.

* src/dfasearch.c (GEAcompile): Add new argument, and avoid unneeded
compilation of regex.
* src/grep.c (compile_fp_t): Update prototype.
(main): Update caller.
* src/kwsearch.c (Fcompile): Update caller and add new argument.
* src/pcresearch.c (Pcompile): Add new argument.
* src/search.h (GEAcompile, Fcompile, Pcompile): Update prototype.
---
 src/dfasearch.c  | 30 +++++++++++++++++-------------
 src/grep.c       |  7 ++++---
 src/kwsearch.c   |  6 +++---
 src/pcresearch.c |  2 +-
 src/search.h     |  6 +++---
 5 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/src/dfasearch.c b/src/dfasearch.c
index 4d3f4b2..a2feda7 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -179,7 +179,8 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
    Return a description of the compiled pattern.  */

 void *
-GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
+GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
+            bool exact)
 {
   char *motif;
   struct dfa_comp *dc = xcalloc (1, sizeof (*dc));
@@ -274,18 +275,6 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
         }
     }

-  if (buf != NULL)
-    {
-      dc->patterns--;
-      dc->pcount++;
-
-      if (!regex_compile (dc, buf, buflen, 0, -1, false))
-        abort ();
-
-      if (buf != pattern)
-        free (buf);
-    }
-
   /* In the match_words and match_lines cases, we use a different pattern
      for the DFA matcher that will quickly throw out cases that won't work.
      Then if DFA succeeds we do some hairy stuff using the regex matcher
@@ -321,6 +310,21 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
   kwsmusts (dc);
   dfacomp (NULL, 0, dc->dfa, 1);

+  if (buf != NULL)
+    {
+      if (exact || !dfasupported (dc->dfa))
+        {
+          dc->patterns--;
+          dc->pcount++;
+
+          if (!regex_compile (dc, buf, buflen, 0, -1, false))
+            abort ();
+        }
+
+      if (buf != pattern)
+        free (buf);
+    }
+
   free (motif);

   return dc;
diff --git a/src/grep.c b/src/grep.c
index 497780e..ca295a5 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -635,7 +635,7 @@ static bool seek_failed;
 static bool seek_data_failed;

 /* Functions we'll use to search. */
-typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t);
+typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t, bool);
 typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
                                 char const *);
 static execute_fp_t execute;
@@ -2969,8 +2969,9 @@ main (int argc, char **argv)
     matcher = try_fgrep_pattern (matcher, keys, &keycc);

   execute = matchers[matcher].execute;
-  compiled_pattern = matchers[matcher].compile (keys, keycc,
-                                                matchers[matcher].syntax);
+  compiled_pattern =
+    matchers[matcher].compile (keys, keycc, matchers[matcher].syntax,
+                               only_matching | color_option);
   /* We need one byte prior and one after.  */
   char eolbytes[3] = { 0, eolbyte, 0 };
   size_t match_size;
diff --git a/src/kwsearch.c b/src/kwsearch.c
index 7081060..1174dbc 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -47,7 +47,7 @@ struct kwsearch
    followed by '\n'.  Return a description of the compiled pattern.  */

 void *
-Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
+Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
 {
   kwset_t kwset;
   char *buf = NULL;
@@ -178,7 +178,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
             {
               fgrep_to_grep_pattern (&kwsearch->pattern, &kwsearch->size);
               kwsearch->re = GEAcompile (kwsearch->pattern, kwsearch->size,
-                                         RE_SYNTAX_GREP);
+                                         RE_SYNTAX_GREP, !!start_ptr);
             }
           return EGexecute (kwsearch->re, buf, size, match_size, start_ptr);
         }
@@ -245,7 +245,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
                     fgrep_to_grep_pattern (&kwsearch->pattern, &kwsearch->size);
                     kwsearch->re = GEAcompile (kwsearch->pattern,
                                                kwsearch->size,
-                                               RE_SYNTAX_GREP);
+                                               RE_SYNTAX_GREP, !!start_ptr);
                   }
                 if (beg + len < buf + size)
                   {
diff --git a/src/pcresearch.c b/src/pcresearch.c
index a668c45..6798967 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -113,7 +113,7 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
    followed by '\n'.  Return a description of the compiled pattern.  */

 void *
-Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
+Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
 {
   int e;
   char const *ep;
diff --git a/src/search.h b/src/search.h
index 1d85a7e..af81dcb 100644
--- a/src/search.h
+++ b/src/search.h
@@ -56,15 +56,15 @@ extern ptrdiff_t mb_goback (char const **, size_t *, char const *,
                             char const *);

 /* dfasearch.c */
-extern void *GEAcompile (char *, size_t, reg_syntax_t);
+extern void *GEAcompile (char *, size_t, reg_syntax_t, bool);
 extern size_t EGexecute (void *, char const *, size_t, size_t *, char const *);

 /* kwsearch.c */
-extern void *Fcompile (char *, size_t, reg_syntax_t);
+extern void *Fcompile (char *, size_t, reg_syntax_t, bool);
 extern size_t Fexecute (void *, char const *, size_t, size_t *, char const *);

 /* pcresearch.c */
-extern void *Pcompile (char *, size_t, reg_syntax_t);
+extern void *Pcompile (char *, size_t, reg_syntax_t, bool);
 extern size_t Pexecute (void *, char const *, size_t, size_t *, char const *);

 /* grep.c */
-- 
2.28.0.546.g385c171a01

