Norihiro Tanaka wrote:
> I rebased this patch because of confliction.

I fixed the degration in rebase.
From f046449f2ee29adb9ab73402dc72eae8ff3d6f0b Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Sat, 12 Apr 2014 14:20:11 +0900
Subject: [PATCH] grep: waste of a lot of memory for a large pattern in dfamust

* src/dfa.c (struct must): New member `prev'.  It has the pointer to
previous must.
(allocmust): New function.
(freemust): New function.
(dfamust): Use it.
---
 src/dfa.c | 115 ++++++++++++++++++++++++++++++--------------------------------
 1 file changed, 55 insertions(+), 60 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 90cf4a9..1658ec9 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3821,13 +3821,28 @@ inboth (char **left, char **right)
   return both;
 }
 
-typedef struct
+typedef struct must must;
+
+struct must
 {
   char **in;
   char *left;
   char *right;
   char *is;
-} must;
+  must *prev;
+};
+
+static must *
+allocmust (must *prev)
+{
+  must *mp = xzalloc (sizeof *prev);
+  mp->in = xzalloc (sizeof *mp->in);
+  mp->left = xzalloc (2);
+  mp->right = xzalloc (2);
+  mp->is = xzalloc (2);
+  mp->prev = prev;
+  return mp;
+}
 
 static void
 resetmust (must * mp)
@@ -3838,32 +3853,26 @@ resetmust (must * mp)
 }
 
 static void
+freemust (must *mp)
+{
+  freelist (mp->in);
+  free (mp->in);
+  free (mp->left);
+  free (mp->right);
+  free (mp->is);
+  free (mp);
+}
+
+static void
 dfamust (struct dfa *d)
 {
-  must *musts = xnmalloc (d->tindex + 1, sizeof *musts);
-  must *mp = musts;
+  must *mp = NULL;
   char const *result = "";
   size_t ri;
   size_t i;
   bool exact = false;
   struct dfamust *dm;
 
-  for (i = 0; i <= d->tindex; ++i)
-    {
-      mp[i].in = xzalloc (sizeof *mp[i].in);
-      mp[i].left = xzalloc (2);
-      mp[i].right = xzalloc (2);
-      mp[i].is = xzalloc (2);
-    }
-#ifdef DEBUG
-  fprintf (stderr, "dfamust:\n");
-  for (i = 0; i < d->tindex; ++i)
-    {
-      fprintf (stderr, " %zd:", i);
-      prtok (d->tokens[i]);
-    }
-  putc ('\n', stderr);
-#endif
   for (ri = 0; ri < d->tindex; ++ri)
     {
       token t = d->tokens[ri];
@@ -3873,11 +3882,6 @@ dfamust (struct dfa *d)
         case RPAREN:
           assert (!"neither LPAREN nor RPAREN may appear here");
 
-        case STAR:
-        case QMARK:
-          assert (musts < mp);
-          --mp;
-          /* Fall through.  */
         case EMPTY:
         case BEGLINE:
         case ENDLINE:
@@ -3888,19 +3892,24 @@ dfamust (struct dfa *d)
         case BACKREF:
         case ANYCHAR:
         case MBCSET:
+          mp = allocmust (mp);
+          /* Fall through.  */
+        case STAR:
+        case QMARK:
+          assert (mp != NULL);
           resetmust (mp);
           break;
 
         case OR:
-          assert (&musts[2] <= mp);
+          assert (mp && mp->prev);
           {
             char **new;
             must *lmp;
             must *rmp;
             size_t j, ln, rn, n;
 
-            rmp = --mp;
-            lmp = --mp;
+            rmp = mp;
+            lmp = mp = mp->prev;
             /* Guaranteed to be.  Unlikely, but ...  */
             if (!STREQ (lmp->is, rmp->is))
               lmp->is[0] = '\0';
@@ -3925,32 +3934,35 @@ dfamust (struct dfa *d)
             freelist (lmp->in);
             free (lmp->in);
             lmp->in = new;
+            freemust (rmp);
           }
           break;
 
         case PLUS:
-          assert (musts < mp);
-          --mp;
+          assert (mp != NULL);
           mp->is[0] = '\0';
           break;
 
         case END:
-          assert (mp == &musts[1]);
-          for (i = 0; musts[0].in[i] != NULL; ++i)
-            if (strlen (musts[0].in[i]) > strlen (result))
-              result = musts[0].in[i];
-          if (STREQ (result, musts[0].is))
-            exact = true;
+          assert (!mp || !mp->prev);
+          if (mp)
+            {
+              for (i = 0; mp->in[i] != NULL; ++i)
+                if (strlen (mp->in[i]) > strlen (result))
+                  result = mp->in[i];
+              if (STREQ (result, mp->is))
+                exact = true;
+            }
           goto done;
 
         case CAT:
-          assert (&musts[2] <= mp);
+          assert (mp && mp->prev);
           {
             must *lmp;
             must *rmp;
 
-            rmp = --mp;
-            lmp = --mp;
+            rmp = mp;
+            lmp = mp = mp->prev;
             /* In.  Everything in left, plus everything in
                right, plus concatenation of
                left's right and right's left.  */
@@ -3977,6 +3989,7 @@ dfamust (struct dfa *d)
               lmp->is = icatalloc (lmp->is, rmp->is);
             else
               lmp->is[0] = '\0';
+            freemust (rmp);
           }
           break;
 
@@ -3985,6 +3998,7 @@ dfamust (struct dfa *d)
           goto done;
 
         default:
+          mp = allocmust (mp);
           resetmust (mp);
           if (CSET <= t)
             {
@@ -4014,17 +4028,6 @@ dfamust (struct dfa *d)
           mp->in = enlist (mp->in, mp->is, 1);
           break;
         }
-#ifdef DEBUG
-      fprintf (stderr, " node: %zd:", ri);
-      prtok (d->tokens[ri]);
-      fprintf (stderr, "\n  in:");
-      for (i = 0; mp->in[i]; ++i)
-        fprintf (stderr, " \"%s\"", mp->in[i]);
-      fprintf (stderr, "\n  is: \"%s\"\n", mp->is);
-      fprintf (stderr, "  left: \"%s\"\n", mp->left);
-      fprintf (stderr, "  right: \"%s\"\n", mp->right);
-#endif
-      ++mp;
     }
 done:
   if (*result)
@@ -4035,16 +4038,8 @@ done:
       dm->next = d->musts;
       d->musts = dm;
     }
-  mp = musts;
-  for (i = 0; i <= d->tindex; ++i)
-    {
-      freelist (mp[i].in);
-      free (mp[i].in);
-      free (mp[i].left);
-      free (mp[i].right);
-      free (mp[i].is);
-    }
-  free (mp);
+  if (mp)
+    freemust (mp);
 }
 
 struct dfa *
-- 
1.9.2

Reply via email to