Norihiro Tanaka wrote:
> I rebased this patch because of confliction.
I fixed the degration in rebase.
From f046449f2ee29adb9ab73402dc72eae8ff3d6f0b Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Sat, 12 Apr 2014 14:20:11 +0900
Subject: [PATCH] grep: waste of a lot of memory for a large pattern in dfamust
* src/dfa.c (struct must): New member `prev'. It has the pointer to
previous must.
(allocmust): New function.
(freemust): New function.
(dfamust): Use it.
---
src/dfa.c | 115 ++++++++++++++++++++++++++++++--------------------------------
1 file changed, 55 insertions(+), 60 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 90cf4a9..1658ec9 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3821,13 +3821,28 @@ inboth (char **left, char **right)
return both;
}
-typedef struct
+typedef struct must must;
+
+struct must
{
char **in;
char *left;
char *right;
char *is;
-} must;
+ must *prev;
+};
+
+static must *
+allocmust (must *prev)
+{
+ must *mp = xzalloc (sizeof *prev);
+ mp->in = xzalloc (sizeof *mp->in);
+ mp->left = xzalloc (2);
+ mp->right = xzalloc (2);
+ mp->is = xzalloc (2);
+ mp->prev = prev;
+ return mp;
+}
static void
resetmust (must * mp)
@@ -3838,32 +3853,26 @@ resetmust (must * mp)
}
static void
+freemust (must *mp)
+{
+ freelist (mp->in);
+ free (mp->in);
+ free (mp->left);
+ free (mp->right);
+ free (mp->is);
+ free (mp);
+}
+
+static void
dfamust (struct dfa *d)
{
- must *musts = xnmalloc (d->tindex + 1, sizeof *musts);
- must *mp = musts;
+ must *mp = NULL;
char const *result = "";
size_t ri;
size_t i;
bool exact = false;
struct dfamust *dm;
- for (i = 0; i <= d->tindex; ++i)
- {
- mp[i].in = xzalloc (sizeof *mp[i].in);
- mp[i].left = xzalloc (2);
- mp[i].right = xzalloc (2);
- mp[i].is = xzalloc (2);
- }
-#ifdef DEBUG
- fprintf (stderr, "dfamust:\n");
- for (i = 0; i < d->tindex; ++i)
- {
- fprintf (stderr, " %zd:", i);
- prtok (d->tokens[i]);
- }
- putc ('\n', stderr);
-#endif
for (ri = 0; ri < d->tindex; ++ri)
{
token t = d->tokens[ri];
@@ -3873,11 +3882,6 @@ dfamust (struct dfa *d)
case RPAREN:
assert (!"neither LPAREN nor RPAREN may appear here");
- case STAR:
- case QMARK:
- assert (musts < mp);
- --mp;
- /* Fall through. */
case EMPTY:
case BEGLINE:
case ENDLINE:
@@ -3888,19 +3892,24 @@ dfamust (struct dfa *d)
case BACKREF:
case ANYCHAR:
case MBCSET:
+ mp = allocmust (mp);
+ /* Fall through. */
+ case STAR:
+ case QMARK:
+ assert (mp != NULL);
resetmust (mp);
break;
case OR:
- assert (&musts[2] <= mp);
+ assert (mp && mp->prev);
{
char **new;
must *lmp;
must *rmp;
size_t j, ln, rn, n;
- rmp = --mp;
- lmp = --mp;
+ rmp = mp;
+ lmp = mp = mp->prev;
/* Guaranteed to be. Unlikely, but ... */
if (!STREQ (lmp->is, rmp->is))
lmp->is[0] = '\0';
@@ -3925,32 +3934,35 @@ dfamust (struct dfa *d)
freelist (lmp->in);
free (lmp->in);
lmp->in = new;
+ freemust (rmp);
}
break;
case PLUS:
- assert (musts < mp);
- --mp;
+ assert (mp != NULL);
mp->is[0] = '\0';
break;
case END:
- assert (mp == &musts[1]);
- for (i = 0; musts[0].in[i] != NULL; ++i)
- if (strlen (musts[0].in[i]) > strlen (result))
- result = musts[0].in[i];
- if (STREQ (result, musts[0].is))
- exact = true;
+ assert (!mp || !mp->prev);
+ if (mp)
+ {
+ for (i = 0; mp->in[i] != NULL; ++i)
+ if (strlen (mp->in[i]) > strlen (result))
+ result = mp->in[i];
+ if (STREQ (result, mp->is))
+ exact = true;
+ }
goto done;
case CAT:
- assert (&musts[2] <= mp);
+ assert (mp && mp->prev);
{
must *lmp;
must *rmp;
- rmp = --mp;
- lmp = --mp;
+ rmp = mp;
+ lmp = mp = mp->prev;
/* In. Everything in left, plus everything in
right, plus concatenation of
left's right and right's left. */
@@ -3977,6 +3989,7 @@ dfamust (struct dfa *d)
lmp->is = icatalloc (lmp->is, rmp->is);
else
lmp->is[0] = '\0';
+ freemust (rmp);
}
break;
@@ -3985,6 +3998,7 @@ dfamust (struct dfa *d)
goto done;
default:
+ mp = allocmust (mp);
resetmust (mp);
if (CSET <= t)
{
@@ -4014,17 +4028,6 @@ dfamust (struct dfa *d)
mp->in = enlist (mp->in, mp->is, 1);
break;
}
-#ifdef DEBUG
- fprintf (stderr, " node: %zd:", ri);
- prtok (d->tokens[ri]);
- fprintf (stderr, "\n in:");
- for (i = 0; mp->in[i]; ++i)
- fprintf (stderr, " \"%s\"", mp->in[i]);
- fprintf (stderr, "\n is: \"%s\"\n", mp->is);
- fprintf (stderr, " left: \"%s\"\n", mp->left);
- fprintf (stderr, " right: \"%s\"\n", mp->right);
-#endif
- ++mp;
}
done:
if (*result)
@@ -4035,16 +4038,8 @@ done:
dm->next = d->musts;
d->musts = dm;
}
- mp = musts;
- for (i = 0; i <= d->tindex; ++i)
- {
- freelist (mp[i].in);
- free (mp[i].in);
- free (mp[i].left);
- free (mp[i].right);
- free (mp[i].is);
- }
- free (mp);
+ if (mp)
+ freemust (mp);
}
struct dfa *
--
1.9.2