I rebased this patch because of confliction again.
From 14f4fe9be5c2267f05d0d8a79afe5cfd3083b189 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Sat, 12 Apr 2014 14:20:11 +0900
Subject: [PATCH] grep: waste of a lot of memory for a large pattern in dfamust
* src/dfa.c (struct must): New member `prev'. It has the pointer to
previous must.
(allocmust): New function.
(freemust): New function.
(dfamust): Use it.
---
src/dfa.c | 127 ++++++++++++++++++++++++++++++--------------------------------
1 file changed, 62 insertions(+), 65 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 65fc03d..24a7d44 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3821,7 +3821,9 @@ inboth (char **left, char **right)
return both;
}
-typedef struct
+typedef struct must must;
+
+struct must
{
char **in;
char *left;
@@ -3829,21 +3831,46 @@ typedef struct
char *is;
bool begline;
bool endline;
-} must;
+ must *prev;
+};
+
+static must *
+allocmust (must *mp)
+{
+ must *new_mp = xzalloc (sizeof *new_mp);
+ new_mp->in = xzalloc (sizeof *new_mp->in);
+ new_mp->left = xzalloc (2);
+ new_mp->right = xzalloc (2);
+ new_mp->is = xzalloc (2);
+ new_mp->prev = mp;
+ return new_mp;
+}
static void
-resetmust (must * mp)
+resetmust (must *mp)
{
freelist (mp->in);
mp->in[0] = NULL;
mp->left[0] = mp->right[0] = mp->is[0] = '\0';
+ mp->begline = false;
+ mp->endline = false;
+}
+
+static void
+freemust (must *mp)
+{
+ freelist (mp->in);
+ free (mp->in);
+ free (mp->left);
+ free (mp->right);
+ free (mp->is);
+ free (mp);
}
static void
dfamust (struct dfa *d)
{
- must *musts = xnmalloc (d->tindex + 1, sizeof *musts);
- must *mp = musts;
+ must *mp = NULL;
char const *result = "";
size_t ri;
size_t i;
@@ -3852,34 +3879,18 @@ dfamust (struct dfa *d)
bool endline = false;
struct dfamust *dm;
- for (i = 0; i <= d->tindex; ++i)
- {
- mp[i].in = xzalloc (sizeof *mp[i].in);
- mp[i].left = xzalloc (2);
- mp[i].right = xzalloc (2);
- mp[i].is = xzalloc (2);
- mp[i].begline = false;
- mp[i].endline = false;
- }
-#ifdef DEBUG
- fprintf (stderr, "dfamust:\n");
- for (i = 0; i < d->tindex; ++i)
- {
- fprintf (stderr, " %zd:", i);
- prtok (d->tokens[i]);
- }
- putc ('\n', stderr);
-#endif
for (ri = 0; ri < d->tindex; ++ri)
{
token t = d->tokens[ri];
switch (t)
{
case BEGLINE:
+ mp = allocmust (mp);
resetmust (mp);
mp->begline = true;
break;
case ENDLINE:
+ mp = allocmust (mp);
resetmust (mp);
mp->endline = true;
break;
@@ -3887,11 +3898,6 @@ dfamust (struct dfa *d)
case RPAREN:
assert (!"neither LPAREN nor RPAREN may appear here");
- case STAR:
- case QMARK:
- assert (musts < mp);
- --mp;
- /* Fall through. */
case EMPTY:
case BEGWORD:
case ENDWORD:
@@ -3900,19 +3906,24 @@ dfamust (struct dfa *d)
case BACKREF:
case ANYCHAR:
case MBCSET:
+ mp = allocmust (mp);
+ /* Fall through. */
+ case STAR:
+ case QMARK:
+ assert (mp != NULL);
resetmust (mp);
break;
case OR:
- assert (&musts[2] <= mp);
+ assert (mp && mp->prev);
{
char **new;
must *lmp;
must *rmp;
size_t j, ln, rn, n;
- rmp = --mp;
- lmp = --mp;
+ rmp = mp;
+ lmp = mp = mp->prev;
/* Guaranteed to be. Unlikely, but ... */
if (!STREQ (lmp->is, rmp->is))
lmp->is[0] = '\0';
@@ -3939,36 +3950,39 @@ dfamust (struct dfa *d)
freelist (lmp->in);
free (lmp->in);
lmp->in = new;
+ freemust (rmp);
}
break;
case PLUS:
- assert (musts < mp);
- --mp;
+ assert (mp != NULL);
mp->is[0] = '\0';
break;
case END:
- assert (mp == &musts[1]);
- for (i = 0; musts[0].in[i] != NULL; ++i)
- if (strlen (musts[0].in[i]) > strlen (result))
- result = musts[0].in[i];
- if (STREQ (result, musts[0].is))
+ assert (!mp || !mp->prev);
+ if (mp)
{
- exact = true;
- begline = musts[0].begline;
- endline = musts[0].endline;
+ for (i = 0; mp->in[i] != NULL; ++i)
+ if (strlen (mp->in[i]) > strlen (result))
+ result = mp->in[i];
+ if (STREQ (result, mp->is))
+ {
+ exact = true;
+ begline = mp->begline;
+ endline = mp->endline;
+ }
}
goto done;
case CAT:
- assert (&musts[2] <= mp);
+ assert (mp && mp->prev);
{
must *lmp;
must *rmp;
- rmp = --mp;
- lmp = --mp;
+ rmp = mp;
+ lmp = mp = mp->prev;
/* In. Everything in left, plus everything in
right, plus concatenation of
left's right and right's left. */
@@ -4003,6 +4017,7 @@ dfamust (struct dfa *d)
lmp->begline = false;
lmp->endline = false;
}
+ freemust (rmp);
}
break;
@@ -4011,6 +4026,7 @@ dfamust (struct dfa *d)
goto done;
default:
+ mp = allocmust (mp);
resetmust (mp);
if (CSET <= t)
{
@@ -4040,17 +4056,6 @@ dfamust (struct dfa *d)
mp->in = enlist (mp->in, mp->is, 1);
break;
}
-#ifdef DEBUG
- fprintf (stderr, " node: %zd:", ri);
- prtok (d->tokens[ri]);
- fprintf (stderr, "\n in:");
- for (i = 0; mp->in[i]; ++i)
- fprintf (stderr, " \"%s\"", mp->in[i]);
- fprintf (stderr, "\n is: \"%s\"\n", mp->is);
- fprintf (stderr, " left: \"%s\"\n", mp->left);
- fprintf (stderr, " right: \"%s\"\n", mp->right);
-#endif
- ++mp;
}
done:
if (*result)
@@ -4063,16 +4068,8 @@ done:
dm->next = d->musts;
d->musts = dm;
}
- mp = musts;
- for (i = 0; i <= d->tindex; ++i)
- {
- freelist (mp[i].in);
- free (mp[i].in);
- free (mp[i].left);
- free (mp[i].right);
- free (mp[i].is);
- }
- free (mp);
+ if (mp)
+ freemust (mp);
}
struct dfa *
--
1.9.2