Module Name: src
Committed By: christos
Date: Sun Oct 9 18:23:00 UTC 2011
Modified Files:
src/lib/libc/regex: engine.c regcomp.c regex2.h
Log Message:
Prevent regcomp/regexec DoS attacks by limiting the amount of memory used
and the level of recursion. Thanks to Maksymilian Arciemowicz for discovery
and help with the implementation.
To generate a diff of this commit:
cvs rdiff -u -r1.22 -r1.23 src/lib/libc/regex/engine.c
cvs rdiff -u -r1.29 -r1.30 src/lib/libc/regex/regcomp.c
cvs rdiff -u -r1.12 -r1.13 src/lib/libc/regex/regex2.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/lib/libc/regex/engine.c
diff -u src/lib/libc/regex/engine.c:1.22 src/lib/libc/regex/engine.c:1.23
--- src/lib/libc/regex/engine.c:1.22 Thu Feb 12 00:06:54 2009
+++ src/lib/libc/regex/engine.c Sun Oct 9 14:23:00 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: engine.c,v 1.22 2009/02/12 05:06:54 lukem Exp $ */
+/* $NetBSD: engine.c,v 1.23 2011/10/09 18:23:00 christos Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994
@@ -212,8 +212,8 @@ matcher(
/* prescreening; this does wonders for this rather slow code */
if (g->must != NULL) {
for (dp = start; dp < stop; dp++)
- if (*dp == g->must[0] && stop - dp >= g->mlen &&
- memcmp(dp, g->must, (size_t)g->mlen) == 0)
+ if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
+ memcmp(dp, g->must, g->mlen) == 0)
break;
if (dp == stop) /* we didn't find g->must */
return(REG_NOMATCH);
Index: src/lib/libc/regex/regcomp.c
diff -u src/lib/libc/regex/regcomp.c:1.29 src/lib/libc/regex/regcomp.c:1.30
--- src/lib/libc/regex/regcomp.c:1.29 Thu Feb 12 00:06:54 2009
+++ src/lib/libc/regex/regcomp.c Sun Oct 9 14:23:00 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: regcomp.c,v 1.29 2009/02/12 05:06:54 lukem Exp $ */
+/* $NetBSD: regcomp.c,v 1.30 2011/10/09 18:23:00 christos Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994
@@ -76,7 +76,7 @@
#if 0
static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";
#else
-__RCSID("$NetBSD: regcomp.c,v 1.29 2009/02/12 05:06:54 lukem Exp $");
+__RCSID("$NetBSD: regcomp.c,v 1.30 2011/10/09 18:23:00 christos Exp $");
#endif
#endif /* LIBC_SCCS and not lint */
@@ -125,11 +125,11 @@ extern "C" {
#endif
/* === regcomp.c === */
-static void p_ere(struct parse *p, int stop);
-static void p_ere_exp(struct parse *p);
+static void p_ere(struct parse *p, int stop, size_t reclimit);
+static void p_ere_exp(struct parse *p, size_t reclimit);
static void p_str(struct parse *p);
-static void p_bre(struct parse *p, int end1, int end2);
-static int p_simp_re(struct parse *p, int starordinary);
+static void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
+static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
static int p_count(struct parse *p);
static void p_bracket(struct parse *p);
static void p_b_term(struct parse *p, cset *cs);
@@ -141,7 +141,7 @@ static int othercase(int ch);
static void bothcases(struct parse *p, int ch);
static void ordinary(struct parse *p, int ch);
static void nonnewline(struct parse *p);
-static void repeat(struct parse *p, sopno start, int from, int to);
+static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
static int seterr(struct parse *p, int e);
static cset *allocset(struct parse *p);
static void freeset(struct parse *p, cset *cs);
@@ -163,7 +163,7 @@ static sopno dupl(struct parse *p, sopno
static void doemit(struct parse *p, sop op, sopno opnd);
static void doinsert(struct parse *p, sop op, sopno opnd, sopno pos);
static void dofwd(struct parse *p, sopno pos, sopno value);
-static void enlarge(struct parse *p, sopno size);
+static int enlarge(struct parse *p, sopno size);
static void stripsnug(struct parse *p, struct re_guts *g);
static void findmust(struct parse *p, struct re_guts *g);
static sopno pluscount(struct parse *p, struct re_guts *g);
@@ -211,6 +211,13 @@ static int never = 0; /* for use in ass
#define never 0 /* some <assert.h>s have bugs too */
#endif
+#define MEMLIMIT 0x8000000
+#define MEMSIZE(p) \
+ ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
+ (p)->ncsalloc * sizeof(cset) + \
+ (p)->ssize * sizeof(sop))
+#define RECLIMIT 256
+
/*
- regcomp - interface for parser and compilation
= extern int regcomp(regex_t *, const char *, int);
@@ -260,7 +267,7 @@ regcomp(
if (g == NULL)
return(REG_ESPACE);
p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
- p->strip = (sop *)malloc(p->ssize * sizeof(sop));
+ p->strip = malloc(p->ssize * sizeof(sop));
p->slen = 0;
if (p->strip == NULL) {
free(g);
@@ -297,11 +304,11 @@ regcomp(
EMIT(OEND, 0);
g->firststate = THERE();
if (cflags®_EXTENDED)
- p_ere(p, OUT);
+ p_ere(p, OUT, 0);
else if (cflags®_NOSPEC)
p_str(p);
else
- p_bre(p, OUT, OUT);
+ p_bre(p, OUT, OUT, 0);
EMIT(OEND, 0);
g->laststate = THERE();
@@ -328,12 +335,13 @@ regcomp(
/*
- p_ere - ERE parser top level, concatenation and alternation
- == static void p_ere(struct parse *p, int stop);
+ == static void p_ere(struct parse *p, int stop, size_t reclimit);
*/
static void
p_ere(
struct parse *p,
- int stop) /* character this ERE should end at */
+ int stop, /* character this ERE should end at */
+ size_t reclimit)
{
char c;
sopno prevback = 0; /* pacify gcc */
@@ -343,11 +351,16 @@ p_ere(
_DIAGASSERT(p != NULL);
+ if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+ p->error = REG_ESPACE;
+ return;
+ }
+
for (;;) {
/* do a bunch of concatenated expressions */
conc = HERE();
while (MORE() && (c = PEEK()) != '|' && c != stop)
- p_ere_exp(p);
+ p_ere_exp(p, reclimit);
REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
if (!EAT('|'))
@@ -376,11 +389,12 @@ p_ere(
/*
- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
- == static void p_ere_exp(struct parse *p);
+ == static void p_ere_exp(struct parse *p, size_t reclimit);
*/
static void
p_ere_exp(
- struct parse *p)
+ struct parse *p,
+ size_t reclimit)
{
char c;
sopno pos;
@@ -404,7 +418,7 @@ p_ere_exp(
p->pbegin[subno] = HERE();
EMIT(OLPAREN, subno);
if (!SEE(')'))
- p_ere(p, ')');
+ p_ere(p, ')', reclimit);
if (subno < NPAREN) {
p->pend[subno] = HERE();
assert(p->pend[subno] != 0);
@@ -506,7 +520,7 @@ p_ere_exp(
count2 = INFINITY;
} else /* just a single number */
count2 = count;
- repeat(p, pos, count, count2);
+ repeat(p, pos, count, count2, 0);
if (!EAT('}')) { /* error heuristics */
while (MORE() && PEEK() != '}')
NEXT();
@@ -544,7 +558,7 @@ p_str(
/*
- p_bre - BRE parser top level, anchoring and concatenation
== static void p_bre(struct parse *p, int end1, \
- == int end2);
+ == int end2, size_t reclimit);
* Giving end1 as OUT essentially eliminates the end1/end2 check.
*
* This implementation is a bit of a kludge, in that a trailing $ is first
@@ -557,7 +571,8 @@ static void
p_bre(
struct parse *p,
int end1, /* first terminating character */
- int end2) /* second terminating character */
+ int end2, /* second terminating character */
+ size_t reclimit)
{
sopno start;
int first = 1; /* first subexpression? */
@@ -565,6 +580,11 @@ p_bre(
_DIAGASSERT(p != NULL);
+ if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+ p->error = REG_ESPACE;
+ return;
+ }
+
start = HERE();
if (EAT('^')) {
@@ -573,7 +593,7 @@ p_bre(
p->g->nbol++;
}
while (MORE() && !SEETWO(end1, end2)) {
- wasdollar = p_simp_re(p, first);
+ wasdollar = p_simp_re(p, first, reclimit);
first = 0;
}
if (wasdollar) { /* oops, that was a trailing anchor */
@@ -588,12 +608,13 @@ p_bre(
/*
- p_simp_re - parse a simple RE, an atom possibly followed by a repetition
- == static int p_simp_re(struct parse *p, int starordinary);
+ == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
*/
static int /* was the simple RE an unbackslashed $? */
p_simp_re(
struct parse *p,
- int starordinary) /* is a leading * an ordinary character? */
+ int starordinary, /* is a leading * an ordinary character? */
+ size_t reclimit)
{
int c;
int count;
@@ -634,7 +655,7 @@ p_simp_re(
EMIT(OLPAREN, subno);
/* the MORE here is an error heuristic */
if (MORE() && !SEETWO('\\', ')'))
- p_bre(p, '\\', ')');
+ p_bre(p, '\\', ')', reclimit);
if (subno < NPAREN) {
p->pend[subno] = HERE();
assert(p->pend[subno] != 0);
@@ -693,7 +714,7 @@ p_simp_re(
count2 = INFINITY;
} else /* just a single number */
count2 = count;
- repeat(p, pos, count, count2);
+ repeat(p, pos, count, count2, 0);
if (!EATTWO('\\', '}')) { /* error heuristics */
while (MORE() && !SEETWO('\\', '}'))
NEXT();
@@ -745,6 +766,8 @@ p_bracket(
_DIAGASSERT(p != NULL);
cs = allocset(p);
+ if (cs == NULL)
+ return;
/* Dept of Truly Sickening Special-Case Kludges */
if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]",
@@ -1101,14 +1124,16 @@ nonnewline(
/*
- repeat - generate code for a bounded repetition, recursively if needed
- == static void repeat(struct parse *p, sopno start, int from, int to);
+ == static void repeat(struct parse *p, sopno start, int from, int to,
+ == size_t reclimit);
*/
static void
repeat(
struct parse *p,
sopno start, /* operand from here to end of strip */
int from, /* repeated from this number */
- int to) /* to this number of times (maybe INFINITY) */
+ int to, /* to this number of times (maybe INFINITY) */
+ size_t reclimit)
{
sopno finish;
# define N 2
@@ -1119,11 +1144,13 @@ repeat(
_DIAGASSERT(p != NULL);
- finish = HERE();
-
- if (p->error != 0) /* head off possible runaway recursion */
+ if (reclimit++ > RECLIMIT)
+ p->error = REG_ESPACE;
+ if (p->error)
return;
+ finish = HERE();
+
assert(from <= to);
switch (REP(MAP(from), MAP(to))) {
@@ -1135,7 +1162,7 @@ repeat(
case REP(0, INF): /* as x{1,}? */
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
INSERT(OCH_, start); /* offset is wrong... */
- repeat(p, start+1, 1, to);
+ repeat(p, start+1, 1, to, reclimit);
ASTERN(OOR1, start);
AHEAD(start); /* ... fix it */
EMIT(OOR2, 0);
@@ -1155,7 +1182,7 @@ repeat(
ASTERN(O_CH, THERETHERE());
copy = dupl(p, start+1, finish+1);
assert(copy == finish+4);
- repeat(p, copy, 1, to-1);
+ repeat(p, copy, 1, to-1, reclimit);
break;
case REP(1, INF): /* as x+ */
INSERT(OPLUS_, start);
@@ -1163,11 +1190,11 @@ repeat(
break;
case REP(N, N): /* as xx{m-1,n-1} */
copy = dupl(p, start, finish);
- repeat(p, copy, from-1, to-1);
+ repeat(p, copy, from-1, to-1, reclimit);
break;
case REP(N, INF): /* as xx{n-1,INF} */
copy = dupl(p, start, finish);
- repeat(p, copy, from-1, to);
+ repeat(p, copy, from-1, to, reclimit);
break;
default: /* "can't happen" */
SETERROR(REG_ASSERT); /* just in case */
@@ -1218,6 +1245,8 @@ allocset(
nc = p->ncsalloc;
assert(nc % CHAR_BIT == 0);
nbytes = nc / CHAR_BIT * css;
+ if (MEMSIZE(p) > MEMLIMIT)
+ goto oomem;
if (p->g->sets == NULL)
p->g->sets = malloc(nc * sizeof(cset));
else
@@ -1234,13 +1263,14 @@ allocset(
(void) memset((char *)p->g->setbits + (nbytes - css),
0, css);
else {
+oomem:
no = 0;
SETERROR(REG_ESPACE);
/* caller's responsibility not to do set ops */
+ return NULL;
}
}
- assert(p->g->sets != NULL); /* xxx */
cs = &p->g->sets[no];
cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
cs->mask = 1 << ((no) % CHAR_BIT);
@@ -1620,8 +1650,8 @@ dupl(
assert(finish >= start);
if (len == 0)
return(ret);
- enlarge(p, p->ssize + len); /* this many unexpected additions */
- assert(p->ssize >= p->slen + len);
+ if (!enlarge(p, p->ssize + len))/* this many unexpected additions */
+ return ret;
(void)memcpy(p->strip + p->slen, p->strip + start,
(size_t)len * sizeof(sop));
p->slen += len;
@@ -1654,8 +1684,8 @@ doemit(
/* deal with undersized strip */
if (p->slen >= p->ssize)
- enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
- assert(p->slen < p->ssize);
+ if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */
+ return;
/* finally, it's all reduced to the easy case */
p->strip[p->slen++] = SOP(op, opnd);
@@ -1727,25 +1757,32 @@ dofwd(
- enlarge - enlarge the strip
== static void enlarge(struct parse *p, sopno size);
*/
-static void
+static int
enlarge(
struct parse *p,
sopno size)
{
sop *sp;
+ sopno osize;
_DIAGASSERT(p != NULL);
if (p->ssize >= size)
- return;
+ return 1;
- sp = (sop *)realloc(p->strip, size*sizeof(sop));
+ osize = p->ssize;
+ p->ssize = size;
+ if (MEMSIZE(p) > MEMLIMIT)
+ goto oomem;
+ sp = realloc(p->strip, p->ssize * sizeof(sop));
if (sp == NULL) {
+oomem:
+ p->ssize = osize;
SETERROR(REG_ESPACE);
- return;
+ return 0;
}
p->strip = sp;
- p->ssize = size;
+ return 1;
}
/*
Index: src/lib/libc/regex/regex2.h
diff -u src/lib/libc/regex/regex2.h:1.12 src/lib/libc/regex/regex2.h:1.13
--- src/lib/libc/regex/regex2.h:1.12 Thu Feb 12 00:06:54 2009
+++ src/lib/libc/regex/regex2.h Sun Oct 9 14:23:00 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: regex2.h,v 1.12 2009/02/12 05:06:54 lukem Exp $ */
+/* $NetBSD: regex2.h,v 1.13 2011/10/09 18:23:00 christos Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994
@@ -110,7 +110,7 @@
* immediately *preceding* "execution" of that operator.
*/
typedef u_int32_t sop; /* strip operator */
-typedef int sopno;
+typedef size_t sopno;
#define OPRMASK ((u_int32_t)0xf8000000UL)
#define OPDMASK ((u_int32_t)0x07ffffffUL)
#define OPSHIFT ((unsigned)27)
@@ -179,8 +179,8 @@ struct re_guts {
int magic;
# define MAGIC2 ((('R'^0200)<<8)|'E')
sop *strip; /* malloced area for strip */
- int csetsize; /* number of bits in a cset vector */
- int ncsets; /* number of csets in use */
+ size_t csetsize; /* number of bits in a cset vector */
+ size_t ncsets; /* number of csets in use */
cset *sets; /* -> cset [ncsets] */
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
int cflags; /* copy of regcomp() cflags argument */
@@ -191,12 +191,12 @@ struct re_guts {
# define USEBOL 01 /* used ^ */
# define USEEOL 02 /* used $ */
# define BAD 04 /* something wrong */
- int nbol; /* number of ^ used */
- int neol; /* number of $ used */
- int ncategories; /* how many character categories */
+ size_t nbol; /* number of ^ used */
+ size_t neol; /* number of $ used */
+ size_t ncategories; /* how many character categories */
cat_t *categories; /* ->catspace[-CHAR_MIN] */
char *must; /* match must contain this string */
- int mlen; /* length of must */
+ size_t mlen; /* length of must */
size_t nsub; /* copy of re_nsub */
int backrefs; /* does it use back references? */
sopno nplus; /* how deep does it nest +s? */