Module Name: src Committed By: christos Date: Sun Oct 9 18:23:00 UTC 2011
Modified Files: src/lib/libc/regex: engine.c regcomp.c regex2.h Log Message: Prevent regcomp/regexec DoS attacks by limiting the amount of memory used and the level of recursion. Thanks to Maksymilian Arciemowicz for discovery and help with the implementation. To generate a diff of this commit: cvs rdiff -u -r1.22 -r1.23 src/lib/libc/regex/engine.c cvs rdiff -u -r1.29 -r1.30 src/lib/libc/regex/regcomp.c cvs rdiff -u -r1.12 -r1.13 src/lib/libc/regex/regex2.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/lib/libc/regex/engine.c diff -u src/lib/libc/regex/engine.c:1.22 src/lib/libc/regex/engine.c:1.23 --- src/lib/libc/regex/engine.c:1.22 Thu Feb 12 00:06:54 2009 +++ src/lib/libc/regex/engine.c Sun Oct 9 14:23:00 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: engine.c,v 1.22 2009/02/12 05:06:54 lukem Exp $ */ +/* $NetBSD: engine.c,v 1.23 2011/10/09 18:23:00 christos Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 @@ -212,8 +212,8 @@ matcher( /* prescreening; this does wonders for this rather slow code */ if (g->must != NULL) { for (dp = start; dp < stop; dp++) - if (*dp == g->must[0] && stop - dp >= g->mlen && - memcmp(dp, g->must, (size_t)g->mlen) == 0) + if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen && + memcmp(dp, g->must, g->mlen) == 0) break; if (dp == stop) /* we didn't find g->must */ return(REG_NOMATCH); Index: src/lib/libc/regex/regcomp.c diff -u src/lib/libc/regex/regcomp.c:1.29 src/lib/libc/regex/regcomp.c:1.30 --- src/lib/libc/regex/regcomp.c:1.29 Thu Feb 12 00:06:54 2009 +++ src/lib/libc/regex/regcomp.c Sun Oct 9 14:23:00 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: regcomp.c,v 1.29 2009/02/12 05:06:54 lukem Exp $ */ +/* $NetBSD: regcomp.c,v 1.30 2011/10/09 18:23:00 christos Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 @@ -76,7 +76,7 @@ #if 0 static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; #else -__RCSID("$NetBSD: regcomp.c,v 1.29 2009/02/12 05:06:54 lukem Exp $"); +__RCSID("$NetBSD: regcomp.c,v 1.30 2011/10/09 18:23:00 christos Exp $"); #endif #endif /* LIBC_SCCS and not lint */ @@ -125,11 +125,11 @@ extern "C" { #endif /* === regcomp.c === */ -static void p_ere(struct parse *p, int stop); -static void p_ere_exp(struct parse *p); +static void p_ere(struct parse *p, int stop, size_t reclimit); +static void p_ere_exp(struct parse *p, size_t reclimit); static void p_str(struct parse *p); -static void p_bre(struct parse *p, int end1, int end2); -static int p_simp_re(struct parse *p, int starordinary); +static void p_bre(struct parse *p, int end1, int end2, size_t reclimit); +static int p_simp_re(struct parse *p, int starordinary, size_t reclimit); static int p_count(struct parse *p); static void p_bracket(struct parse *p); static void p_b_term(struct parse *p, cset *cs); @@ -141,7 +141,7 @@ static int othercase(int ch); static void bothcases(struct parse *p, int ch); static void ordinary(struct parse *p, int ch); static void nonnewline(struct parse *p); -static void repeat(struct parse *p, sopno start, int from, int to); +static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit); static int seterr(struct parse *p, int e); static cset *allocset(struct parse *p); static void freeset(struct parse *p, cset *cs); @@ -163,7 +163,7 @@ static sopno dupl(struct parse *p, sopno static void doemit(struct parse *p, sop op, sopno opnd); static void doinsert(struct parse *p, sop op, sopno opnd, sopno pos); static void dofwd(struct parse *p, sopno pos, sopno value); -static void enlarge(struct parse *p, sopno size); +static int enlarge(struct parse *p, sopno size); static void stripsnug(struct parse *p, struct re_guts *g); static void findmust(struct parse *p, struct re_guts *g); static sopno pluscount(struct parse *p, struct re_guts *g); @@ -211,6 +211,13 @@ static int never = 0; /* for use in ass #define never 0 /* some <assert.h>s have bugs too */ #endif +#define MEMLIMIT 0x8000000 +#define MEMSIZE(p) \ + ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \ + (p)->ncsalloc * sizeof(cset) + \ + (p)->ssize * sizeof(sop)) +#define RECLIMIT 256 + /* - regcomp - interface for parser and compilation = extern int regcomp(regex_t *, const char *, int); @@ -260,7 +267,7 @@ regcomp( if (g == NULL) return(REG_ESPACE); p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ - p->strip = (sop *)malloc(p->ssize * sizeof(sop)); + p->strip = malloc(p->ssize * sizeof(sop)); p->slen = 0; if (p->strip == NULL) { free(g); @@ -297,11 +304,11 @@ regcomp( EMIT(OEND, 0); g->firststate = THERE(); if (cflags®_EXTENDED) - p_ere(p, OUT); + p_ere(p, OUT, 0); else if (cflags®_NOSPEC) p_str(p); else - p_bre(p, OUT, OUT); + p_bre(p, OUT, OUT, 0); EMIT(OEND, 0); g->laststate = THERE(); @@ -328,12 +335,13 @@ regcomp( /* - p_ere - ERE parser top level, concatenation and alternation - == static void p_ere(struct parse *p, int stop); + == static void p_ere(struct parse *p, int stop, size_t reclimit); */ static void p_ere( struct parse *p, - int stop) /* character this ERE should end at */ + int stop, /* character this ERE should end at */ + size_t reclimit) { char c; sopno prevback = 0; /* pacify gcc */ @@ -343,11 +351,16 @@ p_ere( _DIAGASSERT(p != NULL); + if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { + p->error = REG_ESPACE; + return; + } + for (;;) { /* do a bunch of concatenated expressions */ conc = HERE(); while (MORE() && (c = PEEK()) != '|' && c != stop) - p_ere_exp(p); + p_ere_exp(p, reclimit); REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ if (!EAT('|')) @@ -376,11 +389,12 @@ p_ere( /* - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op - == static void p_ere_exp(struct parse *p); + == static void p_ere_exp(struct parse *p, size_t reclimit); */ static void p_ere_exp( - struct parse *p) + struct parse *p, + size_t reclimit) { char c; sopno pos; @@ -404,7 +418,7 @@ p_ere_exp( p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); if (!SEE(')')) - p_ere(p, ')'); + p_ere(p, ')', reclimit); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); @@ -506,7 +520,7 @@ p_ere_exp( count2 = INFINITY; } else /* just a single number */ count2 = count; - repeat(p, pos, count, count2); + repeat(p, pos, count, count2, 0); if (!EAT('}')) { /* error heuristics */ while (MORE() && PEEK() != '}') NEXT(); @@ -544,7 +558,7 @@ p_str( /* - p_bre - BRE parser top level, anchoring and concatenation == static void p_bre(struct parse *p, int end1, \ - == int end2); + == int end2, size_t reclimit); * Giving end1 as OUT essentially eliminates the end1/end2 check. * * This implementation is a bit of a kludge, in that a trailing $ is first @@ -557,7 +571,8 @@ static void p_bre( struct parse *p, int end1, /* first terminating character */ - int end2) /* second terminating character */ + int end2, /* second terminating character */ + size_t reclimit) { sopno start; int first = 1; /* first subexpression? */ @@ -565,6 +580,11 @@ p_bre( _DIAGASSERT(p != NULL); + if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { + p->error = REG_ESPACE; + return; + } + start = HERE(); if (EAT('^')) { @@ -573,7 +593,7 @@ p_bre( p->g->nbol++; } while (MORE() && !SEETWO(end1, end2)) { - wasdollar = p_simp_re(p, first); + wasdollar = p_simp_re(p, first, reclimit); first = 0; } if (wasdollar) { /* oops, that was a trailing anchor */ @@ -588,12 +608,13 @@ p_bre( /* - p_simp_re - parse a simple RE, an atom possibly followed by a repetition - == static int p_simp_re(struct parse *p, int starordinary); + == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit); */ static int /* was the simple RE an unbackslashed $? */ p_simp_re( struct parse *p, - int starordinary) /* is a leading * an ordinary character? */ + int starordinary, /* is a leading * an ordinary character? */ + size_t reclimit) { int c; int count; @@ -634,7 +655,7 @@ p_simp_re( EMIT(OLPAREN, subno); /* the MORE here is an error heuristic */ if (MORE() && !SEETWO('\\', ')')) - p_bre(p, '\\', ')'); + p_bre(p, '\\', ')', reclimit); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); @@ -693,7 +714,7 @@ p_simp_re( count2 = INFINITY; } else /* just a single number */ count2 = count; - repeat(p, pos, count, count2); + repeat(p, pos, count, count2, 0); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); @@ -745,6 +766,8 @@ p_bracket( _DIAGASSERT(p != NULL); cs = allocset(p); + if (cs == NULL) + return; /* Dept of Truly Sickening Special-Case Kludges */ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", @@ -1101,14 +1124,16 @@ nonnewline( /* - repeat - generate code for a bounded repetition, recursively if needed - == static void repeat(struct parse *p, sopno start, int from, int to); + == static void repeat(struct parse *p, sopno start, int from, int to, + == size_t reclimit); */ static void repeat( struct parse *p, sopno start, /* operand from here to end of strip */ int from, /* repeated from this number */ - int to) /* to this number of times (maybe INFINITY) */ + int to, /* to this number of times (maybe INFINITY) */ + size_t reclimit) { sopno finish; # define N 2 @@ -1119,11 +1144,13 @@ repeat( _DIAGASSERT(p != NULL); - finish = HERE(); - - if (p->error != 0) /* head off possible runaway recursion */ + if (reclimit++ > RECLIMIT) + p->error = REG_ESPACE; + if (p->error) return; + finish = HERE(); + assert(from <= to); switch (REP(MAP(from), MAP(to))) { @@ -1135,7 +1162,7 @@ repeat( case REP(0, INF): /* as x{1,}? */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); /* offset is wrong... */ - repeat(p, start+1, 1, to); + repeat(p, start+1, 1, to, reclimit); ASTERN(OOR1, start); AHEAD(start); /* ... fix it */ EMIT(OOR2, 0); @@ -1155,7 +1182,7 @@ repeat( ASTERN(O_CH, THERETHERE()); copy = dupl(p, start+1, finish+1); assert(copy == finish+4); - repeat(p, copy, 1, to-1); + repeat(p, copy, 1, to-1, reclimit); break; case REP(1, INF): /* as x+ */ INSERT(OPLUS_, start); @@ -1163,11 +1190,11 @@ repeat( break; case REP(N, N): /* as xx{m-1,n-1} */ copy = dupl(p, start, finish); - repeat(p, copy, from-1, to-1); + repeat(p, copy, from-1, to-1, reclimit); break; case REP(N, INF): /* as xx{n-1,INF} */ copy = dupl(p, start, finish); - repeat(p, copy, from-1, to); + repeat(p, copy, from-1, to, reclimit); break; default: /* "can't happen" */ SETERROR(REG_ASSERT); /* just in case */ @@ -1218,6 +1245,8 @@ allocset( nc = p->ncsalloc; assert(nc % CHAR_BIT == 0); nbytes = nc / CHAR_BIT * css; + if (MEMSIZE(p) > MEMLIMIT) + goto oomem; if (p->g->sets == NULL) p->g->sets = malloc(nc * sizeof(cset)); else @@ -1234,13 +1263,14 @@ allocset( (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); else { +oomem: no = 0; SETERROR(REG_ESPACE); /* caller's responsibility not to do set ops */ + return NULL; } } - assert(p->g->sets != NULL); /* xxx */ cs = &p->g->sets[no]; cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); cs->mask = 1 << ((no) % CHAR_BIT); @@ -1620,8 +1650,8 @@ dupl( assert(finish >= start); if (len == 0) return(ret); - enlarge(p, p->ssize + len); /* this many unexpected additions */ - assert(p->ssize >= p->slen + len); + if (!enlarge(p, p->ssize + len))/* this many unexpected additions */ + return ret; (void)memcpy(p->strip + p->slen, p->strip + start, (size_t)len * sizeof(sop)); p->slen += len; @@ -1654,8 +1684,8 @@ doemit( /* deal with undersized strip */ if (p->slen >= p->ssize) - enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ - assert(p->slen < p->ssize); + if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ + return; /* finally, it's all reduced to the easy case */ p->strip[p->slen++] = SOP(op, opnd); @@ -1727,25 +1757,32 @@ dofwd( - enlarge - enlarge the strip == static void enlarge(struct parse *p, sopno size); */ -static void +static int enlarge( struct parse *p, sopno size) { sop *sp; + sopno osize; _DIAGASSERT(p != NULL); if (p->ssize >= size) - return; + return 1; - sp = (sop *)realloc(p->strip, size*sizeof(sop)); + osize = p->ssize; + p->ssize = size; + if (MEMSIZE(p) > MEMLIMIT) + goto oomem; + sp = realloc(p->strip, p->ssize * sizeof(sop)); if (sp == NULL) { +oomem: + p->ssize = osize; SETERROR(REG_ESPACE); - return; + return 0; } p->strip = sp; - p->ssize = size; + return 1; } /* Index: src/lib/libc/regex/regex2.h diff -u src/lib/libc/regex/regex2.h:1.12 src/lib/libc/regex/regex2.h:1.13 --- src/lib/libc/regex/regex2.h:1.12 Thu Feb 12 00:06:54 2009 +++ src/lib/libc/regex/regex2.h Sun Oct 9 14:23:00 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: regex2.h,v 1.12 2009/02/12 05:06:54 lukem Exp $ */ +/* $NetBSD: regex2.h,v 1.13 2011/10/09 18:23:00 christos Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 @@ -110,7 +110,7 @@ * immediately *preceding* "execution" of that operator. */ typedef u_int32_t sop; /* strip operator */ -typedef int sopno; +typedef size_t sopno; #define OPRMASK ((u_int32_t)0xf8000000UL) #define OPDMASK ((u_int32_t)0x07ffffffUL) #define OPSHIFT ((unsigned)27) @@ -179,8 +179,8 @@ struct re_guts { int magic; # define MAGIC2 ((('R'^0200)<<8)|'E') sop *strip; /* malloced area for strip */ - int csetsize; /* number of bits in a cset vector */ - int ncsets; /* number of csets in use */ + size_t csetsize; /* number of bits in a cset vector */ + size_t ncsets; /* number of csets in use */ cset *sets; /* -> cset [ncsets] */ uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ int cflags; /* copy of regcomp() cflags argument */ @@ -191,12 +191,12 @@ struct re_guts { # define USEBOL 01 /* used ^ */ # define USEEOL 02 /* used $ */ # define BAD 04 /* something wrong */ - int nbol; /* number of ^ used */ - int neol; /* number of $ used */ - int ncategories; /* how many character categories */ + size_t nbol; /* number of ^ used */ + size_t neol; /* number of $ used */ + size_t ncategories; /* how many character categories */ cat_t *categories; /* ->catspace[-CHAR_MIN] */ char *must; /* match must contain this string */ - int mlen; /* length of must */ + size_t mlen; /* length of must */ size_t nsub; /* copy of re_nsub */ int backrefs; /* does it use back references? */ sopno nplus; /* how deep does it nest +s? */