a trivial fuzzer someone once wrote blew up on this input to regcomp() [passed directly to regcomp() after adding a trailing '\0']:
xxd ~~/Downloads/clusterfuzz-testcase-minimized-regexec_fuzzer-5459313584832512 00000000: 6a3a 5b5d 6a3a 5b5d 6a3a 5bd9 6a3a 5b5d j:[]j:[]j:[.j:[] here: ==2830==ERROR: AddressSanitizer: SEGV on unknown address 0x50f020000093 (pc 0x7354670e97dd bp 0x0000ffffffd9 sp 0x7fff0d906410 T0) ==2830==The signal is caused by a WRITE memory access. #0 0x7354670e97dd in CHadd bionic/libc/upstream-netbsd/lib/libc/regex/regcomp.c:1769:30 #1 0x7354670e84be in p_b_term bionic/libc/upstream-netbsd/lib/libc/regex/regcomp.c:1233:4 #2 0x7354670e84be in p_bracket bionic/libc/upstream-netbsd/lib/libc/regex/regcomp.c:1128:3 #3 0x7354670e6492 in p_ere_exp bionic/libc/upstream-netbsd/lib/libc/regex/regcomp.c:521:3 #4 0x7354670e7c8b in p_re bionic/libc/upstream-netbsd/lib/libc/regex/regcomp.c:851:19 #5 0x7354670e5aec in regcomp_internal bionic/libc/upstream-netbsd/lib/libc/regex/regcomp.c:379:3 #6 0x7354670e5aec in regcomp bionic/libc/upstream-netbsd/lib/libc/regex/regcomp.c:432:10 looking at the netbsd regex source, it seems like all accesses to `bmp` _do_ all have appropriate `< NC` range checks, but because wint_t is signed, the checks are wrong for negative values. i think you want something like this patch: diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c index 47602b77f621..2312dbaa947c 100644 --- a/lib/libc/regex/regcomp.c +++ b/lib/libc/regex/regcomp.c @@ -1764,8 +1764,7 @@ CHadd(struct parse *p, cset *cs, wint_t ch) _DIAGASSERT(p != NULL); _DIAGASSERT(cs != NULL); - assert(ch >= 0); - if (ch < NC) + if ((unsigned)ch < NC) cs->bmp[(unsigned)ch >> 3] |= 1 << (ch & 7); else { newwides = reallocarray(cs->wides, cs->nwides + 1, @@ -1778,9 +1777,9 @@ CHadd(struct parse *p, cset *cs, wint_t ch) cs->wides[cs->nwides++] = ch; } if (cs->icase) { - if ((nch = towlower(ch)) < NC) + if ((unsigned)(nch = towlower(ch)) < NC) cs->bmp[(unsigned)nch >> 3] |= 1 << (nch & 7); - if ((nch = towupper(ch)) < NC) + if ((unsigned)(nch = towupper(ch)) < NC) cs->bmp[(unsigned)nch >> 3] |= 1 << (nch & 7); } } diff --git a/lib/libc/regex/regex2.h b/lib/libc/regex/regex2.h index fbfff0daf0f8..ee37044defc9 100644 --- a/lib/libc/regex/regex2.h +++ b/lib/libc/regex/regex2.h @@ -135,8 +135,7 @@ CHIN1(cset *cs, wint_t ch) { unsigned int i; - assert(ch >= 0); - if (ch < NC) + if ((unsigned)ch < NC) return (((cs->bmp[(unsigned)ch >> 3] & (1 << (ch & 7))) != 0) ^ cs->invert); for (i = 0; i < cs->nwides; i++) { @@ -160,8 +159,7 @@ static __inline int CHIN(cset *cs, wint_t ch) { - assert(ch >= 0); - if (ch < NC) + if ((unsigned)ch < NC) return (((cs->bmp[(unsigned)ch >> 3] & (1 << (ch & 7))) != 0) ^ cs->invert); else if (cs->icase) you can also see i've also removed the assert()s since i don't think anyone's actually building this code with them enabled, and the false sense of security from reading that assert is quite likely what caused that this bug to be introduced in the first place...