For simple patterns, grep has an optimization to avoid regex and run about 50% faster. The problem is its idea of simple patterns is too simple.
This diff switches the logic around from a whitelist to a blacklist. We only need to abort the fast path if we see a magic regex character. Index: util.c =================================================================== RCS file: /cvs/src/usr.bin/grep/util.c,v retrieving revision 1.45 diff -u -p -r1.45 util.c --- util.c 29 Dec 2012 01:32:44 -0000 1.45 +++ util.c 1 May 2013 00:00:30 -0000 @@ -348,15 +348,8 @@ fastcomp(fastgrep_t *fg, const char *pat /* Look for ways to cheat...er...avoid the full regex engine. */ for (i = 0; i < fg->patternLen; i++) { - /* Can still cheat? */ - if ((isalnum(fg->pattern[i])) || isspace(fg->pattern[i]) || - (fg->pattern[i] == '_') || (fg->pattern[i] == ',') || - (fg->pattern[i] == '=') || (fg->pattern[i] == '-') || - (fg->pattern[i] == ':') || (fg->pattern[i] == '/')) { - /* As long as it is good, upper case it for later. */ - if (iflag) - fg->pattern[i] = toupper(fg->pattern[i]); - } else if (fg->pattern[i] == '.') { + switch (fg->pattern[i]) { + case '.': hasDot = i; if (i < fg->patternLen / 2) { if (firstHalfDot < 0) @@ -368,11 +361,23 @@ fastcomp(fastgrep_t *fg, const char *pat if (firstLastHalfDot < 0) firstLastHalfDot = i; } - } else { + break; + case '\\': + case '[': + case '(': + case '{': + case '?': + case '*': + case '+': + case '|': /* Free memory and let others know this is empty. */ free(fg->pattern); fg->pattern = NULL; return (-1); + default: + if (iflag) + fg->pattern[i] = toupper(fg->pattern[i]); + break; } }