Paul Eggert wrote: > I guess that grep needs a performance test suite, so that we can find > problems like this in a more-disciplined way.
I agree, but I have no idea to perform it. BTW, I retried them on Fedora 20, but I couldn't confirm significant slowdown. Intel(R) Core(TM)2 Duo CPU E8500 @ 3.16GHz Fedora 20 GCC 4.8.2 20131017 (Red Hat 4.8.2-1) (x86-64) $ yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 >k $ yes j | head -100000000 >l [ before 0001-dfa-speed-up-at-initial-state.patch ] $ env LC_ALL=C src/grep '\(a\|b\)' k real 0.96 user 0.85 sys 0.10 $ env LC_ALL=C src/grep '\(a\|b\)' l real 0.64 user 0.60 sys 0.03 [ after 0001-dfa-speed-up-at-initial-state.patch ] $ env LC_ALL=C src/grep '\(a\|b\)' k real 0.58 user 0.52 sys 0.05 $ env LC_ALL=C src/grep '\(a\|b\)' l real 0.75 user 0.69 sys 0.05 However, I could confirm slowdown on RHEL6 GCC 4.4.7. According to the result, I see that GCC fails in inlination. In addition to 0001-dfa-speed-up-at-initial-state.patch, I applied an attached patch, and retried them. Then I confirmed speed-up. $ env LC_ALL=C src/grep '\(a\|b\)' k real 0.46 user 0.35 sys 0.10 $ env LC_ALL=C src/grep '\(a\|b\)' l real 0.54 user 0.49 sys 0.04 Thanks, Norihiro
From f527b4d3448a88fbc035eed2fe81df52aa8ba743 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka <nori...@kcn.ne.jp> Date: Sun, 25 May 2014 17:41:11 +0900 Subject: [PATCH] dfa: separate dfaexec function to help optimization by compiler * src/dfa.c (dfaexec_main): Rename from dfaexec, add inline attribute. (dfaexec_mb): New function. Run it when d->multibyte is true. For this function inlination must be avoided. (dfaexec_sb): New function. Run it when d->multibyte is false. For this function inlination must be avoided. (dfaexec): Call dfaexec_mb or dfaexec_sb accoding to d->multibyte. --- src/dfa.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index a41d002..519361f 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -3280,9 +3280,9 @@ skip_remains_mb (struct dfa *d, unsigned char const *p, Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we encountered a back-reference (1) or not (0). The caller may use this to decide whether to fall back on a backtracking matcher. */ -char * -dfaexec (struct dfa *d, char const *begin, char *end, - int allow_nl, size_t *count, int *backref) +static inline char * +dfaexec_main (struct dfa *d, char const *begin, char *end, + int allow_nl, size_t *count, int *backref, bool const multibyte) { state_num s, s1; /* Current state. */ unsigned char const *p, *mbp; /* Current input character. */ @@ -3301,7 +3301,7 @@ dfaexec (struct dfa *d, char const *begin, char *end, saved_end = *(unsigned char *) end; *end = eol; - if (d->multibyte) + if (multibyte) { memset (&d->mbs, 0, sizeof d->mbs); if (! d->mb_match_lens) @@ -3313,7 +3313,7 @@ dfaexec (struct dfa *d, char const *begin, char *end, for (;;) { - if (d->multibyte) + if (multibyte) { while ((t = trans[s]) != NULL) { @@ -3402,7 +3402,7 @@ dfaexec (struct dfa *d, char const *begin, char *end, } s1 = s; - if (d->multibyte) + if (multibyte) { /* Can match with a multibyte character (and multicharacter collating element). Transition table might be updated. */ @@ -3447,6 +3447,29 @@ dfaexec (struct dfa *d, char const *begin, char *end, return (char *) p; } +static char *__attribute__((noinline)) +dfaexec_mb (struct dfa *d, char const *begin, char *end, + int allow_nl, size_t *count, int *backref) +{ + return dfaexec_main (d, begin, end, allow_nl, count, backref, true); +} + +static char *__attribute__((noinline)) +dfaexec_sb (struct dfa *d, char const *begin, char *end, + int allow_nl, size_t *count, int *backref) +{ + return dfaexec_main (d, begin, end, allow_nl, count, backref, false); +} + +char * +dfaexec (struct dfa *d, char const *begin, char *end, + int allow_nl, size_t *count, int *backref) +{ + return (d->multibyte + ? dfaexec_mb (d, begin, end, allow_nl, count, backref) + : dfaexec_sb (d, begin, end, allow_nl, count, backref)); +} + struct dfa * dfasuperset (struct dfa const *d) { -- 1.9.3