Paul Eggert wrote:
> I guess that grep needs a performance test suite, so that we can find
> problems like this in a more-disciplined way.

I agree, but I have no idea to perform it.

BTW, I retried them on Fedora 20, but I couldn't confirm significant
slowdown.

  Intel(R) Core(TM)2 Duo CPU     E8500  @ 3.16GHz
  Fedora 20  GCC 4.8.2 20131017 (Red Hat 4.8.2-1) (x86-64)

$ yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 >k
$ yes j | head -100000000 >l

[ before 0001-dfa-speed-up-at-initial-state.patch ]

$ env LC_ALL=C src/grep '\(a\|b\)' k
real 0.96
user 0.85
sys 0.10
$ env LC_ALL=C src/grep '\(a\|b\)' l
real 0.64
user 0.60
sys 0.03

[ after 0001-dfa-speed-up-at-initial-state.patch ]

$ env LC_ALL=C src/grep '\(a\|b\)' k
real 0.58
user 0.52
sys 0.05
$ env LC_ALL=C src/grep '\(a\|b\)' l
real 0.75
user 0.69
sys 0.05

However, I could confirm slowdown on RHEL6 GCC 4.4.7.  According to the
result, I see that GCC fails in inlination.  In addition to
0001-dfa-speed-up-at-initial-state.patch, I applied an attached patch,
and retried them.  Then I confirmed speed-up.

$ env LC_ALL=C src/grep '\(a\|b\)' k
real 0.46
user 0.35
sys 0.10
$ env LC_ALL=C src/grep '\(a\|b\)' l
real 0.54
user 0.49
sys 0.04

Thanks,
Norihiro
From f527b4d3448a88fbc035eed2fe81df52aa8ba743 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <nori...@kcn.ne.jp>
Date: Sun, 25 May 2014 17:41:11 +0900
Subject: [PATCH] dfa: separate dfaexec function to help optimization by
 compiler

* src/dfa.c (dfaexec_main): Rename from dfaexec, add inline attribute.
(dfaexec_mb): New function.  Run it when d->multibyte is true.   For this
function inlination must be avoided.
(dfaexec_sb): New function.  Run it when d->multibyte is false.  For this
function inlination must be avoided.
(dfaexec): Call dfaexec_mb or dfaexec_sb accoding to d->multibyte.
---
 src/dfa.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index a41d002..519361f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3280,9 +3280,9 @@ skip_remains_mb (struct dfa *d, unsigned char const *p,
    Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
    encountered a back-reference (1) or not (0).  The caller may use this
    to decide whether to fall back on a backtracking matcher.  */
-char *
-dfaexec (struct dfa *d, char const *begin, char *end,
-         int allow_nl, size_t *count, int *backref)
+static inline char *
+dfaexec_main (struct dfa *d, char const *begin, char *end,
+             int allow_nl, size_t *count, int *backref, bool const multibyte)
 {
   state_num s, s1;              /* Current state.  */
   unsigned char const *p, *mbp; /* Current input character.  */
@@ -3301,7 +3301,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
   saved_end = *(unsigned char *) end;
   *end = eol;
 
-  if (d->multibyte)
+  if (multibyte)
     {
       memset (&d->mbs, 0, sizeof d->mbs);
       if (! d->mb_match_lens)
@@ -3313,7 +3313,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
 
   for (;;)
     {
-      if (d->multibyte)
+      if (multibyte)
         {
           while ((t = trans[s]) != NULL)
             {
@@ -3402,7 +3402,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
             }
 
           s1 = s;
-          if (d->multibyte)
+          if (multibyte)
             {
               /* Can match with a multibyte character (and multicharacter
                  collating element).  Transition table might be updated.  */
@@ -3447,6 +3447,29 @@ dfaexec (struct dfa *d, char const *begin, char *end,
   return (char *) p;
 }
 
+static char *__attribute__((noinline))
+dfaexec_mb (struct dfa *d, char const *begin, char *end,
+           int allow_nl, size_t *count, int *backref)
+{
+  return dfaexec_main (d, begin, end, allow_nl, count, backref, true);
+}
+
+static char *__attribute__((noinline))
+dfaexec_sb (struct dfa *d, char const *begin, char *end,
+           int allow_nl, size_t *count, int *backref)
+{
+  return dfaexec_main (d, begin, end, allow_nl, count, backref, false);
+}
+
+char *
+dfaexec (struct dfa *d, char const *begin, char *end,
+         int allow_nl, size_t *count, int *backref)
+{
+  return (d->multibyte
+    ? dfaexec_mb (d, begin, end, allow_nl, count, backref)
+    : dfaexec_sb (d, begin, end, allow_nl, count, backref));
+}
+
 struct dfa *
 dfasuperset (struct dfa const *d)
 {
-- 
1.9.3

Reply via email to