Norihiro Tanaka wrote:
The test case "k" is 50%
faster and "l" is also about 16% faster with GCC 4.8.2 on my platform by
two changes.

Thanks, I finally got around to looking at this and got similar performance results to yours. That __attribute__((noinline)) bothers me, though, as it's not portable and is a bit inelegant. I figured out a different way to avoid the inlining, and tweaked the commentary a bit, and so installed the attached additional patch after installing your patches.
From ac757a34d6b93942732d1026c1e4e3c67f882d86 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 27 Sep 2014 20:55:13 -0700
Subject: [PATCH] dfa: minor tweaks, mostly to remove __attribute__
 ((noinline))

That attribute isn't portable, and I found a way to get similar
performance with standard C features.
* NEWS: Document the recently-installed performance improvement.
* src/dfa.c (struct dfa): New member dfaexec.
(dfaexec_main): Remove unnecessary 'const'.
(dfaexec_mb, dfaexec_sb): Remove __attribute__ ((noinline));
no longer needed.
(dfaexec): Use new dfaexec member.
(dfainit, dfaoptimize, dfassbuild): Initialize it.
---
 NEWS      |  3 +++
 src/dfa.c | 32 +++++++++++++++++++++++---------
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/NEWS b/NEWS
index 5bc89c0..a146e9e 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,9 @@ GNU grep NEWS                                    -*- outline -*-
   Performance has been greatly improved for searching files containing
   holes, on platforms where lseek's SEEK_DATA flag works efficiently.
 
+  Performance has improved for rejecting data that cannot match even
+  the first part of a nontrivial pattern.
+
   Performance has improved for very long strings in patterns.
 
   If a file contains data improperly encoded for the current locale,
diff --git a/src/dfa.c b/src/dfa.c
index ff23c07..4f45fff 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -342,6 +342,9 @@ struct dfa
   token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales.  */
   mbstate_t mbs;               /* Multibyte conversion state.  */
 
+  /* dfaexec implementation.  */
+  char *(*dfaexec) (struct dfa *, char const *, char *, int, size_t *, int *);
+
   /* The following are valid only if MB_CUR_MAX > 1.  */
 
   /* The value of multibyte_prop[i] is defined by following rule.
@@ -3266,10 +3269,14 @@ skip_remains_mb (struct dfa *d, unsigned char const *p,
    If COUNT is non-NULL, increment *COUNT once for each newline processed.
    Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
    encountered a back-reference (1) or not (0).  The caller may use this
-   to decide whether to fall back on a backtracking matcher.  */
+   to decide whether to fall back on a backtracking matcher.
+
+   If MULTIBYTE, the input consists of multibyte characters and/or
+   encoding-error bytes.  Otherwise, the input consists of single-byte
+   characters.  */
 static inline char *
 dfaexec_main (struct dfa *d, char const *begin, char *end,
-             int allow_nl, size_t *count, int *backref, bool const multibyte)
+             int allow_nl, size_t *count, int *backref, bool multibyte)
 {
   state_num s, s1;              /* Current state.  */
   unsigned char const *p, *mbp; /* Current input character.  */
@@ -3432,27 +3439,31 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end,
   return (char *) p;
 }
 
-static char *__attribute__((noinline))
+/* Specialized versions of dfaexec_main for multibyte and single-byte
+   cases.  This is for performance.  */
+
+static char *
 dfaexec_mb (struct dfa *d, char const *begin, char *end,
-           int allow_nl, size_t *count, int *backref)
+            int allow_nl, size_t *count, int *backref)
 {
   return dfaexec_main (d, begin, end, allow_nl, count, backref, true);
 }
 
-static char *__attribute__((noinline))
+static char *
 dfaexec_sb (struct dfa *d, char const *begin, char *end,
-           int allow_nl, size_t *count, int *backref)
+            int allow_nl, size_t *count, int *backref)
 {
   return dfaexec_main (d, begin, end, allow_nl, count, backref, false);
 }
 
+/* Like dfaexec_main (D, BEGIN, END, ALLOW_NL, COUNT, BACKREF, D->multibyte),
+   but faster.  */
+
 char *
 dfaexec (struct dfa *d, char const *begin, char *end,
          int allow_nl, size_t *count, int *backref)
 {
-  return (d->multibyte
-    ? dfaexec_mb (d, begin, end, allow_nl, count, backref)
-    : dfaexec_sb (d, begin, end, allow_nl, count, backref));
+  return d->dfaexec (d, begin, end, allow_nl, count, backref);
 }
 
 struct dfa *
@@ -3504,6 +3515,7 @@ dfainit (struct dfa *d)
 {
   memset (d, 0, sizeof *d);
   d->multibyte = MB_CUR_MAX > 1;
+  d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
   d->fast = !d->multibyte;
 }
 
@@ -3544,6 +3556,7 @@ dfaoptimize (struct dfa *d)
 
   free_mbdata (d);
   d->multibyte = false;
+  d->dfaexec = dfaexec_sb;
 }
 
 static void
@@ -3557,6 +3570,7 @@ dfassbuild (struct dfa *d)
 
   *sup = *d;
   sup->multibyte = false;
+  sup->dfaexec = dfaexec_sb;
   sup->multibyte_prop = NULL;
   sup->mbcsets = NULL;
   sup->superset = NULL;
-- 
1.9.3

Reply via email to