Thanks much for this performance improvement. I puzzled for a while to figure out what was going on, and was confused by that tricky macro expansion. Nowadays functions are typically not significantly slower than macros, so unless there's a major performance difference I'd prefer to use functions. I installed the performance improvement patch and followed up with the attached patch, which uses C functions instead of macros and which coalesces some of the near-duplicate code. The followup patch also includes some comments to try to explain the new functions.
From a3109245243cead455efe93d768ba9200ab2f2cd Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Mon, 21 Apr 2014 17:15:56 -0700
Subject: [PATCH] kwset: simplify Boyer-Moore with unibyte -i

This change doesn't significantly affect performance on my platform,
and should make the code easier to maintain.
* src/kwset.c (BM_DELTA2_SEARCH, LAST_SHIFT, TRANS):
Remove these macros, in favor of ...
(tr, bm_delta2_search): New functions.  All uses changed.
The latter function is inline because this improves code size and
runtime CPU slightly on x86-64 with gcc -O2 (GCC 4.9.0).
(bmexec): Prefer tr when that's simpler.
---
 src/kwset.c | 173 +++++++++++++++++++++++-------------------------------------
 1 file changed, 65 insertions(+), 108 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index d212d59..69b0a55 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -464,75 +464,66 @@ kwsprep (kwset_t kwset)
       kwset->delta[i] = delta[U(trans[i])];
 }
 
-#define BM_DELTA2_SEARCH                               \
-  if (TRANS(tp[-2]) == gc2)                            \
-    {                                                  \
-      for (i = 3; i <= len; ++i)                       \
-        if (TRANS(tp[-i]) != TRANS(sp[-i]))            \
-          break;                                       \
-      if (i > len)                                     \
-        return tp - len - text;                                \
-      d = kwset->shift[i - 2]; tp += d;                        \
-      if (tp > ep)                                     \
-        break;                                         \
-      if (TRANS(tp[-1]) != gc1)                                \
-        {                                              \
-          d = d1[U(tp[-1])]; LAST_SHIFT;               \
-          continue;                                    \
-        }                                              \
-      skip = i - 1;                                    \
-    }                                                  \
-  else                                                 \
-    {                                                  \
-      d = kwset->shift[0]; tp += d;                    \
-      if (tp > ep)                                     \
-        break;                                         \
-      if (TRANS(tp[-1]) != gc1)                                \
-        {                                              \
-          d = d1[U(tp[-1])]; LAST_SHIFT;               \
-          continue;                                    \
-        }                                              \
-      skip = 1;                                                \
-    }                                                  \
-  while (true)                                         \
-    {                                                  \
-      if (TRANS(tp[-2]) == gc2)                                \
-        {                                              \
-          for (i = 3; i <= d; ++i)                     \
-            if (TRANS(tp[-i]) != TRANS(sp[-i]))                \
-              break;                                   \
-          if (i > d)                                   \
-            {                                          \
-              for (i = d + skip + 1; i <= len; ++i)    \
-                if (TRANS(tp[-i]) != TRANS(sp[-i]))    \
-                  break;                               \
-              if (i > len)                             \
-                return tp - len - text;                        \
-            }                                          \
-          d = kwset->shift[i - 2]; tp += d;            \
-          if (tp > ep)                                 \
-            break;                                     \
-          if (TRANS(tp[-1]) != gc1)                    \
-            {                                          \
-              d = d1[U(tp[-1])]; LAST_SHIFT;           \
-              break;                                   \
-            }                                          \
-          skip = i - 1;                                        \
-        }                                              \
-      else                                             \
-        {                                              \
-          d = kwset->shift[0]; tp += d;                        \
-          if (tp > ep)                                 \
-            break;                                     \
-          if (TRANS(tp[-1]) != gc1)                    \
-            {                                          \
-              d = d1[U(tp[-1])]; LAST_SHIFT;           \
-              break;                                   \
-            }                                          \
-          skip = 1;                                    \
-        }                                              \
+/* Use TRANS to transliterate C.  A null TRANS does no transliteration.  */
+static char
+tr (char const *trans, char c)
+{
+  return trans ? trans[U(c)] : c;
+}
+
+/* Delta2 portion of a Boyer-Moore search.  *TP is the string text
+   pointer; it is updated in place.  EP is the end of the string text,
+   and SP the end of the pattern.  LEN is the pattern length; it must
+   be at least 2.  TRANS, if nonnull, is the input translation table.
+   GC1 and GC2 are the last and second-from last bytes of the pattern,
+   transliterated by TRANS; the caller precomputes them for
+   efficiency.  If D1 is nonnull, it is a delta1 table for shifting *TP
+   when failing.  KWSET->shift says how much to shift.  */
+static inline bool
+bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
+                  char const *trans, char gc1, char gc2,
+                  unsigned char const *d1, kwset_t kwset)
+{
+  char const *tp = *tpp;
+  int d = len, skip = 0;
+
+  while (true)
+    {
+      int i = 2;
+      if (tr (trans, tp[-2]) == gc2)
+        {
+          while (++i <= d)
+            if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
+              break;
+          if (i > d)
+            {
+              for (i = d + skip + 1; i <= len; ++i)
+                if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
+                  break;
+              if (i > len)
+                {
+                  *tpp = tp - len;
+                  return true;
+                }
+            }
+        }
+
+      tp += d = kwset->shift[i - 2];
+      if (tp > ep)
+        break;
+      if (tr (trans, tp[-1]) != gc1)
+        {
+          if (d1)
+            tp += d1[U(tp[-1])];
+          break;
+        }
+      skip = i - 1;
     }
 
+  *tpp = tp;
+  return false;
+}
+
 
 /* Fast boyer-moore search. */
 static size_t _GL_ATTRIBUTE_PURE
@@ -540,8 +531,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
 {
   unsigned char const *d1;
   char const *ep, *sp, *tp;
-  int d, i, skip;
-  char gc1, gc2;
+  int d;
   int len = kwset->mind;
   char const *trans = kwset->trans;
 
@@ -568,17 +558,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   d1 = kwset->delta;
   sp = kwset->target + len;
   tp = text + len;
-  if (trans)
-    {
-      gc1 = trans[U(sp[-1])];
-      gc2 = trans[U(sp[-2])];
-    }
-  else
-    {
-      gc1 = sp[-1];
-      gc2 = sp[-2];
-    }
-  skip = 0;
+  char gc1 = tr (trans, sp[-1]);
+  char gc2 = tr (trans, sp[-2]);
 
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
   if (size > 12 * len)
@@ -606,20 +587,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
           }
         break;
       found:
-#undef LAST_SHIFT
-#define LAST_SHIFT tp += d
-        if (trans)
-          {
-#undef TRANS
-#define TRANS(ch) trans[U(ch)]
-            BM_DELTA2_SEARCH;
-          }
-        else
-          {
-#undef TRANS
-#define TRANS(ch) ch
-            BM_DELTA2_SEARCH;
-          }
+        if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
+          return tp - text;
       }
 
   /* Now we have only a few characters left to search.  We
@@ -631,20 +600,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
       d = d1[U((tp += d)[-1])];
       if (d != 0)
         continue;
-#undef LAST_SHIFT
-#define LAST_SHIFT
-      if (trans)
-        {
-#undef TRANS
-#define TRANS(ch) trans[U(ch)]
-          BM_DELTA2_SEARCH;
-        }
-      else
-        {
-#undef TRANS
-#define TRANS(ch) ch
-          BM_DELTA2_SEARCH;
-        }
+      if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
+        return tp - text;
     }
 
   return -1;
-- 
1.9.0

Reply via email to