I submit the patch to use memchr2() for case-insensitive matching in
bmexec().  It counts last characters of the patterns in kwsprep(), if
it's just two, use memchr2().  The new version uses memchr() still, if
last character of the pattern is non-alphabetical.

Norihiro
From 47d9e7c99cd37d16d13db05a7bf2bdba6723ae2e Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Sat, 26 Apr 2014 17:03:31 +0900
Subject: [PATCH 1/2] maint: Revert "kwset: simplify Boyer-Moore with unibyte
 -i"

This reverts commit c7ea5aea911b950b2398454ca89cce23cabd3a40.
It occurs slowdown in some cases.

Conflicts:
        src/kwset.c
---
 src/kwset.c | 173 +++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 108 insertions(+), 65 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index f86ee03..8a5fa23 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -464,66 +464,75 @@ kwsprep (kwset_t kwset)
       kwset->delta[i] = delta[U(trans[i])];
 }
 
-/* Use TRANS to transliterate C.  A null TRANS does no transliteration.  */
-static char
-tr (char const *trans, char c)
-{
-  return trans ? trans[U(c)] : c;
-}
-
-/* Delta2 portion of a Boyer-Moore search.  *TP is the string text
-   pointer; it is updated in place.  EP is the end of the string text,
-   and SP the end of the pattern.  LEN is the pattern length; it must
-   be at least 2.  TRANS, if nonnull, is the input translation table.
-   GC1 and GC2 are the last and second-from last bytes of the pattern,
-   transliterated by TRANS; the caller precomputes them for
-   efficiency.  If D1 is nonnull, it is a delta1 table for shifting *TP
-   when failing.  KWSET->shift says how much to shift.  */
-static inline bool
-bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
-                  char const *trans, char gc1, char gc2,
-                  unsigned char const *d1, kwset_t kwset)
-{
-  char const *tp = *tpp;
-  int d = len, skip = 0;
-
-  while (true)
-    {
-      int i = 2;
-      if (tr (trans, tp[-2]) == gc2)
-        {
-          while (++i <= d)
-            if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
-              break;
-          if (i > d)
-            {
-              for (i = d + skip + 1; i <= len; ++i)
-                if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
-                  break;
-              if (i > len)
-                {
-                  *tpp = tp - len;
-                  return true;
-                }
-            }
-        }
-
-      tp += d = kwset->shift[i - 2];
-      if (tp > ep)
-        break;
-      if (tr (trans, tp[-1]) != gc1)
-        {
-          if (d1)
-            tp += d1[U(tp[-1])];
-          break;
-        }
-      skip = i - 1;
+#define BM_DELTA2_SEARCH                               \
+  if (TRANS(tp[-2]) == gc2)                            \
+    {                                                  \
+      for (i = 3; i <= len; ++i)                       \
+        if (TRANS(tp[-i]) != TRANS(sp[-i]))            \
+          break;                                       \
+      if (i > len)                                     \
+        return tp - len - text;                                \
+      d = kwset->shift[i - 2]; tp += d;                        \
+      if (tp > ep)                                     \
+        break;                                         \
+      if (TRANS(tp[-1]) != gc1)                                \
+        {                                              \
+          d = d1[U(tp[-1])]; LAST_SHIFT;               \
+          continue;                                    \
+        }                                              \
+      skip = i - 1;                                    \
+    }                                                  \
+  else                                                 \
+    {                                                  \
+      d = kwset->shift[0]; tp += d;                    \
+      if (tp > ep)                                     \
+        break;                                         \
+      if (TRANS(tp[-1]) != gc1)                                \
+        {                                              \
+          d = d1[U(tp[-1])]; LAST_SHIFT;               \
+          continue;                                    \
+        }                                              \
+      skip = 1;                                                \
+    }                                                  \
+  while (true)                                         \
+    {                                                  \
+      if (TRANS(tp[-2]) == gc2)                                \
+        {                                              \
+          for (i = 3; i <= d; ++i)                     \
+            if (TRANS(tp[-i]) != TRANS(sp[-i]))                \
+              break;                                   \
+          if (i > d)                                   \
+            {                                          \
+              for (i = d + skip + 1; i <= len; ++i)    \
+                if (TRANS(tp[-i]) != TRANS(sp[-i]))    \
+                  break;                               \
+              if (i > len)                             \
+                return tp - len - text;                        \
+            }                                          \
+          d = kwset->shift[i - 2]; tp += d;            \
+          if (tp > ep)                                 \
+            break;                                     \
+          if (TRANS(tp[-1]) != gc1)                    \
+            {                                          \
+              d = d1[U(tp[-1])]; LAST_SHIFT;           \
+              break;                                   \
+            }                                          \
+          skip = i - 1;                                        \
+        }                                              \
+      else                                             \
+        {                                              \
+          d = kwset->shift[0]; tp += d;                        \
+          if (tp > ep)                                 \
+            break;                                     \
+          if (TRANS(tp[-1]) != gc1)                    \
+            {                                          \
+              d = d1[U(tp[-1])]; LAST_SHIFT;           \
+              break;                                   \
+            }                                          \
+          skip = 1;                                    \
+        }                                              \
     }
 
-  *tpp = tp;
-  return false;
-}
-
 /* Return the address of the first byte in the buffer S that equals C.
    S contains N bytes.  If TRANS is nonnull, use it to transliterate
    S's bytes before comparing them.  */
@@ -545,7 +554,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
 {
   unsigned char const *d1;
   char const *ep, *sp, *tp;
-  int d;
+  int d, i, skip;
+  char gc1, gc2;
   int len = kwset->mind;
   char const *trans = kwset->trans;
 
@@ -562,8 +572,17 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   d1 = kwset->delta;
   sp = kwset->target + len;
   tp = text + len;
-  char gc1 = tr (trans, sp[-1]);
-  char gc2 = tr (trans, sp[-2]);
+  if (trans)
+    {
+      gc1 = trans[U(sp[-1])];
+      gc2 = trans[U(sp[-2])];
+    }
+  else
+    {
+      gc1 = sp[-1];
+      gc2 = sp[-2];
+    }
+  skip = 0;
 
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
   if (size > 12 * len)
@@ -595,8 +614,20 @@ bmexec (kwset_t kwset, char const *text, size_t size)
                   }
               }
           }
-        if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
-          return tp - text;
+#undef LAST_SHIFT
+#define LAST_SHIFT tp += d
+        if (trans)
+          {
+#undef TRANS
+#define TRANS(ch) trans[U(ch)]
+            BM_DELTA2_SEARCH;
+          }
+        else
+          {
+#undef TRANS
+#define TRANS(ch) ch
+            BM_DELTA2_SEARCH;
+          }
       }
 
   /* Now we have only a few characters left to search.  We
@@ -608,8 +639,20 @@ bmexec (kwset_t kwset, char const *text, size_t size)
       d = d1[U((tp += d)[-1])];
       if (d != 0)
         continue;
-      if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
-        return tp - text;
+#undef LAST_SHIFT
+#define LAST_SHIFT
+      if (trans)
+        {
+#undef TRANS
+#define TRANS(ch) trans[U(ch)]
+          BM_DELTA2_SEARCH;
+        }
+      else
+        {
+#undef TRANS
+#define TRANS(ch) ch
+          BM_DELTA2_SEARCH;
+        }
     }
 
   return -1;
-- 
1.9.2


From 64335dee2390182566c6be29db9d970c49d7f804 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Sat, 26 Apr 2014 17:25:54 +0900
Subject: [PATCH 2/2] kwset: speed up Boyer-Moore unibyte -i by using memchr2

* src/kwset.c (struct kwset): New members `lastchars' and `nlastchar'.
(kwsprep): Count last characters in the patterns.
(bmexec): When two last characters, use memchr2().
---
 src/kwset.c | 108 +++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 71 insertions(+), 37 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index 8a5fa23..5bc5da5 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -40,6 +40,7 @@
 #include "system.h"
 #include "obstack.h"
 #include "xalloc.h"
+#include "memchr2.h"
 
 #define link kwset_link
 
@@ -91,6 +92,8 @@ struct kwset
   char *target;                        /* Target string if there's only one. */
   int *shift;                  /* Used in Boyer-Moore search for one string. */
   char const *trans;           /* Character translation table. */
+  char lastchars[2];           /* Last characters up to two in the patterns.  
*/
+  int nlastchar;               /* Number of last character in the patterns.  */
 };
 
 /* Allocate and initialize a keyword set object, returning an opaque
@@ -366,7 +369,7 @@ void
 kwsprep (kwset_t kwset)
 {
   char const *trans = kwset->trans;
-  int i;
+  int i, j;
   unsigned char deltabuf[NCHAR];
   unsigned char *delta = trans ? deltabuf : kwset->delta;
 
@@ -428,6 +431,21 @@ kwsprep (kwset_t kwset)
   struct trie **next = trans ? nextbuf : kwset->next;
   memset (next, 0, sizeof nextbuf);
   treenext (kwset->trie->links, next);
+  kwset->nlastchar = 0;
+  for (i = 0; i < NCHAR; ++i)
+    {
+      if (!next[i])
+        continue;
+      for (j = 0; j < NCHAR; ++j)
+        {
+          if ((trans && trans[i] == trans[j]) || i == j)
+            {
+              if (kwset->nlastchar < 2)
+                kwset->lastchars[kwset->nlastchar] = j;
+              kwset->nlastchar++;
+            }
+        }
+    }
   if (trans)
     for (i = 0; i < NCHAR; ++i)
       kwset->next[i] = next[U(trans[i])];
@@ -533,21 +551,6 @@ kwsprep (kwset_t kwset)
         }                                              \
     }
 
-/* Return the address of the first byte in the buffer S that equals C.
-   S contains N bytes.  If TRANS is nonnull, use it to transliterate
-   S's bytes before comparing them.  */
-static char const *
-memchr_trans (char const *s, char c, size_t n, char const *trans)
-{
-  if (! trans)
-    return memchr (s, c, n);
-  char const *slim = s + n;
-  for (; s < slim; s++)
-    if (trans[U(*s)] == c)
-      return s;
-  return NULL;
-}
-
 /* Fast boyer-moore search. */
 static size_t _GL_ATTRIBUTE_PURE
 bmexec (kwset_t kwset, char const *text, size_t size)
@@ -558,6 +561,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   char gc1, gc2;
   int len = kwset->mind;
   char const *trans = kwset->trans;
+  char const *lastchars = kwset->lastchars;
+  int nlastchar = kwset->nlastchar;
 
   if (len == 0)
     return 0;
@@ -565,8 +570,20 @@ bmexec (kwset_t kwset, char const *text, size_t size)
     return -1;
   if (len == 1)
     {
-      tp = memchr_trans (text, kwset->target[0], size, trans);
-      return tp ? tp - text : -1;
+      switch (nlastchar)
+        {
+        case 1:
+          tp = memchr (text, lastchars[0], size);
+          return tp ? tp - text : -1;
+        case 2:
+          tp = memchr2 (text, lastchars[0], lastchars[1], size);
+          return tp ? tp - text : -1;
+        default:
+          for (tp = text; tp < text + size; tp++)
+            if (trans[U(*tp)] == kwset->target[0])
+              return tp - text;
+          return -1;
+        }
     }
 
   d1 = kwset->delta;
@@ -587,33 +604,50 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
   if (size > 12 * len)
     /* 11 is not a bug, the initial offset happens only once. */
-    for (ep = text + size - 11 * len; tp <= ep; )
+    for (ep = text + size - 11 * len;;)
       {
-        d = d1[U(tp[-1])], tp += d;
-        d = d1[U(tp[-1])], tp += d;
-        if (d != 0)
+        while (tp <= ep)
           {
             d = d1[U(tp[-1])], tp += d;
             d = d1[U(tp[-1])], tp += d;
+            if (d == 0)
+              goto found;
+            d = d1[U(tp[-1])], tp += d;
+            d = d1[U(tp[-1])], tp += d;
+            d = d1[U(tp[-1])], tp += d;
+            if (d == 0)
+              goto found;
+            d = d1[U(tp[-1])], tp += d;
+            d = d1[U(tp[-1])], tp += d;
             d = d1[U(tp[-1])], tp += d;
-            if (d != 0)
+            if (d == 0)
+              goto found;
+            /* Typically memchr is faster than seeking by delta1 when
+               there is no chance to match for a while.  */
+            switch (nlastchar)
               {
-                d = d1[U(tp[-1])], tp += d;
-                d = d1[U(tp[-1])], tp += d;
-                d = d1[U(tp[-1])], tp += d;
-                if (d != 0)
-                  {
-                    /* Typically memchr is faster than seeking by
-                       delta1 when there is no chance to match for
-                       a while.  */
-                    tp--;
-                    tp = memchr_trans (tp, gc1, text + size - tp, trans);
-                    if (! tp)
-                      return -1;
-                    tp++;
-                  }
+              case 1:
+                --tp;
+                tp = memchr (tp, lastchars[0], size + text - tp + 1);
+                if (!tp)
+                  return -1;
+                ++tp;
+                goto found;
+              case 2:
+                --tp;
+                tp = memchr2 (tp, lastchars[0], lastchars[1],
+                              size + text - tp + 1);
+                if (!tp)
+                  return -1;
+                ++tp;
+                goto found;
+              default:
+                d = d1[U(tp[-1])]; tp += d;
+                d = d1[U(tp[-1])]; tp += d;
               }
           }
+        break;
+      found:
 #undef LAST_SHIFT
 #define LAST_SHIFT tp += d
         if (trans)
-- 
1.9.2

Reply via email to