Paul Eggert wrote:
> That's odd, as I'm not observing this slowdown.

Though I apply 17230.diff and tests, I don't observed slowdown.  However,
I cann't find macros `BM_DELTA2_SEARCH' etc in 17230.diff.  Could you also
consider them, and run (A) instead of (B)?  It means that overheads by
`yes' and `head' should be ignored.

(A)
$ yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 >k
$ grep -i jk

(B)
$ yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 | grep -i jk

I attach the difference between current master and my version.

Though I don't analyze detail still, don't seem that overhead by check
for `trans' in `tr' function, which is called each time the comparison
of a character, can be ignorable.

Norihiro
diff --git a/src/kwset.c b/src/kwset.c
index f86ee03..3c53e81 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -55,6 +55,35 @@
 
 #define U(c) (to_uchar (c))
 
+#if defined(__i386__) || defined(__x86_64__)
+#define SHIFT(p, d)    \
+  switch (d)           \
+    {                  \
+    default:           \
+      p += d;          \
+      break;           \
+    case 8:            \
+      ++p;             \
+    case 7:            \
+      ++p;             \
+    case 6:            \
+      ++p;             \
+    case 5:            \
+      ++p;             \
+    case 4:            \
+      ++p;             \
+    case 3:            \
+      ++p;             \
+    case 2:            \
+      ++p;             \
+    case 1:            \
+      ++p;             \
+    }
+#else
+#define SHIFT(p, d)    \
+  p += d;
+#endif
+
 /* Balanced tree of edges and labels leaving a given trie node. */
 struct tree
 {
@@ -464,80 +493,75 @@ kwsprep (kwset_t kwset)
       kwset->delta[i] = delta[U(trans[i])];
 }
 
-/* Use TRANS to transliterate C.  A null TRANS does no transliteration.  */
-static char
-tr (char const *trans, char c)
-{
-  return trans ? trans[U(c)] : c;
-}
-
-/* Delta2 portion of a Boyer-Moore search.  *TP is the string text
-   pointer; it is updated in place.  EP is the end of the string text,
-   and SP the end of the pattern.  LEN is the pattern length; it must
-   be at least 2.  TRANS, if nonnull, is the input translation table.
-   GC1 and GC2 are the last and second-from last bytes of the pattern,
-   transliterated by TRANS; the caller precomputes them for
-   efficiency.  If D1 is nonnull, it is a delta1 table for shifting *TP
-   when failing.  KWSET->shift says how much to shift.  */
-static inline bool
-bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
-                  char const *trans, char gc1, char gc2,
-                  unsigned char const *d1, kwset_t kwset)
-{
-  char const *tp = *tpp;
-  int d = len, skip = 0;
-
-  while (true)
-    {
-      int i = 2;
-      if (tr (trans, tp[-2]) == gc2)
-        {
-          while (++i <= d)
-            if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
-              break;
-          if (i > d)
-            {
-              for (i = d + skip + 1; i <= len; ++i)
-                if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
-                  break;
-              if (i > len)
-                {
-                  *tpp = tp - len;
-                  return true;
-                }
-            }
-        }
-
-      tp += d = kwset->shift[i - 2];
-      if (tp > ep)
-        break;
-      if (tr (trans, tp[-1]) != gc1)
-        {
-          if (d1)
-            tp += d1[U(tp[-1])];
-          break;
-        }
-      skip = i - 1;
+#define BM_DELTA2_SEARCH                               \
+  if (TRANS(tp[-2]) == gc2)                            \
+    {                                                  \
+      for (i = 3; i <= len; ++i)                       \
+        if (TRANS(tp[-i]) != TRANS(sp[-i]))            \
+          break;                                       \
+      if (i > len)                                     \
+        return tp - len - text;                                \
+      d = kwset->shift[i - 2]; SHIFT(tp, d);           \
+      if (tp > ep)                                     \
+        break;                                         \
+      if (TRANS(tp[-1]) != gc1)                                \
+        {                                              \
+          d = d1[U(tp[-1])]; LAST_SHIFT;               \
+          continue;                                    \
+        }                                              \
+      skip = i - 1;                                    \
+    }                                                  \
+  else                                                 \
+    {                                                  \
+      d = kwset->shift[0]; SHIFT(tp, d);               \
+      if (tp > ep)                                     \
+        break;                                         \
+      if (TRANS(tp[-1]) != gc1)                                \
+        {                                              \
+          d = d1[U(tp[-1])]; LAST_SHIFT;               \
+          continue;                                    \
+        }                                              \
+      skip = 1;                                                \
+    }                                                  \
+  while (true)                                         \
+    {                                                  \
+      if (TRANS(tp[-2]) == gc2)                                \
+        {                                              \
+          for (i = 3; i <= d; ++i)                     \
+            if (TRANS(tp[-i]) != TRANS(sp[-i]))                \
+              break;                                   \
+          if (i > d)                                   \
+            {                                          \
+              for (i = d + skip + 1; i <= len; ++i)    \
+                if (TRANS(tp[-i]) != TRANS(sp[-i]))    \
+                  break;                               \
+              if (i > len)                             \
+                return tp - len - text;                        \
+            }                                          \
+          d = kwset->shift[i - 2]; SHIFT(tp, d);       \
+          if (tp > ep)                                 \
+            break;                                     \
+          if (TRANS(tp[-1]) != gc1)                    \
+            {                                          \
+              d = d1[U(tp[-1])]; LAST_SHIFT;           \
+              break;                                   \
+            }                                          \
+          skip = i - 1;                                        \
+        }                                              \
+      else                                             \
+        {                                              \
+          d = kwset->shift[0]; SHIFT(tp, d);           \
+          if (tp > ep)                                 \
+            break;                                     \
+          if (TRANS(tp[-1]) != gc1)                    \
+            {                                          \
+              d = d1[U(tp[-1])]; LAST_SHIFT;           \
+              break;                                   \
+            }                                          \
+          skip = 1;                                    \
+        }                                              \
     }
 
-  *tpp = tp;
-  return false;
-}
-
-/* Return the address of the first byte in the buffer S that equals C.
-   S contains N bytes.  If TRANS is nonnull, use it to transliterate
-   S's bytes before comparing them.  */
-static char const *
-memchr_trans (char const *s, char c, size_t n, char const *trans)
-{
-  if (! trans)
-    return memchr (s, c, n);
-  char const *slim = s + n;
-  for (; s < slim; s++)
-    if (trans[U(*s)] == c)
-      return s;
-  return NULL;
-}
 
 /* Fast boyer-moore search. */
 static size_t _GL_ATTRIBUTE_PURE
@@ -545,7 +569,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
 {
   unsigned char const *d1;
   char const *ep, *sp, *tp;
-  int d;
+  int d, i, skip;
+  char gc1, gc2;
   int len = kwset->mind;
   char const *trans = kwset->trans;
 
@@ -555,48 +580,94 @@ bmexec (kwset_t kwset, char const *text, size_t size)
     return -1;
   if (len == 1)
     {
-      tp = memchr_trans (text, kwset->target[0], size, trans);
-      return tp ? tp - text : -1;
+      if (trans)
+        {
+          for (tp = text; tp < text + size; tp++)
+            if (trans[U(*tp)] == kwset->target[0])
+              return tp - text;
+          return -1;
+        }
+      else
+        {
+          tp = memchr (text, kwset->target[0], size);
+          return tp ? tp - text : -1;
+        }
     }
 
   d1 = kwset->delta;
   sp = kwset->target + len;
   tp = text + len;
-  char gc1 = tr (trans, sp[-1]);
-  char gc2 = tr (trans, sp[-2]);
+  if (trans)
+    {
+      gc1 = trans[U(sp[-1])];
+      gc2 = trans[U(sp[-2])];
+    }
+  else
+    {
+      gc1 = sp[-1];
+      gc2 = sp[-2];
+    }
+  skip = 0;
 
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
   if (size > 12 * len)
     /* 11 is not a bug, the initial offset happens only once. */
-    for (ep = text + size - 11 * len; tp <= ep; )
+    for (ep = text + size - 11 * len;;)
       {
-        d = d1[U(tp[-1])], tp += d;
-        d = d1[U(tp[-1])], tp += d;
-        if (d != 0)
+        while (tp <= ep)
           {
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            if (d != 0)
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            if (d == 0)
+              goto found;
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            if (d == 0)
+              goto found;
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            d = d1[U(tp[-1])]; SHIFT(tp, d);
+            if (d == 0)
+              goto found;
+            /* memchar() of glibc is faster than seeking by delta1 on
+               some platforms.  When there is no chance to match for a
+               while, use it on them.  */
+#if defined(__GLIBC__) && (defined(__i386__) || defined(__x86_64__))
+            if (!trans)
               {
-                d = d1[U(tp[-1])], tp += d;
-                d = d1[U(tp[-1])], tp += d;
-                d = d1[U(tp[-1])], tp += d;
-                if (d != 0)
+                tp = memchr (tp - 1, gc1, size + text - tp + 1);
+                if (tp)
                   {
-                    /* Typically memchr is faster than seeking by
-                       delta1 when there is no chance to match for
-                       a while.  */
-                    tp--;
-                    tp = memchr_trans (tp, gc1, text + size - tp, trans);
-                    if (! tp)
-                      return -1;
-                    tp++;
+                    ++tp;
+                    goto found;
                   }
+                else
+                  return -1;
+              }
+            else
+#endif
+              {
+                d = d1[U(tp[-1])]; SHIFT(tp, d);
+                d = d1[U(tp[-1])]; SHIFT(tp, d);
               }
           }
-        if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
-          return tp - text;
+        break;
+      found:
+#undef LAST_SHIFT
+#define LAST_SHIFT SHIFT(tp, d);
+        if (trans)
+          {
+#undef TRANS
+#define TRANS(ch) trans[U(ch)]
+            BM_DELTA2_SEARCH;
+          }
+        else
+          {
+#undef TRANS
+#define TRANS(ch) ch
+            BM_DELTA2_SEARCH;
+          }
       }
 
   /* Now we have only a few characters left to search.  We
@@ -605,11 +676,24 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   d = d1[U(tp[-1])];
   while (d <= ep - tp)
     {
-      d = d1[U((tp += d)[-1])];
+      SHIFT(tp, d);
+      d = d1[U(tp[-1])];
       if (d != 0)
         continue;
-      if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
-        return tp - text;
+#undef LAST_SHIFT
+#define LAST_SHIFT
+      if (trans)
+        {
+#undef TRANS
+#define TRANS(ch) trans[U(ch)]
+          BM_DELTA2_SEARCH;
+        }
+      else
+        {
+#undef TRANS
+#define TRANS(ch) ch
+          BM_DELTA2_SEARCH;
+        }
     }
 
   return -1;
@@ -659,17 +743,19 @@ cwexec (kwset_t kwset, char const *text, size_t len, 
struct kwsmatch *kwsmatch)
     {
       if (qlim && end <= qlim)
         {
-          end += d - 1;
-          while ((d = delta[c = *end]) && end < qlim)
+          SHIFT(end, d);
+          while ((d = delta[c = end[-1]]) && end <= qlim)
             {
-              end += d;
-              end += delta[U(*end)];
-              end += delta[U(*end)];
+              SHIFT(end, d);
+              d = delta[U(end[-1])]; SHIFT(end, d);
+              d = delta[U(end[-1])]; SHIFT(end, d);
             }
-          ++end;
         }
       else
-        d = delta[c = (end += d)[-1]];
+        {
+          SHIFT(end, d);
+          d = delta[c = end[-1]];
+        }
       if (d)
         continue;
       beg = end - 1;
@@ -718,7 +804,8 @@ cwexec (kwset_t kwset, char const *text, size_t len, struct 
kwsmatch *kwsmatch)
   d = 1;
   while (lim - end >= d)
     {
-      if ((d = delta[c = (end += d)[-1]]) != 0)
+      SHIFT(end, d);
+      if ((d = delta[c = end[-1]]) != 0)
         continue;
       beg = end - 1;
       if (!(trie = next[c]))

Reply via email to