I simplified the patch by macros to work delta2 in bmexec().
From 8eda8d8b76a0a3e395ca1a61715e6ab2590cd638 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Thu, 20 Mar 2014 08:07:25 +0900
Subject: [PATCH 1/3] grep: may also use Boyer-Moore algorithm for
 case-insensitive matching

* src/kwset.c (BM_DELTA2_SEARCH, LAST_SHIFT, TRANS): New macro.
(bmexec): Use character translation table.
(kwsexec): Call bmexec for case-insensitive matching.
(kwsprep): Change the `if' condition.
---
 src/kwset.c | 175 +++++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 119 insertions(+), 56 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index d5db12f..d212d59 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -434,7 +434,7 @@ kwsprep (kwset_t kwset)
 
   /* Check if we can use the simple boyer-moore algorithm, instead
      of the hairy commentz-walter algorithm. */
-  if (!trans && kwset->words == 1)
+  if (kwset->words == 1)
     {
       /* Looking for just one string.  Extract it from the trie. */
       kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
@@ -464,6 +464,76 @@ kwsprep (kwset_t kwset)
       kwset->delta[i] = delta[U(trans[i])];
 }
 
+#define BM_DELTA2_SEARCH                               \
+  if (TRANS(tp[-2]) == gc2)                            \
+    {                                                  \
+      for (i = 3; i <= len; ++i)                       \
+        if (TRANS(tp[-i]) != TRANS(sp[-i]))            \
+          break;                                       \
+      if (i > len)                                     \
+        return tp - len - text;                                \
+      d = kwset->shift[i - 2]; tp += d;                        \
+      if (tp > ep)                                     \
+        break;                                         \
+      if (TRANS(tp[-1]) != gc1)                                \
+        {                                              \
+          d = d1[U(tp[-1])]; LAST_SHIFT;               \
+          continue;                                    \
+        }                                              \
+      skip = i - 1;                                    \
+    }                                                  \
+  else                                                 \
+    {                                                  \
+      d = kwset->shift[0]; tp += d;                    \
+      if (tp > ep)                                     \
+        break;                                         \
+      if (TRANS(tp[-1]) != gc1)                                \
+        {                                              \
+          d = d1[U(tp[-1])]; LAST_SHIFT;               \
+          continue;                                    \
+        }                                              \
+      skip = 1;                                                \
+    }                                                  \
+  while (true)                                         \
+    {                                                  \
+      if (TRANS(tp[-2]) == gc2)                                \
+        {                                              \
+          for (i = 3; i <= d; ++i)                     \
+            if (TRANS(tp[-i]) != TRANS(sp[-i]))                \
+              break;                                   \
+          if (i > d)                                   \
+            {                                          \
+              for (i = d + skip + 1; i <= len; ++i)    \
+                if (TRANS(tp[-i]) != TRANS(sp[-i]))    \
+                  break;                               \
+              if (i > len)                             \
+                return tp - len - text;                        \
+            }                                          \
+          d = kwset->shift[i - 2]; tp += d;            \
+          if (tp > ep)                                 \
+            break;                                     \
+          if (TRANS(tp[-1]) != gc1)                    \
+            {                                          \
+              d = d1[U(tp[-1])]; LAST_SHIFT;           \
+              break;                                   \
+            }                                          \
+          skip = i - 1;                                        \
+        }                                              \
+      else                                             \
+        {                                              \
+          d = kwset->shift[0]; tp += d;                        \
+          if (tp > ep)                                 \
+            break;                                     \
+          if (TRANS(tp[-1]) != gc1)                    \
+            {                                          \
+              d = d1[U(tp[-1])]; LAST_SHIFT;           \
+              break;                                   \
+            }                                          \
+          skip = 1;                                    \
+        }                                              \
+    }
+
+
 /* Fast boyer-moore search. */
 static size_t _GL_ATTRIBUTE_PURE
 bmexec (kwset_t kwset, char const *text, size_t size)
@@ -473,6 +543,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   int d, i, skip;
   char gc1, gc2;
   int len = kwset->mind;
+  char const *trans = kwset->trans;
 
   if (len == 0)
     return 0;
@@ -480,15 +551,33 @@ bmexec (kwset_t kwset, char const *text, size_t size)
     return -1;
   if (len == 1)
     {
-      tp = memchr (text, kwset->target[0], size);
-      return tp ? tp - text : -1;
+      if (trans)
+        {
+          for (tp = text; tp < text + size; tp++)
+            if (trans[U(*tp)] == kwset->target[0])
+              return tp - text;
+          return -1;
+        }
+      else
+        {
+          tp = memchr (text, kwset->target[0], size);
+          return tp ? tp - text : -1;
+        }
     }
 
   d1 = kwset->delta;
   sp = kwset->target + len;
-  gc1 = sp[-1];
-  gc2 = sp[-2];
   tp = text + len;
+  if (trans)
+    {
+      gc1 = trans[U(sp[-1])];
+      gc2 = trans[U(sp[-2])];
+    }
+  else
+    {
+      gc1 = sp[-1];
+      gc2 = sp[-2];
+    }
   skip = 0;
 
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
@@ -517,32 +606,19 @@ bmexec (kwset_t kwset, char const *text, size_t size)
           }
         break;
       found:
-        d = len;
-        while (true)
+#undef LAST_SHIFT
+#define LAST_SHIFT tp += d
+        if (trans)
           {
-            if (tp[-2] == gc2)
-              {
-                for (i = 3; i <= d && tp[-i] == sp[-i]; ++i)
-                  continue;
-                if (i > d)
-                  {
-                    for (i = d + skip + 1; i <= len && tp[-i] == sp[-i]; ++i)
-                      continue;
-                    if (i > len)
-                      return tp - len - text;
-                  }
-                d = kwset->shift[i - 2]; tp += d;
-                if (tp > ep || tp[-1] != gc1)
-                  break;
-                skip = i - 1;
-              }
-            else
-              {
-                d = kwset->shift[0]; tp += d;
-                if (tp > ep || tp[-1] != gc1)
-                  break;
-                skip = 1;
-              }
+#undef TRANS
+#define TRANS(ch) trans[U(ch)]
+            BM_DELTA2_SEARCH;
+          }
+        else
+          {
+#undef TRANS
+#define TRANS(ch) ch
+            BM_DELTA2_SEARCH;
           }
       }
 
@@ -555,32 +631,19 @@ bmexec (kwset_t kwset, char const *text, size_t size)
       d = d1[U((tp += d)[-1])];
       if (d != 0)
         continue;
-      d = len;
-      while (true)
+#undef LAST_SHIFT
+#define LAST_SHIFT
+      if (trans)
         {
-          if (tp[-2] == gc2)
-            {
-              for (i = 3; i <= d && tp[-i] == sp[-i]; ++i)
-                continue;
-              if (i > d)
-                {
-                  for (i = d + skip + 1; i <= len && tp[-i] == sp[-i]; ++i)
-                    continue;
-                  if (i > len)
-                    return tp - len - text;
-                }
-              d = kwset->shift[i - 2]; tp += d;
-              if (tp > ep || tp[-1] != gc1)
-                break;
-              skip = i - 1;
-            }
-          else
-            {
-              d = kwset->shift[0]; tp += d;
-              if (tp > ep || tp[-1] != gc1)
-                break;
-              skip = 1;
-            }
+#undef TRANS
+#define TRANS(ch) trans[U(ch)]
+          BM_DELTA2_SEARCH;
+        }
+      else
+        {
+#undef TRANS
+#define TRANS(ch) ch
+          BM_DELTA2_SEARCH;
         }
     }
 
@@ -752,7 +815,7 @@ size_t
 kwsexec (kwset_t kwset, char const *text, size_t size,
          struct kwsmatch *kwsmatch)
 {
-  if (kwset->words == 1 && kwset->trans == NULL)
+  if (kwset->words == 1)
     {
       size_t ret = bmexec (kwset, text, size);
       if (ret != (size_t) -1)
-- 
1.9.2

Reply via email to