Thanks for the improvement with memchr2(). It's very great!
Paul Eggert wrote:
> If there is a reason to use macros I'd like to see a patch that simply
> changes functions to macros without changing the algorithm, so that we
> can measure this effect separately from the algorithm change.
I tested with below.
yes jjjjjjjjjjjjjjjjjjjj | head -10000000 >k
env LC_ALL=C time -p src/grep kjjjjjjjjjjjjjjjjjjj k
The performance with the current master is as following.
real 2.95 user 2.44 sys 0.45
After changes function to macros.
real 1.09 user 0.59 sys 0.43
Could you try above cases?
Norihiro
From 9e21acaf268f680ebe4969f8b489ee2348bc5e95 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Sun, 27 Apr 2014 16:38:53 +0900
Subject: [PATCH] kwset: changes bm_delta2_search function to macros
* src/kwset.c (bm_delta2_search): Remove it.
(BM_DELTA2_SEARCH, TRANS LAST_SHIFT): Define new macros.
(bmexec): Replace bm_delta2_search function to macros.
---
src/kwset.c | 153 ++++++++++++++++++++++++++++++++++++++----------------------
1 file changed, 97 insertions(+), 56 deletions(-)
diff --git a/src/kwset.c b/src/kwset.c
index 8270f05..2acad1a 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -512,59 +512,76 @@ kwsprep (kwset_t kwset)
kwset->delta[i] = delta[U(trans[i])];
}
-/* Delta2 portion of a Boyer-Moore search. *TP is the string text
- pointer; it is updated in place. EP is the end of the string text,
- and SP the end of the pattern. LEN is the pattern length; it must
- be at least 2. TRANS, if nonnull, is the input translation table.
- GC1 and GC2 are the last and second-from last bytes of the pattern,
- transliterated by TRANS; the caller precomputes them for
- efficiency. If D1 is nonnull, it is a delta1 table for shifting *TP
- when failing. KWSET->shift says how much to shift. */
-static inline bool
-bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
- char const *trans, char gc1, char gc2,
- unsigned char const *d1, kwset_t kwset)
-{
- char const *tp = *tpp;
- int d = len, skip = 0;
-
- while (true)
- {
- int i = 2;
- if (tr (trans, tp[-2]) == gc2)
- {
- while (++i <= d)
- if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
- break;
- if (i > d)
- {
- for (i = d + skip + 1; i <= len; ++i)
- if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
- break;
- if (i > len)
- {
- *tpp = tp - len;
- return true;
- }
- }
- }
-
- tp += d = kwset->shift[i - 2];
- if (tp > ep)
- break;
- if (tr (trans, tp[-1]) != gc1)
- {
- if (d1)
- tp += d1[U(tp[-1])];
- break;
- }
- skip = i - 1;
+/* Delta2 portion of a Boyer-Moore search. */
+#define BM_DELTA2_SEARCH \
+ if (TRANS(tp[-2]) == gc2) \
+ { \
+ for (i = 3; i <= len; ++i) \
+ if (TRANS(tp[-i]) != TRANS(sp[-i])) \
+ break; \
+ if (i > len) \
+ return tp - len - text; \
+ d = kwset->shift[i - 2]; tp += d; \
+ if (tp > ep) \
+ break; \
+ if (TRANS(tp[-1]) != gc1) \
+ { \
+ d = d1[U(tp[-1])]; LAST_SHIFT; \
+ continue; \
+ } \
+ skip = i - 1; \
+ } \
+ else \
+ { \
+ d = kwset->shift[0]; tp += d; \
+ if (tp > ep) \
+ break; \
+ if (TRANS(tp[-1]) != gc1) \
+ { \
+ d = d1[U(tp[-1])]; LAST_SHIFT; \
+ continue; \
+ } \
+ skip = 1; \
+ } \
+ while (true) \
+ { \
+ if (TRANS(tp[-2]) == gc2) \
+ { \
+ for (i = 3; i <= d; ++i) \
+ if (TRANS(tp[-i]) != TRANS(sp[-i])) \
+ break; \
+ if (i > d) \
+ { \
+ for (i = d + skip + 1; i <= len; ++i) \
+ if (TRANS(tp[-i]) != TRANS(sp[-i])) \
+ break; \
+ if (i > len) \
+ return tp - len - text; \
+ } \
+ d = kwset->shift[i - 2]; tp += d; \
+ if (tp > ep) \
+ break; \
+ if (TRANS(tp[-1]) != gc1) \
+ { \
+ d = d1[U(tp[-1])]; LAST_SHIFT; \
+ break; \
+ } \
+ skip = i - 1; \
+ } \
+ else \
+ { \
+ d = kwset->shift[0]; tp += d; \
+ if (tp > ep) \
+ break; \
+ if (TRANS(tp[-1]) != gc1) \
+ { \
+ d = d1[U(tp[-1])]; LAST_SHIFT; \
+ break; \
+ } \
+ skip = 1; \
+ } \
}
- *tpp = tp;
- return false;
-}
-
/* Return the address of the first byte in the buffer S (of size N)
that matches the last byte specified by KWSET, a singleton. */
static char const *
@@ -590,7 +607,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
{
unsigned char const *d1;
char const *ep, *sp, *tp;
- int d;
+ int d, i, skip;
int len = kwset->mind;
char const *trans = kwset->trans;
@@ -646,8 +663,20 @@ bmexec (kwset_t kwset, char const *text, size_t size)
}
}
}
- if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
- return tp - text;
+#undef LAST_SHIFT
+#define LAST_SHIFT tp += d
+ if (trans)
+ {
+#undef TRANS
+#define TRANS(ch) trans[U(ch)]
+ BM_DELTA2_SEARCH;
+ }
+ else
+ {
+#undef TRANS
+#define TRANS(ch) ch
+ BM_DELTA2_SEARCH;
+ }
big_advance:;
}
@@ -660,8 +689,20 @@ bmexec (kwset_t kwset, char const *text, size_t size)
d = d1[U((tp += d)[-1])];
if (d != 0)
continue;
- if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
- return tp - text;
+#undef LAST_SHIFT
+#define LAST_SHIFT
+ if (trans)
+ {
+#undef TRANS
+#define TRANS(ch) trans[U(ch)]
+ BM_DELTA2_SEARCH;
+ }
+ else
+ {
+#undef TRANS
+#define TRANS(ch) ch
+ BM_DELTA2_SEARCH;
+ }
}
return -1;
--
1.9.2