Paul Eggert wrote:
> That's odd, as I'm not observing this slowdown.
Though I apply 17230.diff and tests, I don't observed slowdown. However,
I cann't find macros `BM_DELTA2_SEARCH' etc in 17230.diff. Could you also
consider them, and run (A) instead of (B)? It means that overheads by
`yes' and `head' should be ignored.
(A)
$ yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 >k
$ grep -i jk
(B)
$ yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 | grep -i jk
I attach the difference between current master and my version.
Though I don't analyze detail still, don't seem that overhead by check
for `trans' in `tr' function, which is called each time the comparison
of a character, can be ignorable.
Norihiro
diff --git a/src/kwset.c b/src/kwset.c
index f86ee03..3c53e81 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -55,6 +55,35 @@
#define U(c) (to_uchar (c))
+#if defined(__i386__) || defined(__x86_64__)
+#define SHIFT(p, d) \
+ switch (d) \
+ { \
+ default: \
+ p += d; \
+ break; \
+ case 8: \
+ ++p; \
+ case 7: \
+ ++p; \
+ case 6: \
+ ++p; \
+ case 5: \
+ ++p; \
+ case 4: \
+ ++p; \
+ case 3: \
+ ++p; \
+ case 2: \
+ ++p; \
+ case 1: \
+ ++p; \
+ }
+#else
+#define SHIFT(p, d) \
+ p += d;
+#endif
+
/* Balanced tree of edges and labels leaving a given trie node. */
struct tree
{
@@ -464,80 +493,75 @@ kwsprep (kwset_t kwset)
kwset->delta[i] = delta[U(trans[i])];
}
-/* Use TRANS to transliterate C. A null TRANS does no transliteration. */
-static char
-tr (char const *trans, char c)
-{
- return trans ? trans[U(c)] : c;
-}
-
-/* Delta2 portion of a Boyer-Moore search. *TP is the string text
- pointer; it is updated in place. EP is the end of the string text,
- and SP the end of the pattern. LEN is the pattern length; it must
- be at least 2. TRANS, if nonnull, is the input translation table.
- GC1 and GC2 are the last and second-from last bytes of the pattern,
- transliterated by TRANS; the caller precomputes them for
- efficiency. If D1 is nonnull, it is a delta1 table for shifting *TP
- when failing. KWSET->shift says how much to shift. */
-static inline bool
-bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
- char const *trans, char gc1, char gc2,
- unsigned char const *d1, kwset_t kwset)
-{
- char const *tp = *tpp;
- int d = len, skip = 0;
-
- while (true)
- {
- int i = 2;
- if (tr (trans, tp[-2]) == gc2)
- {
- while (++i <= d)
- if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
- break;
- if (i > d)
- {
- for (i = d + skip + 1; i <= len; ++i)
- if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
- break;
- if (i > len)
- {
- *tpp = tp - len;
- return true;
- }
- }
- }
-
- tp += d = kwset->shift[i - 2];
- if (tp > ep)
- break;
- if (tr (trans, tp[-1]) != gc1)
- {
- if (d1)
- tp += d1[U(tp[-1])];
- break;
- }
- skip = i - 1;
+#define BM_DELTA2_SEARCH \
+ if (TRANS(tp[-2]) == gc2) \
+ { \
+ for (i = 3; i <= len; ++i) \
+ if (TRANS(tp[-i]) != TRANS(sp[-i])) \
+ break; \
+ if (i > len) \
+ return tp - len - text; \
+ d = kwset->shift[i - 2]; SHIFT(tp, d); \
+ if (tp > ep) \
+ break; \
+ if (TRANS(tp[-1]) != gc1) \
+ { \
+ d = d1[U(tp[-1])]; LAST_SHIFT; \
+ continue; \
+ } \
+ skip = i - 1; \
+ } \
+ else \
+ { \
+ d = kwset->shift[0]; SHIFT(tp, d); \
+ if (tp > ep) \
+ break; \
+ if (TRANS(tp[-1]) != gc1) \
+ { \
+ d = d1[U(tp[-1])]; LAST_SHIFT; \
+ continue; \
+ } \
+ skip = 1; \
+ } \
+ while (true) \
+ { \
+ if (TRANS(tp[-2]) == gc2) \
+ { \
+ for (i = 3; i <= d; ++i) \
+ if (TRANS(tp[-i]) != TRANS(sp[-i])) \
+ break; \
+ if (i > d) \
+ { \
+ for (i = d + skip + 1; i <= len; ++i) \
+ if (TRANS(tp[-i]) != TRANS(sp[-i])) \
+ break; \
+ if (i > len) \
+ return tp - len - text; \
+ } \
+ d = kwset->shift[i - 2]; SHIFT(tp, d); \
+ if (tp > ep) \
+ break; \
+ if (TRANS(tp[-1]) != gc1) \
+ { \
+ d = d1[U(tp[-1])]; LAST_SHIFT; \
+ break; \
+ } \
+ skip = i - 1; \
+ } \
+ else \
+ { \
+ d = kwset->shift[0]; SHIFT(tp, d); \
+ if (tp > ep) \
+ break; \
+ if (TRANS(tp[-1]) != gc1) \
+ { \
+ d = d1[U(tp[-1])]; LAST_SHIFT; \
+ break; \
+ } \
+ skip = 1; \
+ } \
}
- *tpp = tp;
- return false;
-}
-
-/* Return the address of the first byte in the buffer S that equals C.
- S contains N bytes. If TRANS is nonnull, use it to transliterate
- S's bytes before comparing them. */
-static char const *
-memchr_trans (char const *s, char c, size_t n, char const *trans)
-{
- if (! trans)
- return memchr (s, c, n);
- char const *slim = s + n;
- for (; s < slim; s++)
- if (trans[U(*s)] == c)
- return s;
- return NULL;
-}
/* Fast boyer-moore search. */
static size_t _GL_ATTRIBUTE_PURE
@@ -545,7 +569,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
{
unsigned char const *d1;
char const *ep, *sp, *tp;
- int d;
+ int d, i, skip;
+ char gc1, gc2;
int len = kwset->mind;
char const *trans = kwset->trans;
@@ -555,48 +580,94 @@ bmexec (kwset_t kwset, char const *text, size_t size)
return -1;
if (len == 1)
{
- tp = memchr_trans (text, kwset->target[0], size, trans);
- return tp ? tp - text : -1;
+ if (trans)
+ {
+ for (tp = text; tp < text + size; tp++)
+ if (trans[U(*tp)] == kwset->target[0])
+ return tp - text;
+ return -1;
+ }
+ else
+ {
+ tp = memchr (text, kwset->target[0], size);
+ return tp ? tp - text : -1;
+ }
}
d1 = kwset->delta;
sp = kwset->target + len;
tp = text + len;
- char gc1 = tr (trans, sp[-1]);
- char gc2 = tr (trans, sp[-2]);
+ if (trans)
+ {
+ gc1 = trans[U(sp[-1])];
+ gc2 = trans[U(sp[-2])];
+ }
+ else
+ {
+ gc1 = sp[-1];
+ gc2 = sp[-2];
+ }
+ skip = 0;
/* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
if (size > 12 * len)
/* 11 is not a bug, the initial offset happens only once. */
- for (ep = text + size - 11 * len; tp <= ep; )
+ for (ep = text + size - 11 * len;;)
{
- d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- if (d != 0)
+ while (tp <= ep)
{
- d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- if (d != 0)
+ d = d1[U(tp[-1])]; SHIFT(tp, d);
+ d = d1[U(tp[-1])]; SHIFT(tp, d);
+ if (d == 0)
+ goto found;
+ d = d1[U(tp[-1])]; SHIFT(tp, d);
+ d = d1[U(tp[-1])]; SHIFT(tp, d);
+ d = d1[U(tp[-1])]; SHIFT(tp, d);
+ if (d == 0)
+ goto found;
+ d = d1[U(tp[-1])]; SHIFT(tp, d);
+ d = d1[U(tp[-1])]; SHIFT(tp, d);
+ d = d1[U(tp[-1])]; SHIFT(tp, d);
+ if (d == 0)
+ goto found;
+ /* memchar() of glibc is faster than seeking by delta1 on
+ some platforms. When there is no chance to match for a
+ while, use it on them. */
+#if defined(__GLIBC__) && (defined(__i386__) || defined(__x86_64__))
+ if (!trans)
{
- d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- if (d != 0)
+ tp = memchr (tp - 1, gc1, size + text - tp + 1);
+ if (tp)
{
- /* Typically memchr is faster than seeking by
- delta1 when there is no chance to match for
- a while. */
- tp--;
- tp = memchr_trans (tp, gc1, text + size - tp, trans);
- if (! tp)
- return -1;
- tp++;
+ ++tp;
+ goto found;
}
+ else
+ return -1;
+ }
+ else
+#endif
+ {
+ d = d1[U(tp[-1])]; SHIFT(tp, d);
+ d = d1[U(tp[-1])]; SHIFT(tp, d);
}
}
- if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
- return tp - text;
+ break;
+ found:
+#undef LAST_SHIFT
+#define LAST_SHIFT SHIFT(tp, d);
+ if (trans)
+ {
+#undef TRANS
+#define TRANS(ch) trans[U(ch)]
+ BM_DELTA2_SEARCH;
+ }
+ else
+ {
+#undef TRANS
+#define TRANS(ch) ch
+ BM_DELTA2_SEARCH;
+ }
}
/* Now we have only a few characters left to search. We
@@ -605,11 +676,24 @@ bmexec (kwset_t kwset, char const *text, size_t size)
d = d1[U(tp[-1])];
while (d <= ep - tp)
{
- d = d1[U((tp += d)[-1])];
+ SHIFT(tp, d);
+ d = d1[U(tp[-1])];
if (d != 0)
continue;
- if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
- return tp - text;
+#undef LAST_SHIFT
+#define LAST_SHIFT
+ if (trans)
+ {
+#undef TRANS
+#define TRANS(ch) trans[U(ch)]
+ BM_DELTA2_SEARCH;
+ }
+ else
+ {
+#undef TRANS
+#define TRANS(ch) ch
+ BM_DELTA2_SEARCH;
+ }
}
return -1;
@@ -659,17 +743,19 @@ cwexec (kwset_t kwset, char const *text, size_t len,
struct kwsmatch *kwsmatch)
{
if (qlim && end <= qlim)
{
- end += d - 1;
- while ((d = delta[c = *end]) && end < qlim)
+ SHIFT(end, d);
+ while ((d = delta[c = end[-1]]) && end <= qlim)
{
- end += d;
- end += delta[U(*end)];
- end += delta[U(*end)];
+ SHIFT(end, d);
+ d = delta[U(end[-1])]; SHIFT(end, d);
+ d = delta[U(end[-1])]; SHIFT(end, d);
}
- ++end;
}
else
- d = delta[c = (end += d)[-1]];
+ {
+ SHIFT(end, d);
+ d = delta[c = end[-1]];
+ }
if (d)
continue;
beg = end - 1;
@@ -718,7 +804,8 @@ cwexec (kwset_t kwset, char const *text, size_t len, struct
kwsmatch *kwsmatch)
d = 1;
while (lim - end >= d)
{
- if ((d = delta[c = (end += d)[-1]]) != 0)
+ SHIFT(end, d);
+ if ((d = delta[c = end[-1]]) != 0)
continue;
beg = end - 1;
if (!(trie = next[c]))