Replaced Rabin-Karp with the bndm algorithm as implemented by
Sascha. Seems to work.
Can people please test/review? =-)
(I'll take care of the formatting and the pre-compilation if
this looks good...) -- justin
Index: modules/filters/mod_include.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/filters/mod_include.c,v
retrieving revision 1.143
diff -u -r1.143 mod_include.c
--- modules/filters/mod_include.c 2001/09/04 02:13:58 1.143
+++ modules/filters/mod_include.c 2001/09/05 19:04:10
@@ -206,6 +206,86 @@
/* --------------------------- Parser functions --------------------------- */
+typedef struct {
+ unsigned int T[256];
+ unsigned int x;
+} bndm_t;
+
+static void bndm_compile(bndm_t *t, char *n, int nl)
+{
+ unsigned int x;
+ char *ne = n + nl;
+
+ memset(t->T, 0, sizeof(unsigned int) * 256);
+
+ for (x = 1; n < ne; x <<= 1)
+ t->T[(unsigned char) *n++] |= x;
+
+ t->x = x - 1;
+
+ return;
+}
+
+static int bndm(const char *n, int nl, const char *h, int hl, bndm_t *t)
+{
+ char *he = h + hl;
+ int skip;
+ unsigned int d;
+ char *p, *pi;
+ unsigned int *T = t->T;
+ unsigned int x = t->x << 1;
+
+ pi = h - 1; /* pi: p initial */
+ p = pi + nl; /* compare window right to left. point to the first char */
+
+ do {
+ skip = nl;
+ d = x;
+ do {
+ d = (d >> 1) & T[(unsigned char) *p--];
+ if ((d & 1)) {
+ if (p != pi) {
+ skip = p - pi;
+ } else {
+ return p - h + 1;
+ /* matches++; */
+ }
+ }
+ } while (d > 1);
+ p = (pi += skip) + nl;
+ } while (p < he);
+
+ return hl;
+}
+
+/* We've now found a start sequence tag... */
+static apr_bucket* found_start_sequence(apr_bucket *dptr,
+ include_ctx_t *ctx,
+ int tagStart)
+{
+ /* We want to split the bucket at the '<'. */
+ ctx->state = PARSE_DIRECTIVE;
+ ctx->tag_length = 0;
+ ctx->parse_pos = 0;
+ ctx->tag_start_bucket = dptr;
+ ctx->tag_start_index = tagStart;
+ if (ctx->head_start_index > 0) {
+ apr_bucket *tmp_bkt;
+
+ /* Split the bucket with the start of the tag in it */
+ apr_bucket_split(ctx->head_start_bucket, ctx->head_start_index);
+ tmp_bkt = APR_BUCKET_NEXT(ctx->head_start_bucket);
+ /* If it was a one bucket match */
+ if (dptr == ctx->head_start_bucket) {
+ ctx->tag_start_bucket = tmp_bkt;
+ ctx->tag_start_index = tagStart - ctx->head_start_index;
+ }
+ ctx->head_start_bucket = tmp_bkt;
+ ctx->head_start_index = 0;
+ }
+ return ctx->head_start_bucket;
+}
+
/* This function returns either a pointer to the split bucket containing the
* first byte of the BEGINNING_SEQUENCE (after finding a complete match) or it
* returns NULL if no match found.
@@ -217,9 +297,13 @@
const char *c;
const char *buf;
const char *str = STARTING_SEQUENCE;
- apr_bucket *tmp_bkt;
- apr_size_t start_index;
+ int slen = strlen(str);
+ bndm_t spat;
+ int pos;
+ /* compile the pattern */
+ bndm_compile(&spat, str, slen);
+
*do_cleanup = 0;
do {
@@ -269,8 +353,53 @@
if (len == 0) { /* end of pipe? */
break;
}
+
+ /* Set our buffer to use. */
+
c = buf;
- while (c < buf + len) {
+ /* The last bucket had a left over partial match that we need to
+ * complete.
+ */
+ if (ctx->state == PARSE_HEAD)
+ {
+ apr_size_t tmpLen;
+ tmpLen = (len > (slen - 1)) ? len : (slen - 1);
+
+ while (c < buf + tmpLen && *c == str[ctx->parse_pos])
+ {
+ c++;
+ ctx->parse_pos++;
+ }
+
+ if (str[ctx->parse_pos] == '\0')
+ {
+ ctx->bytes_parsed += c - buf;
+ return found_start_sequence(dptr, ctx, c - buf);
+ }
+
+ /* False alarm... */
+ ctx->state = PRE_HEAD;
+ }
+
+ if (len)
+ {
+ pos = bndm(str, slen, c, len, &spat);
+ if (pos != len)
+ {
+ ctx->head_start_bucket = dptr;
+ ctx->head_start_index = pos;
+ ctx->bytes_parsed += pos + slen;
+ return found_start_sequence(dptr, ctx, pos + slen);
+ }
+ }
+
+ /* Consider the case where we have <!-- at the end of the bucket. */
+ ctx->bytes_parsed += len - slen;
+ ctx->parse_pos = 0;
+
+ c = buf + len - slen + 1;
+ while (c < buf + len)
+ {
if (*c == str[ctx->parse_pos]) {
if (ctx->state == PRE_HEAD) {
ctx->state = PARSE_HEAD;
@@ -279,48 +408,24 @@
}
ctx->parse_pos++;
}
- else {
- if (str[ctx->parse_pos] == '\0') {
- /* We want to split the bucket at the '<'. */
- ctx->bytes_parsed++;
- ctx->state = PARSE_DIRECTIVE;
- ctx->tag_length = 0;
- ctx->parse_pos = 0;
- ctx->tag_start_bucket = dptr;
- ctx->tag_start_index = c - buf;
- if (ctx->head_start_index > 0) {
- start_index = (c - buf) - ctx->head_start_index;
- apr_bucket_split(ctx->head_start_bucket,
- ctx->head_start_index);
- tmp_bkt = APR_BUCKET_NEXT(ctx->head_start_bucket);
- if (dptr == ctx->head_start_bucket) {
- ctx->tag_start_bucket = tmp_bkt;
- ctx->tag_start_index = start_index;
- }
- ctx->head_start_bucket = tmp_bkt;
- ctx->head_start_index = 0;
- }
- return ctx->head_start_bucket;
+ else if (ctx->parse_pos != 0)
+ {
+ /* The reason for this, is that we need to make sure
+ * that we catch cases like <<!--#. This makes the
+ * second check after the original check fails.
+ * If parse_pos was already 0 then we already checked this.
+ */
+ /* FIXME: Why? */
+ *do_cleanup = 1;
+ if (*c == str[0]) {
+ ctx->parse_pos = 1;
+ ctx->head_start_index = c - buf;
}
- else if (ctx->parse_pos != 0) {
- /* The reason for this, is that we need to make sure
- * that we catch cases like <<!--#. This makes the
- * second check after the original check fails.
- * If parse_pos was already 0 then we already checked this.
- */
- *do_cleanup = 1;
- if (*c == str[0]) {
- ctx->parse_pos = 1;
- ctx->state = PARSE_HEAD;
- ctx->head_start_bucket = dptr;
- ctx->head_start_index = c - buf;
- }
- else {
- ctx->parse_pos = 0;
- ctx->state = PRE_HEAD;
- ctx->head_start_bucket = NULL;
- ctx->head_start_index = 0;
- }
+ else {
+ ctx->parse_pos = 0;
+ ctx->state = PRE_HEAD;
+ ctx->head_start_bucket = NULL;
+ ctx->head_start_index = 0;
}
}
c++;