Hi Dave!

Attached the implementation of the Boyer-Moore[1] string search algorithm for brand new textsearch infrastructure.

I know that this stuff isn't that related with netdev development but actually it was you who pushed forward this stuff to Linus and the current clients of it are just net stuff at the moment: Thomas' textsearch ematch and the iptables match that I'm going to pass to Harald asap. So please apply.

@Thomas: I did some little benchmarks[2] to compare Knuth-Pratt-Morris vs Boyer-Moore. I've used the iptables string match to add rules for the packet classification.

[1] http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
[2] http://people.netfilter.org/~pablo/textsearch/

--
Pablo
Index: netfilter-2.6.14/lib/Makefile
===================================================================
--- netfilter-2.6.14.orig/lib/Makefile	2005-08-16 18:14:27.000000000 +0200
+++ netfilter-2.6.14/lib/Makefile	2005-08-16 18:14:31.000000000 +0200
@@ -38,6 +38,7 @@
 
 obj-$(CONFIG_TEXTSEARCH) += textsearch.o
 obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
+obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
 obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
 
 hostprogs-y	:= gen_crc32table
Index: netfilter-2.6.14/lib/Kconfig
===================================================================
--- netfilter-2.6.14.orig/lib/Kconfig	2005-08-16 18:14:27.000000000 +0200
+++ netfilter-2.6.14/lib/Kconfig	2005-08-16 18:14:31.000000000 +0200
@@ -72,6 +72,16 @@
 config TEXTSEARCH_KMP
 	tristate
 
+config TEXTSEARCH_BM
+        depends on TEXTSEARCH
+        tristate "Boyer-Moore"
+        help
+          Say Y here if you want to be able to search text using the
+          Boyer-Moore textsearch algorithm.
+
+          To compile this code as a module, choose M here: the
+          module will be called ts_bm.
+
 config TEXTSEARCH_FSM
 	tristate
 
Index: netfilter-2.6.14/lib/ts_bm.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ netfilter-2.6.14/lib/ts_bm.c	2005-08-16 18:14:31.000000000 +0200
@@ -0,0 +1,171 @@
+/*
+ * lib/ts_bm.c		Boyer-Moore text search implementation
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Pablo Neira Ayuso <[EMAIL PROTECTED]>
+ *
+ * ==========================================================================
+ * 
+ *   Implements Boyer-Moore string matching algorithm:
+ *
+ *   [1] A Fast String Searching Algorithm, R.S. Boyer and Moore.
+ *       Communications of the Association for Computing Machinery, 
+ *       20(10), 1977, pp. 762-772.
+ *       http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
+ *
+ *   [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
+ *       http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/textsearch.h>
+
+/* Alphabet size, use ASCII */
+#define ASIZE 256
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(args, format...)
+#endif
+
+struct ts_bm
+{
+	u8 *		pattern;
+	unsigned int	patlen;
+	unsigned int 	bad_shift[ASIZE];
+	unsigned int	good_shift[0];
+};
+
+static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)
+{
+	struct ts_bm *bm = ts_config_priv(conf);
+	unsigned int i, text_len, consumed = state->offset;
+	const u8 *text;
+	int shift = bm->patlen, bs;
+
+	for (;;) {
+		text_len = conf->get_next_block(consumed, &text, conf, state);
+
+		if (unlikely(text_len == 0))
+			break;
+
+		while (shift < text_len) {
+			DEBUGP("Searching in position %d (%c)\n", 
+				shift, text[shift]);
+			for (i = 0; i < bm->patlen; i++) 
+			     if (text[shift-i] != bm->pattern[bm->patlen-1-i])
+				     goto next;
+
+			/* London calling... */
+			DEBUGP("found!\n");
+			return consumed += (shift-(bm->patlen-1));
+
+next:			bs = bm->bad_shift[text[shift-i]];
+
+			/* Now jumping to... */
+			shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]);
+		}
+		consumed += text_len;
+	}
+
+	return UINT_MAX;
+}
+
+static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern,
+			       unsigned int len)
+{
+	int i, j, ended, l[ASIZE];
+
+	for (i = 0; i < ASIZE; i++)
+		bm->bad_shift[i] = len;
+	for (i = 0; i < len - 1; i++)
+		bm->bad_shift[pattern[i]] = len - 1 - i;
+
+	/* Compute the good shift array, used to match reocurrences 
+	 * of a subpattern */
+	for (i = 1; i < bm->patlen; i++) {
+		for (j = 0; j < bm->patlen && bm->pattern[bm->patlen - 1 - j]
+				== bm->pattern[bm->patlen - 1 - i - j]; j++);
+		l[i] = j;
+	}  
+
+	bm->good_shift[0] = 1;
+	for (i = 1; i < bm->patlen; i++)
+		bm->good_shift[i] = bm->patlen;
+	for (i = bm->patlen - 1; i > 0; i--)
+		bm->good_shift[l[i]] = i;
+	ended = 0;
+	for (i = 0; i < bm->patlen; i++) {
+		if (l[i] == bm->patlen - 1 - i)
+			ended = i;
+		if (ended)
+			bm->good_shift[i] = ended;
+	}
+}
+
+static struct ts_config *bm_init(const void *pattern, unsigned int len,
+				 int gfp_mask)
+{
+	struct ts_config *conf;
+	struct ts_bm *bm;
+	unsigned int prefix_tbl_len = len * sizeof(unsigned int);
+	size_t priv_size = sizeof(*bm) + len + prefix_tbl_len;
+
+	conf = alloc_ts_config(priv_size, gfp_mask);
+	if (IS_ERR(conf))
+		return conf;
+
+	bm = ts_config_priv(conf);
+	bm->patlen = len;
+	bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len;
+	compute_prefix_tbl(bm, pattern, len);
+	memcpy(bm->pattern, pattern, len);
+
+	return conf;
+}
+
+static void *bm_get_pattern(struct ts_config *conf)
+{
+	struct ts_bm *bm = ts_config_priv(conf);
+	return bm->pattern;
+}
+
+static unsigned int bm_get_pattern_len(struct ts_config *conf)
+{
+	struct ts_bm *bm = ts_config_priv(conf);
+	return bm->patlen;
+}
+
+static struct ts_ops bm_ops = {
+	.name		  = "bm",
+	.find		  = bm_find,
+	.init		  = bm_init,
+	.get_pattern	  = bm_get_pattern,
+	.get_pattern_len  = bm_get_pattern_len,
+	.owner		  = THIS_MODULE,
+	.list		  = LIST_HEAD_INIT(bm_ops.list)
+};
+
+static int __init init_bm(void)
+{
+	return textsearch_register(&bm_ops);
+}
+
+static void __exit exit_bm(void)
+{
+	textsearch_unregister(&bm_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_bm);
+module_exit(exit_bm);

Reply via email to