The current libpcap optimization functions rely heavily on recursive functions;
when optimizing very large filters, this can cause core dumps due to excessive
stack usage in some worst-case scenarios.
The attached patch replaces a few of the "deepest" recursive functions with
implementations that use iteration (or in cases where there are two branches
that must be followed, a combination of iteration and recursion) that
dramatically reduces the stack usage and allows much larger filters to be
constructed without segmentation violations.
First, a bit of shell code to construct some large filters for testing:
$ cat genfilter.sh
#!/bin/sh
if [ $# -ne 1 ]; then
echo 1>&2 "Usage: $0 filter-size"; exit 1
fi
echo -n "ip and ("
N=$1
while [ 1 -lt "$N" ]
do
# for some shells, you may need to remove the \ for correct parsing
A4=$(( 2 \* \( $N % 128 \) ))
A3=$(( \( $N / 128 \) % 256 ))
A2=$(( \( $N / \( 128 \* 256 \) \) % 256 ))
A1=$(( 10 + \( $N / 8388608 \) ))
echo -n "host $A1.$A2.$A3.$A4 or "
N=$(( $N - 1 ))
done
echo "host 10.0.0.1)"
(These filters actually don't illustrate the worst-case stack recursion that I
was seeing when I ran into this problem originally, perhaps because of length
of JT vs. JF chains, but they do allow for some illustrative scaling.)
Then a patch to tcpdump to allow it to read a filter from stdin (these large
filters exceed the maximum argument length on a BSD system):
RCS file: /tcpdump/master/tcpdump/util.c,v
retrieving revision 1.104
diff -u -r1.104 util.c
--- util.c 13 Dec 2005 08:47:10 -0000 1.104
+++ util.c 29 Dec 2005 21:13:42 -0000
@@ -495,31 +495,50 @@
register char *cp;
struct stat buf;
- fd = open(fname, O_RDONLY|O_BINARY);
- if (fd < 0)
- error("can't open %s: %s", fname, pcap_strerror(errno));
-
- if (fstat(fd, &buf) < 0)
- error("can't stat %s: %s", fname, pcap_strerror(errno));
-
- cp = malloc((u_int)buf.st_size + 1);
- if (cp == NULL)
- error("malloc(%d) for %s: %s", (u_int)buf.st_size + 1,
- fname, pcap_strerror(errno));
- cc = read(fd, cp, (u_int)buf.st_size);
- if (cc < 0)
- error("read %s: %s", fname, pcap_strerror(errno));
- if (cc != buf.st_size)
- error("short read %s (%d != %d)", fname, cc, (int)buf.st_size);
+ if (!strcmp(fname, "-")) {
+ i = 0;
+ fd = BUFSIZ + 1;
+ cp = malloc(fd);
+ if (cp == NULL)
+ error("malloc(%d) for stdin: %s", (u_int)fd,
+ pcap_strerror(errno));
+ while ((cc = read(0, &cp[i], fd - i - 1)) > 0) {
+ i += cc;
+ fd += cc;
+ cp = realloc(cp, fd);
+ if (cp == NULL)
+ error("realloc(%d) for stdin: %s", (u_int)fd,
+ pcap_strerror(errno));
+ cp[i] = '\0';
+ }
+ }
+ else {
+ fd = open(fname, O_RDONLY|O_BINARY);
+ if (fd < 0)
+ error("can't open %s: %s", fname, pcap_strerror(errno));
+
+ if (fstat(fd, &buf) < 0)
+ error("can't stat %s: %s", fname, pcap_strerror(errno));
+
+ cp = malloc((u_int)buf.st_size + 1);
+ if (cp == NULL)
+ error("malloc(%d) for %s: %s", (u_int)buf.st_size + 1,
+ fname, pcap_strerror(errno));
+ cc = read(fd, cp, (u_int)buf.st_size);
+ if (cc < 0)
+ error("read %s: %s", fname, pcap_strerror(errno));
+ if (cc != buf.st_size)
+ error("short read %s (%d != %d)", fname, cc,
(int)buf.st_size);
- close(fd);
+ close(fd);
+ cp[cc] = '\0';
+ }
/* replace "# comment" with spaces */
for (i = 0; i < cc; i++) {
if (cp[i] == '#')
while (i < cc && cp[i] != '\n')
cp[i++] = ' ';
}
- cp[cc] = '\0';
return (cp);
}
Here are the "before" stats:
bash-2.05b$ echo `./genfilter.sh 200 | timing ./tcpdump -d -F - | wc -l` BPF
instructions
0.49 real 0.36 user 0.02 sys
133 average unshared stack size
552 BPF instructions
bash-2.05b$ echo `./genfilter.sh 2000 | timing ./tcpdump -d -F - | wc -l` BPF
instructions
101.74 real 90.56 user 1.77 sys
769 average unshared stack size
7751 BPF instructions
And "after":
bash-2.05b$ echo `./genfilter.sh 200 | timing ./tcpdump.new -d -F - | wc -l`
BPF instructions
0.49 real 0.39 user 0.00 sys
120 average unshared stack size
552 BPF instructions
bash-2.05b$ echo `./genfilter.sh 2000 | timing ./tcpdump.new -d -F - | wc -l`
BPF instructions
154 average unshared stack size
7752 BPF instructions
(The one additional BPF instruction is due to a different order of the final
ret #0 and ret #96 instructions, and one additional ja extended branch
instruction needed as a result).
The actual patch to optimize.c is attached.
@alex
Index: optimize.c
===================================================================
RCS file: /tcpdump/master/libpcap/optimize.c,v
retrieving revision 1.86
diff -u -w -r1.86 optimize.c
--- optimize.c 31 Jul 2005 17:58:24 -0000 1.86
+++ optimize.c 29 Dec 2005 22:58:21 -0000
@@ -217,23 +217,31 @@
find_levels_r(b)
struct block *b;
{
+ struct block *q;
+ int i, j;
int level;
- if (isMarked(b))
- return;
-
- Mark(b);
- b->link = 0;
+ /* iterate first, marking as we go, then re-iterate with recursion */
+ for (q = b, i = 0; q != NULL && !isMarked(q); i++) {
+ Mark(q);
+ q->link = 0;
+ q = JF(q);
+ }
- if (JT(b)) {
- find_levels_r(JT(b));
- find_levels_r(JF(b));
- level = MAX(JT(b)->level, JF(b)->level) + 1;
+ for (;i > 0; i--) {
+ q = b;
+ for (j = i - 1; j > 0; j--) {
+ q = JF(q);
+ }
+ if (JT(q)) {
+ find_levels_r(JT(q));
+ level = MAX(JT(q)->level, JF(q)->level) + 1;
} else
level = 0;
- b->level = level;
- b->link = levels[level];
- levels[level] = b;
+ q->level = level;
+ q->link = levels[level];
+ levels[level] = q;
+ }
}
/*
@@ -1906,32 +1914,47 @@
count_blocks(p)
struct block *p;
{
- if (p == 0 || isMarked(p))
- return 0;
- Mark(p);
- return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
+ struct block *q;
+ int i;
+ int n;
+
+ /* iterate first, marking as we go, then re-iterate with recursion */
+ for (q = p, i = 0; q != NULL && !isMarked(q); i++) {
+ Mark(q);
+ q = JF(q);
+ }
+ n = 0;
+ for (q = p; i > 0; i--) {
+ n += count_blocks(JT(q)) + 1;
+ q = JF(q);
+ }
+ return n;
}
/*
- * Do a depth first search on the flow graph, numbering the
- * the basic blocks, and entering them into the 'blocks' array.`
+ * Do a search on the flow graph, numbering the basic blocks,
+ * and entering them into the 'blocks' array.`
*/
static void
number_blks_r(p)
struct block *p;
{
+ struct block *q;
+ int i;
int n;
- if (p == 0 || isMarked(p))
- return;
-
- Mark(p);
+ /* iterate first, marking as we go, then re-iterate with recursion */
+ for (q = p, i = 0; q != NULL && !isMarked(q); i++) {
n = n_blocks++;
- p->id = n;
- blocks[n] = p;
-
- number_blks_r(JT(p));
- number_blks_r(JF(p));
+ q->id = n;
+ blocks[n] = q;
+ Mark(q);
+ q = JF(q);
+ }
+ for (q = p; i > 0; i--) {
+ number_blks_r(JT(q));
+ q = JF(q);
+ }
}
/*
@@ -2235,6 +2258,25 @@
{
int n;
struct bpf_insn *fp;
+ int iter;
+
+ /*
+ * Recursion is simpler (and faster) than iteration but tends to blow
+ * out the stack if the depth is excessive; if the depth exceeds some
+ * threshold (empirically set to 500), we will use iteration instead.
+ */
+ iter = (n_blocks != 0 && root->level > 500);
+
+ if (iter) {
+ /*
+ * The number of levels is bounded by the number of nodes.
+ */
+ levels = (struct block **)malloc(n_blocks * sizeof(*levels));
+ if (levels == NULL)
+ root->level = 0;
+ else
+ find_levels(root);
+ }
/*
* Loop doing convert_code_r() until no branches remain
@@ -2252,11 +2294,33 @@
ftail = fp + n;
unMarkAll();
+ if (iter)
+ {
+ int i;
+ int x;
+ struct block *b;
+
+ for (i = 0; i < root->level; i++) {
+ x = 0;
+ for (b = levels[i]; b; b = b->link) {
+ if (!convert_code_r(b))
+ x = 1;
+ }
+ if (x) /* some offset too large */
+ break;
+ }
+
+ if (i != root->level) /* didn't reach root level */
+ continue;
+ }
if (convert_code_r(root))
break;
free(fp);
}
+ if (iter)
+ free((void *)levels);
+
return fp;
}
-
This is the tcpdump-workers list.
Visit https://lists.sandelman.ca/ to unsubscribe.