Please test the attached diff with tcpdump and pflogd
filter expressions you commonly use to make sure it
does not break anything.

This is needed for Henning's work on pflog.


Instructions:

1. Apply the patch under /usr/src/lib/libpcap,
2. re-build libpcap and install it.
3. restart pflogd

The patch changes the way BPF code is generated from
filter expressions for the pflog interface, to allow
for variable-sized pflog headers.


Things to test for:

1. Run your favorite filter expressions on both
    pflog and non-pflog interfaces, and make sure
    the packets are filtered correctly.

2. Make sure pflogd still generates correct logs,
    especially if you are using a filter expression.

3. Try complex filter expressions on the pflog interface
    and see if they still fit the memory (there is an
    upper bound to the number of instructions, and this
    diff increases the size of the generated code).

4. dump the result of some tcpdump filter code before
    and after applying this patch for *non-pflog*
    interfaces. There should be *no change*.

You can use '-d' switch to tcpdump to dump the filter code.
You can use '-O' to disable optimization.

   Example: tcpdump -i em0 -d host 1.2.3.4 and port 80


Please let me know about your test results,
what you tested, and the architecture you tested on.



Background:

Years ago, when designing the new pflog header,
we assumed that the header would have to be
extended in the future. We did not want to switch
DLT numbers again, so we added a 'size' field
which also doubled as a 'version' field.

This would allow us to add fields to the pflog header
in the future and still be backwards compatible.

Unfortunately, our plans were foiled by the
mess that was libpcap. The bpf filtering language
and filter code generator in libpcap was designed
with the assumption that the link layer has a
fixed size (this is also the reason why there are
two 802_11 header types for WiFi).

Now that Henning has some real need to add to the
pflog header, he asked me to revisit that code and
try to get bpf filtering work on variable sized
pflog headers.



Technical details:

BPF uses a very simple language for testing packets
against a filter language. The filter expression
you specify in tcpdump is parsed and assembled into
a bytecode that the kernel can use to match packets.

Each interface has a specific link-type (DLT_EN10MB
for ethernet, DLT_PFLOG for pflog etc.) and each
link type has a header infront of the packet itself.

The code generation itself is done in libpcap(4)
for simplicity, the size of the link-header for
each link-type is hardcoded into the code generator.

The diff modifies the code generator to handle
variable-sized link-headers. For pflog, which has
the header length in the header itself, the generator
generates additional BPF code to parse the pflog
link-header to determine the size of the link header
instead of assuming a fixed size.

The two important offsets based on the link header
are 1) the start of the link payload (eg. IP header)
and 2) the start of the protocol header (eg. TCP header)
these values require a number of BPF instructions to
compute, and used extensively by most filter constructs.
To prevent the code size from increasing too much, the
offsets are computed once, at the start, and the results
are placed in 'registers'.

Let me know if you have any questions.

Thanks,

Can
Index: gencode.c
===================================================================
RCS file: /cvs/src/lib/libpcap/gencode.c,v
retrieving revision 1.33
diff -u -p -r1.33 gencode.c
--- gencode.c   26 Jun 2010 16:47:07 -0000      1.33
+++ gencode.c   1 Jul 2010 04:51:46 -0000
@@ -99,6 +99,13 @@ static void free_reg(int);
 
 static struct block *root;
 
+/* initialization code used for variable link header */
+static struct slist *init_code = NULL;
+
+/* Flags and registers for variable link type handling */
+static int variable_nl;
+static int nl_reg, iphl_reg;
+
 /*
  * We divy out chunks of memory rather than call malloc each time so
  * we don't have to worry about leaking memory.  It's probably
@@ -126,7 +133,9 @@ static void backpatch(struct block *, st
 static void merge(struct block *, struct block *);
 static struct block *gen_cmp(u_int, u_int, bpf_int32);
 static struct block *gen_cmp_gt(u_int, u_int, bpf_int32);
+static struct block *gen_cmp_nl(u_int, u_int, bpf_int32);
 static struct block *gen_mcmp(u_int, u_int, bpf_int32, bpf_u_int32);
+static struct block *gen_mcmp_nl(u_int, u_int, bpf_int32, bpf_u_int32);
 static struct block *gen_bcmp(u_int, u_int, const u_char *);
 static struct block *gen_uncond(int);
 static __inline struct block *gen_true(void);
@@ -296,6 +305,12 @@ pcap_compile(pcap_t *p, struct bpf_progr
        if (root == NULL)
                root = gen_retblk(snaplen);
 
+       /* prepend initialization code to root */
+       if (init_code != NULL && root != NULL) {
+               sappend(init_code, root->stmts);
+               root->stmts = init_code;
+               init_code = NULL;
+       }
        if (optimize && !no_optimize) {
                bpf_optimize(&root);
                if (root == NULL ||
@@ -344,6 +359,12 @@ pcap_compile_nopcap(int snaplen_arg, int
        if (root == NULL)
                root = gen_retblk(snaplen_arg);
 
+       /* prepend initialization code to root */
+       if (init_code != NULL && root != NULL) {
+               sappend(init_code, root->stmts);
+               root->stmts = init_code;
+               init_code = NULL;
+       }
        if (optimize) {
                bpf_optimize(&root);
                if (root == NULL ||
@@ -421,6 +442,15 @@ finish_parse(p)
        p->sense = !p->sense;
        backpatch(p, gen_retblk(0));
        root = p->head;
+
+       if (iphl_reg != -1) {
+               free_reg(iphl_reg);
+               iphl_reg = -1;
+       }
+       if (nl_reg != -1) {
+               free_reg(nl_reg);
+               nl_reg = -1;
+       }
 }
 
 void
@@ -507,6 +537,23 @@ gen_mcmp(offset, size, v, mask)
 }
 
 static struct block *
+gen_mcmp_nl(offset, size, v, mask)
+       u_int offset, size;
+       bpf_int32 v;
+       bpf_u_int32 mask;
+{
+       struct block *b = gen_cmp_nl(offset, size, v);
+       struct slist *s;
+
+       if (mask != 0xffffffff) {
+               s = new_stmt(BPF_ALU|BPF_AND|BPF_K);
+               s->s.k = mask;
+               b->stmts->next = s;
+       }
+       return b;
+}
+
+static struct block *
 gen_bcmp(offset, size, v)
        register u_int offset, size;
        register const u_char *v;
@@ -555,11 +602,83 @@ static u_int off_nl_nosnap;
 
 static int linktype;
 
+static struct slist *
+nl2X_stmt(void)
+{
+       struct slist *s, *tmp;
+
+       if (nl_reg == -1) {
+               switch (linktype) {
+               case DLT_PFLOG:
+                       /* The pflog header contains PFLOG_REAL_HDRLEN
+                          which does NOT include the padding. Round
+                          up to the nearest dword boundary */
+                       s = new_stmt(BPF_LD|BPF_B|BPF_ABS);
+                       s->s.k = 0;
+
+                       tmp = new_stmt(BPF_ALU|BPF_ADD|BPF_K);
+                       tmp->s.k = 3;
+                       sappend(s, tmp);
+
+                       tmp = new_stmt(BPF_ALU|BPF_AND|BPF_K);
+                       tmp->s.k = 0xf6;
+                       sappend(s, tmp);
+
+                       nl_reg = alloc_reg();
+                       tmp = new_stmt(BPF_ST);
+                       tmp->s.k = nl_reg;
+                       sappend(s, tmp);
+
+                       break;
+               default:
+                       bpf_error("Unknown header size for link type 0x%x",
+                                 linktype);
+               }
+
+               if (init_code == NULL)
+                       init_code = s;
+               else
+                       sappend(init_code, s);
+       }
+
+       s = new_stmt(BPF_LDX|BPF_MEM);
+       s->s.k = nl_reg;
+
+       return s;
+}
+
+
+static struct block *
+gen_cmp_nl(offset, size, v)
+       u_int offset, size;
+       bpf_int32 v;
+{
+       struct slist *s, *tmp;
+       struct block *b;
+
+       if (variable_nl) {
+               s = nl2X_stmt();
+               tmp = new_stmt(BPF_LD|BPF_IND|size);
+               tmp->s.k = offset;
+               sappend(s, tmp);
+       } else {
+               s = new_stmt(BPF_LD|BPF_ABS|size);
+               s->s.k = offset + off_nl;
+       }
+       b = new_block(JMP(BPF_JEQ));
+       b->stmts = s;
+       b->s.k = v;
+
+       return b;
+}
+
 static void
 init_linktype(type)
        int type;
 {
        linktype = type;
+       init_code = NULL;
+       nl_reg = iphl_reg = -1;
 
        switch (type) {
 
@@ -660,8 +779,8 @@ init_linktype(type)
 
        case DLT_PFLOG:
                off_linktype = 0;
-               /* XXX read from header? */
-               off_nl = PFLOG_HDRLEN;
+               variable_nl = 1;
+               off_nl = 0;
                return;
 
        case DLT_PFSYNC:
@@ -849,7 +968,7 @@ gen_hostop(addr, mask, dir, proto, src_o
                    linktype);
        }
        b0 = gen_linktype(proto);
-       b1 = gen_mcmp(offset, BPF_W, (bpf_int32)addr, mask);
+       b1 = gen_mcmp_nl(offset, BPF_W, (bpf_int32)addr, mask);
        gen_and(b0, b1);
        return b1;
 }
@@ -896,12 +1015,12 @@ gen_hostop6(addr, mask, dir, proto, src_
        /* this order is important */
        a = (u_int32_t *)addr;
        m = (u_int32_t *)mask;
-       b1 = gen_mcmp(offset + 12, BPF_W, ntohl(a[3]), ntohl(m[3]));
-       b0 = gen_mcmp(offset + 8, BPF_W, ntohl(a[2]), ntohl(m[2]));
+       b1 = gen_mcmp_nl(offset + 12, BPF_W, ntohl(a[3]), ntohl(m[3]));
+       b0 = gen_mcmp_nl(offset + 8, BPF_W, ntohl(a[2]), ntohl(m[2]));
        gen_and(b0, b1);
-       b0 = gen_mcmp(offset + 4, BPF_W, ntohl(a[1]), ntohl(m[1]));
+       b0 = gen_mcmp_nl(offset + 4, BPF_W, ntohl(a[1]), ntohl(m[1]));
        gen_and(b0, b1);
-       b0 = gen_mcmp(offset + 0, BPF_W, ntohl(a[0]), ntohl(m[0]));
+       b0 = gen_mcmp_nl(offset + 0, BPF_W, ntohl(a[0]), ntohl(m[0]));
        gen_and(b0, b1);
        b0 = gen_linktype(proto);
        gen_and(b0, b1);
@@ -1047,26 +1166,26 @@ gen_dnhostop(addr, dir, base_off)
        }
        b0 = gen_linktype(ETHERTYPE_DN);
        /* Check for pad = 1, long header case */
-       tmp = gen_mcmp(base_off + 2, BPF_H,
-           (bpf_int32)ntohs(0x0681), (bpf_int32)ntohs(0x07FF));
-       b1 = gen_cmp(base_off + 2 + 1 + offset_lh,
+       tmp = gen_mcmp_nl(base_off + 2, BPF_H,
+                      (bpf_int32)ntohs(0x0681), (bpf_int32)ntohs(0x07FF));
+       b1 = gen_cmp_nl(base_off + 2 + 1 + offset_lh,
            BPF_H, (bpf_int32)ntohs(addr));
        gen_and(tmp, b1);
        /* Check for pad = 0, long header case */
-       tmp = gen_mcmp(base_off + 2, BPF_B, (bpf_int32)0x06, (bpf_int32)0x7);
-       b2 = gen_cmp(base_off + 2 + offset_lh, BPF_H, (bpf_int32)ntohs(addr));
+       tmp = gen_mcmp_nl(base_off + 2, BPF_B, (bpf_int32)0x06, (bpf_int32)0x7);
+       b2 = gen_cmp_nl(base_off + 2 + offset_lh, BPF_H, 
(bpf_int32)ntohs(addr));
        gen_and(tmp, b2);
        gen_or(b2, b1);
        /* Check for pad = 1, short header case */
-       tmp = gen_mcmp(base_off + 2, BPF_H,
-           (bpf_int32)ntohs(0x0281), (bpf_int32)ntohs(0x07FF));
-       b2 = gen_cmp(base_off + 2 + 1 + offset_sh,
+       tmp = gen_mcmp_nl(base_off + 2, BPF_H,
+                      (bpf_int32)ntohs(0x0281), (bpf_int32)ntohs(0x07FF));
+       b2 = gen_cmp_nl(base_off + 2 + 1 + offset_sh,
            BPF_H, (bpf_int32)ntohs(addr));
        gen_and(tmp, b2);
        gen_or(b2, b1);
        /* Check for pad = 0, short header case */
-       tmp = gen_mcmp(base_off + 2, BPF_B, (bpf_int32)0x02, (bpf_int32)0x7);
-       b2 = gen_cmp(base_off + 2 + offset_sh, BPF_H, (bpf_int32)ntohs(addr));
+       tmp = gen_mcmp_nl(base_off + 2, BPF_B, (bpf_int32)0x02, (bpf_int32)0x7);
+       b2 = gen_cmp_nl(base_off + 2 + offset_sh, BPF_H, 
(bpf_int32)ntohs(addr));
        gen_and(tmp, b2);
        gen_or(b2, b1);
 
@@ -1096,15 +1215,15 @@ gen_host(addr, mask, proto, dir)
 
        case Q_IP:
                return gen_hostop(addr, mask, dir, ETHERTYPE_IP,
-                                 off_nl + 12, off_nl + 16);
+                                 12, 16);
 
        case Q_RARP:
                return gen_hostop(addr, mask, dir, ETHERTYPE_REVARP,
-                                 off_nl + 14, off_nl + 24);
+                                 14, 24);
 
        case Q_ARP:
                return gen_hostop(addr, mask, dir, ETHERTYPE_ARP,
-                                 off_nl + 14, off_nl + 24);
+                                 14, 24);
 
        case Q_TCP:
                bpf_error("'tcp' modifier applied to host");
@@ -1131,7 +1250,7 @@ gen_host(addr, mask, proto, dir)
                bpf_error("ATALK host filtering not implemented");
 
        case Q_DECNET:
-               return gen_dnhostop(addr, dir, off_nl);
+               return gen_dnhostop(addr, dir, 0);
 
        case Q_SCA:
                bpf_error("SCA host filtering not implemented");
@@ -1229,7 +1348,7 @@ gen_host6(addr, mask, proto, dir)
 
        case Q_IPV6:
                return gen_hostop6(addr, mask, dir, ETHERTYPE_IPV6,
-                                 off_nl + 8, off_nl + 24);
+                                  8, 24);
 
        case Q_ICMPV6:
                bpf_error("'icmp6' modifier applied to host");
@@ -1430,12 +1549,19 @@ gen_proto_abbrev(proto)
 static struct block *
 gen_ipfrag()
 {
-       struct slist *s;
+       struct slist *s, *tmp;
        struct block *b;
 
        /* not ip frag */
-       s = new_stmt(BPF_LD|BPF_H|BPF_ABS);
-       s->s.k = off_nl + 6;
+       if (variable_nl) {
+               s = nl2X_stmt();
+               tmp = new_stmt(BPF_LD|BPF_H|BPF_IND);
+               tmp->s.k = 6;
+               sappend(s, tmp);
+       } else {
+               s = new_stmt(BPF_LD|BPF_H|BPF_ABS);
+               s->s.k = off_nl + 6;
+       }
        b = new_block(JMP(BPF_JSET));
        b->s.k = 0x1fff;
        b->stmts = s;
@@ -1444,19 +1570,67 @@ gen_ipfrag()
        return b;
 }
 
+static struct slist *
+iphl_to_x(void)
+{
+       struct slist *s, *tmp;
+
+       /* XXX clobbers A if variable_nl*/
+       if (variable_nl) {
+               if (iphl_reg == -1) {
+                       /* X <- nl_off */
+                       s = nl2X_stmt();
+
+                       /* A = p[X+0] */
+                       tmp = new_stmt(BPF_LD|BPF_B|BPF_IND);
+                       tmp->s.k = 0;
+                       sappend(s, tmp);
+
+                       /* A = A & 0x0f */
+                       tmp = new_stmt(BPF_ALU|BPF_AND|BPF_K);
+                       tmp->s.k = 0x0f;
+                       sappend(s, tmp);
+
+                       /* A = A << 2 */
+                       tmp = new_stmt(BPF_ALU|BPF_LSH|BPF_K);
+                       tmp->s.k = 2;
+                       sappend(s, tmp);
+
+                       /* A = A + X (add off_nl again to compansate) */
+                       sappend(s, new_stmt(BPF_ALU|BPF_ADD|BPF_X));
+                       
+                       /* MEM[iphl_reg] = A */
+                       iphl_reg = alloc_reg();
+                       tmp = new_stmt(BPF_ST);
+                       tmp->s.k = iphl_reg;
+                       sappend(s, tmp);
+
+                       sappend(init_code, s);
+               }
+               s = new_stmt(BPF_LDX|BPF_MEM);
+               s->s.k = iphl_reg;
+
+       } else {
+               s = new_stmt(BPF_LDX|BPF_MSH|BPF_B);
+               s->s.k = off_nl;
+       }
+
+       return s;
+}
+
 static struct block *
 gen_portatom(off, v)
        int off;
        bpf_int32 v;
 {
-       struct slist *s;
+       struct slist *s, *tmp;
        struct block *b;
 
-       s = new_stmt(BPF_LDX|BPF_MSH|BPF_B);
-       s->s.k = off_nl;
+       s = iphl_to_x();
 
-       s->next = new_stmt(BPF_LD|BPF_IND|BPF_H);
-       s->next->s.k = off_nl + off;
+       tmp = new_stmt(BPF_LD|BPF_IND|BPF_H);
+       tmp->s.k = off_nl + off;        /* off_nl == 0 if variable_nl */
+       sappend(s, tmp);
 
        b = new_block(JMP(BPF_JEQ));
        b->stmts = s;
@@ -1471,7 +1645,7 @@ gen_portatom6(off, v)
        int off;
        bpf_int32 v;
 {
-       return gen_cmp(off_nl + 40 + off, BPF_H, v);
+       return gen_cmp_nl(40 + off, BPF_H, v);
 }
 #endif/*INET6*/
 
@@ -1482,7 +1656,7 @@ gen_portop(port, proto, dir)
        struct block *b0, *b1, *tmp;
 
        /* ip proto 'proto' */
-       tmp = gen_cmp(off_nl + 9, BPF_B, (bpf_int32)proto);
+       tmp = gen_cmp_nl(9, BPF_B, (bpf_int32)proto);
        b0 = gen_ipfrag();
        gen_and(tmp, b0);
 
@@ -1554,7 +1728,7 @@ gen_portop6(port, proto, dir)
        struct block *b0, *b1, *tmp;
 
        /* ip proto 'proto' */
-       b0 = gen_cmp(off_nl + 6, BPF_B, (bpf_int32)proto);
+       b0 = gen_cmp_nl(6, BPF_B, (bpf_int32)proto);
 
        switch (dir) {
        case Q_SRC:
@@ -1667,6 +1841,11 @@ gen_protochain(v, proto, dir)
        memset(s, 0, sizeof(s));
        fix2 = fix3 = fix4 = fix5 = 0;
 
+       if (variable_nl) {
+               bpf_error("'gen_protochain' not supported for variable DLTs");
+               /*NOTREACHED*/
+       }
+
        switch (proto) {
        case Q_IP:
        case Q_IPV6:
@@ -1966,7 +2145,7 @@ gen_proto(v, proto, dir)
        case Q_IP:
                b0 = gen_linktype(ETHERTYPE_IP);
 #ifndef CHASE_CHAIN
-               b1 = gen_cmp(off_nl + 9, BPF_B, (bpf_int32)v);
+               b1 = gen_cmp_nl(9, BPF_B, (bpf_int32)v);
 #else
                b1 = gen_protochain(v, Q_IP);
 #endif
@@ -2040,7 +2219,7 @@ gen_proto(v, proto, dir)
        case Q_IPV6:
                b0 = gen_linktype(ETHERTYPE_IPV6);
 #ifndef CHASE_CHAIN
-               b1 = gen_cmp(off_nl + 6, BPF_B, (bpf_int32)v);
+               b1 = gen_cmp_nl(6, BPF_B, (bpf_int32)v);
 #else
                b1 = gen_protochain(v, Q_IPV6);
 #endif
@@ -2560,9 +2739,16 @@ gen_load(proto, index, size)
        case Q_IPV6:
 #endif
                /* XXX Note that we assume a fixed link header here. */
-               s = xfer_to_x(index);
+               if (variable_nl) {
+                       s = nl2X_stmt();
+                       sappend(s, xfer_to_a(index));
+                       sappend(s, new_stmt(BPF_ALU|BPF_ADD|BPF_X));
+                       sappend(s, new_stmt(BPF_MISC|BPF_TAX));
+               } else {
+                       s = xfer_to_x(index);
+               }
                tmp = new_stmt(BPF_LD|BPF_IND|size);
-               tmp->s.k = off_nl;
+               tmp->s.k = off_nl;      /* off_nl == 0 for variable_nl */
                sappend(s, tmp);
                sappend(index->s, s);
 
@@ -2578,13 +2764,12 @@ gen_load(proto, index, size)
        case Q_IGMP:
        case Q_IGRP:
        case Q_PIM:
-               s = new_stmt(BPF_LDX|BPF_MSH|BPF_B);
-               s->s.k = off_nl;
+               s = iphl_to_x();
                sappend(s, xfer_to_a(index));
                sappend(s, new_stmt(BPF_ALU|BPF_ADD|BPF_X));
                sappend(s, new_stmt(BPF_MISC|BPF_TAX));
                sappend(s, tmp = new_stmt(BPF_LD|BPF_IND|size));
-               tmp->s.k = off_nl;
+               tmp->s.k = off_nl;      /* off_nl is 0 if variable_nl */
                sappend(index->s, s);
 
                gen_and(gen_proto_abbrev(proto), b = gen_ipfrag());
@@ -2874,8 +3059,8 @@ gen_broadcast(proto)
        case Q_IP:
                b0 = gen_linktype(ETHERTYPE_IP);
                hostmask = ~netmask;
-               b1 = gen_mcmp(off_nl + 16, BPF_W, (bpf_int32)0, hostmask);
-               b2 = gen_mcmp(off_nl + 16, BPF_W,
+               b1 = gen_mcmp_nl(16, BPF_W, (bpf_int32)0, hostmask);
+               b2 = gen_mcmp_nl(16, BPF_W,
                              (bpf_int32)(~0 & hostmask), hostmask);
                gen_or(b1, b2);
                gen_and(b0, b2);
@@ -2920,7 +3105,7 @@ gen_multicast(proto)
 
        case Q_IP:
                b0 = gen_linktype(ETHERTYPE_IP);
-               b1 = gen_cmp(off_nl + 16, BPF_B, (bpf_int32)224);
+               b1 = gen_cmp_nl(16, BPF_B, (bpf_int32)224);
                b1->s.code = JMP(BPF_JGE);
                gen_and(b0, b1);
                return b1;
@@ -2928,7 +3113,7 @@ gen_multicast(proto)
 #ifdef INET6
        case Q_IPV6:
                b0 = gen_linktype(ETHERTYPE_IPV6);
-               b1 = gen_cmp(off_nl + 24, BPF_B, (bpf_int32)255);
+               b1 = gen_cmp_nl(24, BPF_B, (bpf_int32)255);
                gen_and(b0, b1);
                return b1;
 #endif /* INET6 */
@@ -3163,6 +3348,11 @@ gen_vlan(vlan_num)
        int vlan_num;
 {
        struct  block   *b0;
+
+       if (variable_nl) {
+               bpf_error("'vlan' not supported for variable DLTs");
+               /*NOTREACHED*/
+       }
 
        /*
         * Change the offsets to point to the type and data fields within

Reply via email to