On Mon, Mar 04, 2013 at 11:37:32PM -0500, Lawrence Teo wrote:
> Brief background: divert(4) sockets can be used to send packets to a
> userspace program.  The program can inspect the packets and decide to
> either reinject them back into the kernel or drop them.
> 
> According to the divert(4) man page, "The packets' checksums are
> recalculated upon reinjection."  This makes sense, because the userspace
> program could have modified the packet.
> 
> However, in my tests, I found that the checksums are not actually
> recalculated upon reinjection.  I ran into this bug when trying to use
> divert-packet PF rules together with nat-to and rdr-to, e.g.:
> 
>     match out on em5 inet nat-to (em5:0)
>     pass out on em5 proto tcp to port 80 divert-packet port 700
>     pass in on em5 proto tcp to port 22 divert-packet port 700 rdr-to 
> 192.168.30.8
> 
> With the above rules, inbound packets would drop and "netstat -p ip -s"
> shows that "bad header checksums" consistently increments by one for
> every inbound packet.
> 
> The diff below fixes this bug by making divert(4) recalculate the IP(v4)
> and TCP/UDP/ICMP/ICMPv6 checksums of reinjected packets on both IPv4 and
> IPv6 (done with a ton of help and feedback from blambert@ who reviewed
> many versions of this diff, thank you!).
> 
> If you are using divert(4), could you please test this diff to ensure
> that it does not break your setup?  Note that this diff only applies to
> divert-packet PF rules, not divert-to/divert-reply.
> 
> I am also looking for feedback from developers to review the approach
> and code that was used to fix this bug.

This revised diff uses ip6_lasthdr() to walk the IPv6 header chain,
which makes the diff a lot shorter and simpler.

The IPv4 part remains unchanged from the original diff that I sent last
week.

Thank you to those who have tested so far; more tests and feedback
welcome!

Lawrence


Index: netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.10
diff -u -p -r1.10 ip_divert.c
--- netinet/ip_divert.c 21 Oct 2012 13:06:03 -0000      1.10
+++ netinet/ip_divert.c 12 Mar 2013 03:50:15 -0000
@@ -37,6 +37,9 @@
 #include <netinet/ip_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_divert.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
 
 struct inpcbtable      divbtable;
 struct divstat         divstat;
@@ -83,8 +86,12 @@ divert_output(struct mbuf *m, ...)
        struct sockaddr_in *sin;
        struct socket *so;
        struct ifaddr *ifa;
-       int s, error = 0;
+       int s, error = 0, p_hdrlen = 0;
        va_list ap;
+       struct ip *ip;
+       u_int16_t off, csum = 0;
+       u_int8_t nxt;
+       size_t p_off = 0;
 
        va_start(ap, m);
        inp = va_arg(ap, struct inpcb *);
@@ -102,15 +109,68 @@ divert_output(struct mbuf *m, ...)
        sin = mtod(nam, struct sockaddr_in *);
        so = inp->inp_socket;
 
+       /* Do basic sanity checks. */
+       if (m->m_pkthdr.len < sizeof(struct ip))
+               goto fail;
+       if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
+               /* m_pullup() has freed the mbuf, so just return. */
+               divstat.divs_errors++;
+               return (ENOBUFS);
+       }
+       ip = mtod(m, struct ip *);
+       if (ip->ip_v != IPVERSION)
+               goto fail;
+       off = ip->ip_hl << 2;
+       if (off < sizeof(struct ip) || ntohs(ip->ip_len) < off ||
+           m->m_pkthdr.len < ntohs(ip->ip_len))
+               goto fail;
+
+       /*
+        * Recalculate IP and protocol checksums since the userspace application
+        * may have modified the packet prior to reinjection.
+        */
+       ip->ip_sum = 0;
+       ip->ip_sum = in_cksum(m, off);
+       nxt = ip->ip_p;
+       switch (ip->ip_p) {
+       case IPPROTO_TCP:
+               p_hdrlen = sizeof(struct tcphdr);
+               p_off = offsetof(struct tcphdr, th_sum);
+               break;
+       case IPPROTO_UDP:
+               p_hdrlen = sizeof(struct udphdr);
+               p_off = offsetof(struct udphdr, uh_sum);
+               break;
+       case IPPROTO_ICMP:
+               p_hdrlen = sizeof(struct icmp);
+               p_off = offsetof(struct icmp, icmp_cksum);
+               nxt = 0;
+               break;
+       default:
+               /* nothing */
+               break;
+       }
+       if (p_hdrlen) {
+               if (m->m_pkthdr.len < off + p_hdrlen)
+                       goto fail;
+
+               if ((error = m_copyback(m, off + p_off, sizeof(csum), &csum, 
M_NOWAIT)))
+                       goto fail;
+               csum = in4_cksum(m, nxt, off, m->m_pkthdr.len - off);
+               if (ip->ip_p == IPPROTO_UDP && csum == 0)
+                       csum = 0xffff;
+               if ((error = m_copyback(m, off + p_off, sizeof(csum), &csum, 
M_NOWAIT)))
+                       goto fail;
+       }
+
        m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
 
        if (sin->sin_addr.s_addr != INADDR_ANY) {
                ipaddr.sin_addr = sin->sin_addr;
                ifa = ifa_ifwithaddr(sintosa(&ipaddr), m->m_pkthdr.rdomain);
                if (ifa == NULL) {
-                       divstat.divs_errors++;
-                       m_freem(m);
-                       return (EADDRNOTAVAIL);
+                       error = EADDRNOTAVAIL;
+                       goto fail;
                }
                m->m_pkthdr.rcvif = ifa->ifa_ifp;
 
@@ -131,6 +191,11 @@ divert_output(struct mbuf *m, ...)
 
        divstat.divs_opackets++;
        return (error);
+
+fail:
+       m_freem(m);
+       divstat.divs_errors++;
+       return (error ? error : EINVAL);
 }
 
 int
Index: netinet6/ip6_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
retrieving revision 1.8
diff -u -p -r1.8 ip6_divert.c
--- netinet6/ip6_divert.c       6 Nov 2012 12:32:42 -0000       1.8
+++ netinet6/ip6_divert.c       12 Mar 2013 15:36:52 -0000
@@ -38,8 +38,10 @@
 #include <netinet/in_pcb.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
-#include <netinet6/in6_var.h>
 #include <netinet6/ip6_divert.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/icmp6.h>
 
 struct inpcbtable      divb6table;
 struct div6stat        div6stat;
@@ -88,8 +90,11 @@ divert6_output(struct mbuf *m, ...)
        struct sockaddr_in6 *sin6;
        struct socket *so;
        struct ifaddr *ifa;
-       int s, error = 0;
+       int s, error = 0, p_hdrlen = 0, nxt = 0, off;
        va_list ap;
+       struct ip6_hdr *ip6;
+       u_int16_t csum = 0;
+       size_t p_off = 0;
 
        va_start(ap, m);
        inp = va_arg(ap, struct inpcb *);
@@ -107,15 +112,65 @@ divert6_output(struct mbuf *m, ...)
        sin6 = mtod(nam, struct sockaddr_in6 *);
        so = inp->inp_socket;
 
+       /* Do basic sanity checks. */
+       if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
+               goto fail;
+       if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
+               /* m_pullup() has freed the mbuf, so just return. */
+               div6stat.divs_errors++;
+               return (ENOBUFS);
+       }
+       ip6 = mtod(m, struct ip6_hdr *);
+       if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
+               goto fail;
+       if (m->m_pkthdr.len < sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen))
+               goto fail;
+
+       /*
+        * Recalculate the protocol checksum since the userspace application
+        * may have modified the packet prior to reinjection.
+        */
+       off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
+       if (off < sizeof(struct ip6_hdr))
+               goto fail;
+       switch (nxt) {
+       case IPPROTO_TCP:
+               p_hdrlen = sizeof(struct tcphdr);
+               p_off = offsetof(struct tcphdr, th_sum);
+               break;
+       case IPPROTO_UDP:
+               p_hdrlen = sizeof(struct udphdr);
+               p_off = offsetof(struct udphdr, uh_sum);
+               break;
+       case IPPROTO_ICMPV6:
+               p_hdrlen = sizeof(struct icmp6_hdr);
+               p_off = offsetof(struct icmp6_hdr, icmp6_cksum);
+               break;
+       default:
+               /* nothing */
+               break;
+       }
+       if (p_hdrlen) {
+               if (m->m_pkthdr.len < off + p_hdrlen)
+                       goto fail;
+
+               if ((error = m_copyback(m, off + p_off, sizeof(csum), &csum, 
M_NOWAIT)))
+                       goto fail;
+               csum = in6_cksum(m, nxt, off, m->m_pkthdr.len - off);
+               if (nxt == IPPROTO_UDP && csum == 0)
+                       csum = 0xffff;
+               if ((error = m_copyback(m, off + p_off, sizeof(csum), &csum, 
M_NOWAIT)))
+                       goto fail;
+       }
+
        m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
 
        if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
                ip6addr.sin6_addr = sin6->sin6_addr;
                ifa = ifa_ifwithaddr(sin6tosa(&ip6addr), m->m_pkthdr.rdomain);
                if (ifa == NULL) {
-                       div6stat.divs_errors++;
-                       m_freem(m);
-                       return (EADDRNOTAVAIL);
+                       error = EADDRNOTAVAIL;
+                       goto fail;
                }
                m->m_pkthdr.rcvif = ifa->ifa_ifp;
 
@@ -133,6 +188,11 @@ divert6_output(struct mbuf *m, ...)
 
        div6stat.divs_opackets++;
        return (error);
+
+fail:
+       div6stat.divs_errors++;
+       m_freem(m);
+       return (error ? error : EINVAL);
 }
 
 int

Reply via email to