this implements an ipe(4) driver that supports ip encapsulation largely as per rfc1241.
it is interesting because it has Flow Id in the encapsulation header which identifies distinct ip tunnels between the same endpoints. this allows you to carry trffic for different routing domains over ip between the same machines for example. the only other way to do this currently is with vxlan, but it is relatively complicated compared to this code. another interesting feature of the encap header is that it has no field to identify the type of payload. the rfc was written when there was no such thing as ipv6, so it implicitly assumes everything is ipv4. the driver implements carrying ipv6 by looking at the first nibble in the payload to differentiate between v4 and v6 traffic. because of this you can carry both ipv4 and ipv6 traffic over ipe(4), which in turn can be carried by either address family. thoughts? Index: sys/net/if_ipe.c =================================================================== RCS file: sys/net/if_ipe.c diff -N sys/net/if_ipe.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/net/if_ipe.c 24 Jan 2018 02:51:39 -0000 @@ -0,0 +1,781 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2018 David Gwynne <[email protected]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "bpfilter.h" +#include "pf.h" + +#include <sys/param.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/timeout.h> +#include <sys/tree.h> + +#include <net/if.h> +#include <net/if_types.h> +#include <net/if_var.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/ip_var.h> + +#ifdef INET6 +#include <netinet6/in6_var.h> +#include <netinet6/ip6_var.h> +#endif + +#if NBPFILTER > 0 +#include <net/bpf.h> +#endif + +#if NPF > 0 +#include <net/pfvar.h> +#endif + +#include <net/if_ipe.h> + +struct ipe_tunnel { + struct ip_encap_header ipe_header; + unsigned int ipe_rtableid; + uint32_t ipe_src[4]; + uint32_t ipe_dst[4]; + sa_family_t ipe_af; +}; + +struct ipe_softc { + struct ifnet sc_if; + + struct ipe_tunnel sc_tunnel; + uint8_t sc_ttl; + + RBT_ENTRY(ipe_softc) + sc_entry; +}; + +static int ipe_clone_create(struct if_clone *, int); +static int ipe_clone_destroy(struct ifnet *); + +static struct if_clone ipe_cloner = IF_CLONE_INITIALIZER("ipe", + ipe_clone_create, ipe_clone_destroy); + +RBT_HEAD(ipe_tree, ipe_softc); + +static inline int + ipe_cmp(const struct ipe_softc *, const struct ipe_softc *); + +RBT_PROTOTYPE(ipe_tree, ipe_softc, sc_entry, ipe_cmp); + +struct ipe_tree ipe_softcs = RBT_INITIALIZER(); + +#define IPEHDR (MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) + \ + sizeof(struct ip_encap_header)) +#define IPEMTU (1500 - IPEHDR) + +static int ipe_ioctl(struct ifnet *, u_long, caddr_t); +static int ipe_up(struct ipe_softc *); +static int ipe_down(struct ipe_softc *); +static int ipe_set_vnetid(struct ipe_softc *, struct ifreq *); +static int ipe_get_vnetid(struct ipe_softc *, struct ifreq *); +static int ipe_set_tunnel(struct ipe_softc *, struct if_laddrreq *); +static int ipe_get_tunnel(struct ipe_softc *, struct if_laddrreq *); +static int ipe_del_tunnel(struct ipe_softc *); + +static int ipe_output(struct ifnet *, struct mbuf *, + struct sockaddr *, struct rtentry *); +static void ipe_start(struct ifnet *); +static int ipe_encap(struct ipe_softc *, struct mbuf *); +static int ipe_input(struct ipe_softc *, struct mbuf **, int); +static int ipe_encap4(struct ipe_softc *, struct mbuf *); +#ifdef INET6 +static int ipe_encap6(struct ipe_softc *, struct mbuf *); +#endif + +/* + * let's begin + */ + +void +ipeattach(int n) +{ + if_clone_attach(&ipe_cloner); +} + +int +ipe_clone_create(struct if_clone *ifc, int unit) +{ + struct ipe_softc *sc; + + sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); + if (!sc) + return (ENOMEM); + + sc->sc_tunnel.ipe_rtableid = 0; + sc->sc_tunnel.ipe_af = AF_UNSPEC; + sc->sc_tunnel.ipe_header.ieh_flags = htons(IPE_FLAGS); + sc->sc_tunnel.ipe_header.ieh_cksum = 0; + sc->sc_tunnel.ipe_header.ieh_flowid = 0; + sc->sc_ttl = ip_defttl; + + snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d", + ifc->ifc_name, unit); + sc->sc_if.if_softc = sc; + sc->sc_if.if_type = IFT_TUNNEL; + sc->sc_if.if_addrlen = 0; + sc->sc_if.if_mtu = IPEMTU; + sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST; + sc->sc_if.if_output = ipe_output; + sc->sc_if.if_start = ipe_start; + sc->sc_if.if_ioctl = ipe_ioctl; + sc->sc_if.if_rtrequest = p2p_rtrequest; + + if_attach(&sc->sc_if); + if_alloc_sadl(&sc->sc_if); + +#if NBPFILTER > 0 + bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_LOOP, sizeof(uint32_t)); +#endif + + return (0); +} + +int +ipe_clone_destroy(struct ifnet *ifp) +{ + struct ipe_softc *sc = ifp->if_softc; + + if_detach(ifp); + + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +/* + * do a checksum of a header. + * + * assumes len is aligned correctly, and not an odd number of bytes. + */ +static inline uint16_t +ipe_cksum(const void *buf, size_t len) +{ + const uint16_t *p = buf; + uint32_t sum = 0; + + do { + sum += bemtoh16(p++); + } while (len -= 2); + + /* end-around-carry */ + sum = (sum >> 16) + (sum & 0xffff); + sum += (sum >> 16); + return (~sum); +} + +static inline int +ipe_cmp(const struct ipe_softc *a, const struct ipe_softc *b) +{ + return (memcmp(&a->sc_tunnel, &b->sc_tunnel, sizeof(a->sc_tunnel))); +} + +static int +ipe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + struct m_tag *mtag; + int error = 0; + + if (!ISSET(ifp->if_flags, IFF_RUNNING)) { + m_freem(m); + error = ENETDOWN; + goto end; + } + + switch (dst->sa_family) { + case AF_INET: + break; +#ifdef INET6 + case AF_INET6: + break; +#endif + default: + m_freem(m); + error = EAFNOSUPPORT; + goto end; + } + + /* Try to limit infinite recursion through misconfiguration. */ + for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag; + mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) { + if (memcmp(mtag + 1, &ifp->if_index, + sizeof(ifp->if_index)) == 0) { + m_freem(m); + error = EIO; + goto end; + } + } + + mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + error = ENOBUFS; + goto end; + } + memcpy(mtag + 1, &ifp->if_index, sizeof(ifp->if_index)); + m_tag_prepend(m, mtag); + + m->m_pkthdr.ph_family = dst->sa_family; + + error = if_enqueue(ifp, m); + end: + if (error) + ifp->if_oerrors++; + return (error); +} + +static void +ipe_start(struct ifnet *ifp) +{ + struct ipe_softc *sc = ifp->if_softc; + struct mbuf *m; + + while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { +#if NBPFILTER > 0 + if (ifp->if_bpf) { + bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, + m, BPF_DIRECTION_OUT); + } +#endif + + if (ipe_encap(sc, m) != 0) + ifp->if_oerrors++; + } +} + +static int +ipe_encap(struct ipe_softc *sc, struct mbuf *m) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + struct ip_encap_header *ieh; + + m = m_prepend(m, sizeof(*ieh), M_DONTWAIT); + if (m == NULL) + return (ENOBUFS); + + ieh = mtod(m, struct ip_encap_header *); + *ieh = tunnel->ipe_header; + + m->m_flags &= ~(M_BCAST|M_MCAST); + m->m_pkthdr.ph_rtableid = tunnel->ipe_rtableid; +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + +#ifdef INET6 + if (tunnel->ipe_af == AF_INET6) + return (ipe_encap6(sc, m)); +#endif + + return (ipe_encap4(sc, m)); +} + +static int +ipe_encap4(struct ipe_softc *sc, struct mbuf *m) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + struct ip *ip; + + m = m_prepend(m, sizeof(*ip), M_DONTWAIT); + if (m == NULL) + return (ENOBUFS); + + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = IPTOS_LOWDELAY; + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_id = htons(ip_randomid()); + ip->ip_off = 0; + ip->ip_ttl = sc->sc_ttl; + ip->ip_p = IPPROTO_ENCAP; + ip->ip_sum = 0; + ip->ip_src.s_addr = tunnel->ipe_src[0]; + ip->ip_dst.s_addr = tunnel->ipe_dst[0]; + + ip_send(m); + + return (0); +} + +#ifdef INET6 +static int +ipe_encap6(struct ipe_softc *sc, struct mbuf *m) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + struct ip6_hdr *ip6; + uint16_t len = m->m_pkthdr.len; + + m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); + if (m == NULL) + return (ENOBUFS); + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_nxt = IPPROTO_ENCAP; + ip6->ip6_hlim = sc->sc_ttl; + ip6->ip6_plen = htons(len); + memcpy(&ip6->ip6_src, tunnel->ipe_src, sizeof(ip6->ip6_src)); + memcpy(&ip6->ip6_dst, tunnel->ipe_dst, sizeof(ip6->ip6_dst)); + + ip6_send(m); + + return (0); +} +#endif /* INET6 */ + +int +ipe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct ipe_softc *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + int error = 0; + + switch(cmd) { + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + /* FALLTHROUGH */ + case SIOCSIFFLAGS: + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = ipe_up(sc); + else + error = ENETRESET; + } else { + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = ipe_down(sc); + } + break; + case SIOCSIFDSTADDR: + break; + case SIOCSIFMTU: + if (ifr->ifr_mtu < 576) { + error = EINVAL; + break; + } + ifp->if_mtu = ifr->ifr_mtu; + break; + case SIOCGIFMTU: + ifr->ifr_mtu = sc->sc_if.if_mtu; + break; + case SIOCADDMULTI: + case SIOCDELMULTI: + break; + + case SIOCSVNETID: + error = ipe_set_vnetid(sc, ifr); + break; + case SIOCGVNETID: + error = ipe_get_vnetid(sc, ifr); + break; + case SIOCSLIFPHYADDR: + error = ipe_set_tunnel(sc, (struct if_laddrreq *)data); + break; + case SIOCGLIFPHYADDR: + error = ipe_get_tunnel(sc, (struct if_laddrreq *)data); + break; + case SIOCDIFPHYADDR: + error = ipe_del_tunnel(sc); + break; + + case SIOCSLIFPHYRTABLE: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + break; + } + + if (ifr->ifr_rdomainid < 0 || + ifr->ifr_rdomainid > RT_TABLEID_MAX || + !rtable_exists(ifr->ifr_rdomainid)) { + error = EINVAL; + break; + } + sc->sc_tunnel.ipe_rtableid = ifr->ifr_rdomainid; + break; + case SIOCGLIFPHYRTABLE: + ifr->ifr_rdomainid = sc->sc_tunnel.ipe_rtableid; + break; + + case SIOCSLIFPHYTTL: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + break; + } + if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { + error = EINVAL; + break; + } + + /* commit */ + sc->sc_ttl = (uint8_t)ifr->ifr_ttl; + break; + + case SIOCGLIFPHYTTL: + ifr->ifr_ttl = (int)sc->sc_ttl; + break; + + default: + error = ENOTTY; + break; + } + + return (error); +} + +static int +ipe_up(struct ipe_softc *sc) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + uint16_t cksum; + + if (tunnel->ipe_af == AF_UNSPEC) + return (EDESTADDRREQ); + + tunnel->ipe_header.ieh_cksum = 0; + cksum = ipe_cksum(&tunnel->ipe_header, sizeof(&tunnel->ipe_header)); + htobem16(&tunnel->ipe_header.ieh_cksum, cksum); + + NET_ASSERT_LOCKED(); + if (RBT_INSERT(ipe_tree, &ipe_softcs, sc) != NULL) + return (EADDRINUSE); + + SET(sc->sc_if.if_flags, IFF_RUNNING); + + return (0); +} + +static int +ipe_down(struct ipe_softc *sc) +{ + NET_ASSERT_LOCKED(); + RBT_REMOVE(ipe_tree, &ipe_softcs, sc); + + CLR(sc->sc_if.if_flags, IFF_RUNNING); + + ifq_barrier(&sc->sc_if.if_snd); + + return (0); +} + +static int +ipe_set_vnetid(struct ipe_softc *sc, struct ifreq *ifr) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + + if (ISSET(sc->sc_if.if_flags, IFF_RUNNING)) + return (EBUSY); + + if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffffffff) + return (EINVAL); + + htobem32(&tunnel->ipe_header.ieh_flowid, ifr->ifr_vnetid); + + return (0); +} + +static int +ipe_get_vnetid(struct ipe_softc *sc, struct ifreq *ifr) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + + ifr->ifr_vnetid = bemtoh32(&tunnel->ipe_header.ieh_flowid); + + return (0); +} + +static int +ipe_set_tunnel(struct ipe_softc *sc, struct if_laddrreq *req) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *src4, *dst4; +#ifdef INET6 + struct sockaddr_in6 *src6, *dst6; + struct in6_addr srcin6, dstin6; + int error; +#endif + + if (ISSET(sc->sc_if.if_flags, IFF_RUNNING)) + return (EBUSY); + + /* sa_family and sa_len must be equal */ + if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) + return (EINVAL); + + /* validate */ + switch (dst->sa_family) { + case AF_INET: + src4 = (struct sockaddr_in *)src; + if (in_nullhost(src4->sin_addr) || + IN_MULTICAST(src4->sin_addr.s_addr)) + return (EINVAL); + + dst4 = (struct sockaddr_in *)dst; + if (in_nullhost(dst4->sin_addr) || + IN_MULTICAST(dst4->sin_addr.s_addr)) + return (EINVAL); + + /* commit */ + memset(tunnel->ipe_src, 0, sizeof(tunnel->ipe_src)); + memset(tunnel->ipe_dst, 0, sizeof(tunnel->ipe_dst)); + tunnel->ipe_src[0] = src4->sin_addr.s_addr; + tunnel->ipe_dst[0] = dst4->sin_addr.s_addr; + tunnel->ipe_af = AF_INET; + + break; +#ifdef INET6 + case AF_INET6: + if (dst->sa_len != sizeof(*src6)) + return (EINVAL); + + src6 = (struct sockaddr_in6 *)src; + if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) + return (EINVAL); + + error = in6_embedscope(&srcin6, src6, NULL); + if (error != 0) + return (error); + + dst6 = (struct sockaddr_in6 *)dst; + if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr)) + return (EINVAL); + + error = in6_embedscope(&dstin6, dst6, NULL); + if (error != 0) + return (error); + + /* commit */ + memcpy(tunnel->ipe_src, &srcin6, sizeof(tunnel->ipe_src)); + memcpy(tunnel->ipe_dst, &dstin6, sizeof(tunnel->ipe_dst)); + tunnel->ipe_af = AF_INET6; + + break; +#endif + default: + return (EAFNOSUPPORT); + } + + return (0); +} + +static int +ipe_get_tunnel(struct ipe_softc *sc, struct if_laddrreq *req) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *sin; +#ifdef INET6 /* ifconfig already embeds the scopeid */ + struct sockaddr_in6 *sin6; +#endif + + switch (tunnel->ipe_af) { + case AF_UNSPEC: + return (EADDRNOTAVAIL); + case AF_INET: + sin = (struct sockaddr_in *)src; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr.s_addr = tunnel->ipe_src[0]; + + sin = (struct sockaddr_in *)dst; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr.s_addr = tunnel->ipe_dst[0]; + + break; + +#ifdef INET6 + case AF_INET6: + sin6 = (struct sockaddr_in6 *)src; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + in6_recoverscope(sin6, (struct in6_addr *)tunnel->ipe_src); + + sin6 = (struct sockaddr_in6 *)dst; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + in6_recoverscope(sin6, (struct in6_addr *)tunnel->ipe_dst); + + break; +#endif + default: + return (EAFNOSUPPORT); + } + + return (0); +} + +static int +ipe_del_tunnel(struct ipe_softc *sc) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + + if (ISSET(sc->sc_if.if_flags, IFF_RUNNING)) + return (EBUSY); + + /* commit */ + tunnel->ipe_af = AF_UNSPEC; + + return (0); +} + +int +ipe_input4(struct mbuf **mp, int *offp, int type, int af) +{ + struct ipe_softc key; + struct ip *ip; + int iphlen; + + ip = mtod(*mp, struct ip *); + iphlen = ip->ip_hl << 2; + + memset(&key, 0, sizeof(key)); + key.sc_tunnel.ipe_af = AF_INET; + key.sc_tunnel.ipe_src[0] = ip->ip_dst.s_addr; + key.sc_tunnel.ipe_dst[0] = ip->ip_src.s_addr; + + return (ipe_input(&key, mp, iphlen)); +} + +#ifdef INET6 +int +ipe_input6(struct mbuf **mp, int *offp, int type, int af) +{ + struct ipe_softc key; + struct ip6_hdr *ip6; + + ip6 = mtod(*mp, struct ip6_hdr *); + + memset(&key, 0, sizeof(key)); + key.sc_tunnel.ipe_af = AF_INET6; + memcpy(key.sc_tunnel.ipe_src, &ip6->ip6_dst, + sizeof(key.sc_tunnel.ipe_src)); + memcpy(key.sc_tunnel.ipe_dst, &ip6->ip6_src, + sizeof(key.sc_tunnel.ipe_dst)); + + return (ipe_input(&key, mp, sizeof(*ip6))); +} +#endif + +static int +ipe_input(struct ipe_softc *key, struct mbuf **mp, int iphlen) +{ + struct ipe_tunnel *tunnel = &key->sc_tunnel; + struct mbuf *m = *mp; + struct ifnet *ifp; + struct ipe_softc *sc; + struct ip_encap_header *ieh; + caddr_t hdr; + int hlen; + int af; + + hlen = iphlen + sizeof(*ieh); + + m = *mp = m_pullup(m, hlen); + if (m == NULL) + return (IPPROTO_DONE); + + hdr = mtod(m, caddr_t); + ieh = (struct ip_encap_header *)(hdr + iphlen); + + tunnel->ipe_rtableid = m->m_pkthdr.ph_rtableid; + tunnel->ipe_header = *ieh; + + /* NET_ASSERT_READ_LOCKED() */ + sc = RBT_FIND(ipe_tree, &ipe_softcs, key); + if (sc == NULL) + goto drop; + + m_adj(m, hlen); + + if (m->m_len == 0) + goto drop; + + switch (*mtod(m, uint8_t *) >> 4) { + case 4: + af = AF_INET; + break; +#ifdef INET6 + case 6: + af = AF_INET6; + break; +#endif + default: + goto drop; + } + + ifp = &sc->sc_if; + + CLR(m->m_flags, M_MCAST|M_BCAST); + SET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK); + m->m_pkthdr.ph_ifidx = ifp->if_index; + m->m_pkthdr.ph_rtableid = ifp->if_rdomain; + +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + + ifp->if_ipackets++; + ifp->if_ibytes += m->m_pkthdr.len; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap_af(ifp->if_bpf, af, m, BPF_DIRECTION_IN); +#endif + + switch (af) { + case AF_INET: + ipv4_input(ifp, m); + break; +#ifdef INET6 + case AF_INET6: + ipv6_input(ifp, m); + break; +#endif + } + + return (IPPROTO_DONE); + +drop: + m_freem(m); + return (IPPROTO_DONE); +} + +RBT_GENERATE(ipe_tree, ipe_softc, sc_entry, ipe_cmp); Index: sys/net/if_ipe.h =================================================================== RCS file: sys/net/if_ipe.h diff -N sys/net/if_ipe.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/net/if_ipe.h 24 Jan 2018 02:51:39 -0000 @@ -0,0 +1,47 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2018 David Gwynne <[email protected]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _NET_IF_IPE_H +#define _NET_IF_IPE_H + +struct ip_encap_header { + uint16_t ieh_flags; +#define IPE_VERS_MASK 0xf000 +#define IPE_VERS_1 0x1000 +#define IPE_HL_MASK 0x0f00 +#define IPE_HL_SHIFT 8 +#define IPE_MT_MASK 0x00f0 +#define IPE_MT_DATA 0x0010 +#define IPE_MT_ERROR 0x0020 +#define IPE_RC_MASK 0x000f +#define IPE_RC_UNK_FLOWID 0x0001 +#define IPE_RC_ICMP_RETURNED 0x0002 + uint16_t ieh_cksum; + uint32_t ieh_flowid; +}__packed __aligned(4); + +#define IPE_HL (sizeof(struct ip_encap_header) << IPE_HL_SHIFT) +#define IPE_FLAGS (IPE_VERS_1 | IPE_HL | IPE_MT_DATA) + +#ifdef _KERNEL +void ipeattach(int); +int ipe_input4(struct mbuf **, int *, int, int); +int ipe_input6(struct mbuf **, int *, int, int); +#endif /* _KERNEL */ + +#endif /* _NET_IF_MOBILEIP_H_ */ Index: sys/netinet/in_proto.c =================================================================== RCS file: /cvs/src/sys/netinet/in_proto.c,v retrieving revision 1.88 diff -u -p -r1.88 in_proto.c --- sys/netinet/in_proto.c 23 Nov 2017 13:45:46 -0000 1.88 +++ sys/netinet/in_proto.c 24 Jan 2018 02:51:39 -0000 @@ -172,6 +172,11 @@ #include <net/if_etherip.h> #endif +#include "ipe.h" +#if NIPE > 0 +#include <net/if_ipe.h> +#endif + u_char ip_protox[IPPROTO_MAX]; const struct protosw inetsw[] = { @@ -417,6 +422,19 @@ const struct protosw inetsw[] = { .pr_sysctl = etherip_sysctl }, #endif /* NETHERIP */ +#if NIPE > 0 +{ + .pr_type = SOCK_RAW, + .pr_domain = &inetdomain, + .pr_protocol = IPPROTO_ENCAP, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = ipe_input4, + .pr_ctloutput = rip_ctloutput, + .pr_usrreq = rip_usrreq, + .pr_attach = rip_attach, + .pr_detach = rip_detach, +}, +#endif /* NIPE */ { /* raw wildcard */ .pr_type = SOCK_RAW, Index: sys/netinet6/in6_proto.c =================================================================== RCS file: /cvs/src/sys/netinet6/in6_proto.c,v retrieving revision 1.100 diff -u -p -r1.100 in6_proto.c --- sys/netinet6/in6_proto.c 23 Nov 2017 13:45:46 -0000 1.100 +++ sys/netinet6/in6_proto.c 24 Jan 2018 02:51:39 -0000 @@ -116,6 +116,11 @@ #include <net/if_etherip.h> #endif +#include "ipe.h" +#if NIPE > 0 +#include <net/if_ipe.h> +#endif + /* * TCP/IP protocol family: IP6, ICMP6, UDP, TCP. */ @@ -313,6 +318,19 @@ const struct protosw inet6sw[] = { .pr_detach = rip6_detach, }, #endif /* NETHERIP */ +#if NIPE > 0 +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_ENCAP, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = ipe_input6, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreq = rip6_usrreq, + .pr_attach = rip6_attach, + .pr_detach = rip6_detach, +}, +#endif /* NIPE */ { /* raw wildcard */ .pr_type = SOCK_RAW, Index: sys/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.250 diff -u -p -r1.250 GENERIC --- sys/conf/GENERIC 25 Oct 2017 12:38:21 -0000 1.250 +++ sys/conf/GENERIC 24 Jan 2018 02:51:39 -0000 @@ -90,6 +90,7 @@ pseudo-device carp # CARP protocol supp pseudo-device etherip # EtherIP (RFC 3378) pseudo-device gif # IPv[46] over IPv[46] tunnel (RFC1933) pseudo-device gre # GRE encapsulation interface +pseudo-device ipe # RFC 1241 IP encapsulation interface pseudo-device loop # network loopback pseudo-device mpe # MPLS PE interface pseudo-device mpw # MPLS pseudowire support Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.656 diff -u -p -r1.656 files --- sys/conf/files 16 Nov 2017 18:12:27 -0000 1.656 +++ sys/conf/files 24 Jan 2018 02:51:40 -0000 @@ -551,6 +551,7 @@ pseudo-device carp: ifnet, ether pseudo-device sppp: ifnet pseudo-device gif: ifnet pseudo-device gre: ifnet +pseudo-device ipe: ifnet pseudo-device crypto: ifnet pseudo-device trunk: ifnet, ether, ifmedia pseudo-device mpe: ifnet, ether @@ -798,6 +799,7 @@ file net/rtsock.c file net/slcompress.c ppp file net/if_enc.c enc needs-count file net/if_gre.c gre needs-count +file net/if_ipe.c ipe needs-count file net/if_trunk.c trunk needs-count file net/trunklacp.c trunk file net/if_mpe.c mpe needs-count Index: share/man/man4/Makefile =================================================================== RCS file: /cvs/src/share/man/man4/Makefile,v retrieving revision 1.663 diff -u -p -r1.663 Makefile --- share/man/man4/Makefile 2 Jan 2018 22:56:01 -0000 1.663 +++ share/man/man4/Makefile 24 Jan 2018 02:51:40 -0000 @@ -31,7 +31,7 @@ MAN= aac.4 ac97.4 acphy.4 acrtc.4 \ hvn.4 hvs.4 hyperv.4 \ iatp.4 ichiic.4 ichwdt.4 icmp.4 icmp6.4 icsphy.4 ifmedia.4 \ iha.4 ihidev.4 iic.4 ikbd.4 ims.4 imt.4 inet.4 inet6.4 inphy.4 iophy.4 \ - ip.4 ip6.4 ipcomp.4 ipgphy.4 ipmi.4 ips.4 ipsec.4 ipw.4 \ + ip.4 ip6.4 ipcomp.4 ipe.4 ipgphy.4 ipmi.4 ips.4 ipsec.4 ipw.4 \ isa.4 isagpio.4 isapnp.4 it.4 itherm.4 iwi.4 iwn.4 iwm.4 \ ix.4 ixgb.4 jmb.4 jme.4 jmphy.4 \ kate.4 km.4 ksyms.4 kue.4 lc.4 lge.4 lii.4 lisa.4 lm.4 \ Index: share/man/man4/ipe.4 =================================================================== RCS file: share/man/man4/ipe.4 diff -N share/man/man4/ipe.4 --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ share/man/man4/ipe.4 24 Jan 2018 02:51:40 -0000 @@ -0,0 +1,172 @@ +.\" $OpenBSD$ +.\" +.\" Copyright (c) 2018 David Gwynne <[email protected]> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt IPE 4 +.Sh NAME +.Nm ipe +.Nd RFC 1241 Internet Encapsulation Protocol network device +.Sh SYNOPSIS +.Cd "pseudo-device ipe" +.Sh DESCRIPTION +The +.Nm +driver provides IP tunnel construction using +A Scheme for an Internet Encapsulation Protocol: Version 1 (RFC 1241). +.Pp +.Nm +datagrams (IP protocol number 98 as per RFC 1700) +are encapsulated into IP using a small encapsulation header. +Different tunnels between the same endpoints are distinguished by a +Flow Identifier in the header. +This protocol according to the RFC only supports encapsulating IPv4 +in IPv4, but the driver also supports IPv6. +.Pp +A +.Nm +interface can be created at runtime using the +.Ic ifconfig Nm Ns Ar N Ic create +command or by setting up a +.Xr hostname.if 5 +configuration file for +.Xr netstart 8 . +.Pp +The MTU is set to 1452 by default. +This may not be an optimal value +depending on the link between the two tunnel endpoints, +but it can be adjusted via +.Xr ifconfig 8 . +.Pp +For correct operation, the route to the tunnel destination must not +go over the interface itself. +This can be implemented by adding a distinct or a more specific +route to the tunnel destination than the hosts or networks routed +via the tunnel interface. +Alternatively, the tunnel traffic may be configured in a separate +routing table to the encapsulated traffic. +.Pp +.Nm +interfaces support the following +.Xr ioctl 2 Ns s +for configuring tunnel options: +.Bl -tag -width indent -offset 3n +.It Dv SIOCSLIFPHYADDR Fa "struct if_laddrreq *" +Set the addresses of the outer IP header. +The addresses may only be configured while the interface is down. +.It Dv SIOCGLIFPHYADDR Fa "struct if_laddrreq *" +Get the addresses of the outer IP header. +.It Dv SIOCDIFPHYADDR +Clear the outer IP header addresses. +The addresses may only be cleared while the interface is down. +.It Dv SIOCSVNETID Fa "struct ifreq *" +Set a 32 bit virtual network identifier used as the Flow Identifier +in the IP Encapsulation header. +The virtual network identifier may only be configured while the +interface is down. +.It Dv SIOCGVNETID Fa "struct ifreq *" +Get the virtual network identifer used as the Flow Identifier in +the IP Encapsulation header. +.It Dv SIOCSLIFPHYRTABLE Fa "struct ifreq *" +Set the routing table the encapsulated IP packets operate within. +The routing table may only be configured while the interface is down. +.It Dv SIOCGLIFPHYRTABLE Fa "struct ifreq *" +Get the routing table the encapsulated IP packets operate within. +.It Dv SIOCSLIFPHYTTL Fa "struct ifreq *" +Set the Time-To-Live field in IPv4 encapsulation headers, or the +Hop Limit field in IPv6 encapsulation headers. +.It Dv SIOCGLIFPHYTTL Fa "struct ifreq *" +Get the value used in Time-To-Live field in a IPv4 encapsulation +header or the Hop Limit field in a IPv6 encapsulation header. +.El +.Sh EXAMPLES +Configuration example: +.Bd -literal +Host X --- Host A ----------- IP Encap ------------ Host D --- Host E + \e / + \e / + +------ Host B ------ Host C ------+ +.Ed +.Pp +On Host A +.Pq Ox : +.Bd -literal -offset indent +# route add default B +# ifconfig ipeN create +# ifconfig ipeN tunnel A D +# ifconfig ipeN A D netmask 255.255.255.255 +# route add E D +.Ed +.Pp +On Host D +.Pq Ox : +.Bd -literal -offset indent +# route add default C +# ifconfig ipeN create +# ifconfig ipeN tunnel D A +# ifconfig ipeN D A netmask 255.255.255.255 +# route add D E +.Ed +.Pp +The Flow Identifier may be set using +.Xr ifconfig 8 +and the vnetid argument: +.Bd -literal -offset indent +# ifconfig ipeN vnetid 128 +.Ed +.Pp +The route domain used for the encapsulated traffic may be set using +.Xr ifconfig 8 +and the tunneldomain argument: +.Bd -literal -offset indent +# ifconfig ipeN tunneldomain 1 +.Ed +.Sh SEE ALSO +.Xr inet 4 , +.Xr ip 4 , +.Xr netintro 4 , +.Xr options 4 , +.Xr hostname.if 5 , +.Xr protocols 5 , +.Xr ifconfig 8 , +.Xr netstart 8 +.Sh STANDARDS +.Rs +.%A R. Woodburn +.%A D. Mills +.%D July 1991 +.%R RFC 1241 +.%T A Scheme for an Internet Encapsulation Protocol: Version 1 +.Re +.Pp +.Rs +.%A J. Reynolds +.%A J. Postel +.%D October 1994 +.%R RFC 1700 +.%T Assigned Numbers +.Re +.Sh HISTORY +The +.Nm +driver first appeared in +.Ox 6.3 . +.Sh AUTHORS +.An David Gwynne Aq Mt [email protected] +.Sh CAVEATS +The +.Nm +driver only handles data packets within the protocol, it does not +implement support for error handling as described in the RFC.
