On Fri, Feb 11, 2022 at 03:13:25PM +1000, David Gwynne wrote: > On Fri, Mar 05, 2021 at 05:09:29PM +1000, David Gwynne wrote: > > On Thu, Mar 04, 2021 at 03:36:19PM +1000, David Gwynne wrote: > > > as the subject says, this is a rewrite of vxlan(4). > > > > > > vxlan(4) relies on bridge(4) to implement learning, but i want to be > > > able to remove bridge(4) one day. while working on veb(4), i wrote > > > the guts of a learning bridge implementation that is now used by veb(4), > > > bpe(4), and nvgre(4). that learning bridge code is now also used by > > > vxlan(4). > > > > > > this means that a few of the modes that the manpage talks about are > > > different now. because vxlan doesnt need a bridge for learning, there's > > > no "multicast mode" anymore, it just does "dynamic mode" out of the box > > > when configured with a multicast destination address. there's no > > > multipoint mode now too. > > > > > > another thing that's always bothered me about vxlan(4) is how it occupies > > > the "udp namespace" and gets how it steals packets from the udp stack. > > > the new code actually creates and bind udp sockets to handle the > > > vxlan packets. this means userland can't collide with a vxlan interface, > > > and you get to see that the port is in use in things like netstat. e.g.: > > > > > > dlg@ikkaku ~$ ifconfig vxlan0 > > > vxlan0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500 > > > lladdr fe:e1:ba:d1:17:2a > > > index 11 llprio 3 > > > encap: vnetid none parent aggr0 txprio 0 rxprio outer > > > groups: vxlan > > > tunnel: inet 192.0.2.36 port 4789 --> 239.0.0.1 ttl 1 nodf > > > Addresses (max cache: 100, timeout: 240): > > > inet 100.64.1.36 netmask 0xffffff00 broadcast 100.64.1.255 > > > dlg@ikkaku ~$ netstat -na -f inet -p udp > > > Active Internet connections (including servers) > > > Proto Recv-Q Send-Q Local Address Foreign Address > > > udp 0 0 130.102.96.36.29742 129.250.35.250.123 > > > udp 0 0 130.102.96.36.8965 162.159.200.123.123 > > > udp 0 0 130.102.96.36.13189 162.159.200.1.123 > > > udp 0 0 130.102.96.36.46580 220.158.215.20.123 > > > udp 0 0 130.102.96.36.23109 103.38.121.36.123 > > > udp 0 0 239.0.0.1.4789 *.* > > > udp 0 0 192.0.2.36.4789 *.* > > > > > > ive also added loop prevention, ie, sending an interfaces vxlan > > > packets over itself should fail rather than panic now. > > > > here's an updated diff with a few fixes. > > > > this diff better supports vxlan p2p and multicast vxlan configs that > share a UDP listener.
it doesn't look like anyone (else) cares about vxlan(4), so i'm going to commit this tomorrow unless anyone really objects. > Index: net/if_vxlan.c > =================================================================== > RCS file: /cvs/src/sys/net/if_vxlan.c,v > retrieving revision 1.83 > diff -u -p -r1.83 if_vxlan.c > --- net/if_vxlan.c 10 Jan 2022 14:07:59 -0000 1.83 > +++ net/if_vxlan.c 11 Feb 2022 05:11:13 -0000 > @@ -1,7 +1,7 @@ > -/* $OpenBSD: if_vxlan.c,v 1.83 2022/01/10 14:07:59 jan Exp $ */ > +/* $OpenBSD$ */ > > /* > - * Copyright (c) 2013 Reyk Floeter <r...@openbsd.org> > + * Copyright (c) 2021 David Gwynne <d...@openbsd.org> > * > * Permission to use, copy, modify, and distribute this software for any > * purpose with or without fee is hereby granted, provided that the above > @@ -17,475 +17,781 @@ > */ > > #include "bpfilter.h" > -#include "vxlan.h" > -#include "vlan.h" > #include "pf.h" > -#include "bridge.h" > > #include <sys/param.h> > #include <sys/systm.h> > +#include <sys/kernel.h> > #include <sys/mbuf.h> > #include <sys/socket.h> > -#include <sys/sockio.h> > #include <sys/ioctl.h> > +#include <sys/timeout.h> > +#include <sys/pool.h> > +#include <sys/tree.h> > +#include <sys/refcnt.h> > +#include <sys/smr.h> > + > +#include <sys/socket.h> > +#include <sys/socketvar.h> > > #include <net/if.h> > #include <net/if_var.h> > +#include <net/if_dl.h> > #include <net/if_media.h> > +#include <net/if_types.h> > #include <net/route.h> > - > -#if NBPFILTER > 0 > -#include <net/bpf.h> > -#endif > +#include <net/rtable.h> > > #include <netinet/in.h> > #include <netinet/in_var.h> > #include <netinet/if_ether.h> > #include <netinet/ip.h> > -#include <netinet/ip_var.h> > #include <netinet/udp.h> > -#include <netinet/udp_var.h> > #include <netinet/in_pcb.h> > +#include <netinet/ip_var.h> > > -#if NPF > 0 > -#include <net/pfvar.h> > +#ifdef INET6 > +#include <netinet/ip6.h> > +#include <netinet6/ip6_var.h> > +#include <netinet6/in6_var.h> > #endif > > -#if NBRIDGE > 0 > +/* for bridge stuff */ > #include <net/if_bridge.h> > +#include <net/if_etherbridge.h> > + > +#if NBPFILTER > 0 > +#include <net/bpf.h> > #endif > > -#include <net/if_vxlan.h> > +/* > + * The protocol. > + */ > + > +#define VXLANMTU 1492 > +#define VXLAN_PORT 4789 > + > +struct vxlan_header { > + uint32_t vxlan_flags; > +#define VXLAN_F_I (1U << 27) > + uint32_t vxlan_id; > +#define VXLAN_VNI_SHIFT 8 > +#define VXLAN_VNI_MASK (0xffffffU << VXLAN_VNI_SHIFT) > +}; > + > +#define VXLAN_VNI_MAX 0x00ffffffU > +#define VXLAN_VNI_MIN 0x00000000U > + > +/* > + * The driver. > + */ > + > +union vxlan_addr { > + struct in_addr in4; > + struct in6_addr in6; > +}; > + > +struct vxlan_softc; > + > +struct vxlan_peer { > + RBT_ENTRY(vxlan_peer) p_entry; > + > + struct vxlan_header p_header; > + union vxlan_addr p_addr; > + > + struct vxlan_softc *p_sc; > +}; > + > +RBT_HEAD(vxlan_peers, vxlan_peer); > + > +struct vxlan_tep { > + TAILQ_ENTRY(vxlan_tep) vt_entry; > + > + sa_family_t vt_af; > + unsigned int vt_rdomain; > + union vxlan_addr vt_addr; > +#define vt_addr4 vt_addr.in4 > +#define vt_addr6 vt_addr.in6 > + in_port_t vt_port; > + > + struct socket *vt_so; > + > + struct mutex vt_mtx; > + struct vxlan_peers vt_peers; > +}; > + > +TAILQ_HEAD(vxlan_teps, vxlan_tep); > + > +enum vxlan_tunnel_mode { > + VXLAN_TMODE_UNSET, > + VXLAN_TMODE_P2P, /* unicast destination, no learning */ > + VXLAN_TMODE_LEARNING, /* multicast destination, learning */ > + VXLAN_TMODE_ENDPOINT, /* unset destination, no learning */ > +}; > > struct vxlan_softc { > struct arpcom sc_ac; > - struct ifmedia sc_media; > + struct etherbridge sc_eb; > + > + unsigned int sc_rdomain; > + sa_family_t sc_af; > + union vxlan_addr sc_src; > + union vxlan_addr sc_dst; > + in_port_t sc_port; > + struct vxlan_header sc_header; > + unsigned int sc_if_index0; > > - struct ip_moptions sc_imo; > - struct task sc_atask; > - struct task sc_ltask; > struct task sc_dtask; > + void *sc_inmulti; > + > + enum vxlan_tunnel_mode sc_mode; > + struct vxlan_peer *sc_ucast_peer; > + struct vxlan_peer *sc_mcast_peer; > + struct refcnt sc_refs; > > - struct sockaddr_storage sc_src; > - struct sockaddr_storage sc_dst; > - in_port_t sc_dstport; > - u_int sc_rdomain; > - int64_t sc_vnetid; > uint16_t sc_df; > - u_int8_t sc_ttl; > + int sc_ttl; > int sc_txhprio; > + int sc_rxhprio; > > - struct task sc_sendtask; > - > - LIST_ENTRY(vxlan_softc) sc_entry; > + struct task sc_send_task; > }; > > -void vxlanattach(int); > -int vxlanioctl(struct ifnet *, u_long, caddr_t); > -void vxlanstart(struct ifnet *); > -int vxlan_clone_create(struct if_clone *, int); > -int vxlan_clone_destroy(struct ifnet *); > -void vxlan_multicast_cleanup(struct ifnet *); > -int vxlan_multicast_join(struct ifnet *, struct sockaddr *, > - struct sockaddr *); > -int vxlan_media_change(struct ifnet *); > -void vxlan_media_status(struct ifnet *, struct ifmediareq *); > -int vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *); > -int vxlan_output(struct ifnet *, struct mbuf *); > -void vxlan_addr_change(void *); > -void vxlan_if_change(void *); > -void vxlan_link_change(void *); > -void vxlan_send_dispatch(void *); > +void vxlanattach(int); > + > +static int vxlan_clone_create(struct if_clone *, int); > +static int vxlan_clone_destroy(struct ifnet *); > + > +static int vxlan_output(struct ifnet *, struct mbuf *, > + struct sockaddr *, struct rtentry *); > +static int vxlan_enqueue(struct ifnet *, struct mbuf *); > +static void vxlan_start(struct ifqueue *); > +static void vxlan_send(void *); > + > +static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); > +static int vxlan_up(struct vxlan_softc *); > +static int vxlan_down(struct vxlan_softc *); > +static int vxlan_addmulti(struct vxlan_softc *, struct ifnet *); > +static void vxlan_delmulti(struct vxlan_softc *); > + > +static struct mbuf * > + vxlan_input(void *, struct mbuf *, > + struct ip *, struct ip6_hdr *, void *, int); > + > +static int vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *); > +static int vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *); > +static int vxlan_set_tunnel(struct vxlan_softc *, > + const struct if_laddrreq *); > +static int vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *); > +static int vxlan_del_tunnel(struct vxlan_softc *); > +static int vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *); > +static int vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *); > +static int vxlan_del_vnetid(struct vxlan_softc *); > +static int vxlan_set_parent(struct vxlan_softc *, > + const struct if_parent *); > +static int vxlan_get_parent(struct vxlan_softc *, struct if_parent *); > +static int vxlan_del_parent(struct vxlan_softc *); > + > +static int vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *); > +static int vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *); > > -int vxlan_sockaddr_cmp(struct sockaddr *, struct sockaddr *); > -uint16_t vxlan_sockaddr_port(struct sockaddr *); > +static void vxlan_detach_hook(void *); > > -struct if_clone vxlan_cloner = > +static struct if_clone vxlan_cloner = > IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); > > -int vxlan_enable = 0; > -u_long vxlan_tagmask; > +static int vxlan_eb_port_eq(void *, void *, void *); > +static void *vxlan_eb_port_take(void *, void *); > +static void vxlan_eb_port_rele(void *, void *); > +static size_t vxlan_eb_port_ifname(void *, char *, size_t, void *); > +static void vxlan_eb_port_sa(void *, struct sockaddr_storage *, void *); > + > +static const struct etherbridge_ops vxlan_etherbridge_ops = { > + vxlan_eb_port_eq, > + vxlan_eb_port_take, > + vxlan_eb_port_rele, > + vxlan_eb_port_ifname, > + vxlan_eb_port_sa, > +}; > + > +static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps"); > +static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps); > +static struct pool vxlan_endpoint_pool; > > -#define VXLAN_TAGHASHSIZE 32 > -#define VXLAN_TAGHASH(tag) ((unsigned int)tag & vxlan_tagmask) > -LIST_HEAD(vxlan_taghash, vxlan_softc) *vxlan_tagh, vxlan_any; > +static inline int vxlan_peer_cmp(const struct vxlan_peer *, > + const struct vxlan_peer *); > + > +RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); > > void > vxlanattach(int count) > { > - /* Regular vxlan interfaces with a VNI */ > - if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT, > - &vxlan_tagmask)) == NULL) > - panic("vxlanattach: hashinit"); > - > - /* multipoint-to-multipoint interfaces that accept any VNI */ > - LIST_INIT(&vxlan_any); > - > if_clone_attach(&vxlan_cloner); > } > > -int > +static int > vxlan_clone_create(struct if_clone *ifc, int unit) > { > - struct ifnet *ifp; > - struct vxlan_softc *sc; > + struct vxlan_softc *sc; > + struct ifnet *ifp; > + int error; > + > + if (vxlan_endpoint_pool.pr_size == 0) { > + pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr), > + 0, IPL_SOFTNET, 0, "vxlanep", NULL); > + } > > - sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); > - sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, > - sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); > - sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; > - sc->sc_dstport = htons(VXLAN_PORT); > - sc->sc_vnetid = VXLAN_VNI_UNSET; > - sc->sc_txhprio = IFQ_TOS2PRIO(IPTOS_PREC_ROUTINE); /* 0 */ > - sc->sc_df = htons(0); > - task_set(&sc->sc_atask, vxlan_addr_change, sc); > - task_set(&sc->sc_ltask, vxlan_link_change, sc); > - task_set(&sc->sc_dtask, vxlan_if_change, sc); > - task_set(&sc->sc_sendtask, vxlan_send_dispatch, sc); > + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); > + if (sc == NULL) > + return (ENOMEM); > > ifp = &sc->sc_ac.ac_if; > - snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit); > - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; > - ether_fakeaddr(ifp); > > - ifp->if_softc = sc; > - ifp->if_ioctl = vxlanioctl; > - ifp->if_start = vxlanstart; > + snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", > + ifc->ifc_name, unit); > > - ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; > - ifp->if_capabilities = IFCAP_VLAN_MTU; > - ifp->if_xflags = IFXF_CLONED; > + error = etherbridge_init(&sc->sc_eb, ifp->if_xname, > + &vxlan_etherbridge_ops, sc); > + if (error == -1) { > + free(sc, M_DEVBUF, sizeof(*sc)); > + return (error); > + } > + > + sc->sc_af = AF_UNSPEC; > + sc->sc_txhprio = 0; > + sc->sc_rxhprio = IF_HDRPRIO_OUTER; > + sc->sc_df = 0; > + sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL; > + > + task_set(&sc->sc_dtask, vxlan_detach_hook, sc); > + refcnt_init(&sc->sc_refs); > + task_set(&sc->sc_send_task, vxlan_send, sc); > > - ifmedia_init(&sc->sc_media, 0, vxlan_media_change, > - vxlan_media_status); > - ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); > - ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); > + ifp->if_softc = sc; > + ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; > + ifp->if_ioctl = vxlan_ioctl; > + ifp->if_output = vxlan_output; > + ifp->if_enqueue = vxlan_enqueue; > + ifp->if_qstart = vxlan_start; > + ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX; > + ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; > + ether_fakeaddr(ifp); > > if_counters_alloc(ifp); > if_attach(ifp); > ether_ifattach(ifp); > > -#if 0 > - /* > - * Instead of using a decreased MTU of 1450 bytes, prefer > - * to use the default Ethernet-size MTU of 1500 bytes and to > - * increase the MTU of the outer transport interfaces to > - * at least 1550 bytes. The following is disabled by default. > - */ > - ifp->if_mtu = ETHERMTU - sizeof(struct ether_header); > - ifp->if_mtu -= sizeof(struct vxlanudphdr) + sizeof(struct ipovly); > -#endif > - > - LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry); > - vxlan_enable++; > - > return (0); > } > > -int > +static int > vxlan_clone_destroy(struct ifnet *ifp) > { > - struct vxlan_softc *sc = ifp->if_softc; > + struct vxlan_softc *sc = ifp->if_softc; > > NET_LOCK(); > - vxlan_multicast_cleanup(ifp); > + if (ISSET(ifp->if_flags, IFF_RUNNING)) > + vxlan_down(sc); > NET_UNLOCK(); > > - vxlan_enable--; > - LIST_REMOVE(sc, sc_entry); > - > - ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); > ether_ifdetach(ifp); > if_detach(ifp); > > - if (!task_del(net_tq(ifp->if_index), &sc->sc_sendtask)) > - taskq_barrier(net_tq(ifp->if_index)); > + etherbridge_destroy(&sc->sc_eb); > + > + refcnt_finalize(&sc->sc_refs, "vxlanfini"); > > - free(sc->sc_imo.imo_membership, M_IPMOPTS, > - sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); > free(sc, M_DEVBUF, sizeof(*sc)); > > return (0); > } > > -void > -vxlan_multicast_cleanup(struct ifnet *ifp) > +static struct vxlan_softc * > +vxlan_take(struct vxlan_softc *sc) > +{ > + refcnt_take(&sc->sc_refs); > + return (sc); > +} > + > +static void > +vxlan_rele(struct vxlan_softc *sc) > { > - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; > - struct ip_moptions *imo = &sc->sc_imo; > - struct ifnet *mifp; > + refcnt_rele_wake(&sc->sc_refs); > +} > > - mifp = if_get(imo->imo_ifidx); > - if (mifp != NULL) { > - if_addrhook_del(mifp, &sc->sc_atask); > - if_linkstatehook_del(mifp, &sc->sc_ltask); > - if_detachhook_del(mifp, &sc->sc_dtask); > +static struct mbuf * > +vxlan_encap(struct vxlan_softc *sc, struct mbuf *m, > + struct mbuf *(ip_encap)(struct vxlan_softc *sc, struct mbuf *, > + const union vxlan_addr *, uint8_t)) > +{ > + struct mbuf *m0; > + union vxlan_addr gateway; > + const union vxlan_addr *endpoint; > + struct vxlan_header *vh; > + struct udphdr *uh; > + int prio; > + uint8_t tos; > > - if_put(mifp); > - } > + if (sc->sc_mode == VXLAN_TMODE_UNSET) > + goto drop; > > - if (imo->imo_num_memberships > 0) { > - in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); > - imo->imo_ifidx = 0; > + if (sc->sc_mode == VXLAN_TMODE_P2P) > + endpoint = &sc->sc_dst; > + else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */ > + struct ether_header *eh = mtod(m, struct ether_header *); > + > + smr_read_enter(); > + endpoint = etherbridge_resolve_ea(&sc->sc_eb, > + (struct ether_addr *)eh->ether_dhost); > + if (endpoint != NULL) { > + gateway = *endpoint; > + endpoint = &gateway; > + } > + smr_read_leave(); > + > + if (endpoint == NULL) { > + if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) > + goto drop; > + > + /* "flood" to unknown destinations */ > + endpoint = &sc->sc_dst; > + } > } > + > + /* force prepend mbuf because of payload alignment */ > + m0 = m_get(M_DONTWAIT, m->m_type); > + if (m0 == NULL) > + goto drop; > + > + m_align(m0, 0); > + m0->m_len = 0; > + > + M_MOVE_PKTHDR(m0, m); > + m0->m_next = m; > + > + m = m_prepend(m0, sizeof(*vh), M_DONTWAIT); > + if (m == NULL) > + return (NULL); > + > + vh = mtod(m, struct vxlan_header *); > + *vh = sc->sc_header; > + > + m = m_prepend(m, sizeof(*uh), M_DONTWAIT); > + if (m == NULL) > + return (NULL); > + > + uh = mtod(m, struct udphdr *); > + uh->uh_sport = sc->sc_port; /* XXX */ > + uh->uh_dport = sc->sc_port; > + htobem16(&uh->uh_ulen, m->m_pkthdr.len); > + uh->uh_sum = htons(0); > + > + SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); > + > + prio = sc->sc_txhprio; > + if (prio == IF_HDRPRIO_PACKET) > + prio = m->m_pkthdr.pf.prio; > + tos = IFQ_PRIO2TOS(prio); > + > + CLR(m->m_flags, M_BCAST|M_MCAST); > + m->m_pkthdr.ph_rtableid = sc->sc_rdomain; > + > +#if NPF > 0 > + pf_pkt_addr_changed(m); > +#endif > + > + return ((*ip_encap)(sc, m, endpoint, tos)); > +drop: > + m_freem(m); > + return (NULL); > } > > -int > -vxlan_multicast_join(struct ifnet *ifp, struct sockaddr *src, > - struct sockaddr *dst) > +static struct mbuf * > +vxlan_encap_ipv4(struct vxlan_softc *sc, struct mbuf *m, > + const union vxlan_addr *endpoint, uint8_t tos) > { > - struct vxlan_softc *sc = ifp->if_softc; > - struct ip_moptions *imo = &sc->sc_imo; > - struct sockaddr_in *src4, *dst4; > -#ifdef INET6 > - struct sockaddr_in6 *dst6; > -#endif /* INET6 */ > - struct ifaddr *ifa; > - struct ifnet *mifp; > + struct ip *ip; > + > + m = m_prepend(m, sizeof(*ip), M_DONTWAIT); > + if (m == NULL) > + return (NULL); > + > + ip = mtod(m, struct ip *); > + ip->ip_v = IPVERSION; > + ip->ip_hl = sizeof(*ip) >> 2; > + ip->ip_off = sc->sc_df; > + ip->ip_tos = tos; > + ip->ip_len = htons(m->m_pkthdr.len); > + ip->ip_ttl = sc->sc_ttl; > + ip->ip_p = IPPROTO_UDP; > + ip->ip_src = sc->sc_src.in4; > + ip->ip_dst = endpoint->in4; > + > + return (m); > +} > > - switch (dst->sa_family) { > - case AF_INET: > - dst4 = satosin(dst); > - if (!IN_MULTICAST(dst4->sin_addr.s_addr)) > - return (0); > - break; > #ifdef INET6 > - case AF_INET6: > - dst6 = satosin6(dst); > - if (!IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr)) > - return (0); > +static struct mbuf * > +vxlan_encap_ipv6(struct vxlan_softc *sc, struct mbuf *m, > + const union vxlan_addr *endpoint, uint8_t tos) > +{ > + struct ip6_hdr *ip6; > + int len = m->m_pkthdr.len; > > - /* Multicast mode is currently not supported for IPv6 */ > - return (EAFNOSUPPORT); > + m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); > + if (m == NULL) > + return (NULL); > + > + ip6 = mtod(m, struct ip6_hdr *); > + ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ? > + htonl(m->m_pkthdr.ph_flowid) : 0; > + ip6->ip6_vfc |= IPV6_VERSION; > + ip6->ip6_flow |= htonl((uint32_t)tos << 20); > + ip6->ip6_plen = htons(len); > + ip6->ip6_nxt = IPPROTO_UDP; > + ip6->ip6_hlim = sc->sc_ttl; > + ip6->ip6_src = sc->sc_src.in6; > + ip6->ip6_dst = endpoint->in6; > + > + if (sc->sc_df) > + SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); > + > + return (m); > +} > #endif /* INET6 */ > - default: > - return (EAFNOSUPPORT); > + > +static int > +vxlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, > + struct rtentry *rt) > +{ > + struct m_tag *mtag; > + int error = 0; > + > + mtag = NULL; > + while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) { > + if (memcmp((caddr_t)(mtag + 1), &ifp->if_index, > + sizeof(ifp->if_index)) == 0) { > + error = EIO; > + goto drop; > + } > } > > - src4 = satosin(src); > - dst4 = satosin(dst); > + mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); > + if (mtag == NULL) { > + error = ENOBUFS; > + goto drop; > + } > + memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index)); > + m_tag_prepend(m, mtag); > > - if (src4->sin_addr.s_addr == INADDR_ANY || > - IN_MULTICAST(src4->sin_addr.s_addr)) > - return (EINVAL); > - if ((ifa = ifa_ifwithaddr(src, sc->sc_rdomain)) == NULL || > - (mifp = ifa->ifa_ifp) == NULL || > - (mifp->if_flags & IFF_MULTICAST) == 0) > - return (EADDRNOTAVAIL); > + return (ether_output(ifp, m, dst, rt)); > > - if ((imo->imo_membership[0] = > - in_addmulti(&dst4->sin_addr, mifp)) == NULL) > - return (ENOBUFS); > +drop: > + m_freem(m); > + return (error); > +} > > - imo->imo_num_memberships++; > - imo->imo_ifidx = mifp->if_index; > - if (sc->sc_ttl > 0) > - imo->imo_ttl = sc->sc_ttl; > - else > - imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; > - imo->imo_loop = 0; > +static int > +vxlan_enqueue(struct ifnet *ifp, struct mbuf *m) > +{ > + struct vxlan_softc *sc = ifp->if_softc; > + struct ifqueue *ifq = &ifp->if_snd; > + > + if (ifq_enqueue(ifq, m) != 0) > + return (ENOBUFS); > > - /* > - * Use interface hooks to track any changes on the interface > - * that is used to send out the tunnel traffic as multicast. > - */ > - if_addrhook_add(mifp, &sc->sc_atask); > - if_linkstatehook_add(mifp, &sc->sc_ltask); > - if_detachhook_add(mifp, &sc->sc_dtask); > + task_add(ifq->ifq_softnet, &sc->sc_send_task); > > return (0); > } > > -void > -vxlanstart(struct ifnet *ifp) > +static void > +vxlan_start(struct ifqueue *ifq) > { > - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; > + struct ifnet *ifp = ifq->ifq_if; > + struct vxlan_softc *sc = ifp->if_softc; > > - task_add(net_tq(ifp->if_index), &sc->sc_sendtask); > + task_add(ifq->ifq_softnet, &sc->sc_send_task); > } > > -void > -vxlan_send_dispatch(void *xsc) > +static uint64_t > +vxlan_send_ipv4(struct vxlan_softc *sc, struct mbuf_list *ml) > { > - struct vxlan_softc *sc = xsc; > - struct ifnet *ifp = &sc->sc_ac.ac_if; > - struct mbuf *m; > - struct mbuf_list ml; > - > - ml_init(&ml); > - for (;;) { > - m = ifq_dequeue(&ifp->if_snd); > - if (m == NULL) > - break; > - > -#if NBPFILTER > 0 > - if (ifp->if_bpf) > - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); > -#endif > - > - ml_enqueue(&ml, m); > - } > - > - if (ml_empty(&ml)) > - return; > + struct ip_moptions imo; > + struct mbuf *m; > + uint64_t oerrors = 0; > + > + imo.imo_ifidx = sc->sc_if_index0; > + imo.imo_ttl = sc->sc_ttl; > + imo.imo_loop = 0; > > NET_LOCK(); > - while ((m = ml_dequeue(&ml)) != NULL) { > - vxlan_output(ifp, m); > + while ((m = ml_dequeue(ml)) != NULL) { > + if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0) > + oerrors++; > } > NET_UNLOCK(); > + > + return (oerrors); > } > > +#ifdef INET6 > +static uint64_t > +vxlan_send_ipv6(struct vxlan_softc *sc, struct mbuf_list *ml) > +{ > + struct ip6_moptions im6o; > + struct mbuf *m; > + uint64_t oerrors = 0; > + > + im6o.im6o_ifidx = sc->sc_if_index0; > + im6o.im6o_hlim = sc->sc_ttl; > + im6o.im6o_loop = 0; > + > + NET_LOCK(); > + while ((m = ml_dequeue(ml)) != NULL) { > + if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0) > + oerrors++; > + } > + NET_UNLOCK(); > + > + return (oerrors); > +} > +#endif /* INET6 */ > > -int > -vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) > +static void > +vxlan_send(void *arg) > { > - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; > - int reset = 0, error, af; > - socklen_t slen; > - in_port_t port; > - struct vxlan_taghash *tagh; > - > - if (src != NULL && dst != NULL) { > - if ((af = src->sa_family) != dst->sa_family) > - return (EAFNOSUPPORT); > - } else { > - /* Reset current configuration */ > - af = sc->sc_src.ss_family; > - src = sstosa(&sc->sc_src); > - dst = sstosa(&sc->sc_dst); > - reset = 1; > - } > + struct vxlan_softc *sc = arg; > + struct ifnet *ifp = &sc->sc_ac.ac_if; > + struct mbuf *(*ip_encap)(struct vxlan_softc *, struct mbuf *, > + const union vxlan_addr *, uint8_t); > + uint64_t (*ip_send)(struct vxlan_softc *, struct mbuf_list *); > + struct mbuf_list ml = MBUF_LIST_INITIALIZER(); > + struct mbuf *m; > + uint64_t oerrors; > + > + if (!ISSET(ifp->if_flags, IFF_RUNNING)) > + return; > > - switch (af) { > + switch (sc->sc_af) { > case AF_INET: > - slen = sizeof(struct sockaddr_in); > + ip_encap = vxlan_encap_ipv4; > + ip_send = vxlan_send_ipv4; > break; > #ifdef INET6 > case AF_INET6: > - slen = sizeof(struct sockaddr_in6); > + ip_encap = vxlan_encap_ipv6; > + ip_send = vxlan_send_ipv6; > break; > -#endif /* INET6 */ > +#endif > default: > - return (EAFNOSUPPORT); > + unhandled_af(sc->sc_af); > + /* NOTREACHED */ > } > > - if (src->sa_len != slen || dst->sa_len != slen) > - return (EINVAL); > + while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { > +#if NBPFILTER > 0 > + caddr_t if_bpf = READ_ONCE(ifp->if_bpf); > + if (if_bpf != NULL) > + bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); > +#endif > + m = vxlan_encap(sc, m, ip_encap); > + if (m == NULL) > + continue; > > - vxlan_multicast_cleanup(ifp); > + ml_enqueue(&ml, m); > + } > > - /* returns without error if multicast is not configured */ > - if ((error = vxlan_multicast_join(ifp, src, dst)) != 0) > - return (error); > + oerrors = (*ip_send)(sc, &ml); > + > + counters_add(ifp->if_counters, ifc_oerrors, oerrors); > +} > + > +static struct mbuf * > +vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6, > + void *uhp, int hlen) > +{ > + struct vxlan_tep *vt = arg; > + union vxlan_addr addr; > + struct vxlan_peer key, *p; > + struct udphdr *uh; > + struct vxlan_header *vh; > + struct ether_header *eh; > + int vhlen = hlen + sizeof(*vh); > + struct mbuf *n; > + int off; > + in_port_t port; > + struct vxlan_softc *sc = NULL; > + struct ifnet *ifp; > + > + if (m->m_pkthdr.len < vhlen) > + goto drop; > + > + uh = uhp; > + port = uh->uh_sport; > > - if ((port = vxlan_sockaddr_port(dst)) != 0) > - sc->sc_dstport = port; > + if (ip != NULL) > + addr.in4 = ip->ip_src; > +#ifdef INET6 > + else > + addr.in6 = ip6->ip6_src; > +#endif > > - if (!reset) { > - bzero(&sc->sc_src, sizeof(sc->sc_src)); > - bzero(&sc->sc_dst, sizeof(sc->sc_dst)); > - memcpy(&sc->sc_src, src, src->sa_len); > - memcpy(&sc->sc_dst, dst, dst->sa_len); > + if (m->m_len < vhlen) { > + m = m_pullup(m, vhlen); > + if (m == NULL) > + return (NULL); > } > > - if (sc->sc_vnetid == VXLAN_VNI_ANY) { > - /* > - * If the interface accepts any VNI, put it into a separate > - * list that is not part of the main hash. > - */ > - tagh = &vxlan_any; > - } else > - tagh = &vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)]; > + /* can't use ip/ip6/uh after this */ > > - LIST_REMOVE(sc, sc_entry); > - LIST_INSERT_HEAD(tagh, sc, sc_entry); > + vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen); > > - return (0); > + memset(&key, 0, sizeof(key)); > + key.p_addr = addr; > + key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I); > + key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK); > + > + mtx_enter(&vt->vt_mtx); > + p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); > + if (p == NULL) { > + memset(&key.p_addr, 0, sizeof(key.p_addr)); > + p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); > + } > + if (p != NULL) > + sc = vxlan_take(p->p_sc); > + mtx_leave(&vt->vt_mtx); > + > + if (sc == NULL) > + goto drop; > + > + ifp = &sc->sc_ac.ac_if; > + if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port) > + goto rele_drop; > + > + m_adj(m, vhlen); > + > + if (m->m_pkthdr.len < sizeof(*eh)) > + goto rele_drop; > + > + if (m->m_len < sizeof(*eh)) { > + m = m_pullup(m, sizeof(*eh)); > + if (m == NULL) > + goto rele; > + } > + > + n = m_getptr(m, sizeof(*eh), &off); > + if (n == NULL) > + goto rele_drop; > + > + if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { > + n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); > + m_freem(m); > + if (n == NULL) > + goto rele; > + m = n; > + } > + > + if (sc->sc_mode == VXLAN_TMODE_LEARNING) { > + eh = mtod(m, struct ether_header *); > + etherbridge_map_ea(&sc->sc_eb, &addr, > + (struct ether_addr *)eh->ether_shost); > + } > + > + /* XXX prio */ > + > + if_vinput(ifp, m); > +rele: > + vxlan_rele(sc); > + return (NULL); > + > +rele_drop: > + vxlan_rele(sc); > +drop: > + m_freem(m); > + return (NULL); > } > > -int > -vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data) > +static int > +vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) > { > - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; > - struct ifreq *ifr = (struct ifreq *)data; > - struct if_laddrreq *lifr = (struct if_laddrreq *)data; > - int error = 0; > + struct vxlan_softc *sc = ifp->if_softc; > + struct ifreq *ifr = (struct ifreq *)data; > + struct ifbrparam *bparam = (struct ifbrparam *)data; > + int error = 0; > > switch (cmd) { > case SIOCSIFADDR: > - ifp->if_flags |= IFF_UP; > - /* FALLTHROUGH */ > - > + break; > case SIOCSIFFLAGS: > - if (ifp->if_flags & IFF_UP) { > - ifp->if_flags |= IFF_RUNNING; > + if (ISSET(ifp->if_flags, IFF_UP)) { > + if (!ISSET(ifp->if_flags, IFF_RUNNING)) > + error = vxlan_up(sc); > + else > + error = 0; > } else { > - ifp->if_flags &= ~IFF_RUNNING; > + if (ISSET(ifp->if_flags, IFF_RUNNING)) > + error = vxlan_down(sc); > } > break; > > - case SIOCADDMULTI: > - case SIOCDELMULTI: > + case SIOCSLIFPHYRTABLE: > + error = vxlan_set_rdomain(sc, ifr); > break; > - > - case SIOCGIFMEDIA: > - case SIOCSIFMEDIA: > - error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); > + case SIOCGLIFPHYRTABLE: > + error = vxlan_get_rdomain(sc, ifr); > break; > > case SIOCSLIFPHYADDR: > - error = vxlan_config(ifp, > - sstosa(&lifr->addr), > - sstosa(&lifr->dstaddr)); > + error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data); > + break; > + case SIOCGLIFPHYADDR: > + error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data); > break; > - > case SIOCDIFPHYADDR: > - vxlan_multicast_cleanup(ifp); > - bzero(&sc->sc_src, sizeof(sc->sc_src)); > - bzero(&sc->sc_dst, sizeof(sc->sc_dst)); > - sc->sc_dstport = htons(VXLAN_PORT); > + error = vxlan_del_tunnel(sc); > break; > > - case SIOCGLIFPHYADDR: > - if (sc->sc_dst.ss_family == AF_UNSPEC) { > - error = EADDRNOTAVAIL; > - break; > - } > - bzero(&lifr->addr, sizeof(lifr->addr)); > - bzero(&lifr->dstaddr, sizeof(lifr->dstaddr)); > - memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len); > - memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len); > + case SIOCSVNETID: > + error = vxlan_set_vnetid(sc, ifr); > break; > - > - case SIOCSLIFPHYRTABLE: > - if (ifr->ifr_rdomainid < 0 || > - ifr->ifr_rdomainid > RT_TABLEID_MAX || > - !rtable_exists(ifr->ifr_rdomainid)) { > - error = EINVAL; > - break; > - } > - sc->sc_rdomain = ifr->ifr_rdomainid; > - (void)vxlan_config(ifp, NULL, NULL); > + case SIOCGVNETID: > + error = vxlan_get_vnetid(sc, ifr); > + break; > + case SIOCDVNETID: > + error = vxlan_del_vnetid(sc); > break; > > - case SIOCGLIFPHYRTABLE: > - ifr->ifr_rdomainid = sc->sc_rdomain; > + case SIOCSIFPARENT: > + error = vxlan_set_parent(sc, (struct if_parent *)data); > + break; > + case SIOCGIFPARENT: > + error = vxlan_get_parent(sc, (struct if_parent *)data); > + break; > + case SIOCDIFPARENT: > + error = vxlan_del_parent(sc); > break; > > - case SIOCSLIFPHYTTL: > - if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { > - error = EINVAL; > + case SIOCSTXHPRIO: > + error = if_txhprio_l2_check(ifr->ifr_hdrprio); > + if (error != 0) > break; > - } > - if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl) > - break; > - sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl); > - (void)vxlan_config(ifp, NULL, NULL); > + > + sc->sc_txhprio = ifr->ifr_hdrprio; > + break; > + case SIOCGTXHPRIO: > + ifr->ifr_hdrprio = sc->sc_txhprio; > break; > > - case SIOCGLIFPHYTTL: > - ifr->ifr_ttl = (int)sc->sc_ttl; > + case SIOCSRXHPRIO: > + error = if_rxhprio_l2_check(ifr->ifr_hdrprio); > + if (error != 0) > + break; > + > + sc->sc_rxhprio = ifr->ifr_hdrprio; > + break; > + case SIOCGRXHPRIO: > + ifr->ifr_hdrprio = sc->sc_rxhprio; > break; > > case SIOCSLIFPHYDF: > @@ -496,50 +802,45 @@ vxlanioctl(struct ifnet *ifp, u_long cmd > ifr->ifr_df = sc->sc_df ? 1 : 0; > break; > > - case SIOCSTXHPRIO: > - if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET) > - ; /* fall through */ > - else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN || > - ifr->ifr_hdrprio > IF_HDRPRIO_MAX) { > + case SIOCSLIFPHYTTL: > + if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { > error = EINVAL; > break; > } > > - sc->sc_txhprio = ifr->ifr_hdrprio; > + /* commit */ > + sc->sc_ttl = (uint8_t)ifr->ifr_ttl; > break; > - case SIOCGTXHPRIO: > - ifr->ifr_hdrprio = sc->sc_txhprio; > + case SIOCGLIFPHYTTL: > + ifr->ifr_ttl = (int)sc->sc_ttl; > break; > > - case SIOCSVNETID: > - if (sc->sc_vnetid == ifr->ifr_vnetid) > - break; > - > - if ((ifr->ifr_vnetid != VXLAN_VNI_ANY) && > - (ifr->ifr_vnetid > VXLAN_VNI_MAX || > - ifr->ifr_vnetid < VXLAN_VNI_MIN)) { > - error = EINVAL; > - break; > - } > - > - sc->sc_vnetid = (int)ifr->ifr_vnetid; > - (void)vxlan_config(ifp, NULL, NULL); > + case SIOCBRDGSCACHE: > + error = etherbridge_set_max(&sc->sc_eb, bparam); > break; > - > - case SIOCGVNETID: > - if ((sc->sc_vnetid != VXLAN_VNI_ANY) && > - (sc->sc_vnetid > VXLAN_VNI_MAX || > - sc->sc_vnetid < VXLAN_VNI_MIN)) { > - error = EADDRNOTAVAIL; > - break; > - } > - > - ifr->ifr_vnetid = sc->sc_vnetid; > + case SIOCBRDGGCACHE: > + error = etherbridge_get_max(&sc->sc_eb, bparam); > + break; > + case SIOCBRDGSTO: > + error = etherbridge_set_tmo(&sc->sc_eb, bparam); > + break; > + case SIOCBRDGGTO: > + error = etherbridge_get_tmo(&sc->sc_eb, bparam); > break; > > - case SIOCDVNETID: > - sc->sc_vnetid = VXLAN_VNI_UNSET; > - (void)vxlan_config(ifp, NULL, NULL); > + case SIOCBRDGRTS: > + error = etherbridge_rtfind(&sc->sc_eb, > + (struct ifbaconf *)data); > + break; > + case SIOCBRDGFLUSH: > + etherbridge_flush(&sc->sc_eb, > + ((struct ifbreq *)data)->ifbr_ifsflags); > + break; > + case SIOCBRDGSADDR: > + error = vxlan_add_addr(sc, (struct ifbareq *)data); > + break; > + case SIOCBRDGDADDR: > + error = vxlan_del_addr(sc, (struct ifbareq *)data); > break; > > default: > @@ -550,465 +851,960 @@ vxlanioctl(struct ifnet *ifp, u_long cmd > return (error); > } > > -int > -vxlan_media_change(struct ifnet *ifp) > +static struct vxlan_tep * > +vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr) > { > - return (0); > -} > + struct vxlan_tep *vt; > > -void > -vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr) > -{ > - imr->ifm_status = IFM_AVALID | IFM_ACTIVE; > + TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) { > + if (sc->sc_af == vt->vt_af && > + sc->sc_rdomain == vt->vt_rdomain && > + memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 && > + sc->sc_port == vt->vt_port) > + return (vt); > + } > + > + return (NULL); > } > > -int > -vxlan_sockaddr_cmp(struct sockaddr *srcsa, struct sockaddr *dstsa) > +static int > +vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, > + struct vxlan_peer *p) > { > - struct sockaddr_in *src4, *dst4; > + struct mbuf m; > + struct vxlan_tep *vt; > + struct socket *so; > + struct sockaddr_in *sin; > #ifdef INET6 > - struct sockaddr_in6 *src6, *dst6; > -#endif /* INET6 */ > + struct sockaddr_in6 *sin6; > +#endif > + int error; > + int s; > > - if (srcsa->sa_family != dstsa->sa_family) > - return (1); > + vt = vxlan_tep_get(sc, addr); > + if (vt != NULL) { > + struct vxlan_peer *op; > + > + mtx_enter(&vt->vt_mtx); > + op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p); > + mtx_leave(&vt->vt_mtx); > + > + if (op != NULL) > + return (EADDRINUSE); > + > + return (0); > + } > > - switch (dstsa->sa_family) { > + vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO); > + if (vt == NULL) > + return (ENOMEM); > + > + vt->vt_af = sc->sc_af; > + vt->vt_rdomain = sc->sc_rdomain; > + vt->vt_addr = *addr; > + vt->vt_port = sc->sc_port; > + > + mtx_init(&vt->vt_mtx, IPL_SOFTNET); > + RBT_INIT(vxlan_peers, &vt->vt_peers); > + RBT_INSERT(vxlan_peers, &vt->vt_peers, p); > + > + error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP); > + if (error != 0) > + goto free; > + > + s = solock(so); > + > + sotoinpcb(so)->inp_upcall = vxlan_input; > + sotoinpcb(so)->inp_upcall_arg = vt; > + > + m_inithdr(&m); > + m.m_len = sizeof(vt->vt_rdomain); > + *mtod(&m, unsigned int *) = vt->vt_rdomain; > + error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m); > + if (error != 0) > + goto close; > + > + m_inithdr(&m); > + switch (vt->vt_af) { > case AF_INET: > - src4 = satosin(srcsa); > - dst4 = satosin(dstsa); > - if (src4->sin_addr.s_addr == dst4->sin_addr.s_addr) > - return (0); > + sin = mtod(&m, struct sockaddr_in *); > + memset(sin, 0, sizeof(*sin)); > + sin->sin_len = sizeof(*sin); > + sin->sin_family = AF_INET; > + sin->sin_addr = addr->in4; > + sin->sin_port = vt->vt_port; > + > + m.m_len = sizeof(*sin); > break; > + > #ifdef INET6 > case AF_INET6: > - src6 = satosin6(srcsa); > - dst6 = satosin6(dstsa); > - if (IN6_ARE_ADDR_EQUAL(&src6->sin6_addr, &dst6->sin6_addr) && > - src6->sin6_scope_id == dst6->sin6_scope_id) > - return (0); > + sin6 = mtod(&m, struct sockaddr_in6 *); > + sin6->sin6_len = sizeof(*sin6); > + sin6->sin6_family = AF_INET6; > + in6_recoverscope(sin6, &addr->in6); > + sin6->sin6_port = sc->sc_port; > + > + m.m_len = sizeof(*sin6); > break; > -#endif /* INET6 */ > +#endif > + default: > + unhandled_af(vt->vt_af); > } > > - return (1); > + error = sobind(so, &m, curproc); > + if (error != 0) > + goto close; > + > + sounlock(so, s); > + > + rw_assert_wrlock(&vxlan_lock); > + TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry); > + > + vt->vt_so = so; > + > + return (0); > + > +close: > + sounlock(so, s); > + soclose(so, MSG_DONTWAIT); > +free: > + free(vt, M_DEVBUF, sizeof(*vt)); > + return (error); > } > > -uint16_t > -vxlan_sockaddr_port(struct sockaddr *sa) > +static void > +vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, > + struct vxlan_peer *p) > { > - struct sockaddr_in *sin4; > -#ifdef INET6 > - struct sockaddr_in6 *sin6; > -#endif /* INET6 */ > + struct vxlan_tep *vt; > + int empty; > > - switch (sa->sa_family) { > - case AF_INET: > - sin4 = satosin(sa); > - return (sin4->sin_port); > -#ifdef INET6 > - case AF_INET6: > - sin6 = satosin6(sa); > - return (sin6->sin6_port); > -#endif /* INET6 */ > - default: > - break; > - } > + vt = vxlan_tep_get(sc, addr); > + if (vt == NULL) > + panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc); > + > + mtx_enter(&vt->vt_mtx); > + RBT_REMOVE(vxlan_peers, &vt->vt_peers, p); > + empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers); > + mtx_leave(&vt->vt_mtx); > > - return (0); > + if (!empty) > + return; > + > + rw_assert_wrlock(&vxlan_lock); > + TAILQ_REMOVE(&vxlan_teps, vt, vt_entry); > + > + soclose(vt->vt_so, MSG_DONTWAIT); > + free(vt, M_DEVBUF, sizeof(*vt)); > } > > -int > -vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen, > - struct sockaddr *srcsa, struct sockaddr *dstsa) > -{ > - struct vxlan_softc *sc = NULL, *sc_cand = NULL; > - struct vxlan_header v; > - int vni; > - struct ifnet *ifp; > - int skip; > -#if NBRIDGE > 0 > - struct bridge_tunneltag *brtag; > -#endif > - struct mbuf *n; > - int off; > - > - /* XXX Should verify the UDP port first before copying the packet */ > - skip = iphlen + sizeof(*uh); > - if (m->m_pkthdr.len - skip < sizeof(v)) > - return (0); > - m_copydata(m, skip, sizeof(v), &v); > - skip += sizeof(v); > +static int > +vxlan_tep_up(struct vxlan_softc *sc) > +{ > + struct vxlan_peer *up, *mp; > + int error; > + > + up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO); > + if (up == NULL) > + return (ENOMEM); > > - if (v.vxlan_flags & htonl(VXLAN_RESERVED1) || > - v.vxlan_id & htonl(VXLAN_RESERVED2)) > + if (sc->sc_mode == VXLAN_TMODE_P2P) > + up->p_addr = sc->sc_dst; > + up->p_header = sc->sc_header; > + up->p_sc = vxlan_take(sc); > + > + error = vxlan_tep_add_addr(sc, &sc->sc_src, up); > + if (error != 0) > + goto freeup; > + > + sc->sc_ucast_peer = up; > + > + if (sc->sc_mode != VXLAN_TMODE_LEARNING) > return (0); > > - vni = ntohl(v.vxlan_id) >> VXLAN_VNI_S; > - if ((v.vxlan_flags & htonl(VXLAN_FLAGS_VNI)) == 0) { > - if (vni != 0) > - return (0); > + mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO); > + if (mp == NULL) { > + error = ENOMEM; > + goto delup; > + } > + > + /* addr is multicast, leave it as 0s */ > + mp->p_header = sc->sc_header; > + mp->p_sc = vxlan_take(sc); > + > + /* destination address is a multicast group we want to join */ > + error = vxlan_tep_add_addr(sc, &sc->sc_dst, up); > + if (error != 0) > + goto freemp; > + > + sc->sc_mcast_peer = mp; > > - vni = VXLAN_VNI_UNSET; > + return (0); > + > +freemp: > + vxlan_rele(mp->p_sc); > + free(mp, M_DEVBUF, sizeof(*mp)); > +delup: > + vxlan_tep_del_addr(sc, &sc->sc_src, up); > +freeup: > + vxlan_rele(up->p_sc); > + free(up, M_DEVBUF, sizeof(*up)); > + return (error); > +} > + > +static void > +vxlan_tep_down(struct vxlan_softc *sc) > +{ > + struct vxlan_peer *up = sc->sc_ucast_peer; > + > + if (sc->sc_mode == VXLAN_TMODE_LEARNING) { > + struct vxlan_peer *mp = sc->sc_mcast_peer; > + vxlan_tep_del_addr(sc, &sc->sc_dst, mp); > + vxlan_rele(mp->p_sc); > + free(mp, M_DEVBUF, sizeof(*mp)); > } > > + vxlan_tep_del_addr(sc, &sc->sc_src, up); > + vxlan_rele(up->p_sc); > + free(up, M_DEVBUF, sizeof(*up)); > +} > + > +static int > +vxlan_up(struct vxlan_softc *sc) > +{ > + struct ifnet *ifp = &sc->sc_ac.ac_if; > + struct ifnet *ifp0 = NULL; > + int error; > + > + KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); > NET_ASSERT_LOCKED(); > - /* First search for a vxlan(4) interface with the packet's VNI */ > - LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) { > - if ((uh->uh_dport == sc->sc_dstport) && > - vni == sc->sc_vnetid && > - sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid)) { > - sc_cand = sc; > - if (vxlan_sockaddr_cmp(srcsa, sstosa(&sc->sc_dst)) == 0) > - goto found; > - } > + > + if (sc->sc_af == AF_UNSPEC) > + return (EDESTADDRREQ); > + KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); > + > + NET_UNLOCK(); > + > + error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); > + if (error != 0) > + goto netlock; > + > + NET_LOCK(); > + if (ISSET(ifp->if_flags, IFF_RUNNING)) { > + /* something else beat us */ > + rw_exit(&vxlan_lock); > + return (0); > } > + NET_UNLOCK(); > > - /* > - * Now loop through all the vxlan(4) interfaces that are configured > - * to accept any VNI and operating in multipoint-to-multipoint mode > - * that is used in combination with bridge(4) or switch(4). > - * If a vxlan(4) interface has been found for the packet's VNI, this > - * code is not reached as the other interface is more specific. > - */ > - LIST_FOREACH(sc, &vxlan_any, sc_entry) { > - if ((uh->uh_dport == sc->sc_dstport) && > - (sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid))) { > - sc_cand = sc; > - goto found; > - } > + if (sc->sc_mode != VXLAN_TMODE_P2P) { > + error = etherbridge_up(&sc->sc_eb); > + if (error != 0) > + goto unlock; > } > > - if (sc_cand) { > - sc = sc_cand; > - goto found; > + if (sc->sc_mode == VXLAN_TMODE_LEARNING) { > + ifp0 = if_get(sc->sc_if_index0); > + if (ifp0 == NULL) { > + error = ENXIO; > + goto down; > + } > + > + /* check again if multicast will work on top of the parent */ > + if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { > + error = EPROTONOSUPPORT; > + goto put; > + } > + > + error = vxlan_addmulti(sc, ifp0); > + if (error != 0) > + goto put; > + > + /* Register callback if parent wants to unregister */ > + if_detachhook_add(ifp0, &sc->sc_dtask); > + } else { > + if (sc->sc_if_index0 != 0) { > + error = EPROTONOSUPPORT; > + goto down; > + } > } > > - /* not found */ > + error = vxlan_tep_up(sc); > + if (error != 0) > + goto del; > + > + if_put(ifp0); > + > + NET_LOCK(); > + SET(ifp->if_flags, IFF_RUNNING); > + rw_exit(&vxlan_lock); > + > return (0); > > - found: > - if (m->m_pkthdr.len < skip + sizeof(struct ether_header)) { > - m_freem(m); > - return (EINVAL); > +del: > + if (ifp0 != NULL) > + if_detachhook_del(ifp0, &sc->sc_dtask); > + vxlan_delmulti(sc); > +put: > + if_put(ifp0); > +down: > + if (sc->sc_mode != VXLAN_TMODE_P2P) > + etherbridge_down(&sc->sc_eb); > +unlock: > + rw_exit(&vxlan_lock); > +netlock: > + NET_LOCK(); > + > + return (error); > +} > + > +static int > +vxlan_down(struct vxlan_softc *sc) > +{ > + struct ifnet *ifp = &sc->sc_ac.ac_if; > + struct ifnet *ifp0; > + int error; > + > + KASSERT(ISSET(ifp->if_flags, IFF_RUNNING)); > + NET_UNLOCK(); > + > + error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); > + if (error != 0) { > + NET_LOCK(); > + return (error); > } > > - m_adj(m, skip); > - ifp = &sc->sc_ac.ac_if; > + NET_LOCK(); > + if (!ISSET(ifp->if_flags, IFF_RUNNING)) { > + /* something else beat us */ > + rw_exit(&vxlan_lock); > + return (0); > + } > + NET_UNLOCK(); > > -#if NBRIDGE > 0 > - /* Store the tunnel src/dst IP and vni for the bridge or switch */ > - if ((ifp->if_bridgeidx != 0 || ifp->if_switchport != NULL) && > - srcsa->sa_family != AF_UNSPEC && > - ((brtag = bridge_tunneltag(m)) != NULL)) { > - memcpy(&brtag->brtag_peer.sa, srcsa, srcsa->sa_len); > - memcpy(&brtag->brtag_local.sa, dstsa, dstsa->sa_len); > - brtag->brtag_id = vni; > + vxlan_tep_down(sc); > + > + if (sc->sc_mode == VXLAN_TMODE_LEARNING) { > + vxlan_delmulti(sc); > + ifp0 = if_get(sc->sc_if_index0); > + if (ifp0 != NULL) { > + if_detachhook_del(ifp0, &sc->sc_dtask); > + } > + if_put(ifp0); > } > -#endif > > - m->m_flags &= ~(M_BCAST|M_MCAST); > + if (sc->sc_mode != VXLAN_TMODE_P2P) > + etherbridge_down(&sc->sc_eb); > > -#if NPF > 0 > - pf_pkt_addr_changed(m); > -#endif > - if ((m->m_len < sizeof(struct ether_header)) && > - (m = m_pullup(m, sizeof(struct ether_header))) == NULL) > - return (ENOBUFS); > + taskq_del_barrier(ifp->if_snd.ifq_softnet, &sc->sc_send_task); > + NET_LOCK(); > + CLR(ifp->if_flags, IFF_RUNNING); > + rw_exit(&vxlan_lock); > > - n = m_getptr(m, sizeof(struct ether_header), &off); > - if (n == NULL) { > - m_freem(m); > - return (EINVAL); > + return (0); > +} > + > +static int > +vxlan_addmulti(struct vxlan_softc *sc, struct ifnet *ifp0) > +{ > + int error = 0; > + > + NET_LOCK(); > + > + switch (sc->sc_af) { > + case AF_INET: > + sc->sc_inmulti = in_addmulti(&sc->sc_dst.in4, ifp0); > + if (sc->sc_inmulti == NULL) > + error = EADDRNOTAVAIL; > + break; > +#ifdef INET6 > + case AF_INET6: > + sc->sc_inmulti = in6_addmulti(&sc->sc_dst.in6, ifp0, &error); > + break; > +#endif > + default: > + unhandled_af(sc->sc_af); > } > - if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { > - n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); > - /* Dispose of the original mbuf chain */ > - m_freem(m); > - if (n == NULL) > - return (ENOBUFS); > - m = n; > + > + NET_UNLOCK(); > + > + return (error); > +} > + > +static void > +vxlan_delmulti(struct vxlan_softc *sc) > +{ > + NET_LOCK(); > + > + switch (sc->sc_af) { > + case AF_INET: > + in_delmulti(sc->sc_inmulti); > + break; > +#ifdef INET6 > + case AF_INET6: > + in6_delmulti(sc->sc_inmulti); > + break; > +#endif > + default: > + unhandled_af(sc->sc_af); > } > > - if_vinput(ifp, m); > + sc->sc_inmulti = NULL; /* keep it tidy */ > > - /* success */ > - return (1); > + NET_UNLOCK(); > } > > -struct mbuf * > -vxlan_encap4(struct ifnet *ifp, struct mbuf *m, > - struct sockaddr *src, struct sockaddr *dst) > -{ > - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; > - struct ip *ip; > - > - /* > - * Remove multicast and broadcast flags or encapsulated packet > - * ends up as multicast or broadcast packet. > - */ > - m->m_flags &= ~(M_BCAST|M_MCAST); > +static int > +vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr) > +{ > + struct ifnet *ifp = &sc->sc_ac.ac_if; > > - M_PREPEND(m, sizeof(*ip), M_DONTWAIT); > - if (m == NULL) > - return (NULL); > + if (ifr->ifr_rdomainid < 0 || > + ifr->ifr_rdomainid > RT_TABLEID_MAX) > + return (EINVAL); > + if (!rtable_exists(ifr->ifr_rdomainid)) > + return (EADDRNOTAVAIL); > > - ip = mtod(m, struct ip *); > - ip->ip_v = IPVERSION; > - ip->ip_hl = sizeof(struct ip) >> 2; > - ip->ip_id = htons(ip_randomid()); > - ip->ip_off = sc->sc_df; > - ip->ip_p = IPPROTO_UDP; > - ip->ip_tos = IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ? > - m->m_pkthdr.pf.prio : sc->sc_txhprio); > - ip->ip_len = htons(m->m_pkthdr.len); > + if (sc->sc_rdomain == ifr->ifr_rdomainid) > + return (0); > > - ip->ip_src = satosin(src)->sin_addr; > - ip->ip_dst = satosin(dst)->sin_addr; > + if (!ISSET(ifp->if_flags, IFF_RUNNING)) > + return (EBUSY); > > - if (sc->sc_ttl > 0) > - ip->ip_ttl = sc->sc_ttl; > - else > - ip->ip_ttl = IPDEFTTL; > + /* commit */ > + sc->sc_rdomain = ifr->ifr_rdomainid; > + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); > > - return (m); > + return (0); > +} > + > +static int > +vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr) > +{ > + ifr->ifr_rdomainid = sc->sc_rdomain; > + > + return (0); > } > > +static int > +vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req) > +{ > + struct ifnet *ifp = &sc->sc_ac.ac_if; > + struct sockaddr *src = (struct sockaddr *)&req->addr; > + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; > + struct sockaddr_in *src4, *dst4; > #ifdef INET6 > -struct mbuf * > -vxlan_encap6(struct ifnet *ifp, struct mbuf *m, > - struct sockaddr *src, struct sockaddr *dst) > -{ > - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; > - struct ip6_hdr *ip6; > - struct in6_addr *in6a; > - uint32_t flow; > - > - /* > - * Remove multicast and broadcast flags or encapsulated packet > - * ends up as multicast or broadcast packet. > - */ > - m->m_flags &= ~(M_BCAST|M_MCAST); > + struct sockaddr_in6 *src6, *dst6; > + int error; > +#endif > + union vxlan_addr saddr, daddr; > + unsigned int mode = VXLAN_TMODE_ENDPOINT; > + in_port_t port = htons(VXLAN_PORT); > > - M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); > - if (m == NULL) > - return (NULL); > + memset(&saddr, 0, sizeof(saddr)); > + memset(&daddr, 0, sizeof(daddr)); > > - flow = (uint32_t)IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ? > - m->m_pkthdr.pf.prio : sc->sc_txhprio) << 20; > + /* validate */ > + switch (src->sa_family) { > + case AF_INET: > + src4 = (struct sockaddr_in *)src; > + if (in_nullhost(src4->sin_addr) || > + IN_MULTICAST(src4->sin_addr.s_addr)) > + return (EINVAL); > > - ip6 = mtod(m, struct ip6_hdr *); > - ip6->ip6_flow = htonl(flow); > - ip6->ip6_vfc &= ~IPV6_VERSION_MASK; > - ip6->ip6_vfc |= IPV6_VERSION; > - ip6->ip6_nxt = IPPROTO_UDP; > - ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); > - if (in6_embedscope(&ip6->ip6_src, satosin6(src), NULL) != 0) > - goto drop; > - if (in6_embedscope(&ip6->ip6_dst, satosin6(dst), NULL) != 0) > - goto drop; > + if (src4->sin_port != htons(0)) > + port = src4->sin_port; > > - if (sc->sc_ttl > 0) > - ip6->ip6_hlim = sc->sc_ttl; > - else > - ip6->ip6_hlim = ip6_defhlim; > + if (dst->sa_family != AF_UNSPEC) { > + if (dst->sa_family != AF_INET) > + return (EINVAL); > + > + dst4 = (struct sockaddr_in *)dst; > + if (in_nullhost(dst4->sin_addr)) > + return (EINVAL); > + > + /* all good */ > + mode = IN_MULTICAST(dst4->sin_addr.s_addr) ? > + VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; > + daddr.in4 = dst4->sin_addr; > + } > > - if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { > - if (in6_selectsrc(&in6a, satosin6(dst), NULL, > - sc->sc_rdomain) != 0) > - goto drop; > + saddr.in4 = src4->sin_addr; > + break; > + > +#ifdef INET6 > + case AF_INET6: > + src6 = (struct sockaddr_in6 *)src; > + if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || > + IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) > + return (EINVAL); > > - ip6->ip6_src = *in6a; > + if (src6->sin6_port != htons(0)) > + port = src6->sin6_port; > + > + if (dst->sa_family != AF_UNSPEC) { > + if (dst->sa_family != AF_INET6) > + return (EINVAL); > + > + dst6 = (struct sockaddr_in6 *)dst; > + if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr)) > + return (EINVAL); > + > + if (src6->sin6_scope_id != dst6->sin6_scope_id) > + return (EINVAL); > + > + /* all good */ > + mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ? > + VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; > + error = in6_embedscope(&daddr.in6, dst6, NULL); > + if (error != 0) > + return (error); > + } > + > + error = in6_embedscope(&saddr.in6, src6, NULL); > + if (error != 0) > + return (error); > + > + break; > +#endif > + default: > + return (EAFNOSUPPORT); > } > > - if (sc->sc_df) > - SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); > + if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 && > + memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 && > + sc->sc_port == port) > + return (0); > > - /* > - * The UDP checksum of VXLAN packets should be set to zero, > - * but the IPv6 UDP checksum is not optional. There is an RFC 6539 > - * to relax the IPv6 UDP checksum requirement for tunnels, but it > - * is currently not supported by most implementations. > - */ > - m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; > + if (ISSET(ifp->if_flags, IFF_RUNNING)) > + return (EBUSY); > > - return (m); > + /* commit */ > + sc->sc_af = src->sa_family; > + sc->sc_src = saddr; > + sc->sc_dst = daddr; > + sc->sc_port = port; > + sc->sc_mode = mode; > + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); > > -drop: > - m_freem(m); > - return (NULL); > + return (0); > } > -#endif /* INET6 */ > > -int > -vxlan_output(struct ifnet *ifp, struct mbuf *m) > +static int > +vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req) > { > - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; > - struct vxlanudphdr *vu; > - struct sockaddr *src, *dst; > -#if NBRIDGE > 0 > - struct bridge_tunneltag *brtag; > -#endif > - int error, af; > - uint32_t tag; > - struct mbuf *m0; > - > - /* VXLAN header, needs new mbuf because of alignment issues */ > - MGET(m0, M_DONTWAIT, m->m_type); > - if (m0 == NULL) { > - ifp->if_oerrors++; > - return (ENOBUFS); > - } > - M_MOVE_PKTHDR(m0, m); > - m0->m_next = m; > - m = m0; > - m_align(m, sizeof(*vu)); > - m->m_len = sizeof(*vu); > - m->m_pkthdr.len += sizeof(*vu); > - > - src = sstosa(&sc->sc_src); > - dst = sstosa(&sc->sc_dst); > - af = src->sa_family; > - > - vu = mtod(m, struct vxlanudphdr *); > - vu->vu_u.uh_sport = sc->sc_dstport; > - vu->vu_u.uh_dport = sc->sc_dstport; > - vu->vu_u.uh_ulen = htons(m->m_pkthdr.len); > - vu->vu_u.uh_sum = 0; > - tag = sc->sc_vnetid; > - > -#if NBRIDGE > 0 > - if ((brtag = bridge_tunnel(m)) != NULL) { > - dst = &brtag->brtag_peer.sa; > - > - /* If accepting any VNI, source ip address is from brtag */ > - if (sc->sc_vnetid == VXLAN_VNI_ANY) { > - src = &brtag->brtag_local.sa; > - tag = (uint32_t)brtag->brtag_id; > - af = src->sa_family; > - } > - > - if (dst->sa_family != af) { > - ifp->if_oerrors++; > - m_freem(m); > - return (EINVAL); > - } > - } else > + struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; > + struct sockaddr_in *sin; > +#ifdef INET6 > + struct sockaddr_in6 *sin6; > #endif > - if (sc->sc_vnetid == VXLAN_VNI_ANY) { > - /* > - * If accepting any VNI, build the vxlan header only by > - * bridge_tunneltag or drop packet if the tag does not exist. > - */ > - ifp->if_oerrors++; > - m_freem(m); > - return (ENETUNREACH); > - } > > - if (sc->sc_vnetid != VXLAN_VNI_UNSET) { > - vu->vu_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI); > - vu->vu_v.vxlan_id = htonl(tag << VXLAN_VNI_S); > - } else { > - vu->vu_v.vxlan_flags = htonl(0); > - vu->vu_v.vxlan_id = htonl(0); > - } > + if (sc->sc_af == AF_UNSPEC) > + return (EADDRNOTAVAIL); > + KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); > > - switch (af) { > + memset(&req->addr, 0, sizeof(req->addr)); > + memset(&req->dstaddr, 0, sizeof(req->dstaddr)); > + > + /* default to endpoint */ > + dstaddr->sa_len = 2; > + dstaddr->sa_family = AF_UNSPEC; > + > + switch (sc->sc_af) { > case AF_INET: > - m = vxlan_encap4(ifp, m, src, dst); > + sin = (struct sockaddr_in *)&req->addr; > + sin->sin_len = sizeof(*sin); > + sin->sin_family = AF_INET; > + sin->sin_addr = sc->sc_src.in4; > + sin->sin_port = sc->sc_port; > + > + if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) > + break; > + > + sin = (struct sockaddr_in *)&req->dstaddr; > + sin->sin_len = sizeof(*sin); > + sin->sin_family = AF_INET; > + sin->sin_addr = sc->sc_dst.in4; > break; > + > #ifdef INET6 > case AF_INET6: > - m = vxlan_encap6(ifp, m, src, dst); > + sin6 = (struct sockaddr_in6 *)&req->addr; > + sin6->sin6_len = sizeof(*sin6); > + sin6->sin6_family = AF_INET6; > + in6_recoverscope(sin6, &sc->sc_src.in6); > + sin6->sin6_port = sc->sc_port; > + > + if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) > + break; > + > + sin6 = (struct sockaddr_in6 *)&req->dstaddr; > + sin6->sin6_len = sizeof(*sin6); > + sin6->sin6_family = AF_INET6; > + in6_recoverscope(sin6, &sc->sc_dst.in6); > break; > -#endif /* INET6 */ > +#endif > default: > - m_freem(m); > - m = NULL; > + unhandled_af(sc->sc_af); > } > > - if (m == NULL) { > - ifp->if_oerrors++; > - return (ENOBUFS); > + return (0); > +} > + > +static int > +vxlan_del_tunnel(struct vxlan_softc *sc) > +{ > + struct ifnet *ifp = &sc->sc_ac.ac_if; > + > + if (sc->sc_af == AF_UNSPEC) > + return (0); > + > + if (ISSET(ifp->if_flags, IFF_RUNNING)) > + return (EBUSY); > + > + /* commit */ > + sc->sc_af = AF_UNSPEC; > + memset(&sc->sc_src, 0, sizeof(sc->sc_src)); > + memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); > + sc->sc_port = htons(0); > + sc->sc_mode = VXLAN_TMODE_UNSET; > + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); > + > + return (0); > +} > + > +static int > +vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr) > +{ > + struct ifnet *ifp = &sc->sc_ac.ac_if; > + uint32_t vni; > + > + if (ifr->ifr_vnetid < VXLAN_VNI_MIN || > + ifr->ifr_vnetid > VXLAN_VNI_MAX) > + return (EINVAL); > + > + vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT); > + if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) && > + sc->sc_header.vxlan_id == vni) > + return (0); > + > + if (ISSET(ifp->if_flags, IFF_RUNNING)) > + return (EBUSY); > + > + /* commit */ > + SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); > + sc->sc_header.vxlan_id = vni; > + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); > + > + return (0); > +} > + > +static int > +vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr) > +{ > + uint32_t vni; > + > + if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) > + return (EADDRNOTAVAIL); > + > + vni = ntohl(sc->sc_header.vxlan_id); > + vni &= VXLAN_VNI_MASK; > + vni >>= VXLAN_VNI_SHIFT; > + > + ifr->ifr_vnetid = vni; > + > + return (0); > +} > + > +static int > +vxlan_del_vnetid(struct vxlan_softc *sc) > +{ > + struct ifnet *ifp = &sc->sc_ac.ac_if; > + > + if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) > + return (0); > + > + if (ISSET(ifp->if_flags, IFF_RUNNING)) > + return (EBUSY); > + > + /* commit */ > + CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); > + sc->sc_header.vxlan_id = htonl(0 << VXLAN_VNI_SHIFT); > + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); > + > + return (0); > +} > + > +static int > +vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p) > +{ > + struct ifnet *ifp = &sc->sc_ac.ac_if; > + struct ifnet *ifp0; > + int error = 0; > + > + ifp0 = if_unit(p->ifp_parent); > + if (ifp0 == NULL) > + return (ENXIO); > + > + if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { > + error = ENXIO; > + goto put; > } > > -#if NBRIDGE > 0 > - if (brtag != NULL) > - bridge_tunneluntag(m); > -#endif > + if (sc->sc_if_index0 == ifp0->if_index) > + goto put; > > - m->m_pkthdr.ph_rtableid = sc->sc_rdomain; > + if (ISSET(ifp->if_flags, IFF_RUNNING)) { > + error = EBUSY; > + goto put; > + } > > -#if NPF > 0 > - pf_pkt_addr_changed(m); > + /* commit */ > + sc->sc_if_index0 = ifp0->if_index; > + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); > + > +put: > + if_put(ifp0); > + return (error); > +} > + > +static int > +vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p) > +{ > + struct ifnet *ifp0; > + int error = 0; > + > + ifp0 = if_get(sc->sc_if_index0); > + if (ifp0 == NULL) > + error = EADDRNOTAVAIL; > + else > + strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent)); > + if_put(ifp0); > + > + return (error); > +} > + > +static int > +vxlan_del_parent(struct vxlan_softc *sc) > +{ > + struct ifnet *ifp = &sc->sc_ac.ac_if; > + > + if (sc->sc_if_index0 == 0) > + return (0); > + > + if (ISSET(ifp->if_flags, IFF_RUNNING)) > + return (EBUSY); > + > + /* commit */ > + sc->sc_if_index0 = 0; > + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); > + > + return (0); > +} > + > +static int > +vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) > +{ > + struct sockaddr_in *sin; > +#ifdef INET6 > + struct sockaddr_in6 *sin6; > + struct sockaddr_in6 src6 = { > + .sin6_len = sizeof(src6), > + .sin6_family = AF_UNSPEC, > + }; > + int error; > #endif > + union vxlan_addr endpoint; > + unsigned int type; > + > + switch (sc->sc_mode) { > + case VXLAN_TMODE_UNSET: > + return (ENOPROTOOPT); > + case VXLAN_TMODE_P2P: > + return (EPROTONOSUPPORT); > + default: > + break; > + } > + > + /* ignore ifba_ifsname */ > + > + if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK)) > + return (EINVAL); > + switch (ifba->ifba_flags & IFBAF_TYPEMASK) { > + case IFBAF_DYNAMIC: > + type = EBE_DYNAMIC; > + break; > + case IFBAF_STATIC: > + type = EBE_STATIC; > + break; > + default: > + return (EINVAL); > + } > + > + memset(&endpoint, 0, sizeof(endpoint)); > > - switch (af) { > + if (ifba->ifba_dstsa.ss_family != sc->sc_af) > + return (EAFNOSUPPORT); > + switch (ifba->ifba_dstsa.ss_family) { > case AF_INET: > - error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, > - &sc->sc_imo, NULL, 0); > + sin = (struct sockaddr_in *)&ifba->ifba_dstsa; > + if (in_nullhost(sin->sin_addr) || > + IN_MULTICAST(sin->sin_addr.s_addr)) > + return (EADDRNOTAVAIL); > + > + if (sin->sin_port != htons(0)) > + return (EADDRNOTAVAIL); > + > + endpoint.in4 = sin->sin_addr; > break; > + > #ifdef INET6 > case AF_INET6: > - error = ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL); > + sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa; > + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || > + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) > + return (EADDRNOTAVAIL); > + > + in6_recoverscope(&src6, &sc->sc_src.in6); > + if (src6.sin6_scope_id != sin6->sin6_scope_id) > + return (EADDRNOTAVAIL); > + > + if (sin6->sin6_port != htons(0)) > + return (EADDRNOTAVAIL); > + > + error = in6_embedscope(&endpoint.in6, sin6, NULL); > + if (error != 0) > + return (error); > + > break; > -#endif /* INET6 */ > - default: > - m_freem(m); > - error = EAFNOSUPPORT; > +#endif > + default: /* AF_UNSPEC */ > + return (EADDRNOTAVAIL); > } > > - if (error) > - ifp->if_oerrors++; > + return (etherbridge_add_addr(&sc->sc_eb, &endpoint, > + &ifba->ifba_dst, type)); > +} > > - return (error); > +static int > +vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) > +{ > + return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst)); > } > > void > -vxlan_addr_change(void *arg) > +vxlan_detach_hook(void *arg) > { > - struct vxlan_softc *sc = arg; > - struct ifnet *ifp = &sc->sc_ac.ac_if; > - int error; > - > - /* > - * Reset the configuration after resume or any possible address > - * configuration changes. > - */ > - if ((error = vxlan_config(ifp, NULL, NULL))) { > - /* > - * The source address of the tunnel can temporarily disappear, > - * after a link state change when running the DHCP client, > - * so keep it configured. > - */ > + struct vxlan_softc *sc = arg; > + struct ifnet *ifp = &sc->sc_ac.ac_if; > + > + if (ISSET(ifp->if_flags, IFF_RUNNING)) { > + vxlan_down(sc); > + CLR(ifp->if_flags, IFF_UP); > } > + > + sc->sc_if_index0 = 0; > } > > -void > -vxlan_if_change(void *arg) > +static int > +vxlan_eb_port_eq(void *arg, void *a, void *b) > { > - struct vxlan_softc *sc = arg; > - struct ifnet *ifp = &sc->sc_ac.ac_if; > + const union vxlan_addr *va = a, *vb = b; > + size_t i; > > - /* > - * Reset the configuration after the parent interface disappeared. > - */ > - vxlan_multicast_cleanup(ifp); > - memset(&sc->sc_src, 0, sizeof(sc->sc_src)); > - memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); > - sc->sc_dstport = htons(VXLAN_PORT); > + for (i = 0; i < nitems(va->in6.s6_addr32); i++) { > + if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i]) > + return (0); > + } > + > + return (1); > } > > -void > -vxlan_link_change(void *arg) > +static void * > +vxlan_eb_port_take(void *arg, void *port) > { > - struct vxlan_softc *sc = arg; > - struct ifnet *ifp = &sc->sc_ac.ac_if; > + union vxlan_addr *endpoint; > > - /* > - * The machine might have lost its multicast associations after > - * link state changes. This fixes a problem with VMware after > - * suspend/resume of the host or guest. > - */ > - (void)vxlan_config(ifp, NULL, NULL); > + endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT); > + if (endpoint == NULL) > + return (NULL); > + > + *endpoint = *(union vxlan_addr *)port; > + > + return (endpoint); > } > + > +static void > +vxlan_eb_port_rele(void *arg, void *port) > +{ > + union vxlan_addr *endpoint = port; > + > + pool_put(&vxlan_endpoint_pool, endpoint); > +} > + > +static size_t > +vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port) > +{ > + struct vxlan_softc *sc = arg; > + > + return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len)); > +} > + > +static void > +vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port) > +{ > + struct vxlan_softc *sc = arg; > + union vxlan_addr *endpoint = port; > + > + switch (sc->sc_af) { > + case AF_INET: { > + struct sockaddr_in *sin = (struct sockaddr_in *)ss; > + > + sin->sin_len = sizeof(*sin); > + sin->sin_family = AF_INET; > + sin->sin_addr = endpoint->in4; > + break; > + } > +#ifdef INET6 > + case AF_INET6: { > + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; > + > + sin6->sin6_len = sizeof(*sin6); > + sin6->sin6_family = AF_INET6; > + in6_recoverscope(sin6, &endpoint->in6); > + break; > + } > +#endif /* INET6 */ > + default: > + unhandled_af(sc->sc_af); > + } > +} > + > +static inline int > +vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp) > +{ > + size_t i; > + > + if (ap->p_header.vxlan_id > bp->p_header.vxlan_id) > + return (1); > + if (ap->p_header.vxlan_id < bp->p_header.vxlan_id) > + return (-1); > + if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags) > + return (1); > + if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags) > + return (-1); > + > + for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) { > + if (ap->p_addr.in6.s6_addr32[i] > > + bp->p_addr.in6.s6_addr32[i]) > + return (1); > + if (ap->p_addr.in6.s6_addr32[i] < > + bp->p_addr.in6.s6_addr32[i]) > + return (-1); > + } > + > + return (0); > +} > + > +RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); > Index: netinet/udp_usrreq.c > =================================================================== > RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v > retrieving revision 1.268 > diff -u -p -r1.268 udp_usrreq.c > --- netinet/udp_usrreq.c 4 Jan 2022 06:32:40 -0000 1.268 > +++ netinet/udp_usrreq.c 11 Feb 2022 05:11:13 -0000 > @@ -112,11 +112,6 @@ > #include <net/pipex.h> > #endif > > -#include "vxlan.h" > -#if NVXLAN > 0 > -#include <net/if_vxlan.h> > -#endif > - > /* > * UDP protocol implementation. > * Per RFC 768, August, 1980. > @@ -345,15 +340,6 @@ udp_input(struct mbuf **mp, int *offp, i > break; > #endif /* INET6 */ > } > - > -#if NVXLAN > 0 > - if (vxlan_enable > 0 && > -#if NPF > 0 > - !(m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) && > -#endif > - vxlan_lookup(m, uh, iphlen, &srcsa.sa, &dstsa.sa) != 0) > - return IPPROTO_DONE; > -#endif > > if (m->m_flags & (M_BCAST|M_MCAST)) { > struct inpcb *last; > Index: conf/files > =================================================================== > RCS file: /cvs/src/sys/conf/files,v > retrieving revision 1.709 > diff -u -p -r1.709 files > --- conf/files 8 Feb 2022 17:25:11 -0000 1.709 > +++ conf/files 11 Feb 2022 05:11:13 -0000 > @@ -573,7 +573,7 @@ pseudo-device mpip: ifnet, mpls > pseudo-device bpe: ifnet, ether, ifmedia, etherbridge > pseudo-device vether: ifnet, ether > pseudo-device pppx: ifnet > -pseudo-device vxlan: ifnet, ether, ifmedia > +pseudo-device vxlan: ifnet, ether, etherbridge > pseudo-device wg: ifnet > > pseudo-device ksyms