The branch stable/12 has been updated by hselasky:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=992ef0f90deab792aef67c8e66167a7ad48ea905

commit 992ef0f90deab792aef67c8e66167a7ad48ea905
Author:     Hans Petter Selasky <[email protected]>
AuthorDate: 2020-12-29 17:01:57 +0000
Commit:     Hans Petter Selasky <[email protected]>
CommitDate: 2021-01-12 16:34:32 +0000

    MFC ec52ff6d1411 and 747feea146d8:
    Streamline the infiniband code according to the ethernet code.
    
    Specifically implement the if_requestencap callback function for infiniband.
    Most of the changes are simply a cut and paste of the equivalent ethernet 
part.
    
    Reviewed by:    melifaro @
    Differential Revision:  https://reviews.freebsd.org/D27631
    Sponsored by:   Mellanox Technologies // NVIDIA Networking
---
 sys/net/if_infiniband.c | 297 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 197 insertions(+), 100 deletions(-)

diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c
index 19f7cdf7ffea..b644f91f2cda 100644
--- a/sys/net/if_infiniband.c
+++ b/sys/net/if_infiniband.c
@@ -143,139 +143,236 @@ infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
        mb->m_pkthdr.len += sizeof(*ibh);
 }
 
+static void
+update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
+{
+       int csum_flags = 0;
+
+       if (src->m_pkthdr.csum_flags & CSUM_IP)
+               csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
+       if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
+               csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+       if (src->m_pkthdr.csum_flags & CSUM_SCTP)
+               csum_flags |= CSUM_SCTP_VALID;
+       dst->m_pkthdr.csum_flags |= csum_flags;
+       if (csum_flags & CSUM_DATA_VALID)
+               dst->m_pkthdr.csum_data = 0xffff;
+}
+
 /*
- * Infiniband output routine.
+ * Handle link-layer encapsulation requests.
  */
 static int
-infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr 
*dst,
-    struct route *ro)
+infiniband_requestencap(struct ifnet *ifp, struct if_encap_req *req)
 {
-       uint8_t edst[INFINIBAND_ADDR_LEN];
-#if defined(INET) || defined(INET6)
-       struct llentry *lle = NULL;
-#endif
-       struct infiniband_header *ibh;
-       int error = 0;
-       uint16_t type;
-       bool is_gw;
+       struct infiniband_header *ih;
+       struct arphdr *ah;
+       uint16_t etype;
+       const uint8_t *lladdr;
 
-       is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0);
+       if (req->rtype != IFENCAP_LL)
+               return (EOPNOTSUPP);
 
-#ifdef MAC
-       error = mac_ifnet_check_transmit(ifp, m);
-       if (error)
-               goto bad;
-#endif
+       if (req->bufsize < INFINIBAND_HDR_LEN)
+               return (ENOMEM);
 
-       M_PROFILE(m);
-       if (ifp->if_flags & IFF_MONITOR) {
-               error = ENETDOWN;
-               goto bad;
-       }
-       if (!((ifp->if_flags & IFF_UP) &&
-           (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
-               error = ENETDOWN;
-               goto bad;
-       }
+       ih = (struct infiniband_header *)req->buf;
+       lladdr = req->lladdr;
+       req->lladdr_off = 0;
 
-       switch (dst->sa_family) {
-       case AF_LINK:
-               goto output;
-#ifdef INET
+       switch (req->family) {
        case AF_INET:
-               if (lle != NULL && (lle->la_flags & LLE_VALID)) {
-                       memcpy(edst, lle->ll_addr, sizeof(edst));
-               } else if (m->m_flags & M_MCAST) {
-                       infiniband_ipv4_multicast_map(
-                           ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
-                           ifp->if_broadcastaddr, edst);
-               } else {
-                       error = arpresolve(ifp, is_gw, m, dst, edst, NULL, 
NULL);
-                       if (error) {
-                               if (error == EWOULDBLOCK)
-                                       error = 0;
-                               m = NULL; /* mbuf is consumed by resolver */
-                               goto bad;
-                       }
-               }
-               type = htons(ETHERTYPE_IP);
+               etype = htons(ETHERTYPE_IP);
                break;
-       case AF_ARP: {
-               struct arphdr *ah;
-
-               if (m->m_len < sizeof(*ah)) {
-                       error = EINVAL;
-                       goto bad;
-               }
-
-               ah = mtod(m, struct arphdr *);
-
-               if (m->m_len < arphdr_len(ah)) {
-                       error = EINVAL;
-                       goto bad;
-               }
+       case AF_INET6:
+               etype = htons(ETHERTYPE_IPV6);
+               break;
+       case AF_ARP:
+               ah = (struct arphdr *)req->hdata;
                ah->ar_hrd = htons(ARPHRD_INFINIBAND);
 
                switch (ntohs(ah->ar_op)) {
                case ARPOP_REVREQUEST:
                case ARPOP_REVREPLY:
-                       type = htons(ETHERTYPE_REVARP);
+                       etype = htons(ETHERTYPE_REVARP);
                        break;
                case ARPOP_REQUEST:
                case ARPOP_REPLY:
                default:
-                       type = htons(ETHERTYPE_ARP);
+                       etype = htons(ETHERTYPE_ARP);
                        break;
                }
 
-               if (m->m_flags & M_BCAST) {
-                       memcpy(edst, ifp->if_broadcastaddr, 
INFINIBAND_ADDR_LEN);
+               if (req->flags & IFENCAP_FLAG_BROADCAST)
+                       lladdr = ifp->if_broadcastaddr;
+               break;
+       default:
+               return (EAFNOSUPPORT);
+       }
+
+       ih->ib_protocol = etype;
+       ih->ib_reserved = 0;
+       memcpy(ih->ib_hwaddr, lladdr, INFINIBAND_ADDR_LEN);
+       req->bufsize = sizeof(struct infiniband_header);
+
+       return (0);
+}
+
+static int
+infiniband_resolve_addr(struct ifnet *ifp, struct mbuf *m,
+    const struct sockaddr *dst, struct route *ro, uint8_t *phdr,
+    uint32_t *pflags, struct llentry **plle)
+{
+       struct infiniband_header *ih;
+       uint32_t lleflags = 0;
+       int error = 0;
+
+       if (plle)
+               *plle = NULL;
+       ih = (struct infiniband_header *)phdr;
+
+       switch (dst->sa_family) {
+#ifdef INET
+       case AF_INET:
+               if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
+                       error = arpresolve(ifp, 0, m, dst, phdr, &lleflags, 
plle);
                } else {
-                       if (ah->ar_hln != INFINIBAND_ADDR_LEN) {
-                               error = EINVAL;
-                               goto bad;
+                       if (m->m_flags & M_BCAST) {
+                               memcpy(ih->ib_hwaddr, ifp->if_broadcastaddr,
+                                   INFINIBAND_ADDR_LEN);
+                       } else {
+                               infiniband_ipv4_multicast_map(
+                                   ((const struct sockaddr_in 
*)dst)->sin_addr.s_addr,
+                                   ifp->if_broadcastaddr, ih->ib_hwaddr);
                        }
-                       memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN);
+                       ih->ib_protocol = htons(ETHERTYPE_IP);
+                       ih->ib_reserved = 0;
                }
                break;
-       }
 #endif
 #ifdef INET6
-       case AF_INET6: {
-               const struct ip6_hdr *ip6;
-
-               ip6 = mtod(m, const struct ip6_hdr *);
-               if (m->m_len < sizeof(*ip6)) {
-                       error = EINVAL;
-                       goto bad;
-               } else if (lle != NULL && (lle->la_flags & LLE_VALID)) {
-                       memcpy(edst, lle->ll_addr, sizeof(edst));
-               } else if (m->m_flags & M_MCAST) {
+       case AF_INET6:
+               if ((m->m_flags & M_MCAST) == 0) {
+                       error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags, 
plle);
+               } else {
                        infiniband_ipv6_multicast_map(
                            &((const struct sockaddr_in6 *)dst)->sin6_addr,
-                           ifp->if_broadcastaddr, edst);
-               } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
-                       memcpy(edst, ifp->if_broadcastaddr, 
INFINIBAND_ADDR_LEN);
-               } else {
-                       error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, 
NULL);
-                       if (error) {
-                               if (error == EWOULDBLOCK)
-                                       error = 0;
-                               m = NULL; /* mbuf is consumed by resolver */
-                               goto bad;
-                       }
+                           ifp->if_broadcastaddr, ih->ib_hwaddr);
+                       ih->ib_protocol = htons(ETHERTYPE_IPV6);
+                       ih->ib_reserved = 0;
                }
-               type = htons(ETHERTYPE_IPV6);
                break;
-       }
 #endif
        default:
-               error = EAFNOSUPPORT;
+               if_printf(ifp, "can't handle af%d\n", dst->sa_family);
+               if (m != NULL)
+                       m_freem(m);
+               return (EAFNOSUPPORT);
+       }
+
+       if (error == EHOSTDOWN) {
+               if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
+                       error = EHOSTUNREACH;
+       }
+
+       if (error != 0)
+               return (error);
+
+       *pflags = RT_MAY_LOOP;
+       if (lleflags & LLE_IFADDR)
+               *pflags |= RT_L2_ME;
+
+       return (0);
+}
+
+/*
+ * Infiniband output routine.
+ */
+static int
+infiniband_output(struct ifnet *ifp, struct mbuf *m,
+    const struct sockaddr *dst, struct route *ro)
+{
+       uint8_t linkhdr[INFINIBAND_HDR_LEN];
+       uint8_t *phdr;
+       struct llentry *lle = NULL;
+       struct infiniband_header *ih;
+       int error = 0;
+       int hlen;       /* link layer header length */
+       uint32_t pflags;
+       bool addref;
+
+       addref = false;
+       phdr = NULL;
+       pflags = 0;
+       if (ro != NULL) {
+               /* XXX BPF uses ro_prepend */
+               if (ro->ro_prepend != NULL) {
+                       phdr = ro->ro_prepend;
+                       hlen = ro->ro_plen;
+               } else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
+                       if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
+                               lle = ro->ro_lle;
+                               if (lle != NULL &&
+                                   (lle->la_flags & LLE_VALID) == 0) {
+                                       LLE_FREE(lle);
+                                       lle = NULL;     /* redundant */
+                                       ro->ro_lle = NULL;
+                               }
+                               if (lle == NULL) {
+                                       /* if we lookup, keep cache */
+                                       addref = 1;
+                               } else
+                                       /*
+                                        * Notify LLE code that
+                                        * the entry was used
+                                        * by datapath.
+                                        */
+                                       llentry_mark_used(lle);
+                       }
+                       if (lle != NULL) {
+                               phdr = lle->r_linkdata;
+                               hlen = lle->r_hdrlen;
+                               pflags = lle->r_flags;
+                       }
+               }
+       }
+
+#ifdef MAC
+       error = mac_ifnet_check_transmit(ifp, m);
+       if (error)
+               goto bad;
+#endif
+
+       M_PROFILE(m);
+       if (ifp->if_flags & IFF_MONITOR) {
+               error = ENETDOWN;
                goto bad;
        }
+       if (!((ifp->if_flags & IFF_UP) &&
+           (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
+               error = ENETDOWN;
+               goto bad;
+       }
+
+       if (phdr == NULL) {
+               /* No prepend data supplied. Try to calculate ourselves. */
+               phdr = linkhdr;
+               hlen = INFINIBAND_HDR_LEN;
+               error = infiniband_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
+                   addref ? &lle : NULL);
+               if (addref && lle != NULL)
+                       ro->ro_lle = lle;
+               if (error != 0)
+                       return (error == EWOULDBLOCK ? 0 : error);
+       }
+
+       if ((pflags & RT_L2_ME) != 0) {
+               update_mbuf_csumflags(m, m);
+               return (if_simloop(ifp, m, dst->sa_family, 0));
+       }
 
        /*
-        * Add local net header.  If no space in first mbuf,
+        * Add local infiniband header. If no space in first mbuf,
         * allocate another.
         */
        M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
@@ -283,16 +380,15 @@ infiniband_output(struct ifnet *ifp, struct mbuf *m, 
const struct sockaddr *dst,
                error = ENOBUFS;
                goto bad;
        }
-       ibh = mtod(m, struct infiniband_header *);
-
-       ibh->ib_protocol = type;
-       memcpy(ibh->ib_hwaddr, edst, sizeof(edst));
+       if ((pflags & RT_HAS_HEADER) == 0) {
+               ih = mtod(m, struct infiniband_header *);
+               memcpy(ih, phdr, hlen);
+       }
 
        /*
         * Queue message on interface, update output statistics if
         * successful, and start output if interface not yet active.
         */
-output:
        return (ifp->if_transmit(ifp, m));
 bad:
        if (m != NULL)
@@ -482,6 +578,7 @@ infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, 
const uint8_t *llb)
        ifp->if_output = infiniband_output;
        ifp->if_input = infiniband_input;
        ifp->if_resolvemulti = infiniband_resolvemulti;
+       ifp->if_requestencap = infiniband_requestencap;
 
        if (ifp->if_baudrate == 0)
                ifp->if_baudrate = IF_Gbps(10); /* default value */
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to