The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=051e7d78b03944d5910d4f7ad2f1fd6f2cfac382

commit 051e7d78b03944d5910d4f7ad2f1fd6f2cfac382
Author:     Konstantin Belousov <[email protected]>
AuthorDate: 2021-10-17 15:00:34 +0000
Commit:     Konstantin Belousov <[email protected]>
CommitDate: 2022-05-24 20:59:32 +0000

    Kernel-side infrastructure to implement nvlist-based set/get ifcaps
    
    Reviewed by:    hselasky, jhb, kp (previous version)
    Sponsored by:   NVIDIA Networking
    MFC after:      3 weeks
    Differential revision:  https://reviews.freebsd.org/D32551
---
 sys/net/if.c     | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 sys/net/if.h     |  59 +++++++++++++++++-
 sys/sys/sockio.h |   3 +
 3 files changed, 236 insertions(+), 4 deletions(-)

diff --git a/sys/net/if.c b/sys/net/if.c
index bc0240035ea3..c50cc2d291e2 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -58,6 +58,7 @@
 #include <sys/lock.h>
 #include <sys/refcount.h>
 #include <sys/module.h>
+#include <sys/nv.h>
 #include <sys/rwlock.h>
 #include <sys/sockio.h>
 #include <sys/syslog.h>
@@ -2391,6 +2392,88 @@ ifr_data_get_ptr(void *ifrp)
                return (ifrup->ifr.ifr_ifru.ifru_data);
 }
 
+struct ifcap_nv_bit_name {
+       int cap_bit;
+       const char *cap_name;
+};
+#define CAPNV(x) {.cap_bit = IFCAP_##x, \
+    .cap_name = __CONCAT(IFCAP_, __CONCAT(x, _NAME)) }
+const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = {
+       CAPNV(RXCSUM),
+       CAPNV(TXCSUM),
+       CAPNV(NETCONS),
+       CAPNV(VLAN_MTU),
+       CAPNV(VLAN_HWTAGGING),
+       CAPNV(JUMBO_MTU),
+       CAPNV(POLLING),
+       CAPNV(VLAN_HWCSUM),
+       CAPNV(TSO4),
+       CAPNV(TSO6),
+       CAPNV(LRO),
+       CAPNV(WOL_UCAST),
+       CAPNV(WOL_MCAST),
+       CAPNV(WOL_MAGIC),
+       CAPNV(TOE4),
+       CAPNV(TOE6),
+       CAPNV(VLAN_HWFILTER),
+       CAPNV(VLAN_HWTSO),
+       CAPNV(LINKSTATE),
+       CAPNV(NETMAP),
+       CAPNV(RXCSUM_IPV6),
+       CAPNV(TXCSUM_IPV6),
+       CAPNV(HWSTATS),
+       CAPNV(TXRTLMT),
+       CAPNV(HWRXTSTMP),
+       CAPNV(MEXTPG),
+       CAPNV(TXTLS4),
+       CAPNV(TXTLS6),
+       CAPNV(VXLAN_HWCSUM),
+       CAPNV(VXLAN_HWTSO),
+       CAPNV(TXTLS_RTLMT),
+       {0, NULL}
+};
+#define CAP2NV(x) {.cap_bit = IFCAP2_##x, \
+    .cap_name = __CONCAT(IFCAP2_, __CONCAT(x, _NAME)) }
+const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = {
+       CAP2NV(RXTLS4),
+       CAP2NV(RXTLS6),
+       {0, NULL}
+};
+#undef CAPNV
+#undef CAP2NV
+
+int
+if_capnv_to_capint(const nvlist_t *nv, int *old_cap,
+    const struct ifcap_nv_bit_name *nn, bool all)
+{
+       int i, res;
+
+       res = 0;
+       for (i = 0; nn[i].cap_name != NULL; i++) {
+               if (nvlist_exists_bool(nv, nn[i].cap_name)) {
+                       if (all || nvlist_get_bool(nv, nn[i].cap_name))
+                               res |= nn[i].cap_bit;
+               } else {
+                       res |= *old_cap & nn[i].cap_bit;
+               }
+       }
+       return (res);
+}
+
+void
+if_capint_to_capnv(nvlist_t *nv, const struct ifcap_nv_bit_name *nn,
+    int ifr_cap, int ifr_req)
+{
+       int i;
+
+       for (i = 0; nn[i].cap_name != NULL; i++) {
+               if ((nn[i].cap_bit & ifr_cap) != 0) {
+                       nvlist_add_bool(nv, nn[i].cap_name,
+                           (nn[i].cap_bit & ifr_req) != 0);
+               }
+       }
+}
+
 /*
  * Hardware specific interface ioctls.
  */
@@ -2401,12 +2484,15 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, 
struct thread *td)
        int error = 0, do_ifup = 0;
        int new_flags, temp_flags;
        size_t namelen, onamelen;
-       size_t descrlen;
+       size_t descrlen, nvbuflen;
        char *descrbuf, *odescrbuf;
        char new_name[IFNAMSIZ];
        char old_name[IFNAMSIZ], strbuf[IFNAMSIZ + 8];
        struct ifaddr *ifa;
        struct sockaddr_dl *sdl;
+       void *buf;
+       nvlist_t *nvcap;
+       struct siocsifcapnv_driver_data drv_ioctl_data;
 
        ifr = (struct ifreq *)data;
        switch (cmd) {
@@ -2425,6 +2511,47 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, 
struct thread *td)
                ifr->ifr_curcap = ifp->if_capenable;
                break;
 
+       case SIOCGIFCAPNV:
+               if ((ifp->if_capabilities & IFCAP_NV) == 0) {
+                       error = EINVAL;
+                       break;
+               }
+               buf = NULL;
+               nvcap = nvlist_create(0);
+               for (;;) {
+                       if_capint_to_capnv(nvcap, ifcap_nv_bit_names,
+                           ifp->if_capabilities, ifp->if_capenable);
+                       if_capint_to_capnv(nvcap, ifcap2_nv_bit_names,
+                           ifp->if_capabilities2, ifp->if_capenable2);
+                       error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV,
+                           __DECONST(caddr_t, nvcap));
+                       if (error != 0) {
+                               if_printf(ifp,
+                           "SIOCGIFCAPNV driver mistake: nvlist error %d\n",
+                                   error);
+                               break;
+                       }
+                       buf = nvlist_pack(nvcap, &nvbuflen);
+                       if (buf == NULL) {
+                               error = nvlist_error(nvcap);
+                               if (error == 0)
+                                       error = EDOOFUS;
+                               break;
+                       }
+                       if (nvbuflen > ifr->ifr_cap_nv.buf_length) {
+                               ifr->ifr_cap_nv.length = nvbuflen;
+                               ifr->ifr_cap_nv.buffer = NULL;
+                               error = EFBIG;
+                               break;
+                       }
+                       ifr->ifr_cap_nv.length = nvbuflen;
+                       error = copyout(buf, ifr->ifr_cap_nv.buffer, nvbuflen);
+                       break;
+               }
+               free(buf, M_NVLIST);
+               nvlist_destroy(nvcap);
+               break;
+
        case SIOCGIFDATA:
        {
                struct if_data ifd;
@@ -2563,7 +2690,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, 
struct thread *td)
 
        case SIOCSIFCAP:
                error = priv_check(td, PRIV_NET_SETIFCAP);
-               if (error)
+               if (error != 0)
                        return (error);
                if (ifp->if_ioctl == NULL)
                        return (EOPNOTSUPP);
@@ -2574,6 +2701,53 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, 
struct thread *td)
                        getmicrotime(&ifp->if_lastchange);
                break;
 
+       case SIOCSIFCAPNV:
+               error = priv_check(td, PRIV_NET_SETIFCAP);
+               if (error != 0)
+                       return (error);
+               if (ifp->if_ioctl == NULL)
+                       return (EOPNOTSUPP);
+               if ((ifp->if_capabilities & IFCAP_NV) == 0)
+                       return (EINVAL);
+               if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
+                       return (EINVAL);
+               nvcap = NULL;
+               buf = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK);
+               for (;;) {
+                       error = copyin(ifr->ifr_cap_nv.buffer, buf,
+                           ifr->ifr_cap_nv.length);
+                       if (error != 0)
+                               break;
+                       nvcap = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0);
+                       if (nvcap == NULL) {
+                               error = EINVAL;
+                               break;
+                       }
+                       drv_ioctl_data.reqcap = if_capnv_to_capint(nvcap,
+                           &ifp->if_capenable, ifcap_nv_bit_names, false);
+                       if ((drv_ioctl_data.reqcap &
+                           ~ifp->if_capabilities) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       drv_ioctl_data.reqcap2 = if_capnv_to_capint(nvcap,
+                           &ifp->if_capenable2, ifcap2_nv_bit_names, false);
+                       if ((drv_ioctl_data.reqcap2 &
+                           ~ifp->if_capabilities2) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       drv_ioctl_data.nvcap = nvcap;
+                       error = (*ifp->if_ioctl)(ifp, SIOCSIFCAPNV,
+                           (caddr_t)&drv_ioctl_data);
+                       break;
+               }
+               nvlist_destroy(nvcap);
+               free(buf, M_TEMP);
+               if (error == 0)
+                       getmicrotime(&ifp->if_lastchange);
+               break;
+
 #ifdef MAC
        case SIOCSIFMAC:
                error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
diff --git a/sys/net/if.h b/sys/net/if.h
index 782e792cf87c..4bf29193e7ce 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -236,7 +236,7 @@ struct if_data {
 #define        IFCAP_TOE4              0x04000 /* interface can offload TCP */
 #define        IFCAP_TOE6              0x08000 /* interface can offload TCP6 */
 #define        IFCAP_VLAN_HWFILTER     0x10000 /* interface hw can filter vlan 
tag */
-/*     available               0x20000 */
+#define        IFCAP_NV                0x20000 /* can do 
SIOCGIFCAPNV/SIOCSIFCAPNV */
 #define        IFCAP_VLAN_HWTSO        0x40000 /* can do IFCAP_TSO on VLANs */
 #define        IFCAP_LINKSTATE         0x80000 /* the runtime link state is 
dynamic */
 #define        IFCAP_NETMAP            0x100000 /* netmap mode 
supported/enabled */
@@ -260,7 +260,40 @@ struct if_data {
 #define        IFCAP_TOE       (IFCAP_TOE4 | IFCAP_TOE6)
 #define        IFCAP_TXTLS     (IFCAP_TXTLS4 | IFCAP_TXTLS6)
 
-#define        IFCAP_CANTCHANGE        (IFCAP_NETMAP)
+#define        IFCAP_CANTCHANGE        (IFCAP_NETMAP | IFCAP_NV)
+#define        IFCAP_ALLCAPS           0xffffffff
+
+#define        IFCAP_RXCSUM_NAME       "RXCSUM"
+#define        IFCAP_TXCSUM_NAME       "TXCSUM"
+#define        IFCAP_NETCONS_NAME      "NETCONS"
+#define        IFCAP_VLAN_MTU_NAME     "VLAN_MTU"
+#define        IFCAP_VLAN_HWTAGGING_NAME "VLAN_HWTAGGING"
+#define        IFCAP_JUMBO_MTU_NAME    "JUMBO_MTU"
+#define        IFCAP_POLLING_NAME      "POLLING"
+#define        IFCAP_VLAN_HWCSUM_NAME  "VLAN_HWCSUM"
+#define        IFCAP_TSO4_NAME         "TSO4"
+#define        IFCAP_TSO6_NAME         "TSO6"
+#define        IFCAP_LRO_NAME          "LRO"
+#define        IFCAP_WOL_UCAST_NAME    "WOL_UCAST"
+#define        IFCAP_WOL_MCAST_NAME    "WOL_MCAST"
+#define        IFCAP_WOL_MAGIC_NAME    "WOL_MAGIC"
+#define        IFCAP_TOE4_NAME         "TOE4"
+#define        IFCAP_TOE6_NAME         "TOE6"
+#define        IFCAP_VLAN_HWFILTER_NAME "VLAN_HWFILTER"
+#define        IFCAP_VLAN_HWTSO_NAME   "VLAN_HWTSO"
+#define        IFCAP_LINKSTATE_NAME    "LINKSTATE"
+#define        IFCAP_NETMAP_NAME       "NETMAP"
+#define        IFCAP_RXCSUM_IPV6_NAME  "RXCSUM_IPV6"
+#define        IFCAP_TXCSUM_IPV6_NAME  "TXCSUM_IPV6"
+#define        IFCAP_HWSTATS_NAME      "HWSTATS"
+#define        IFCAP_TXRTLMT_NAME      "TXRTLMT"
+#define        IFCAP_HWRXTSTMP_NAME    "HWRXTSTMP"
+#define        IFCAP_MEXTPG_NAME       "MEXTPG"
+#define        IFCAP_TXTLS4_NAME       "TXTLS4"
+#define        IFCAP_TXTLS6_NAME       "TXTLS6"
+#define        IFCAP_VXLAN_HWCSUM_NAME "VXLAN_HWCSUM"
+#define        IFCAP_VXLAN_HWTSO_NAME  "VXLAN_HWTSO"
+#define        IFCAP_TXTLS_RTLMT_NAME  "TXTLS_RTLMT"
 
 #define        IFQ_MAXLEN      50
 #define        IFNET_SLOWHZ    1               /* granularity is 1 second */
@@ -387,6 +420,15 @@ struct ifreq_buffer {
        void    *buffer;
 };
 
+struct ifreq_nv_req {
+       u_int   buf_length;     /* Total size of buffer,
+                                  u_int for ABI struct ifreq */
+       u_int   length;         /* Length of the filled part */
+       void    *buffer;        /* Buffer itself, containing packed nv */
+};
+
+#define        IFR_CAP_NV_MAXBUFSIZE   (2 * 1024 * 1024)
+
 /*
  * Interface request structure used for socket
  * ioctl's.  All interface ioctl's must have parameter
@@ -411,6 +453,7 @@ struct ifreq {
                int     ifru_cap[2];
                u_int   ifru_fib;
                u_char  ifru_vlan_pcp;
+               struct  ifreq_nv_req ifru_nv;
        } ifr_ifru;
 #define        ifr_addr        ifr_ifru.ifru_addr      /* address */
 #define        ifr_dstaddr     ifr_ifru.ifru_dstaddr   /* other end of p-to-p 
link */
@@ -434,6 +477,7 @@ struct ifreq {
 #define        ifr_fib         ifr_ifru.ifru_fib       /* interface fib */
 #define        ifr_vlan_pcp    ifr_ifru.ifru_vlan_pcp  /* VLAN priority */
 #define        ifr_lan_pcp     ifr_ifru.ifru_vlan_pcp  /* VLAN priority */
+#define        ifr_cap_nv      ifr_ifru.ifru_nv        /* nv-based cap 
interface */
 };
 
 #define        _SIZEOF_ADDR_IFREQ(ifr) \
@@ -605,6 +649,17 @@ MALLOC_DECLARE(M_IFMADDR);
 
 extern struct sx ifnet_detach_sxlock;
 
+struct nvlist;
+struct ifcap_nv_bit_name;
+int if_capnv_to_capint(const struct nvlist *nv, int *old_cap,
+    const struct ifcap_nv_bit_name *nn, bool all);
+void if_capint_to_capnv(struct nvlist *nv,
+    const struct ifcap_nv_bit_name *nn, int ifr_cap, int ifr_req);
+struct siocsifcapnv_driver_data {
+       int reqcap;
+       int reqcap2;
+       struct nvlist *nvcap;
+};
 #endif
 
 #ifndef _KERNEL
diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h
index 93b8af28e171..b9ed4a439995 100644
--- a/sys/sys/sockio.h
+++ b/sys/sys/sockio.h
@@ -147,4 +147,7 @@
 
 #define        SIOCGIFDOWNREASON       _IOWR('i', 154, struct ifdownreason)
 
+#define        SIOCSIFCAPNV    _IOW('i', 155, struct ifreq)    /* set IF 
features */
+#define        SIOCGIFCAPNV    _IOWR('i', 156, struct ifreq)   /* get IF 
features */
+
 #endif /* !_SYS_SOCKIO_H_ */

Reply via email to