From: Magnus Karlsson <magnus.karls...@intel.com> Here, the bind syscall is added. Binding an AF_XDP socket, means associating the socket to an umem, a netdev and a queue index. This can be done in two ways.
The first way, creating a "socket from scratch". Create the umem using the XDP_UMEM_REG setsockopt and an associated fill queue with XDP_UMEM_FILL_QUEUE. Create the Rx queue using the XDP_RX_QUEUE setsockopt. Call bind passing ifindex and queue index ("channel" in ethtool speak). The second way to bind a socket, is simply skipping the umem/netdev/queue index, and passing another already setup AF_XDP socket. The new socket will then have the same umem/netdev/queue index as the parent so it will share the same umem. You must also set the flags field in the socket address to XDP_SHARED_UMEM. Signed-off-by: Magnus Karlsson <magnus.karls...@intel.com> --- include/uapi/linux/if_xdp.h | 11 ++++ net/xdp/xdp_umem.c | 9 ++++ net/xdp/xdp_umem.h | 2 + net/xdp/xsk.c | 125 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 146 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 118456064e01..e3593df7323c 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -20,6 +20,17 @@ #include <linux/types.h> +/* Options for the sxdp_flags field */ +#define XDP_SHARED_UMEM 1 + +struct sockaddr_xdp { + __u16 sxdp_family; + __u32 sxdp_ifindex; + __u32 sxdp_queue_id; + __u32 sxdp_shared_umem_fd; + __u16 sxdp_flags; +}; + /* XDP socket options */ #define XDP_RX_QUEUE 1 #define XDP_UMEM_REG 3 diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index f2c0768ca63e..96395beb980e 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -97,6 +97,11 @@ static void xdp_umem_release(struct xdp_umem *umem) kfree(umem); } +void xdp_get_umem(struct xdp_umem *umem) +{ + atomic_inc(&umem->users); +} + void xdp_put_umem(struct xdp_umem *umem) { if (!umem) @@ -243,3 +248,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return err; } +bool xdp_umem_validate_queues(struct xdp_umem *umem) +{ + return umem->fq; +} diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index b3538dd2118c..ad041b911b38 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -36,7 +36,9 @@ struct xdp_umem { struct user_struct *user; }; +bool xdp_umem_validate_queues(struct xdp_umem *umem); int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr); +void xdp_get_umem(struct xdp_umem *umem); void xdp_put_umem(struct xdp_umem *umem); int xdp_umem_create(struct xdp_umem **umem); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index f82f750cadc2..d99a1b830f94 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -43,6 +43,8 @@ struct xdp_sock { /* Protects multiple processes in the control path */ struct mutex mutex; struct xdp_umem *umem; + u32 ifindex; + u16 queue_id; }; static struct xdp_sock *xdp_sk(struct sock *sk) @@ -66,9 +68,18 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue, return 0; } +static void __xsk_release(struct xdp_sock *xs) +{ + /* Wait for driver to stop using the xdp socket. */ + synchronize_net(); + + dev_put(xs->dev); +} + static int xsk_release(struct socket *sock) { struct sock *sk = sock->sk; + struct xdp_sock *xs = xdp_sk(sk); struct net *net; if (!sk) @@ -80,6 +91,11 @@ static int xsk_release(struct socket *sock) sock_prot_inuse_add(net, sk->sk_prot, -1); local_bh_enable(); + if (xs->dev) { + __xsk_release(xs); + xs->dev = NULL; + } + sock_orphan(sk); sock->sk = NULL; @@ -89,6 +105,113 @@ static int xsk_release(struct socket *sock) return 0; } +static struct socket *xsk_lookup_xsk_from_fd(int fd, int *err) +{ + struct socket *sock; + + *err = -ENOTSOCK; + sock = sockfd_lookup(fd, err); + if (!sock) + return NULL; + + if (sock->sk->sk_family != PF_XDP) { + *err = -ENOPROTOOPT; + sockfd_put(sock); + return NULL; + } + + *err = 0; + return sock; +} + +static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; + struct sock *sk = sock->sk; + struct net_device *dev, *dev_curr; + struct xdp_sock *xs = xdp_sk(sk); + struct xdp_umem *old_umem = NULL; + int err = 0; + + if (addr_len < sizeof(struct sockaddr_xdp)) + return -EINVAL; + if (sxdp->sxdp_family != AF_XDP) + return -EINVAL; + + mutex_lock(&xs->mutex); + dev_curr = xs->dev; + dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex); + if (!dev) { + err = -ENODEV; + goto out_release; + } + + if (!xs->rx) { + err = -EINVAL; + goto out_unlock; + } + + if (sxdp->sxdp_queue_id >= dev->num_rx_queues) { + err = -EINVAL; + goto out_unlock; + } + + if (sxdp->sxdp_flags & XDP_SHARED_UMEM) { + struct xdp_sock *umem_xs; + struct socket *sock; + + if (xs->umem) { + /* We have already our own. */ + err = -EINVAL; + goto out_unlock; + } + + sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd, &err); + if (!sock) + goto out_unlock; + + umem_xs = xdp_sk(sock->sk); + if (!umem_xs->umem) { + /* No umem to inherit. */ + err = -EBADF; + sockfd_put(sock); + goto out_unlock; + } else if (umem_xs->dev != dev || + umem_xs->queue_id != sxdp->sxdp_queue_id) { + err = -EINVAL; + sockfd_put(sock); + goto out_unlock; + } + + xdp_get_umem(umem_xs->umem); + old_umem = xs->umem; + xs->umem = umem_xs->umem; + sockfd_put(sock); + } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { + err = -EINVAL; + goto out_unlock; + } + + /* Rebind? */ + if (dev_curr && (dev_curr != dev || + xs->queue_id != sxdp->sxdp_queue_id)) { + __xsk_release(xs); + if (old_umem) + xdp_put_umem(old_umem); + } + + xs->dev = dev; + xs->ifindex = sxdp->sxdp_ifindex; + xs->queue_id = sxdp->sxdp_queue_id; + +out_unlock: + if (err) + dev_put(dev); +out_release: + mutex_unlock(&xs->mutex); + return err; +} + static int xsk_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { @@ -212,7 +335,7 @@ static const struct proto_ops xsk_proto_ops = { .family = PF_XDP, .owner = THIS_MODULE, .release = xsk_release, - .bind = sock_no_bind, + .bind = xsk_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, -- 2.14.1