This commit adds AF_XDP support to libbpf. The main reason for
this is to facilitate writing applications that use AF_XDP by offering
higher-level APIs that hide many of the details of the AF_XDP
uapi. This is in the same vein as libbpf facilitates XDP adoption by
offering easy-to-use higher level interfaces of XDP
functionality. Hopefully this will facilitate adoption of AF_XDP, make
applications using it simpler and smaller, and finally also make it
possible for applications to benefit from optimizations in the AF_XDP
user space access code. Previously, people just copied and pasted the
code from the sample application into their application, which is not
desirable.

The interface is composed of two parts:

* Low-level access interface to the four rings and the packet
* High-level control plane interface for creating and setting
  up umems and af_xdp sockets.

Signed-off-by: Magnus Karlsson <magnus.karls...@intel.com>
---
 tools/include/uapi/linux/if_xdp.h |  78 ++++++
 tools/lib/bpf/Build               |   2 +-
 tools/lib/bpf/Makefile            |   5 +-
 tools/lib/bpf/README.rst          |  11 +-
 tools/lib/bpf/libbpf.h            |  93 +++++++
 tools/lib/bpf/libbpf.map          |  12 +
 tools/lib/bpf/xsk.c               | 506 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 704 insertions(+), 3 deletions(-)
 create mode 100644 tools/include/uapi/linux/if_xdp.h
 create mode 100644 tools/lib/bpf/xsk.c

diff --git a/tools/include/uapi/linux/if_xdp.h 
b/tools/include/uapi/linux/if_xdp.h
new file mode 100644
index 0000000..caed8b1
--- /dev/null
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * if_xdp: XDP socket user-space interface
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * Author(s): Björn Töpel <bjorn.to...@intel.com>
+ *           Magnus Karlsson <magnus.karls...@intel.com>
+ */
+
+#ifndef _LINUX_IF_XDP_H
+#define _LINUX_IF_XDP_H
+
+#include <linux/types.h>
+
+/* Options for the sxdp_flags field */
+#define XDP_SHARED_UMEM        (1 << 0)
+#define XDP_COPY       (1 << 1) /* Force copy-mode */
+#define XDP_ZEROCOPY   (1 << 2) /* Force zero-copy mode */
+
+struct sockaddr_xdp {
+       __u16 sxdp_family;
+       __u16 sxdp_flags;
+       __u32 sxdp_ifindex;
+       __u32 sxdp_queue_id;
+       __u32 sxdp_shared_umem_fd;
+};
+
+struct xdp_ring_offset {
+       __u64 producer;
+       __u64 consumer;
+       __u64 desc;
+};
+
+struct xdp_mmap_offsets {
+       struct xdp_ring_offset rx;
+       struct xdp_ring_offset tx;
+       struct xdp_ring_offset fr; /* Fill */
+       struct xdp_ring_offset cr; /* Completion */
+};
+
+/* XDP socket options */
+#define XDP_MMAP_OFFSETS               1
+#define XDP_RX_RING                    2
+#define XDP_TX_RING                    3
+#define XDP_UMEM_REG                   4
+#define XDP_UMEM_FILL_RING             5
+#define XDP_UMEM_COMPLETION_RING       6
+#define XDP_STATISTICS                 7
+
+struct xdp_umem_reg {
+       __u64 addr; /* Start of packet data area */
+       __u64 len; /* Length of packet data area */
+       __u32 chunk_size;
+       __u32 headroom;
+};
+
+struct xdp_statistics {
+       __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+       __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+       __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+};
+
+/* Pgoff for mmaping the rings */
+#define XDP_PGOFF_RX_RING                        0
+#define XDP_PGOFF_TX_RING               0x80000000
+#define XDP_UMEM_PGOFF_FILL_RING       0x100000000ULL
+#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
+
+/* Rx/Tx descriptor */
+struct xdp_desc {
+       __u64 addr;
+       __u32 len;
+       __u32 options;
+};
+
+/* UMEM descriptor is __u64 */
+
+#endif /* _LINUX_IF_XDP_H */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 197b40f..91780e8 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o 
bpf_prog_linfo.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o 
bpf_prog_linfo.o xsk.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 34d9c36..ddaa147 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -179,6 +179,9 @@ $(BPF_IN): force elfdep bpfdep
        @(test -f ../../include/uapi/linux/if_link.h -a -f 
../../../include/uapi/linux/if_link.h && ( \
        (diff -B ../../include/uapi/linux/if_link.h 
../../../include/uapi/linux/if_link.h >/dev/null) || \
        echo "Warning: Kernel ABI header at 
'tools/include/uapi/linux/if_link.h' differs from latest version at 
'include/uapi/linux/if_link.h'" >&2 )) || true
+       @(test -f ../../include/uapi/linux/if_xdp.h -a -f 
../../../include/uapi/linux/if_xdp.h && ( \
+       (diff -B ../../include/uapi/linux/if_xdp.h 
../../../include/uapi/linux/if_xdp.h >/dev/null) || \
+       echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' 
differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
        $(Q)$(MAKE) $(build)=libbpf
 
 $(OUTPUT)libbpf.so: $(BPF_IN)
@@ -189,7 +192,7 @@ $(OUTPUT)libbpf.a: $(BPF_IN)
        $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
 $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
-       $(QUIET_LINK)$(CXX) $^ -lelf -o $@
+       $(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@
 
 check: check_abi
 
diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
index 056f383..5a4d644 100644
--- a/tools/lib/bpf/README.rst
+++ b/tools/lib/bpf/README.rst
@@ -9,7 +9,7 @@ described here. It's recommended to follow these conventions 
whenever a
 new function or type is added to keep libbpf API clean and consistent.
 
 All types and functions provided by libbpf API should have one of the
-following prefixes: ``bpf_``, ``btf_``, ``libbpf_``.
+following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``.
 
 System call wrappers
 --------------------
@@ -62,6 +62,15 @@ Auxiliary functions and types that don't fit well in any of 
categories
 described above should have ``libbpf_`` prefix, e.g.
 ``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
 
+AF_XDP functions
+-------------------
+
+AF_XDP functions should have ``xsk_`` prefix, e.g.  ``xsk_get_data``
+or ``xsk_create_umem``. The interface consists of both low-level ring
+access functions and high-level configuration functions. These can be
+mixed and matched. Note that these functions are not reentrant for
+performance reasons.
+
 libbpf ABI
 ==========
 
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5f68d7b..7f30af1 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -15,6 +15,7 @@
 #include <stdbool.h>
 #include <sys/types.h>  // for size_t
 #include <linux/bpf.h>
+#include <linux/if_xdp.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -355,6 +356,98 @@ LIBBPF_API const struct bpf_line_info *
 bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
                      __u32 insn_off, __u32 nr_skip);
 
+/* Do not access these members directly. Use the functions below. */
+struct xsk_prod_ring {
+       __u32 cached_prod;
+       __u32 cached_cons;
+       __u32 mask;
+       __u32 size;
+       __u32 *producer;
+       __u32 *consumer;
+       void *ring;
+};
+
+/* Do not access these members directly. Use the functions below. */
+struct xsk_cons_ring {
+       __u32 cached_prod;
+       __u32 cached_cons;
+       __u32 mask;
+       __u32 size;
+       __u32 *producer;
+       __u32 *consumer;
+       void *ring;
+};
+
+static inline __u64 *xsk_get_fill_desc(struct xsk_prod_ring *fill,
+                                      __u64 idx)
+{
+       __u64 *descs = (__u64 *)fill->ring;
+
+       return &descs[idx & fill->mask];
+}
+
+static inline __u64 *xsk_get_completion_desc(struct xsk_cons_ring *comp,
+                                            __u64 idx)
+{
+       __u64 *descs = (__u64 *)comp->ring;
+
+       return &descs[idx & comp->mask];
+}
+
+static inline struct xdp_desc *xsk_get_tx_desc(struct xsk_prod_ring *tx,
+                                              __u64 idx)
+{
+       struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
+
+       return &descs[idx & tx->mask];
+}
+
+static inline struct xdp_desc *xsk_get_rx_desc(struct xsk_cons_ring *rx,
+                                              __u64 idx)
+{
+       struct xdp_desc *descs = (struct xdp_desc *)rx->ring;
+
+       return &descs[idx & rx->mask];
+}
+
+LIBBPF_API size_t xsk_peek_cons(struct xsk_cons_ring *ring, size_t nb,
+                               __u32 *idx);
+LIBBPF_API void xsk_release_cons(struct xsk_cons_ring *ring);
+LIBBPF_API size_t xsk_reserve_prod(struct xsk_prod_ring *ring, size_t nb,
+                                  __u32 *idx);
+LIBBPF_API void xsk_submit_prod(struct xsk_prod_ring *ring);
+
+LIBBPF_API void *xsk_get_data(void *umem_area, __u64 addr);
+
+#define XSK_DEFAULT_NUM_DESCS      2048
+#define XSK_DEFAULT_FRAME_SHIFT    11 /* 2048 bytes */
+#define XSK_DEFAULT_FRAME_SIZE     (1 << XSK_DEFAULT_FRAME_SHIFT)
+#define XSK_DEFAULT_FRAME_HEADROOM 0
+
+struct xsk_umem_config {
+       __u32 fq_size;
+       __u32 cq_size;
+       __u32 frame_size;
+       __u32 frame_headroom;
+};
+
+struct xsk_xdp_socket_config {
+       __u32 rx_size;
+       __u32 tx_size;
+};
+
+/* Set config to XSK_DEFAULT_CONFIG to get the default configuration. */
+LIBBPF_API int xsk_create_umem(void *umem_area, __u64 size,
+                              struct xsk_prod_ring *fq,
+                              struct xsk_cons_ring *cq,
+                              struct xsk_umem_config *config);
+LIBBPF_API int xsk_create_xdp_socket(int umem_fd, struct xsk_cons_ring *rx,
+                                    struct xsk_prod_ring *tx,
+                                    struct xsk_xdp_socket_config *config);
+/* Returns 0 for success. */
+LIBBPF_API int xsk_delete_umem(int fd);
+LIBBPF_API int xsk_delete_xdp_socket(int fd);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index cd02cd4..0dc79c8 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -124,3 +124,15 @@ LIBBPF_0.0.1 {
        local:
                *;
 };
+LIBBPF_0.0.2 {
+       global:
+               xsk_peek_cons;
+               xsk_release_cons;
+               xsk_reserve_prod;
+               xsk_submit_prod;
+               xsk_get_data;
+               xsk_create_umem;
+               xsk_create_xdp_socket;
+               xsk_delete_umem;
+               xsk_delete_xdp_socket;
+} LIBBPF_0.0.1;
\ No newline at end of file
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
new file mode 100644
index 0000000..a9c05d4
--- /dev/null
+++ b/tools/lib/bpf/xsk.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <asm/barrier.h>
+#include <linux/compiler.h>
+#include <linux/if_xdp.h>
+#include <linux/list.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "libbpf.h"
+
+#ifndef SOL_XDP
+ #define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+ #define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+ #define PF_XDP AF_XDP
+#endif
+
+#define MMAP2_SYSCALL 192
+
+/* This has to be a power of 2 for performance reasons. */
+#define HASH_TABLE_ENTRIES 128
+
+struct xsk_umem_info {
+       struct xsk_prod_ring *fq;
+       struct xsk_cons_ring *cq;
+       char *umem_area;
+       struct list_head list;
+       struct xsk_umem_config config;
+       int fd;
+       int refcount;
+};
+
+struct xsk_xdp_socket_info {
+       struct xsk_cons_ring *rx;
+       struct xsk_prod_ring *tx;
+       __u64 outstanding_tx;
+       struct list_head list;
+       struct xsk_umem_info *umem;
+       struct xsk_xdp_socket_config config;
+       int fd;
+};
+
+static struct xsk_xdp_socket_info *xsk_hash_table[HASH_TABLE_ENTRIES];
+static struct xsk_umem_info *umem_hash_table[HASH_TABLE_ENTRIES];
+
+/* For 32-bit systems, we need to use mmap2 as the offsets are 64-bit.
+ * Unfortunately, it is not part of glibc.
+ */
+static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags,
+                            int fd, __u64 offset)
+{
+#ifdef __NR_mmap2
+       unsigned int page_shift = __builtin_ffs(getpagesize()) - 1;
+       long ret = syscall(MMAP2_SYSCALL, addr, length, prot, flags, fd,
+                          (off_t)(offset >> page_shift));
+
+       return (void *)ret;
+#else
+       return mmap(addr, length, prot, flags, fd, offset);
+#endif
+}
+
+static __u32 xsk_prod_nb_free(struct xsk_prod_ring *r, __u32 nb)
+{
+       __u32 free_entries = r->cached_cons - r->cached_prod;
+
+       if (free_entries >= nb)
+               return free_entries;
+
+       /* Refresh the local tail pointer.
+        * cached_cons is r->size bigger than the real consumer pointer so
+        * that this addition can be avoided in the more frequently
+        * executed code that computs free_entries in the beginning of
+        * this function. Without this optimization it whould have been
+        * free_entries = r->cached_prod - r->cached_cons + r->size.
+        */
+       r->cached_cons = *r->consumer + r->size;
+
+       return r->cached_cons - r->cached_prod;
+}
+
+static __u32 xsk_cons_nb_avail(struct xsk_cons_ring *r, __u32 nb)
+{
+       __u32 entries = r->cached_prod - r->cached_cons;
+
+       if (entries == 0) {
+               r->cached_prod = *r->producer;
+               entries = r->cached_prod - r->cached_cons;
+       }
+
+       return (entries > nb) ? nb : entries;
+}
+
+size_t xsk_reserve_prod(struct xsk_prod_ring *prod, size_t nb, __u32 *idx)
+{
+       if (unlikely(xsk_prod_nb_free(prod, nb) < nb))
+               return 0;
+
+       *idx = prod->cached_prod;
+       prod->cached_prod += nb;
+
+       return nb;
+}
+
+void xsk_submit_prod(struct xsk_prod_ring *prod)
+{
+       /* Make sure everything has been written to the ring before signalling
+        * this to the kernel.
+        */
+       smp_wmb();
+
+       *prod->producer = prod->cached_prod;
+}
+
+size_t xsk_peek_cons(struct xsk_cons_ring *cons, size_t nb,
+                    __u32 *idx)
+{
+       size_t entries = xsk_cons_nb_avail(cons, nb);
+
+       if (likely(entries > 0)) {
+               /* Make sure we do not speculatively read the data before
+                * we have received the packet buffers from the ring.
+                */
+               smp_rmb();
+
+               *idx = cons->cached_cons;
+               cons->cached_cons += entries;
+       }
+
+       return entries;
+}
+
+void xsk_release_cons(struct xsk_cons_ring *cons)
+{
+       *cons->consumer = cons->cached_cons;
+}
+
+void *xsk_get_data(void *umem_area, __u64 addr)
+{
+       return &((char *)umem_area)[addr];
+}
+
+static bool xsk_page_aligned(void *buffer)
+{
+       unsigned long addr = (unsigned long)buffer;
+
+       return !(addr & (getpagesize() - 1));
+}
+
+/* Since the file descriptors are generally allocated sequentially, and also
+ * for performance reasons, we pick the simplest possible hash function:
+ * just a single "and" operation (from the modulo operator).
+ */
+static void xsk_hash_insert_umem(int fd, struct xsk_umem_info *umem)
+{
+       struct xsk_umem_info *umem_in_hash =
+               umem_hash_table[fd % HASH_TABLE_ENTRIES];
+
+       if (umem_in_hash) {
+               list_add_tail(&umem->list, &umem_in_hash->list);
+               return;
+       }
+
+       INIT_LIST_HEAD(&umem->list);
+       umem_hash_table[fd % HASH_TABLE_ENTRIES] = umem;
+}
+
+static struct xsk_umem_info *xsk_hash_find_umem(int fd)
+{
+       struct xsk_umem_info *umem = umem_hash_table[fd % HASH_TABLE_ENTRIES];
+
+       while (umem && umem->fd != fd)
+               umem = list_next_entry(umem, list);
+
+       return umem;
+}
+
+static void xsk_hash_remove_umem(int fd)
+{
+       struct xsk_umem_info *umem = umem_hash_table[fd % HASH_TABLE_ENTRIES];
+
+       while (umem && umem->fd != fd)
+               umem = list_next_entry(umem, list);
+
+       if (umem) {
+               if (list_empty(&umem->list)) {
+                       umem_hash_table[fd % HASH_TABLE_ENTRIES] = NULL;
+                       return;
+               }
+
+               if (umem == umem_hash_table[fd % HASH_TABLE_ENTRIES])
+                       umem_hash_table[fd % HASH_TABLE_ENTRIES] =
+                               list_next_entry(umem, list);
+               list_del(&umem->list);
+       }
+}
+
+static void xsk_hash_insert_xdp_socket(int fd, struct xsk_xdp_socket_info *xsk)
+{
+       struct xsk_xdp_socket_info *xsk_in_hash =
+               xsk_hash_table[fd % HASH_TABLE_ENTRIES];
+
+       if (xsk_in_hash) {
+               list_add_tail(&xsk->list, &xsk_in_hash->list);
+               return;
+       }
+
+       INIT_LIST_HEAD(&xsk->list);
+       xsk_hash_table[fd % HASH_TABLE_ENTRIES] = xsk;
+}
+
+static struct xsk_xdp_socket_info *xsk_hash_find_xdp_socket(int fd)
+{
+       struct xsk_xdp_socket_info *xsk =
+               xsk_hash_table[fd % HASH_TABLE_ENTRIES];
+
+       while (xsk && xsk->fd != fd)
+               xsk = list_next_entry(xsk, list);
+
+       return xsk;
+}
+
+static void xsk_hash_remove_xdp_socket(int fd)
+{
+       struct xsk_xdp_socket_info *xsk =
+               xsk_hash_table[fd % HASH_TABLE_ENTRIES];
+
+       while (xsk && xsk->fd != fd)
+               xsk = list_next_entry(xsk, list);
+
+       if (xsk) {
+               if (list_empty(&xsk->list)) {
+                       xsk_hash_table[fd % HASH_TABLE_ENTRIES] = NULL;
+                       return;
+               }
+
+               if (xsk == xsk_hash_table[fd % HASH_TABLE_ENTRIES])
+                       xsk_hash_table[fd % HASH_TABLE_ENTRIES] =
+                               list_next_entry(xsk, list);
+               list_del(&xsk->list);
+       }
+}
+
+static void xsk_set_umem_config(struct xsk_umem_config *config,
+                               struct xsk_umem_config *usr_config)
+{
+       if (!usr_config) {
+               config->fq_size = XSK_DEFAULT_NUM_DESCS;
+               config->cq_size = XSK_DEFAULT_NUM_DESCS;
+               config->frame_size = XSK_DEFAULT_FRAME_SIZE;
+               config->frame_headroom = XSK_DEFAULT_FRAME_HEADROOM;
+               return;
+       }
+
+       config->fq_size = usr_config->fq_size;
+       config->cq_size = usr_config->cq_size;
+       config->frame_size = usr_config->frame_size;
+       config->frame_headroom = usr_config->frame_headroom;
+}
+
+static void xsk_set_xdp_socket_config(struct xsk_xdp_socket_config *config,
+                                     struct xsk_xdp_socket_config *usr_config)
+{
+       if (!usr_config) {
+               config->rx_size = XSK_DEFAULT_NUM_DESCS;
+               config->tx_size = XSK_DEFAULT_NUM_DESCS;
+               return;
+       }
+
+       config->rx_size = usr_config->rx_size;
+       config->tx_size = usr_config->tx_size;
+}
+
+int xsk_create_umem(void *umem_area, __u64 size, struct xsk_prod_ring *fq,
+                   struct xsk_cons_ring *cq,
+                   struct xsk_umem_config *usr_config)
+{
+       struct xdp_mmap_offsets off;
+       struct xsk_umem_info *umem;
+       struct xdp_umem_reg mr;
+       socklen_t optlen;
+       int err, fd;
+       void *map;
+
+       if (!umem_area)
+               return -EFAULT;
+       if (!size && !xsk_page_aligned(umem_area))
+               return -EINVAL;
+
+       fd = socket(AF_XDP, SOCK_RAW, 0);
+       if (fd < 0)
+               return -errno;
+
+       umem = calloc(1, sizeof(*umem));
+       if (!umem)
+               return -ENOMEM;
+
+       xsk_hash_insert_umem(fd, umem);
+       xsk_set_umem_config(&umem->config, usr_config);
+
+       mr.addr = (uintptr_t)umem_area;
+       mr.len = size;
+       mr.chunk_size = umem->config.frame_size;
+       mr.headroom = umem->config.frame_headroom;
+
+       err = setsockopt(fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
+       if (err)
+               return -errno;
+       err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
+                        &umem->config.fq_size, sizeof(umem->config.fq_size));
+       if (err)
+               return -errno;
+       err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+                        &umem->config.cq_size, sizeof(umem->config.cq_size));
+       if (err)
+               return -errno;
+
+       optlen = sizeof(off);
+       err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+       if (err)
+               return -errno;
+
+       map = xsk_mmap(NULL, off.fr.desc + umem->config.fq_size * sizeof(__u64),
+                      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                      fd, XDP_UMEM_PGOFF_FILL_RING);
+       if (map == MAP_FAILED)
+               return -errno;
+
+       umem->fq = fq;
+       fq->mask = umem->config.fq_size - 1;
+       fq->size = umem->config.fq_size;
+       fq->producer = map + off.fr.producer;
+       fq->consumer = map + off.fr.consumer;
+       fq->ring = map + off.fr.desc;
+       fq->cached_cons = umem->config.fq_size;
+
+       map = xsk_mmap(NULL, off.cr.desc + umem->config.cq_size * sizeof(__u64),
+                   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                   fd, XDP_UMEM_PGOFF_COMPLETION_RING);
+       if (map == MAP_FAILED)
+               return -errno;
+
+       umem->cq = cq;
+       cq->mask = umem->config.cq_size - 1;
+       cq->size = umem->config.cq_size;
+       cq->producer = map + off.cr.producer;
+       cq->consumer = map + off.cr.consumer;
+       cq->ring = map + off.cr.desc;
+
+       umem->umem_area = umem_area;
+       umem->fd = fd;
+
+       return fd;
+}
+
+int xsk_create_xdp_socket(int umem_fd, struct xsk_cons_ring *rx,
+                         struct xsk_prod_ring *tx,
+                         struct xsk_xdp_socket_config *usr_config)
+{
+       struct xsk_xdp_socket_info *xsk;
+       struct xdp_mmap_offsets off;
+       struct xsk_umem_info *umem;
+       socklen_t optlen;
+       int err, fd;
+       void *map;
+
+       umem = xsk_hash_find_umem(umem_fd);
+       if (!umem)
+               return -EBADF;
+
+       if (umem->refcount++ == 0) {
+               fd = umem_fd;
+       } else {
+               fd = socket(AF_XDP, SOCK_RAW, 0);
+               if (fd < 0)
+                       return -errno;
+       }
+
+       xsk = calloc(1, sizeof(*xsk));
+       if (!xsk)
+               return -ENOMEM;
+
+       xsk->fd = fd;
+       xsk->outstanding_tx = 0;
+       xsk_hash_insert_xdp_socket(fd, xsk);
+       xsk_set_xdp_socket_config(&xsk->config, usr_config);
+
+       err = setsockopt(fd, SOL_XDP, XDP_RX_RING,
+                        &xsk->config.rx_size, sizeof(xsk->config.rx_size));
+       if (err)
+               return -errno;
+       err = setsockopt(fd, SOL_XDP, XDP_TX_RING,
+                        &xsk->config.tx_size, sizeof(xsk->config.tx_size));
+       if (err)
+               return -errno;
+
+       optlen = sizeof(off);
+       err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+       if (err)
+               return -errno;
+
+       map = xsk_mmap(NULL, off.rx.desc +
+                      xsk->config.rx_size * sizeof(struct xdp_desc),
+                      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                      fd, XDP_PGOFF_RX_RING);
+       if (map == MAP_FAILED)
+               return -errno;
+
+       xsk->rx = rx;
+       rx->mask = xsk->config.rx_size - 1;
+       rx->size = xsk->config.rx_size;
+       rx->producer = map + off.rx.producer;
+       rx->consumer = map + off.rx.consumer;
+       rx->ring = map + off.rx.desc;
+
+       map = xsk_mmap(NULL, off.tx.desc +
+                      xsk->config.tx_size * sizeof(struct xdp_desc),
+                      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                      fd, XDP_PGOFF_TX_RING);
+       if (map == MAP_FAILED)
+               return -errno;
+
+       xsk->tx = tx;
+       tx->mask = xsk->config.tx_size - 1;
+       tx->size = xsk->config.tx_size;
+       tx->producer = map + off.tx.producer;
+       tx->consumer = map + off.tx.consumer;
+       tx->ring = map + off.tx.desc;
+       tx->cached_cons = xsk->config.tx_size;
+
+       return fd;
+}
+
+int xsk_delete_umem(int fd)
+{
+       struct xdp_mmap_offsets off;
+       struct xsk_umem_info *umem;
+       socklen_t optlen;
+       int err;
+
+       umem = xsk_hash_find_umem(fd);
+       if (!umem)
+               return -EBADF;
+
+       if (umem->refcount > 0)
+               return -EBUSY;
+
+       optlen = sizeof(off);
+       err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+       if (!err) {
+               munmap(umem->fq->ring,
+                      off.fr.desc + umem->config.fq_size * sizeof(__u64));
+               munmap(umem->cq->ring,
+                      off.cr.desc + umem->config.cq_size * sizeof(__u64));
+       }
+
+       xsk_hash_remove_umem(fd);
+       free(umem);
+
+       return 0;
+}
+
+int xsk_delete_xdp_socket(int fd)
+{
+       struct xsk_xdp_socket_info *xsk;
+       struct xdp_mmap_offsets off;
+       socklen_t optlen;
+       int err;
+
+       xsk = xsk_hash_find_xdp_socket(fd);
+       if (!xsk)
+               return -EBADF;
+
+       optlen = sizeof(off);
+       err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+       if (!err) {
+               munmap(xsk->rx->ring,
+                      off.rx.desc +
+                      xsk->config.rx_size * sizeof(struct xdp_desc));
+               munmap(xsk->tx->ring,
+                      off.tx.desc +
+                      xsk->config.tx_size * sizeof(struct xdp_desc));
+       }
+
+       xsk->umem->refcount--;
+       xsk_hash_remove_xdp_socket(fd);
+       free(xsk);
+
+       return 0;
+}
-- 
2.7.4

Reply via email to