On 02/17/2017 04:07 PM, Konrad Rzeszutek Wilk wrote:
> On Thu, Feb 16, 2017 at 10:51:44PM +0100, Vincent JARDIN wrote:
>> Le 16/02/2017 à 14:36, Konrad Rzeszutek Wilk a écrit :
>>>> Is it time now to officially remove Dom0 support?
>>> So we do have an prototype implementation of netback but it is waiting
>>> for review of xen-devel to the spec.
>>>
>>> And I believe the implementation does utilize some of the dom0
>>> parts of code in DPDK.
>>
>> Please, do you have URLs/pointers about it? It would be interesting to share
>> it with DPDK community too.
> 
> Joao, would it be possible to include an tarball of the patches? I know
> they are no in the right state with the review of the staging
> grants API - they are incompatible, but it may help folks to get
> a feel for what DPDK APIs you used?
OK, see attached - I should note that its a WIP as Konrad noted, but once the
staging grants work is finished, the code would be improved to have it in better
shape (as well as in feature parity) for a proper RFC [and adhering to the
project coding style].

Joao
>From 3bced1452e1e619e7f4701cf67ba88c2627aa376 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.mart...@oracle.com>
Date: Mon, 20 Feb 2017 13:33:34 +0000
Subject: [PATCH WIP 1/2] drivers/net: add xen-netback PMD

Introduce Xen network backend support, namely xen-netback.
This mostly means adding a boilerplate driver with a initially
reduced set of features (i.e. without feature-sg and no multi queue).
It handles grant operations and notifications correctly, and almost
all state machine. Additionally it supports one early version of
staging grants (here after feature-persistent=1) to allow DPDK to
have a set of premapped grants and hence avoid the grant copy
(slow)paths. This driver is implemented using xen provided libraries
for event channels, gnttab and xenstore operations.

Signed-off-by: Joao Martins <joao.m.mart...@oracle.com>
---
 drivers/net/Makefile                               |   1 +
 drivers/net/xen-netback/Makefile                   |  68 ++
 .../xen-netback/rte_pmd_xen-netback_version.map    |   3 +
 drivers/net/xen-netback/xnb.h                      | 159 ++++
 drivers/net/xen-netback/xnb_ethdev.c               | 701 +++++++++++++++
 drivers/net/xen-netback/xnb_ethdev.h               |  34 +
 drivers/net/xen-netback/xnb_ring.c                 | 240 +++++
 drivers/net/xen-netback/xnb_rxtx.c                 | 683 +++++++++++++++
 drivers/net/xen-netback/xnb_xenbus.c               | 975 +++++++++++++++++++++
 mk/rte.app.mk                                      |   1 +
 10 files changed, 2865 insertions(+)
 create mode 100644 drivers/net/xen-netback/Makefile
 create mode 100644 drivers/net/xen-netback/rte_pmd_xen-netback_version.map
 create mode 100644 drivers/net/xen-netback/xnb.h
 create mode 100644 drivers/net/xen-netback/xnb_ethdev.c
 create mode 100644 drivers/net/xen-netback/xnb_ethdev.h
 create mode 100644 drivers/net/xen-netback/xnb_ring.c
 create mode 100644 drivers/net/xen-netback/xnb_rxtx.c
 create mode 100644 drivers/net/xen-netback/xnb_xenbus.c

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..a4bf7cb 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += xen-netback
 
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile
new file mode 100644
index 0000000..c6299b0
--- /dev/null
+++ b/drivers/net/xen-netback/Makefile
@@ -0,0 +1,68 @@
+# BSD LICENSE
+#
+# Copyright(c) 2016, Oracle and/or its affiliates. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+#   * Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+#   * Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in
+#     the documentation and/or other materials provided with the
+#     distribution.
+#   * Neither the name of Intel Corporation nor the names of its
+#     contributors may be used to endorse or promote products derived
+#     from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_xen-netback.a
+LIBABIVER  := 1
+EXPORT_MAP := rte_pmd_xen-netback_version.map
+
+LDLIBS += -lpthread
+LDLIBS += -lxenstore -lxenctrl
+# OL6 and OL7 has it on /usr/lib64
+LDLIBS += -L/usr/lib64
+
+CFLAGS += -O0 -D_GNU_SOURCE -g
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D__XEN_TOOLS__
+#CFLAGS += -DDEBUG
+#CFLAGS += -DDEBUG_PACKET
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += xnb_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += xnb_xenbus.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += xnb_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += xnb_ring.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += lib/librte_hash
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += lib/librte_kvargs
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK) += lib/librte_net
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/xen-netback/rte_pmd_xen-netback_version.map b/drivers/net/xen-netback/rte_pmd_xen-netback_version.map
new file mode 100644
index 0000000..dc4d417
--- /dev/null
+++ b/drivers/net/xen-netback/rte_pmd_xen-netback_version.map
@@ -0,0 +1,3 @@
+DPDK_16.04 {
+	local: *;
+};
diff --git a/drivers/net/xen-netback/xnb.h b/drivers/net/xen-netback/xnb.h
new file mode 100644
index 0000000..39c92d2
--- /dev/null
+++ b/drivers/net/xen-netback/xnb.h
@@ -0,0 +1,159 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <rte_mempool.h>
+
+#include <xen/io/xenbus.h>
+#include <xen/io/netif.h>
+#include <xen/xen-compat.h>
+#include <xenstore.h>
+
+#if __XEN_LATEST_INTERFACE_VERSION__ >= 0x00040700
+#define XC_WANT_COMPAT_GNTTAB_API
+#define XC_WANT_COMPAT_EVTCHN_API
+#endif
+
+#include <xenctrl.h>
+
+#define RTE_XEN_MAX_PKT_BURST 256
+
+struct pending_req {
+	union {
+		netif_tx_request_t txreq;
+		netif_rx_request_t rxreq;
+	} u;
+	bool more;
+	struct rte_mbuf *mbuf;
+};
+
+struct xenvif_ring {
+	xc_interface *xch;
+	xc_gnttab *gnttabh;
+	xc_evtchn *evtchnh;
+
+	struct netif_tx_sring *tx_addr;
+	netif_tx_back_ring_t  tx_ring;
+	struct netif_rx_sring *rx_addr;
+	netif_rx_back_ring_t  rx_ring;
+
+	grant_ref_t ring_ref;
+	evtchn_port_t evtchn;
+	evtchn_port_or_error_t port;
+
+	struct gnttab_copy *gop;
+	struct pending_req *pending;
+	struct rte_hash *grants;
+	uint16_t grants_cnt;
+
+	domid_t dom;
+	char *name;
+};
+
+
+/* The Xenbus related domain state entries */
+struct xenvif_state {
+	XenbusState state;
+	domid_t domid;
+	unsigned handle;
+	char *path;
+};
+
+/* The Xen virtual interface queues */
+struct xenvif_queue {
+	unsigned int id;
+	char *path;
+
+	struct xenvif_ring tx;
+	struct xenvif_ring rx;
+
+	struct xenvif *vif;
+};
+
+/* The Frontend features capabilities */
+struct xenvif_features {
+	uint8_t rx_poll;
+	uint8_t sg;
+	uint8_t tcp4;
+	uint8_t tcp4_prefix;
+	uint8_t tcp6;
+	uint8_t tcp6_prefix;
+	uint8_t ip4_csum;
+	uint8_t ip6_csum;
+	uint8_t mcast_ctrl;
+	uint8_t pgnt;
+	uint8_t zc;
+};
+
+/* The Domain related backend and frontend state */
+struct xenvif {
+	struct xenvif_state fe;
+	struct xenvif_state be;
+
+	void *priv;
+	char *ifname;
+	struct xenvif_queue *queues;
+	unsigned num_queues;
+
+	struct xenvif_features features;
+};
+
+struct xenbus_ops {
+	/* xenstore ids /backend/<ids> (NULL terminated) */
+	const char **ids;
+
+	/* device state changes */
+	int (*init)(struct xenvif *);
+	int (*connect)(struct xenvif *);
+	int (*disconnect)(struct xenvif *);
+	int (*close)(struct xenvif *);
+};
+
+int rte_xen_ring_map(struct xenvif_ring *ring);
+int rte_xen_ring_unmap(struct xenvif_ring *ring);
+
+void *rte_xen_ring_get_page(struct xenvif_ring *ring, grant_ref_t ref,
+			    bool writable);
+
+int rte_xenbus_backend_register(struct xenbus_ops *,
+				unsigned max_cores);
+
+int rte_xenbus_backend_start(void);
+void rte_xenbus_backend_stop(void);
+
+uint16_t rte_xen_enqueue_burst(struct xenvif *dev, uint16_t queue_id,
+			       struct rte_mbuf **pkts, uint16_t count);
+
+uint16_t rte_xen_dequeue_burst(struct xenvif *dev, uint16_t queue_id,
+			       struct rte_mempool *mbuf_pool,
+			       struct rte_mbuf **pkts, uint16_t count);
diff --git a/drivers/net/xen-netback/xnb_ethdev.c b/drivers/net/xen-netback/xnb_ethdev.c
new file mode 100644
index 0000000..67cd1b3
--- /dev/null
+++ b/drivers/net/xen-netback/xnb_ethdev.c
@@ -0,0 +1,701 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_tcp.h>
+#include <rte_dev.h>
+#include <rte_errno.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_dev.h>
+#include <rte_kvargs.h>
+#include <rte_spinlock.h>
+#include "xnb.h"
+
+#define ETH_XNB_IFACE_ARG	"iface"
+#define ETH_XNB_QUEUES_ARG	"queues"
+
+static const char *drivername = "XEN NETBACK PMD";
+
+static const char *valid_arguments[] = {
+	ETH_XNB_IFACE_ARG,
+	ETH_XNB_QUEUES_ARG,
+	NULL
+};
+
+static struct ether_addr base_eth_addr = {
+	.addr_bytes = {
+		0x58 /* X */,
+		0x45 /* E */,
+		0x4E /* N */,
+		0x42 /* B */,
+		0x45 /* E */,
+		0x00
+	}
+};
+
+struct xnb_queue {
+	rte_atomic32_t allow_queuing;
+	rte_atomic32_t while_queuing;
+	struct xenvif *device;
+	struct pmd_internal *internal;
+	struct rte_mempool *mb_pool;
+	uint8_t port;
+	uint16_t queue_id;
+	uint64_t rx_pkts;
+	uint64_t tx_pkts;
+	uint64_t missed_pkts;
+	uint64_t rx_bytes;
+	uint64_t tx_bytes;
+};
+
+struct pmd_internal {
+	char *dev_name;
+	char *iface_name;
+	uint16_t max_queues;
+
+	volatile uint16_t once;
+};
+
+struct internal_list {
+	TAILQ_ENTRY(internal_list) next;
+	struct rte_eth_dev *eth_dev;
+};
+
+TAILQ_HEAD(internal_list_head, internal_list);
+static struct internal_list_head internal_list =
+	TAILQ_HEAD_INITIALIZER(internal_list);
+
+static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static rte_atomic16_t nb_started_ports;
+static pthread_t session_th;
+
+static struct rte_eth_link pmd_link = {
+		.link_speed = 10000,
+		.link_duplex = ETH_LINK_FULL_DUPLEX,
+		.link_status = ETH_LINK_DOWN
+};
+
+static int eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static inline struct internal_list *find_internal_resource(char *ifname)
+{
+	int found = 0;
+	struct internal_list *list;
+	struct pmd_internal *internal;
+
+	if (ifname == NULL)
+		return NULL;
+
+	pthread_mutex_lock(&internal_list_lock);
+
+	TAILQ_FOREACH(list, &internal_list, next) {
+		internal = list->eth_dev->data->dev_private;
+		if (!strcmp(internal->iface_name, ifname)) {
+			found = 1;
+			break;
+		}
+	}
+
+	pthread_mutex_unlock(&internal_list_lock);
+
+	if (!found)
+		return NULL;
+
+	return list;
+}
+
+static void eth_dev_infos_get(struct rte_eth_dev *dev __rte_unused,
+			      struct rte_eth_dev_info *dev_info __rte_unused)
+{
+	struct pmd_internal *internal;
+
+	internal = dev->data->dev_private;
+	if (internal == NULL) {
+		RTE_LOG(ERR, PMD, "Invalid device specified\n");
+		return;
+	}
+
+	dev_info->driver_name = drivername;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)-1;
+	dev_info->max_rx_queues = internal->max_queues;
+	dev_info->max_tx_queues = internal->max_queues;
+	dev_info->min_rx_bufsize = 0;
+}
+
+static void eth_stats_get(struct rte_eth_dev *dev __rte_unused,
+			  struct rte_eth_stats *stats __rte_unused)
+{
+	unsigned i;
+	unsigned long rx_total = 0, tx_total = 0, tx_missed_total = 0;
+	unsigned long rx_total_bytes = 0, tx_total_bytes = 0;
+	struct xnb_queue *xnbq;
+
+	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
+			i < dev->data->nb_rx_queues; i++) {
+		if (dev->data->rx_queues[i] == NULL)
+			continue;
+		xnbq = dev->data->rx_queues[i];
+		stats->q_ipackets[i] = xnbq->rx_pkts;
+		rx_total += stats->q_ipackets[i];
+
+		stats->q_ibytes[i] = xnbq->rx_bytes;
+		rx_total_bytes += stats->q_ibytes[i];
+	}
+
+	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
+			i < dev->data->nb_tx_queues; i++) {
+		if (dev->data->tx_queues[i] == NULL)
+			continue;
+		xnbq = dev->data->tx_queues[i];
+		stats->q_opackets[i] = xnbq->tx_pkts;
+		tx_missed_total += xnbq->missed_pkts;
+		tx_total += stats->q_opackets[i];
+
+		stats->q_obytes[i] = xnbq->tx_bytes;
+		tx_total_bytes += stats->q_obytes[i];
+	}
+
+	stats->ipackets = rx_total;
+	stats->opackets = tx_total;
+	stats->imissed = tx_missed_total;
+	stats->ibytes = rx_total_bytes;
+	stats->obytes = tx_total_bytes;
+}
+
+static void eth_stats_reset(struct rte_eth_dev *dev __rte_unused)
+{
+	struct xnb_queue *xnbq;
+	unsigned i;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		if (dev->data->rx_queues[i] == NULL)
+			continue;
+		xnbq = dev->data->rx_queues[i];
+		xnbq->rx_pkts = 0;
+		xnbq->rx_bytes = 0;
+	}
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		if (dev->data->tx_queues[i] == NULL)
+			continue;
+		xnbq = dev->data->tx_queues[i];
+		xnbq->tx_pkts = 0;
+		xnbq->tx_bytes = 0;
+		xnbq->missed_pkts = 0;
+	}
+}
+
+static int xnb_init(struct xenvif *vif __rte_unused)
+{
+	return 0;
+}
+
+static int xnb_close(struct xenvif *vif __rte_unused)
+{
+	return 0;
+}
+
+static int xnb_connect(struct xenvif *dev __rte_unused)
+{
+	struct rte_eth_dev *eth_dev;
+	struct internal_list *list;
+	struct pmd_internal *internal;
+	struct xnb_queue *xnbq;
+	unsigned i;
+
+	if (dev == NULL) {
+		RTE_LOG(INFO, PMD, "Invalid argument\n");
+		return -1;
+	}
+
+	list = find_internal_resource(dev->ifname);
+	if (list == NULL) {
+		RTE_LOG(INFO, PMD, "Invalid device name\n");
+		return -1;
+	}
+
+	eth_dev = list->eth_dev;
+	internal = eth_dev->data->dev_private;
+
+	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+		xnbq = eth_dev->data->rx_queues[i];
+		if (xnbq == NULL)
+			continue;
+
+		if (rte_xen_ring_map(&dev->queues[i].rx)) {
+			RTE_LOG(INFO, PMD, "Cannot map RX%d\n", i);
+			return -1;
+		}
+		xnbq->device = dev;
+		xnbq->internal = internal;
+		xnbq->port = eth_dev->data->port_id;
+	}
+	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+		xnbq = eth_dev->data->tx_queues[i];
+		if (xnbq == NULL)
+			continue;
+
+		if (rte_xen_ring_map(&dev->queues[i].tx)) {
+			RTE_LOG(INFO, PMD, "Cannot map TX%d\n", i);
+			return -1;
+		}
+
+		xnbq->device = dev;
+		xnbq->internal = internal;
+		xnbq->port = eth_dev->data->port_id;
+	}
+
+	dev->priv = eth_dev;
+	eth_dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+		xnbq = eth_dev->data->rx_queues[i];
+		if (xnbq == NULL)
+			continue;
+
+		rte_atomic32_set(&xnbq->allow_queuing, 1);
+	}
+	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+		xnbq = eth_dev->data->tx_queues[i];
+		if (xnbq == NULL)
+			continue;
+
+		rte_atomic32_set(&xnbq->allow_queuing, 1);
+	}
+
+	RTE_LOG(INFO, PMD, "New connection established\n");
+	return 0;
+}
+
+static int xnb_disconnect(struct xenvif *vif __rte_unused)
+{
+	return 0;
+}
+
+static void * xnb_session(void *param __rte_unused)
+{
+	static const char *xnb_ids[] = {
+		"vif", NULL
+	};
+
+	static struct xenbus_ops xnb_ops = {
+		.ids        = xnb_ids,
+		.init       = xnb_init,
+		.connect    = xnb_connect,
+		.disconnect = xnb_disconnect,
+		.close      = xnb_close
+	};
+
+	if (rte_xenbus_backend_register(&xnb_ops, rte_lcore_count() ) < 0)
+		RTE_LOG(ERR, PMD, "Can't register callbacks\n");
+
+	/* start event handling */
+	rte_xenbus_backend_start();
+
+	return NULL;
+}
+
+static int xnb_session_start(void)
+{
+	int ret;
+
+	ret = pthread_create(&session_th, NULL, xnb_session, NULL);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Can't create a thread\n");
+
+	return ret;
+}
+
+static void xnb_session_stop(void)
+{
+	int ret;
+
+	ret = pthread_cancel(session_th);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
+
+	ret = pthread_join(session_th, NULL);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Can't join the thread\n");
+}
+
+static int eth_dev_start(struct rte_eth_dev *dev __rte_unused)
+{
+	int ret = 0;
+
+	/* We need only one message handling thread */
+	if (rte_atomic16_add_return(&nb_started_ports, 1) == 1)
+		ret = xnb_session_start();
+
+	return ret;
+}
+
+static void eth_dev_stop(struct rte_eth_dev *dev __rte_unused)
+{
+	if (rte_atomic16_sub_return(&nb_started_ports, 1) == 0)
+		xnb_session_stop();
+}
+
+static int eth_link_update(struct rte_eth_dev *dev __rte_unused,
+			   int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static uint16_t eth_dev_rx_pkt_burst(void *queue,
+				     struct rte_mbuf **pkts,
+				     uint16_t nb_pkts)
+{
+	struct xnb_queue *r = queue;
+	uint16_t i, nb_rx = 0;
+
+	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
+		return 0;
+
+	rte_atomic32_set(&r->while_queuing, 1);
+
+	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
+		goto out;
+
+	/* Dequeue packets from guest TX queue */
+	nb_rx = rte_xen_dequeue_burst(r->device, r->queue_id, r->mb_pool,
+				      pkts, nb_pkts);
+
+	r->rx_pkts += nb_rx;
+
+	for (i = 0; likely(i < nb_rx); i++) {
+		pkts[i]->port = r->port;
+		r->rx_bytes += pkts[i]->pkt_len;
+	}
+
+out:
+	rte_atomic32_set(&r->while_queuing, 0);
+
+	return nb_rx;
+}
+
+static int eth_rx_queue_setup(struct rte_eth_dev *dev,
+			      uint16_t queue_id,
+			      uint16_t nb_desc __rte_unused,
+			      unsigned int socket_id,
+			      const struct rte_eth_rxconf *rx_conf __rte_unused,
+			      struct rte_mempool *mp)
+{
+	struct xnb_queue *xnbq;
+
+	xnbq = rte_zmalloc_socket(NULL, sizeof(struct xnb_queue),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (xnbq == NULL) {
+		RTE_LOG(ERR, PMD, "Failed to allocate memory for rx queue\n");
+		return -ENOMEM;
+	}
+
+	xnbq->mb_pool = mp;
+	xnbq->queue_id = queue_id;
+	dev->data->rx_queues[queue_id] = xnbq;
+	return 0;
+}
+
+static void eth_rx_queue_release(void *queue)
+{
+	rte_free(queue);
+}
+
+static uint16_t eth_dev_tx_pkt_burst(void *queue,
+				     struct rte_mbuf **pkts,
+				     uint16_t nb_pkts)
+{
+	struct xnb_queue *r = queue;
+	uint16_t i, nb_tx = 0;
+
+	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
+		return 0;
+
+	rte_atomic32_set(&r->while_queuing, 1);
+
+	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
+		goto out;
+
+	/* Enqueue packets to guest RX queue */
+	nb_tx = rte_xen_enqueue_burst(r->device, r->queue_id, pkts, nb_pkts);
+
+	r->tx_pkts += nb_tx;
+	r->missed_pkts += nb_pkts - nb_tx;
+
+	for (i = 0; likely(i < nb_tx); i++)
+		r->tx_bytes += pkts[i]->pkt_len;
+
+	for (i = 0; likely(i < nb_tx); i++)
+		rte_pktmbuf_free(pkts[i]);
+out:
+	rte_atomic32_set(&r->while_queuing, 0);
+
+	return nb_tx;
+}
+
+static int eth_tx_queue_setup(struct rte_eth_dev *dev,
+			      uint16_t queue_id,
+			      uint16_t nb_desc __rte_unused,
+			      unsigned int socket_id,
+			      const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct xnb_queue *xnbq;
+
+	xnbq = rte_zmalloc_socket(NULL, sizeof(struct xnb_queue),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (xnbq == NULL) {
+		RTE_LOG(ERR, PMD, "Failed to allocate memory for tx queue\n");
+		return -ENOMEM;
+	}
+
+	xnbq->queue_id = queue_id;
+	dev->data->tx_queues[queue_id] = xnbq;
+	return 0;
+}
+
+static void eth_tx_queue_release(void *queue __rte_unused)
+{
+	rte_free(queue);
+}
+
+static struct eth_dev_ops xnb_dev_ops __rte_unused = {
+	/* device */
+	.dev_configure        = eth_dev_configure,
+	.dev_infos_get        = eth_dev_infos_get,
+	.dev_start            = eth_dev_start,
+	.dev_stop             = eth_dev_stop,
+	.link_update          = eth_link_update,
+
+	/* queue setup */
+	.rx_queue_setup       = eth_rx_queue_setup,
+	.rx_queue_release     = eth_rx_queue_release,
+	.tx_queue_setup       = eth_tx_queue_setup,
+	.tx_queue_release     = eth_tx_queue_release,
+
+	/* statistics */
+	.stats_get            = eth_stats_get,
+	.stats_reset          = eth_stats_reset,
+};
+
+static int xnb_eth_dev_create(const char *name, char *iface_name, int16_t queues,
+			      const unsigned numa_node)
+{
+	struct rte_eth_dev_data *data = NULL;
+	struct pmd_internal *internal = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+	struct ether_addr *eth_addr = NULL;
+	struct internal_list *list = NULL;
+
+	RTE_LOG(INFO, PMD, "Creating Xen netback backend on numa socket %u\n",
+		numa_node);
+
+	/* now do all data allocation - for eth_dev structure, dummy pci driver
+	 * and internal (private) data
+	 */
+	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
+	if (data == NULL)
+		goto error;
+
+	internal = rte_zmalloc_socket(name, sizeof(*internal), 0, numa_node);
+	if (internal == NULL)
+		goto error;
+
+	list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node);
+	if (list == NULL)
+		goto error;
+
+	/* reserve an ethdev entry */
+	eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+	if (eth_dev == NULL)
+		goto error;
+
+	eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node);
+	if (eth_addr == NULL)
+		goto error;
+	*eth_addr = base_eth_addr;
+	eth_addr->addr_bytes[5] = eth_dev->data->port_id;
+
+	TAILQ_INIT(&eth_dev->link_intr_cbs);
+
+	/* now put it all together
+	 * - store queue data in internal,
+	 * - store numa_node info in ethdev data
+	 * - point eth_dev_data to internals
+	 * - and point eth_dev structure to new eth_dev_data structure
+	 */
+	internal->dev_name = strdup(name);
+	if (internal->dev_name == NULL)
+		goto error;
+	internal->iface_name = strdup(iface_name);
+	if (internal->iface_name == NULL)
+		goto error;
+
+	list->eth_dev = eth_dev;
+	pthread_mutex_lock(&internal_list_lock);
+	TAILQ_INSERT_TAIL(&internal_list, list, next);
+	pthread_mutex_unlock(&internal_list_lock);
+
+	data->dev_private = internal;
+	data->port_id = eth_dev->data->port_id;
+	memmove(data->name, eth_dev->data->name, sizeof(data->name));
+	data->nb_rx_queues = queues;
+	data->nb_tx_queues = queues;
+	internal->max_queues = queues;
+	data->dev_link = pmd_link;
+	data->mac_addrs = eth_addr;
+
+	/* We'll replace the 'data' originally allocated by eth_dev. So the
+	 * vhost PMD resources won't be shared between multi processes.
+	 */
+	eth_dev->data = data;
+	eth_dev->dev_ops = &xnb_dev_ops;
+	eth_dev->driver = NULL;
+	data->dev_flags =
+		RTE_ETH_DEV_DETACHABLE | RTE_ETH_DEV_INTR_LSC;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = internal->dev_name;
+	data->numa_node = numa_node;
+
+	/* finally assign rx and tx ops */
+	eth_dev->rx_pkt_burst = eth_dev_rx_pkt_burst;
+	eth_dev->tx_pkt_burst = eth_dev_tx_pkt_burst;
+
+	return data->port_id;
+
+error:
+	if (internal)
+		free(internal->dev_name);
+	rte_free(eth_addr);
+	if (eth_dev)
+		rte_eth_dev_release_port(eth_dev);
+	rte_free(internal);
+	rte_free(list);
+	rte_free(data);
+
+	return -1;
+}
+
+static inline int open_iface(const char *key __rte_unused, const char *value,
+			     void *extra_args)
+{
+	const char **iface_name = extra_args;
+
+	if (value == NULL)
+		return -1;
+
+	*iface_name = value;
+
+	return 0;
+}
+
+static inline int open_queues(const char *key __rte_unused, const char *value,
+			      void *extra_args)
+{
+	uint16_t *q = extra_args;
+
+	if (value == NULL || extra_args == NULL)
+		return -EINVAL;
+
+	*q = (uint16_t)strtoul(value, NULL, 0);
+	if (*q == USHRT_MAX && errno == ERANGE)
+		return -1;
+
+	if (*q > RTE_MAX_QUEUES_PER_PORT)
+		return -1;
+
+	return 0;
+}
+
+static int rte_pmd_xnb_devinit(const char *name __rte_unused,
+			       const char *params __rte_unused)
+{
+	struct rte_kvargs *kvlist = NULL;
+	int ret = 0;
+	char *iface_name;
+	uint16_t queues;
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_xnb for %s\n", name);
+
+	kvlist = rte_kvargs_parse(params, valid_arguments);
+	if (kvlist == NULL)
+		return -1;
+
+	if (rte_kvargs_count(kvlist, ETH_XNB_IFACE_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_XNB_IFACE_ARG,
+					 &open_iface, &iface_name);
+		if (ret < 0)
+			goto out_free;
+	} else {
+		ret = -1;
+		goto out_free;
+	}
+
+	if (rte_kvargs_count(kvlist, ETH_XNB_QUEUES_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_XNB_QUEUES_ARG,
+					 &open_queues, &queues);
+		if (ret < 0)
+			goto out_free;
+
+	} else
+		queues = 1;
+
+	xnb_eth_dev_create(name, iface_name, queues, rte_socket_id());
+
+out_free:
+	rte_kvargs_free(kvlist);
+	return ret;
+};
+
+static int rte_pmd_xnb_devuninit(const char *name __rte_unused)
+{
+	return 0;
+};
+
+struct rte_driver pmd_xnb_drv = {
+	.type = PMD_VDEV,
+	.init = rte_pmd_xnb_devinit,
+	.uninit = rte_pmd_xnb_devuninit,
+};
+
+PMD_REGISTER_DRIVER(pmd_xnb_drv, eth_xnb);
+DRIVER_REGISTER_PARAM_STRING(eth_xnb,
+	"iface=<ifc> "
+	"queues=<int>");
diff --git a/drivers/net/xen-netback/xnb_ethdev.h b/drivers/net/xen-netback/xnb_ethdev.h
new file mode 100644
index 0000000..a88792f
--- /dev/null
+++ b/drivers/net/xen-netback/xnb_ethdev.h
@@ -0,0 +1,34 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_xen.h>
diff --git a/drivers/net/xen-netback/xnb_ring.c b/drivers/net/xen-netback/xnb_ring.c
new file mode 100644
index 0000000..7067589
--- /dev/null
+++ b/drivers/net/xen-netback/xnb_ring.c
@@ -0,0 +1,240 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_log.h>
+#include <rte_memcpy.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+
+#include <rte_hash.h>
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#include <rte_hash_crc.h>
+#else
+#include <rte_jhash.h>
+#endif
+
+#include "xnb.h"
+
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#define RTE_LOGTYPE_XENRING RTE_LOGTYPE_USER1
+
+#define RTE_XEN_MAX_PKT_GRANTS (4 * RTE_XEN_MAX_PKT_BURST)
+
+struct grant {
+	grant_ref_t ref;
+	void *page;
+};
+
+/* Hash functions for the domains table */
+static uint32_t grants_hash_crc(const void *data,
+				__rte_unused uint32_t data_len,
+				uint32_t init_val)
+{
+	const grant_ref_t *gref = data;
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+	init_val = rte_hash_crc_4byte(*gref, init_val);
+#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+	init_val = rte_jhash_1word(*gref, init_val);
+#endif
+
+	return init_val;
+}
+
+static struct rte_hash* grants_hash_init(struct xenvif_ring *ring)
+{
+	char s[64] = { 0 };
+	struct rte_hash_parameters grants_hash_params = {
+		.name = NULL,
+		.entries = RTE_XEN_MAX_PKT_GRANTS,
+		.key_len = sizeof(uint32_t),
+		.hash_func = grants_hash_crc,
+		.hash_func_init_val = 0,
+	};
+
+	snprintf(s, sizeof(s), "grants_hash_%u_%s", ring->dom, ring->name);
+	grants_hash_params.name = s;
+	grants_hash_params.socket_id = 0;
+
+	return rte_hash_create(&grants_hash_params);
+}
+
+int rte_xen_ring_map(struct xenvif_ring *ring)
+{
+	int sz;
+
+	/* Open event channel handle */
+        ring->evtchnh = xc_evtchn_open(NULL, 0);
+	fcntl(xc_evtchn_fd(ring->evtchnh), F_SETFD, FD_CLOEXEC);
+
+	/* Allocate event channel
+	 * Failing to do so implies xen-netback is running already for that
+	 * interface */
+	ring->port = xc_evtchn_bind_interdomain(ring->evtchnh,
+						ring->dom, ring->evtchn);
+	if (ring->port == -1) {
+		RTE_LOG(ERR, XENRING, "%s: failed to bind evtchn %d\n",
+			ring->name, ring->evtchn);
+		xc_evtchn_close(ring->evtchnh);
+		return -1;
+	}
+
+	ring->xch = xc_interface_open(0, 0, 0);
+	if (!ring->xch) {
+		RTE_LOG(ERR, XENRING, "%s: failed to open xc", ring->name);
+		return -1;
+	}
+
+	ring->grants = grants_hash_init(ring);
+
+	sz = sizeof(struct gnttab_copy) * RTE_XEN_MAX_PKT_GRANTS;
+	ring->gop = malloc(sz);
+	if (!ring->gop) {
+		RTE_LOG(ERR, XENRING, "%s: failed to init copy ops", ring->name);
+		return -1;
+	}
+	memset(ring->gop, 0, sz);
+
+	sz = sizeof(struct pending_req) * RTE_XEN_MAX_PKT_GRANTS;
+	ring->pending = malloc(sz);
+	if (!ring->pending) {
+		RTE_LOG(ERR, XENRING, "%s: failed to init pending", ring->name);
+		return -1;
+	}
+
+	/* Open grant table handle */
+        ring->gnttabh = xc_gnttab_open(NULL, 0);
+
+	if (xc_gnttab_set_max_grants(ring->gnttabh,
+				     RTE_XEN_MAX_PKT_BURST) < 0) {
+		RTE_LOG(ERR, XENRING, "%s: failed to set max grants",
+			ring->name);
+		return -1;
+	}
+
+	RTE_LOG(INFO, XENRING, "%s: gnttab %p evtchn %p (fd %d)\n",
+		ring->name, ring->gnttabh, ring->evtchnh,
+		xc_evtchn_fd(ring->evtchnh));
+
+	/* Map ring */
+	if (!strncmp(ring->name, "TX", 2)) {
+		ring->tx_addr = xc_gnttab_map_grant_ref(ring->gnttabh,
+						     ring->dom, ring->ring_ref,
+						     PROT_READ | PROT_WRITE);
+		if (!ring->tx_addr)
+			return -1;
+
+		BACK_RING_INIT(&ring->tx_ring, ring->tx_addr, XC_PAGE_SIZE);
+
+		RTE_LOG(ERR, XENRING, "%s: ref %u dom %u -> addr %p\n",
+			ring->name, ring->ring_ref, ring->dom, ring->tx_addr);
+	} else {
+		ring->rx_addr = xc_gnttab_map_grant_ref(ring->gnttabh,
+						     ring->dom, ring->ring_ref,
+						     PROT_READ | PROT_WRITE);
+		if (!ring->rx_addr)
+			return -1;
+
+		BACK_RING_INIT(&ring->rx_ring, ring->rx_addr, XC_PAGE_SIZE);
+
+		RTE_LOG(ERR, XENRING, "%s: ref %u dom %u -> addr %p\n",
+			ring->name, ring->ring_ref, ring->dom, ring->rx_addr);
+	}
+
+	RTE_LOG(ERR, XENRING, "%s: evtchn %d -> port %d\n",
+		ring->name, ring->evtchn, ring->port);
+
+	return 0;
+}
+
+int rte_xen_ring_unmap(struct xenvif_ring *ring)
+{
+	/* Deallocate event channel */
+	xc_evtchn_unbind(ring->evtchnh, ring->port);
+	ring->port = -1;
+
+	/* Unmap ring */
+	if (!strncmp(ring->name, "TX", 2))
+	        xc_gnttab_munmap(ring->gnttabh, ring->tx_addr, 1);
+	else
+	        xc_gnttab_munmap(ring->gnttabh, ring->rx_addr, 1);
+
+	/* Unmap initial buffers */
+
+	/* Close event channel handle
+	 * Close grant table handle */
+	xc_evtchn_close(ring->evtchnh);
+	xc_gnttab_close(ring->gnttabh);
+
+	RTE_LOG(INFO, XENRING, "%s: closed gnttab %p evtchn %p (fd %d)\n",
+		ring->name, ring->gnttabh, ring->evtchnh,
+		xc_evtchn_fd(ring->evtchnh));
+
+	return 0;
+}
+
+void *rte_xen_ring_get_page(struct xenvif_ring *ring, grant_ref_t ref,
+			    bool writable)
+{
+	struct grant *gnt = NULL;
+	unsigned flags = writable ? (PROT_READ | PROT_WRITE) : PROT_READ;
+
+	rte_hash_lookup_data(ring->grants, &ref, (void**) &gnt);
+
+	if (gnt)
+		return gnt->page;
+
+	if (ring->grants_cnt >= RTE_XEN_MAX_PKT_GRANTS)
+		return NULL;
+
+	gnt = malloc(sizeof(struct grant));
+	if (!gnt) {
+		RTE_LOG(ERR, XENRING, "%s: error allocating grant ref %u\n",
+			ring->name, ref);
+		return NULL;
+	}
+
+	gnt->ref = ref;
+	gnt->page = xc_gnttab_map_grant_ref(ring->gnttabh, ring->dom, ref,
+					    flags);
+	if (!gnt->page)
+		return NULL;
+
+	rte_hash_add_key_data(ring->grants, &ref, gnt);
+	ring->grants_cnt++;
+	return gnt->page;
+}
+
diff --git a/drivers/net/xen-netback/xnb_rxtx.c b/drivers/net/xen-netback/xnb_rxtx.c
new file mode 100644
index 0000000..3177883
--- /dev/null
+++ b/drivers/net/xen-netback/xnb_rxtx.c
@@ -0,0 +1,683 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <rte_log.h>
+#include <rte_memcpy.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+
+#include <xen/xen.h>
+#include <xen/grant_table.h>
+
+#include "xnb.h"
+
+#define XEN_PAGE_SHIFT	    12
+#define XEN_PFN_DOWN(x)     ((x) >> XEN_PAGE_SHIFT)
+#define XEN_PAGE_SIZE	    XC_PAGE_SIZE
+#define XEN_PAGE_MASK	    (~(XEN_PAGE_SIZE-1))
+
+#define RTE_LOGTYPE_XENRING RTE_LOGTYPE_USER1
+
+#define RTE_XEN_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, XEN_PAGE_SIZE)
+#define RTE_XEN_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, XEN_PAGE_SIZE)
+
+#define BUG_ON(x) do {\
+	if (x) \
+		rte_panic("XEN: x"); \
+} while (0)
+
+#ifdef DEBUG_PACKET
+#define PRINT_PACKET_BUFF 6072
+#define PRINT_PACKET(str, addr, size, header) do {				\
+	char *pkt_addr = (char*)(addr);						\
+	unsigned int index;							\
+	char packet[PRINT_PACKET_BUFF];						\
+										\
+	if ((header))								\
+		snprintf(packet, PRINT_PACKET_BUFF, "Header size %d: ", (size));\
+	else									\
+		snprintf(packet, PRINT_PACKET_BUFF, "Packet size %d: ", (size));\
+	for (index = 0; index < (size); index++) {				\
+		snprintf(packet + strnlen(packet, PRINT_PACKET_BUFF),		\
+			PRINT_PACKET_BUFF - strnlen(packet, PRINT_PACKET_BUFF),	\
+			"%02hhx ", pkt_addr[index]);				\
+	}									\
+	snprintf(packet + strnlen(packet, PRINT_PACKET_BUFF),			\
+		PRINT_PACKET_BUFF - strnlen(packet, PRINT_PACKET_BUFF), "\n");	\
+										\
+	RTE_LOG(DEBUG, XENRING, "%s %s", str, packet);				\
+} while(0)
+
+#define XEN_LOG_PKT(ring, mbuf)	do {					   \
+	PRINT_PACKET(ring->name,					   \
+		     (uintptr_t) rte_pktmbuf_mtod_offset(mbuf, void *, 0), \
+		     mbuf->pkt_len, 0);					   \
+} while (0)
+
+#else
+#define PRINT_PACKET(str, addr, size, header) do{} while(0)
+#define XEN_LOG_PKT(ring, mbuf)		    do {} while(0)
+#endif
+
+#ifdef DEBUG
+#define XEN_LOG_GOP(gmfn_buf, gref_buf, flags, len) do {		\
+		RTE_LOG(ERR, XENRING,					\
+			"gop %s: gmfn %x offset %u "			\
+			"%s: ref %u offset %u size %d\n",		\
+			flags & GNTCOPY_dest_gref ? "src" : "dst",	\
+			gmfn_buf->u.gmfn,				\
+			gmfn_buf->offset,				\
+			flags & GNTCOPY_dest_gref ? "dst" : "src",	\
+			gref_buf->u.ref,				\
+			gref_buf->offset,				\
+			len);						\
+} while (0)
+
+#define XEN_LOG_RXGOP(gop) do {					 \
+		RTE_LOG(INFO, XENRING,					 \
+			"(%s) rxgop size %d "				 \
+			"src: gmfn %x offset %u dst: ref %u offset %u\n",\
+			(gop)->status != GNTST_okay ? "not ok" : "ok",	 \
+		        (gop)->len,					 \
+			(gop)->source.u.gmfn,				 \
+		        (gop)->source.offset,				 \
+			(gop)->dest.u.ref,				 \
+			(gop)->dest.offset);				 \
+} while (0)
+
+#define XEN_LOG_TXGOP(gop) do {					 \
+		RTE_LOG(INFO, XENRING,					 \
+			"(%s) txgop size %d "				 \
+			"src: ref %u offset %u dst: gmfn %x offset %u\n",\
+			(gop)->status != GNTST_okay ? "not ok" : "ok",	 \
+		        (gop)->len,					 \
+			(gop)->source.u.ref,				 \
+		        (gop)->source.offset,				 \
+			(gop)->dest.u.gmfn,				 \
+			(gop)->dest.offset);				 \
+} while (0)
+
+#define XEN_LOG_TXREQ(ring, txreq, i)   do {				\
+		RTE_LOG(INFO, XENRING,					\
+			"%s get req[%u]: id=%d ref=%u offset=%d\n",	\
+			(ring)->name, i,				\
+		        (txreq)->id, (txreq)->gref, (txreq)->offset);	\
+} while (0)
+
+#define XEN_LOG_PREQ(ring, p, txreq) do {				   \
+	RTE_LOG(INFO, XENRING, "%s set req[%u]: id=%d ref=%u offset=%d\n", \
+		(ring)->name, (p) - (ring)->pending,			   \
+		(txreq)->id, (txreq)->gref, (txreq)->offset);		   \
+} while (0)
+
+#define XEN_LOG_DEBUG(fmt, ...) RTE_LOG(INFO, XENRING, fmt, ##__VA_ARGS__)
+#else
+#define XEN_LOG_GOP(gmfn, gref, flags, len) do {} while(0)
+#define XEN_LOG_RXGOP(gop)		    do {} while(0)
+#define XEN_LOG_TXGOP(gop)		    do {} while(0)
+#define XEN_LOG_TXREQ(ring, txreq, i)	    do {} while(0)
+#define XEN_LOG_PREQ(ring, p, txreq)	    do {} while(0)
+#define XEN_LOG_DEBUG(fmt, ...)		    do {} while(0)
+#endif
+
+
+/* Sets up grant copy operation.
+ * Determines by "flags" which direction the copy is.
+ */
+static inline int make_copy_gop(struct gnttab_copy *copy_gop,
+				uint16_t len, domid_t dom, uint8_t flags,
+				grant_ref_t gref, uint16_t offset,
+				unsigned long gmfn, uint16_t gofs)
+{
+	struct gnttab_copy_ptr *gref_buf, *gmfn_buf;
+
+	if (flags & GNTCOPY_dest_gref) {
+		gmfn_buf = &copy_gop->source;
+		gmfn_buf->domid = DOMID_SELF;
+
+		gref_buf = &copy_gop->dest;
+		gref_buf->domid = dom;
+	} else {
+		gref_buf = &copy_gop->source;
+		gref_buf->domid = dom;
+
+		gmfn_buf = &copy_gop->dest;
+		gmfn_buf->domid = DOMID_SELF;
+	}
+
+	gmfn_buf->u.gmfn = XEN_PFN_DOWN(gmfn);
+	gmfn_buf->offset = gofs;
+	gref_buf->u.ref = gref;
+	gref_buf->offset = offset;
+	copy_gop->flags = flags;
+
+	if (gofs + len > XEN_PAGE_SIZE)
+		copy_gop->len = XEN_PAGE_SIZE - gofs;
+	else
+		copy_gop->len = len;
+
+	XEN_LOG_GOP(gmfn_buf, gref_buf, flags, copy_gop->len);
+
+	return len - copy_gop->len;
+}
+
+/* Assumes an mbuf.size <= XEN_PAGE_SIZE */
+static inline bool gop_mbuf_copy(struct gnttab_copy **gop,
+				 struct pending_req **r,
+				 unsigned long pfn,
+				 uint8_t flags, domid_t dom,
+				 grant_ref_t ref, uint16_t offset, uint16_t len)
+{
+	struct gnttab_copy *copy_gop = *gop;
+	struct pending_req *req = *r;
+	uint16_t gofs = pfn & ~XEN_PAGE_MASK;
+	uint16_t avail;
+
+	/* There is still some remaining data but mbuf
+	 * area crosses XEN_PAGE_SIZE boundary
+	 */
+	while (len) {
+		/* */
+		avail = make_copy_gop(copy_gop, len, dom, flags, ref,
+				      offset, pfn, gofs);
+		len -= copy_gop->len;
+		offset += copy_gop->len;
+
+		if (avail) {
+			pfn++;
+			gofs = 0;
+		}
+
+		/* Copy as much to a gref as possible. "more" will be set
+		 * if there is more than one grant operation from/to the
+		 * reference, in which case no ring request. Which leads
+		 * to no response - but only to those provided by frontend.
+		 */
+		if (*gop != copy_gop) {
+			req->more = 1;
+			req->mbuf = NULL;
+		}
+
+		++copy_gop;
+		++req;
+	};
+
+	*gop = copy_gop;
+	*r = req;
+	return !len;
+}
+
+static void make_rx_response(struct xenvif_ring *ring, netif_rx_request_t *req,
+			     int16_t size, uint16_t flags)
+{
+	RING_IDX i = ring->rx_ring.rsp_prod_pvt;
+	netif_rx_response_t *resp;
+
+	resp = RING_GET_RESPONSE(&ring->rx_ring, i);
+	resp->offset     = 0;
+	resp->flags      = flags;
+	resp->id         = req->id;
+	resp->status     = size;
+
+	ring->rx_ring.rsp_prod_pvt = ++i;
+}
+
+static inline uint16_t get_rx_flags(struct rte_mbuf *m)
+{
+	uint16_t flags = 0;
+
+	if ((m->ol_flags & PKT_TX_UDP_CKSUM) ||
+	    (m->ol_flags & PKT_TX_TCP_CKSUM))
+		flags |= NETRXF_csum_blank | NETRXF_data_validated;
+	else
+		flags |= NETRXF_data_validated;
+
+	return flags;
+
+}
+/* Sets up grant operations from frontend grant refs *from* an mbuf */
+static int gop_from_mbuf(struct xenvif_ring *ring,
+			 struct xenvif *vif,
+			 struct rte_mbuf *m,
+			 RING_IDX *rc,
+			 struct gnttab_copy **gop)
+{
+	struct pending_req *last_req;
+	struct gnttab_copy *copy_gop = *gop;
+	struct pending_req *p = &ring->pending[copy_gop - ring->gop];
+	netif_rx_request_t *rxreq = &p->u.rxreq;
+	netif_rx_back_ring_t *rx_ring = &ring->rx_ring;
+	unsigned long addr;
+
+	RING_COPY_REQUEST(rx_ring, *rc, rxreq);
+	rx_ring->req_cons = ++(*rc);
+
+	BUG_ON(m->pkt_len > XEN_PAGE_SIZE);
+
+	if (vif->features.pgnt) {
+		void *page;
+
+		page = rte_xen_ring_get_page(ring, rxreq->gref, true);
+		if (page) {
+			rte_memcpy(page, rte_pktmbuf_mtod_offset(m, void *, 0),
+				   m->pkt_len);
+			make_rx_response(ring, rxreq, m->pkt_len,
+					 get_rx_flags(m));
+			return 1;
+		}
+	}
+
+	/* First is always NULL. On receive side, the last
+	 * fragment of the mbuf grant operations is the one
+	 * that contains the sent mbuf.
+	 */
+	p->mbuf = NULL;
+
+	addr = rte_mbuf_data_dma_addr_default(m);
+	gop_mbuf_copy(gop, &p, addr,
+		      GNTCOPY_dest_gref, ring->dom,
+		      rxreq->gref, 0, m->pkt_len);
+
+	/* The last one gets the mbuf set
+	 * and we prepend the pkt_len. The pending_req
+	 * with mbuf != NULL will then make the response
+	 * to the guest
+	 */
+	last_req = --p;
+	last_req->mbuf = m;
+
+	return 0;
+}
+
+uint16_t
+rte_xen_enqueue_burst(struct xenvif *dev, uint16_t queue_id,
+		      struct rte_mbuf **pkts, uint16_t count)
+{
+	struct xenvif_queue *queue = &dev->queues[queue_id];
+	struct xenvif_ring *ring = &queue->rx;
+	netif_rx_back_ring_t *rx_ring = &ring->rx_ring;
+	struct gnttab_copy *gop = ring->gop;
+	struct pending_req *pending = ring->pending;
+	RING_IDX rc, rp;
+	uint32_t recv = 0;
+	int notify = 0;
+	uint16_t nr_gops = 0;
+	int ret = -1;
+	int i = 0;
+
+	rc = rx_ring->req_cons;
+	rp = rx_ring->sring->req_prod;
+	xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+	/* mbufs are continguous in 1G/2M page size and we don't support GSO
+	 * which means worst case page crosses XEN_PAGE_SIZE boundary
+	 */
+	count = RTE_MIN((uint32_t) rp - rc, count);
+	if (count == 0)
+		return 0;
+
+	for (recv = 0; recv < count; recv++) {
+		if (gop_from_mbuf(ring, queue->vif,
+				  pkts[recv], &rc, &gop))
+			continue;
+
+		rte_compiler_barrier();
+	}
+
+	/* Grant copy the refs to the mbufs */
+	nr_gops = gop - ring->gop;
+	if (nr_gops) {
+		ret = xc_gnttab_op(ring->xch, GNTTABOP_copy,
+				   ring->gop, sizeof(struct gnttab_copy),
+				   nr_gops);
+
+		if (unlikely(ret))
+			RTE_LOG(ERR, XENRING,"%s: grant copy failed (err %d).\n",
+				queue->rx.name, ret);
+	}
+
+	/* Produce the responses */
+	for (i = 0; i < nr_gops; i++) {
+		struct rte_mbuf *m = pending[i].mbuf;
+		netif_rx_request_t *rxreq = &pending[i].u.rxreq;
+		int16_t st = 0;
+
+		if (unlikely(ring->gop[i].status != GNTST_okay))
+			st = NETIF_RSP_ERROR;
+
+		if (m != NULL) {
+			XEN_LOG_RXGOP(&ring->gop[i]);
+			XEN_LOG_PKT(ring, m);
+			make_rx_response(ring, rxreq,
+					 !st ? (int16_t) m->pkt_len : st,
+					 get_rx_flags(m));
+		}
+	}
+
+	if (likely(recv)) {
+		/* Notify the guest if necessary. */
+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&ring->rx_ring, notify);
+		if (notify)
+			xc_evtchn_notify(ring->evtchnh, ring->port);
+	}
+
+	return recv;
+}
+
+static void make_tx_response(struct xenvif_ring *ring, netif_tx_request_t *req,
+			     int8_t status)
+{
+	RING_IDX i = ring->tx_ring.rsp_prod_pvt;
+	netif_tx_response_t *resp;
+
+	resp = RING_GET_RESPONSE(&ring->tx_ring, i);
+	resp->id     = req->id;
+	resp->status = status;
+
+#ifdef DEBUG
+	RTE_LOG(INFO, XENRING, "%s resp id=%d (ref=%u offset=%d)\n",
+		ring->name, resp->id, req->gref, req->offset);
+#endif
+
+	ring->tx_ring.rsp_prod_pvt = ++i;
+}
+
+static int8_t count_tx_requests(struct xenvif_ring *ring,
+				netif_tx_request_t *first,
+				RING_IDX rc)
+{
+	netif_tx_back_ring_t *tx_ring;
+	netif_tx_request_t txreq;
+	int slots = 0;
+
+	if (!(first->flags & NETTXF_more_data))
+		return 0;
+
+	tx_ring = &ring->tx_ring;
+	slots = 0;
+
+	do {
+		RING_COPY_REQUEST(tx_ring, rc + slots, &txreq);
+		first->size -= txreq.size;
+		slots++;
+	} while (txreq.flags & NETTXF_more_data);
+
+	XEN_LOG_DEBUG("slots %u\n", slots);
+	return slots;
+}
+
+/* Sets a tx request that is pending validation */
+static inline void set_tx_request(struct xenvif_ring *ring __rte_unused,
+				  struct pending_req *req,
+				  netif_tx_request_t *txreq)
+{
+	XEN_LOG_PREQ(ring, req, txreq);
+	memcpy(&req->u.txreq, txreq, sizeof(*txreq));
+	req->more = 0;
+	req->mbuf = NULL;
+}
+
+
+static void parse_ethernet(struct rte_mbuf *m,
+			   uint16_t *l4_proto, void **l4_hdr)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+	void *l3_hdr = NULL;
+	struct ether_hdr *eth_hdr;
+	uint16_t ethertype;
+
+	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+	m->l2_len = sizeof(struct ether_hdr);
+	ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
+
+	if (ethertype == ETHER_TYPE_VLAN) {
+		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+
+		m->l2_len += sizeof(struct vlan_hdr);
+		ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
+	}
+
+	l3_hdr = (char *)eth_hdr + m->l2_len;
+
+	switch (ethertype) {
+	case ETHER_TYPE_IPv4:
+		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+		*l4_proto = ipv4_hdr->next_proto_id;
+		m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
+		*l4_hdr = (char *)l3_hdr + m->l3_len;
+		m->ol_flags |= PKT_TX_IPV4;
+		break;
+	case ETHER_TYPE_IPv6:
+		ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
+		*l4_proto = ipv6_hdr->proto;
+		m->l3_len = sizeof(struct ipv6_hdr);
+		*l4_hdr = (char *)l3_hdr + m->l3_len;
+		m->ol_flags |= PKT_TX_IPV6;
+		break;
+	default:
+		m->l3_len = 0;
+		*l4_proto = 0;
+		break;
+	}
+}
+
+static inline void set_mbuf_chksum(netif_tx_request_t *txreq,
+				   struct rte_mbuf *m)
+{
+	uint16_t l4_proto = 0;
+	void *l4_hdr = NULL;
+
+	if (txreq->flags & NETTXF_csum_blank &&
+	    txreq->flags & NETTXF_data_validated) {
+		parse_ethernet(m, &l4_proto, &l4_hdr);
+		switch (l4_proto) {
+			case IPPROTO_TCP:
+				m->ol_flags |= PKT_TX_TCP_CKSUM;
+				break;
+			case IPPROTO_UDP:
+				m->ol_flags |= PKT_TX_UDP_CKSUM;
+				break;
+			case IPPROTO_SCTP:
+				m->ol_flags |= PKT_TX_SCTP_CKSUM;
+				break;
+		}
+	}
+}
+
+/* Sets up grant operations from frontend grant refs *to* an mbuf */
+static int gop_to_mbuf(struct xenvif_ring *ring,
+		       struct xenvif *vif,
+		       struct rte_mbuf *m,
+		       RING_IDX *rc,
+		       struct gnttab_copy **gop)
+{
+	struct gnttab_copy *copy_gop = *gop;
+	struct pending_req *p = &ring->pending[copy_gop - ring->gop];
+	struct pending_req *first = p;
+	netif_tx_request_t *txreq = &p->u.txreq;
+	netif_tx_back_ring_t *tx_ring = &ring->tx_ring;
+	unsigned long addr;
+	uint16_t ofs, len;
+	RING_IDX ri = *rc;
+	bool pgnt = vif->features.pgnt;
+	char *page = NULL;
+
+	RING_COPY_REQUEST(tx_ring, ri, txreq);
+
+	m->pkt_len = len = txreq->size;
+	count_tx_requests(ring, txreq, ++ri);
+	set_mbuf_chksum(txreq, m);
+
+	BUG_ON(txreq->size > XEN_PAGE_SIZE);
+
+	ofs = 0;
+	addr = rte_mbuf_data_dma_addr_default(m);
+	set_tx_request(ring, p, txreq);
+
+	while (len) {
+		if (pgnt)
+			page = rte_xen_ring_get_page(ring, txreq->gref, false);
+
+		if (page) {
+			rte_memcpy(rte_pktmbuf_mtod_offset(m, void*, ofs),
+				   page + txreq->offset, txreq->size);
+			make_tx_response(ring, txreq, NETIF_RSP_OKAY);
+		} else {
+			gop_mbuf_copy(gop, &p, addr + ofs, GNTCOPY_source_gref,
+				      ring->dom, txreq->gref, txreq->offset,
+				      txreq->size);
+		}
+
+		len -= txreq->size;
+		ofs += txreq->size;
+
+		/* More slots remaining */
+		if (len) {
+			txreq = &p->u.txreq;
+			RING_COPY_REQUEST(tx_ring, ri++, txreq);
+			set_tx_request(ring, p, txreq);
+		}
+	}
+
+	tx_ring->req_cons = ri;
+	*rc = ri;
+
+	/* The first one gets the mbuf set
+	 * and we prepend the pkt_len. The pending_req
+	 * with mbuf != NULL will then make the response
+	 * to the guest
+	 */
+	first->mbuf = m;
+
+	/* If no grant ops were set up */
+	return (copy_gop - *gop) == 0;
+}
+
+uint16_t
+rte_xen_dequeue_burst(struct xenvif *dev, uint16_t queue_id,
+		      struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
+		      uint16_t count)
+{
+	struct xenvif_queue *queue = &dev->queues[queue_id];
+	struct xenvif_ring *ring = &queue->tx;
+	struct gnttab_copy *gop = ring->gop;
+	struct pending_req *pending = ring->pending;
+	netif_tx_back_ring_t *tx_ring = &ring->tx_ring;
+	RING_IDX rc, rp;
+	uint16_t sent = 0;
+	int notify = 0;
+	uint16_t nr_gops = 0;
+	int ret = -1;
+	int i = 0;
+
+	rc = tx_ring->req_cons;
+	rp = tx_ring->sring->req_prod;
+	xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+	count = RTE_MIN((uint32_t) rp - rc, count);
+
+	rte_prefetch0(&queue->vif->features);
+	rte_prefetch0(&tx_ring->sring->ring[rc & RTE_XEN_TX_RING_SIZE]);
+
+	/* Get requests and setup the pages */
+	while ((rc != rp) && sent < count) {
+		struct rte_mbuf *m;
+
+		/* Allocate an mbuf and populate the structure. */
+		m = rte_pktmbuf_alloc(mbuf_pool);
+		if (unlikely(m == NULL)) {
+			RTE_LOG(ERR, XENRING,
+				"%s: Failed to allocate memory for mbuf.\n",
+				 queue->tx.name);
+			break;
+		}
+
+		/* Copy if data is inline */
+		if (likely(gop_to_mbuf(ring, queue->vif, m, &rc, &gop))) {
+			pkts[sent] = m;
+			sent++;
+			continue;
+		}
+	}
+
+	/* Grant copy the refs to the mbufs */
+	nr_gops = gop - ring->gop;
+	if (nr_gops) {
+		ret = xc_gnttab_op(ring->xch, GNTTABOP_copy,
+				   ring->gop, sizeof(struct gnttab_copy),
+				   nr_gops);
+
+		if (unlikely(ret))
+			RTE_LOG(ERR, XENRING,"%s: grant copy failed (err %d).\n",
+				queue->tx.name, ret);
+	}
+
+	/* Produce the responses */
+	for (i = 0; i < nr_gops; i++) {
+		struct rte_mbuf *m = pending[i].mbuf;
+		netif_tx_request_t *txreq = &pending[i].u.txreq;
+		bool more = pending[i].more;
+		int8_t st = NETIF_RSP_OKAY;
+
+		if (unlikely(ring->gop[i].status != GNTST_okay))
+			st = NETIF_RSP_ERROR;
+
+		XEN_LOG_TXGOP(&ring->gop[i]);
+
+		if (!more) {
+			XEN_LOG_TXREQ(ring, txreq, i);
+			make_tx_response(ring, txreq, st);
+		}
+
+		if (m == NULL)
+			continue;
+
+		XEN_LOG_PKT(ring, m);
+		pkts[sent] = m;
+		sent++;
+	}
+
+	if (likely(sent)) {
+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&ring->tx_ring, notify);
+		if (notify)
+			xc_evtchn_notify(ring->evtchnh, ring->port);
+	}
+
+	return sent;
+}
diff --git a/drivers/net/xen-netback/xnb_xenbus.c b/drivers/net/xen-netback/xnb_xenbus.c
new file mode 100644
index 0000000..3d4c8e1
--- /dev/null
+++ b/drivers/net/xen-netback/xnb_xenbus.c
@@ -0,0 +1,975 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "xnb.h"
+
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_hash.h>
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#include <rte_hash_crc.h>
+#else
+#include <rte_jhash.h>
+#endif
+
+#include <unistd.h>
+#include <poll.h>
+#include <sys/queue.h>
+
+#define RTE_LOGTYPE_XEN RTE_LOGTYPE_USER1
+
+#define XENBUS_MAX_ENTRY 64
+#define XENBUS_DOMAINS_HASH_ENTRIES 1024
+
+/* Key for the domain hash table */
+struct xenvif_hash_key {
+	domid_t domid;
+	unsigned handle;
+};
+
+/* The Xenbus watch path and token */
+struct xenbus_watch {
+	char *path;
+	const char *token;
+	LIST_ENTRY(xenbus_watch) next;
+};
+
+/* Registered Xenbus client
+ * Listening on /local/domain/<dev->domid>/backend/{vif,dpvif} */
+struct xenbus_device {
+	/* Backend domain id and it's base path */
+	unsigned int domid;
+	char *dompath;
+	unsigned int max_cores;
+
+	/* Xenstore handle */
+	struct xs_handle *xsh;
+	/* Xenstore file descriptor */
+	int xsh_fd;
+
+	/* upcalls for registered backend */
+	struct xenbus_ops *callbacks;
+	struct xenbus_watch watch;
+
+	/* domains look up table */
+	struct rte_hash *domains;
+};
+
+static struct xenbus_device *dev = NULL;
+
+static struct xenvif* xenbus_backend_get(struct xenbus_device *dev,
+					 unsigned domid, unsigned int handle);
+
+/*
+ * Xenstore/Xenbus helper functions
+ */
+
+static int xenbus_printf(xs_transaction_t xbt, char *basename,
+		         const char *key, const char *fmt, ...)
+{
+	char *path = NULL;
+	char *buf = NULL;
+	va_list ap;
+	int ret, len;
+
+	va_start(ap, fmt);
+	len = vasprintf(&buf, fmt, ap);
+	va_end(ap);
+
+	if (len <= 0)
+		return -ENOMEM;
+
+	len = asprintf(&path, "%s/%s", basename, key);
+	if (len <= 0)
+		return -ENOMEM;
+
+	ret = xs_write(dev->xsh, xbt, path, buf, strlen(buf));
+	if (ret)
+		RTE_LOG(INFO, XEN, "xs_write: %s = %s\n", path, buf);
+
+	free(path);
+	free(buf);
+
+	return ret;
+}
+
+static int xenbus_scanf(xs_transaction_t xbt, char *basename, const char *key,
+			const char *fmt, ...)
+{
+	char *path = NULL;
+	char *buf = NULL;
+	va_list ap;
+	unsigned int ret = 0;
+
+	if (asprintf(&path, "%s/%s", basename, key) <= 0)
+		return -ENOMEM;
+
+	buf = xs_read(dev->xsh, xbt, path, &ret);
+	if (!buf) {
+		RTE_LOG(ERR, XEN, "xs_read: failed on path %s\n", path);
+		return -EINVAL;
+	}
+
+	RTE_LOG(INFO, XEN, "xs_read: %s = %s\n", path, buf);
+
+	va_start(ap, fmt);
+	ret = vsscanf(buf, fmt, ap);
+	va_end(ap);
+
+	free(buf);
+	free(path);
+
+	return ret <= 0 ? -EINVAL : (int) ret;
+}
+
+/* Converts xenbus state to a string */
+static const char *xenbus_strstate(enum xenbus_state state)
+{
+	static const char *names[] = {
+		"Unknown",
+		"Initialising",
+		"InitWait",
+		"Initialised",
+		"Connected",
+		"Closing",
+		"Closed",
+		"Reconfiguring",
+		"Reconfigured",
+	};
+
+	return state < sizeof(names) ?  names[state] : "Invalid";
+}
+
+/*
+ * Xen Virtual Interface routines (Backend)
+ *
+ * ( Initialising ) -> Announce features
+ *                  -> Update 'state' to "InitWait"
+ *
+ * ( Connected    ) -> Read frontend features
+ *                  -> Fetch TX/RX ring refs and event channels
+ *                  -> Update 'state' to "Connected"
+ *
+ * We propagate an event to tell downstream consumers that the queue
+ * is initialized. This allows a PMD (app) to initialize the TX / RX
+ * grant references and event channels in each lcore.
+ *
+ * Here we handle all xenstore interactions and transitions with the
+ * frontend. All assume the libxc handling is done downstream.
+ */
+
+static int xenvif_queue_init(struct xenvif_queue *queue)
+{
+	char *path = NULL;
+	int ret = -1;
+
+	if (queue->vif->num_queues == 1)
+		path = queue->vif->fe.path;
+	else {
+		if (asprintf(&path, "%s/queue-%u",
+			     queue->vif->fe.path, queue->id) < 0)
+			return ret;
+	}
+
+	if ((xenbus_scanf(XBT_NULL, path, "tx-ring-ref", "%u",
+			 &queue->tx.ring_ref) != 1) ||
+	    (xenbus_scanf(XBT_NULL, path, "rx-ring-ref", "%u",
+			 &queue->rx.ring_ref) != 1))
+		return ret;
+
+	if ((xenbus_scanf(XBT_NULL, path, "event-channel-tx", "%u",
+			 &queue->tx.evtchn) != 1) ||
+	    (xenbus_scanf(XBT_NULL, path, "event-channel-rx", "%u",
+			 &queue->rx.evtchn) != 1))
+		return ret;
+
+	if (asprintf(&queue->tx.name, "TX%u", queue->id) < 0)
+		return ret;
+
+	if (asprintf(&queue->rx.name, "RX%u", queue->id) < 0)
+		return ret;
+
+	queue->tx.dom = queue->rx.dom = queue->vif->fe.domid;
+
+	ret = 0;
+	return ret;
+}
+
+static struct xenvif* xenvif_alloc(unsigned int domid, unsigned int handle,
+				   unsigned max_queues)
+{
+	struct xenvif *vif;
+
+	vif = malloc(sizeof(struct xenvif));
+	if (!vif)
+		return NULL;
+
+	vif->be.domid = domid;
+	vif->be.handle = handle;
+	vif->num_queues = max_queues;
+
+	return vif;
+}
+
+static int xenvif_write_features(struct xenvif *vif)
+{
+	xs_transaction_t xbt;
+	char *path = vif->be.path;
+
+again:
+	xbt = xs_transaction_start(dev->xsh);
+	if (!xbt)
+		goto fail;
+
+	/* Write features supported  */
+	if (xenbus_printf(xbt, path, "feature-sg", "%u", 0) <= 0)
+		goto abort_transaction;
+
+	if (xenbus_printf(xbt, path, "feature-gso-tcpv4", "%u", 0) <= 0)
+		goto abort_transaction;
+
+	if (xenbus_printf(xbt, path, "feature-gso-tcpv6", "%u", 0) <= 0)
+		goto abort_transaction;
+
+	if (xenbus_printf(xbt, path, "feature-ipv6-csum-offload", "%u", 1) <= 0)
+		goto abort_transaction;
+
+	if (xenbus_printf(xbt, path, "feature-rx-copy", "%u", 1) <= 0)
+		goto abort_transaction;
+
+	if (xenbus_printf(xbt, path, "feature-rx-flip", "%u", 0) <= 0)
+		goto abort_transaction;
+
+	if (xenbus_printf(xbt, path, "feature-multicast-control", "%u", 0) <= 0)
+		goto abort_transaction;
+
+	if (!xs_transaction_end(dev->xsh, xbt, false)) {
+		if (errno == EAGAIN)
+			goto again;
+		if (errno)
+			goto fail;
+	}
+
+	/* Optional features supported  */
+	if (xenbus_printf(XBT_NULL, path, "feature-split-event-channels", "%u",
+			  1) <= 0)
+		goto abort_transaction;
+
+	if (xenbus_printf(XBT_NULL, path, "multi-queue-max-queues", "%u",
+			  vif->num_queues) <= 0)
+		goto abort_transaction;
+
+	if (xenbus_printf(XBT_NULL, path, "feature-persistent", "%u", 1) <= 0)
+		goto abort_transaction;
+
+	return 0;
+
+abort_transaction:
+	xs_transaction_end(dev->xsh, xbt, true);
+
+fail:
+	RTE_LOG(INFO, XEN, "%s: vif%d.%d: failed to write features\n",
+		__func__, vif->be.domid, vif->be.handle);
+	return -1;
+}
+
+static int xenvif_read_features(struct xenvif *vif)
+{
+	char *path = vif->fe.path;
+	int ret = 0;
+	unsigned int num_queues = 1;
+
+	/* Read feature-rx-copy (mandatory) */
+	if (xenbus_scanf(XBT_NULL, path, "request-rx-copy", "%u", &ret) <= 0) {
+		RTE_LOG(ERR, XEN, "%s: error reading request-rx-copy\n",
+			__func__);
+		return -1;
+	}
+
+	/* Read number of supported queues */
+	if (xenbus_scanf(XBT_NULL, path, "multi-queue-num-queues",
+			 "%u", &num_queues) <= 0)
+		num_queues = 1;
+
+	if (num_queues < vif->num_queues)
+		vif->num_queues = num_queues;
+
+	/* Read supported feature by frontend */
+	if (xenbus_scanf(XBT_NULL, path, "feature-rx-notify", "%u",
+			 &vif->features.rx_poll) <= 0)
+		vif->features.rx_poll = 1;
+
+	if (xenbus_scanf(XBT_NULL, path, "feature-sg", "%u",
+			 &vif->features.sg) <= 0)
+		vif->features.sg = 0;
+
+	if (xenbus_scanf(XBT_NULL, path, "feature-gso-tcpv4", "%u",
+			 &vif->features.tcp4) <= 0)
+		vif->features.tcp4 = 0;
+
+	if (xenbus_scanf(XBT_NULL, path, "feature-gso-tcpv4-prefix", "%u",
+			 &vif->features.tcp4_prefix) <= 0)
+		vif->features.tcp4_prefix = 0;
+
+	if (xenbus_scanf(XBT_NULL, path, "feature-gso-tcpv6", "%u",
+			 &vif->features.tcp6) <= 0)
+		vif->features.tcp6 = 0;
+
+	if (xenbus_scanf(XBT_NULL, path, "feature-gso-tcpv6-prefix", "%u",
+			 &vif->features.tcp6_prefix) <= 0)
+		vif->features.tcp6_prefix = 0;
+
+	if (xenbus_scanf(XBT_NULL, path, "feature-no-csum-offload", "%u",
+			 &vif->features.ip4_csum) <= 0)
+		vif->features.ip4_csum = 1;
+
+	if (xenbus_scanf(XBT_NULL, path, "feature-ipv6-csum-offload", "%u",
+			 &vif->features.ip6_csum) <= 0)
+		vif->features.ip6_csum = 0;
+
+	if (xenbus_scanf(XBT_NULL, path, "feature-multicast-control", "%u",
+			 &vif->features.mcast_ctrl) <= 0)
+		vif->features.mcast_ctrl = 0;
+
+	if (xenbus_scanf(XBT_NULL, path, "feature-persistent", "%u",
+			 &vif->features.pgnt) <= 0)
+		vif->features.pgnt = 0;
+
+	/* Pseudo features internal to device only */
+	vif->features.zc = 0;
+
+	return 0;
+}
+
+static int xenvif_alloc_queues(struct xenvif *vif)
+{
+	struct xenvif_queue *queues;
+	size_t sz = sizeof(struct xenvif_queue) * vif->num_queues;
+	unsigned int i;
+
+	queues = malloc(sz);
+	if (!queues)
+		return -1;
+
+	memset(queues, 0, sz);
+	vif->queues = queues;
+
+	for (i = 0; i < vif->num_queues; i++) {
+		vif->queues[i].id = i;
+		vif->queues[i].vif = vif;
+	}
+
+	return 0;
+}
+
+static int xenvif_read_queues(struct xenvif *vif)
+{
+	int ret = -1;
+	unsigned int i = 0;
+
+	if (xenvif_alloc_queues(vif) < 0)
+		return -1;
+
+	if (vif->num_queues == 1)
+		ret = xenvif_queue_init(&vif->queues[0]);
+	else {
+		for (i = 0; i < vif->num_queues; i++) {
+			ret = xenvif_queue_init(&vif->queues[i]);
+			if (ret < 0)
+				return -1;
+		}
+	}
+
+	return ret;
+}
+
+static int xenvif_connect(struct xenvif *vif)
+{
+	if (xenvif_read_features(vif) < 0)
+		return -1;
+
+	if (xenvif_read_queues(vif) < 0)
+		return -1;
+
+	return 0;
+}
+
+/*
+ * Backend <-> Frontend state management
+ *
+ * It is called when an entry is added or a watch token matches the same
+ * name. Here we run the main state machine in the backend.
+ *
+ * State transitions:
+ *
+ *    Init(1)
+ *
+ *      |
+ *      |
+ *      v
+ *
+ * InitWait (2)  ---> Connected (4)
+ *
+ *      ^    \           |
+ *      |     \          |
+ *      |      \         |
+ *      |       \        |
+ *      |        \       |
+ *      |         \      |
+ *      |          \     |
+ *      |           \    |
+ *      |            v   v
+ *
+ *  Closed (6) <---> Closing(5)
+ *
+ *  Init(1)       Created by the toolstack
+ *  InitWait(2)   Frontend is initializing
+ *  Connected(4)  Frontend is connected
+ *  Closing(5)    Frontend is closing
+ *  Closed(6)     Frontend is closed
+ */
+
+/* Switches state of the backend */
+static int xenbus_backend_set_state(struct xenvif *domain,
+				    enum xenbus_state state)
+{
+	xs_transaction_t xbt;
+	bool abort;
+
+	/* Do not fire unnecessary watches, and/or
+	 * risking recreating the base directory if state
+	 * is closed already */
+	if (domain->be.state == state)
+		return 0;
+
+again:
+	xbt = xs_transaction_start(dev->xsh);
+	if (!xbt)
+		return 0;
+
+	abort = true;
+	if (xenbus_printf(xbt, domain->be.path, "state", "%u", state))
+		abort = false;
+
+	if (!xs_transaction_end(dev->xsh, xbt, abort)) {
+		if (errno == -EAGAIN && !abort)
+			goto again;
+		/* TODO fail here */
+	} else
+		domain->be.state = state;
+
+	if (!abort)
+		RTE_LOG(INFO, XEN, "%s: vif%d.%d -> %s\n", __func__,
+			domain->be.domid, domain->be.handle,
+			xenbus_strstate(domain->be.state));
+
+	return abort ? -EINVAL : 0;
+}
+
+/* Backend initial state */
+static void xenbus_backend_state_init(struct xenvif *domain __rte_unused)
+{
+	if (!xenvif_write_features(domain)) {
+		if (dev && dev->callbacks)
+			dev->callbacks->init(domain);
+
+		xenbus_backend_set_state(domain, XenbusStateInitWait);
+	}
+}
+
+/* Frontend is initializing */
+static void xenbus_backend_state_initwait(struct xenvif *domain __rte_unused)
+{
+}
+
+/* Frontend is connected */
+static void xenbus_backend_state_connect(struct xenvif *domain)
+{
+	if (!xenvif_connect(domain)) {
+		if (dev && dev->callbacks)
+			dev->callbacks->connect(domain);
+
+		xenbus_backend_set_state(domain, XenbusStateConnected);
+	}
+}
+
+/* Frontend is closing */
+static void xenbus_backend_state_closing(struct xenvif *domain)
+{
+	if (dev && dev->callbacks)
+		dev->callbacks->disconnect(domain);
+
+	xenbus_backend_set_state(domain, XenbusStateClosing);
+}
+
+/* Frontend is closed or unknown */
+static void xenbus_backend_state_closed(struct xenvif *domain)
+{
+	if (dev && dev->callbacks)
+		dev->callbacks->close(domain);
+
+	xenbus_backend_set_state(domain, XenbusStateClosed);
+}
+
+/* When frontend state changes */
+static void xenbus_frontend_state_changed(struct xenvif *domain,
+					  XenbusState state)
+{
+	switch (state) {
+		case XenbusStateInitialising:
+			xenbus_backend_state_initwait(domain);
+			break;
+		case XenbusStateInitialised:
+		case XenbusStateInitWait:
+			break;
+		case XenbusStateConnected:
+			xenbus_backend_state_connect(domain);
+			break;
+		case XenbusStateClosing:
+			xenbus_backend_state_closing(domain);
+			break;
+		case XenbusStateClosed:
+		case XenbusStateUnknown:
+			xenbus_backend_state_closed(domain);
+			break;
+		case XenbusStateReconfiguring:
+		case XenbusStateReconfigured:
+			break;
+	}
+}
+
+/* Read frontend state */
+static int xenbus_frontend_state_read(struct xenvif *domain)
+{
+	xs_transaction_t xbt;
+	int ret = -1;
+	bool abort;
+
+again:
+	xbt = xs_transaction_start(dev->xsh);
+
+	abort = true;
+	if (xenbus_scanf(xbt, domain->fe.path, "state", "%d", &ret) == 1) {
+		domain->fe.state = ret;
+		abort = false;
+	}
+
+	if (!xs_transaction_end(dev->xsh, xbt, abort)) {
+		if (errno == EAGAIN && !abort)
+			goto again;
+
+		return -1;
+	}
+
+	return 0;
+}
+
+static void xenbus_frontend_changed(struct xenbus_device *dev,
+				    unsigned int domid,
+				    unsigned int handle)
+{
+	struct xenvif *domain = xenbus_backend_get(dev, domid, handle);
+
+	if (!xenbus_frontend_state_read(domain)) {
+		RTE_LOG(INFO, XEN, "%s: vif%d.%d -> %s\n", __func__, domid,
+			handle, xenbus_strstate(domain->fe.state));
+
+		xenbus_frontend_state_changed(domain, domain->fe.state);
+	}
+}
+
+/*
+ * Interface management routines
+ */
+
+/* Hash functions for the domains table */
+static uint32_t xenbus_backend_hash_crc(const void *data,
+					__rte_unused uint32_t data_len,
+					uint32_t init_val)
+{
+	const struct xenvif_hash_key *k = data;
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+	init_val = rte_hash_crc_4byte(k->domid, init_val);
+	init_val = rte_hash_crc_4byte(k->handle, init_val);
+#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+	init_val = rte_jhash_1word(k->domid, init_val);
+	init_val = rte_jhash_1word(k->handle, init_val);
+#endif
+
+	return init_val;
+}
+
+static void xenbus_backend_init(struct xenbus_device *dev)
+{
+	char s[64] = { 0 };
+	struct rte_hash_parameters domains_hash_params = {
+		.name = NULL,
+		.entries = XENBUS_DOMAINS_HASH_ENTRIES,
+		.key_len = sizeof(domid_t),
+		.hash_func = xenbus_backend_hash_crc,
+		.hash_func_init_val = 0,
+	};
+
+	snprintf(s, sizeof(s), "xen_domains_hash_%d", 0);
+	domains_hash_params.name = s;
+	domains_hash_params.socket_id = 0;
+
+	dev->domains = rte_hash_create(&domains_hash_params);
+}
+
+static int xenbus_backend_read(struct xenvif *domain)
+{
+	unsigned int handle = domain->be.handle;
+	domid_t domid = domain->be.domid;
+	char *path;
+
+	path = malloc(sizeof(char) * XENBUS_MAX_ENTRY);
+	snprintf(path, XENBUS_MAX_ENTRY, "%s/%d/%d",
+		 dev->watch.path, domid, handle);
+	domain->be.path = path;
+
+	/* read backend state */
+	if (xenbus_scanf(XBT_NULL, path, "state", "%d",
+			 &domain->be.state) != 1)
+		return -1;
+
+	/* read backend vifname */
+	if (xenbus_scanf(XBT_NULL, path, "vifname", "%ms",
+			 &domain->ifname) != 1)
+		domain->ifname = strdup("");
+
+	/* %ms is a GNU extension
+	 * XXX BSD compatibility */
+	if (xenbus_scanf(XBT_NULL, path, "frontend", "%ms",
+			 &domain->fe.path) != 1)
+		return -1;
+
+	if (xenbus_scanf(XBT_NULL, path, "frontend-id", "%d",
+			 &domain->fe.domid) != 1)
+		return -1;
+
+	return 0;
+}
+
+/*
+ * Add a new domain to our table whenever the backend/<type>/<domid> entry
+ * shows up. Here we setup any watches for state changes.
+ */
+static int xenbus_backend_add(struct xenbus_device *dev,
+			      unsigned int domid, unsigned int handle)
+{
+	struct xenvif_hash_key k = { .domid = domid, .handle = handle };
+	struct xenvif *domain;
+	char *path = NULL, *token = NULL;
+	int ret;
+
+	domain = xenvif_alloc(domid, handle, dev->max_cores);
+	if (!domain) {
+		RTE_LOG(ERR, XEN, "%s: error allocating vif%d.%d\n",
+			__func__, domid, handle);
+		return -1;
+	}
+
+	rte_hash_add_key_data(dev->domains, &k, domain);
+
+	RTE_LOG(INFO, XEN, "%s: set key = (%u,%u) data = %p\n",
+		__func__, k.domid, k.handle, domain);
+
+	/* read 'frontend' and 'frontend-id' entries */
+	if (xenbus_backend_read(domain) < 0) {
+		RTE_LOG(ERR, XEN, "%s: error reading backend entries\n",
+			__func__);
+		return -1;
+	}
+
+	/* watch frontend state changes */
+	if (xenbus_scanf(XBT_NULL, domain->fe.path, "state", "%d",
+			 &domain->fe.state) != 1)
+		return -1;
+
+	asprintf(&path, "%s/state", domain->fe.path);
+	asprintf(&token, "%d/%d/fe", domid, handle);
+	ret = xs_watch(dev->xsh, path, token);
+	if (!ret) {
+		RTE_LOG(ERR, XEN, "%s: failed to watch otherend state %s\n",
+			__func__, path);
+		return -1;
+	}
+
+	xenbus_backend_state_init(domain);
+	return 0;
+}
+
+#if 0
+/*
+ * Deletes a new domain from the domain table.
+ */
+static void xenbus_backend_del(struct xenbus_device *dev,
+			       unsigned int domid, unsigned int handle)
+{
+	struct xenvif_hash_key k = { 0 };
+	struct xenvif *dom = NULL;
+
+	k.domid = domid;
+	k.handle = handle;
+
+	rte_hash_lookup_data(dev->domains, &k, (void**) &dom);
+	rte_hash_del_key(dev->domains, &k);
+
+	free(dom);
+}
+#endif
+
+static struct xenvif* xenbus_backend_get(struct xenbus_device *dev,
+					 unsigned domid,
+					 unsigned int handle)
+{
+	struct xenvif_hash_key k;
+	struct xenvif *dom = NULL;
+
+	k.domid = domid;
+	k.handle = handle;
+
+	rte_hash_lookup_data(dev->domains, &k, (void**) &dom);
+
+	RTE_LOG(INFO, XEN, "%s: get key = (%u,%u) data = %p\n",
+		__func__, k.domid, k.handle, dom);
+
+	return dom;
+}
+
+/*
+ * Xenbus global state management
+ */
+
+/* Register the dev on xenstore */
+static int xenbus_register(struct xenbus_device *dev, const char *type)
+{
+	struct xenbus_watch *w = &dev->watch;
+	ssize_t sz;
+	int ret;
+
+	sz = strlen(dev->dompath) + 20;
+	w->path = malloc(sz);
+	memset(w->path, 0, sz);
+	w->token = type;
+
+	snprintf(w->path, sz, "backend/%s", w->token);
+	ret = xs_watch(dev->xsh, w->path, w->token);
+	if (!ret) {
+		RTE_LOG(ERR, XEN, "%s: failed to watch backend path %s \n",
+			__func__, w->path);
+		return -1;
+	}
+
+	RTE_LOG(INFO, XEN, "%s: registering %s domid %d cores %d\n", __func__,
+		type, dev->domid, dev->max_cores);
+	return 0;
+}
+
+/*
+ * Quite inneficient, but we listen on /backend/<type> and add domains
+ * and trigger the initial state changed.
+ */
+static void xenbus_update(struct xenbus_device *dev, char *path)
+{
+	char entry[XENBUS_MAX_ENTRY];
+	unsigned domid, handle;
+	int ret, len;
+
+	len = strlen(dev->watch.path);
+	if (strncmp(path, dev->watch.path, len) != 0)
+		return;
+
+	if (!strlen(path+len))
+		return;
+
+	/* Parse path _or_ watch token */
+	ret = sscanf(path+len, "/%u/%u/%255s", &domid, &handle, (char *) &entry);
+
+	/* Currently we have a global watch on /backend/vif
+	 * which leads to watch _all_ changes in the directory.
+	 * In the meantime, we should switch to the event channel that
+	 * is triggered when a domain gets created.
+	 *
+	 * NB: For the purposes of the prototype for this approach.
+	 *
+	 * TODO: * Change to listening on @introduceDomain special watch
+	 *       * Change to listening on @releaseDomain special watch
+	 */
+	if (ret == 2 &&
+	    !xenbus_backend_get(dev, domid, handle)) {
+
+		/* domain is introduced */
+		xenbus_backend_add(dev, domid, handle);
+
+		RTE_LOG(INFO, XEN, "%s: new domain (dom = %d handle = %d)\n",
+			__func__, domid, handle);
+	}
+}
+
+static void xenbus_event_loop(struct xenbus_device *dev)
+{
+	char **token;
+	char *name, *path;
+	char type[XENBUS_MAX_ENTRY];
+	struct pollfd fds = {
+		.fd = dev->xsh_fd,
+		.events = POLLIN | POLLOUT,
+	};
+
+	for (;poll(&fds, 1, 1000) >= 0;) {
+		unsigned int domid, handle;
+		int ret;
+
+		if (!(token = xs_check_watch(dev->xsh)))
+			continue;
+
+		name = token[XS_WATCH_TOKEN];
+		path = token[XS_WATCH_PATH];
+
+		if (!strcmp(name, "vif")) {
+			xenbus_update(dev, path);
+			continue;
+		}
+
+		ret = sscanf(name, "%u/%u/%255s", &domid, &handle, (char*)&type);
+		if (ret == 3 && !strcmp(type, "fe"))
+			xenbus_frontend_changed(dev, domid, handle);
+		else
+			RTE_LOG(INFO, XEN, "%s: (unknown) watch %s path %s\n",
+				__func__, name, path);
+	}
+}
+
+static int xenbus_get_domain_id(void)
+{
+	char *buf;
+	unsigned int len;
+
+	buf = xs_read(dev->xsh, XBT_NULL, "domid", &len);
+	if (!buf) {
+		RTE_LOG(ERR, XEN, "%s: failed read domain id\n", __func__);
+		return -1;
+	}
+
+	errno = 0;
+	dev->domid = atoi(buf);
+	if (errno != 0)
+		return -1;
+
+	free(buf);
+
+	return 0;
+}
+
+static int xenbus_init(void)
+{
+	/* initialize xenstore related state */
+	dev->xsh = xs_domain_open();
+	if (!dev->xsh) {
+		RTE_LOG(ERR, XEN, "%s: failed to open xenstore\n", __func__);
+		return -1;
+	}
+
+	if (xenbus_get_domain_id() < 0) {
+		RTE_LOG(ERR, XEN, "%s: failed read domain id\n", __func__);
+		return -1;
+	}
+
+	dev->dompath = xs_get_domain_path(dev->xsh, dev->domid);
+	if (!dev->dompath) {
+		RTE_LOG(ERR, XEN, "%s: failed read domain path\n", __func__);
+		return -1;
+	}
+
+	dev->xsh_fd = xs_fileno(dev->xsh);
+
+	/* create domains hash table */
+	xenbus_backend_init(dev);
+
+	RTE_LOG(INFO, XEN, "%s: done\n", __func__);
+	return 0;
+}
+
+static int xenbus_exit(void)
+{
+	int ret = 0;
+
+	if (!dev->xsh)
+		return ret;
+
+	xs_daemon_close(dev->xsh);
+	free(dev->dompath);
+
+	return ret;
+}
+
+/*
+ * Top level library functions exported to DPDK
+ */
+
+int rte_xenbus_backend_register(struct xenbus_ops *ops,
+				unsigned max_cores)
+{
+	if (!dev) {
+		dev = malloc(sizeof(struct xenbus_device));
+		memset(dev, 0, sizeof(struct xenbus_device));
+	}
+
+	dev->callbacks = (struct xenbus_ops*) ops;
+	dev->max_cores = max_cores > 1 ? max_cores : 1;
+
+	return 0;
+}
+
+int rte_xenbus_backend_start(void)
+{
+	if (!dev || (!dev->xsh && xenbus_init() < 0))
+		return -1;
+
+	xenbus_register(dev, *(dev->callbacks->ids));
+	xenbus_event_loop(dev);
+
+	return 0;
+}
+
+void rte_xenbus_backend_stop(void)
+{
+	if (!dev->xsh)
+		return;
+
+	xenbus_exit();
+}
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index eb28e11..bf050af 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -130,6 +130,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
 endif # $(CONFIG_RTE_LIBRTE_VHOST)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK)+= -lrte_pmd_xen-netback
 
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)   += -lrte_pmd_aesni_mb
-- 
2.1.4

>From 726567a34c537d27285f65657c2c34a941093e91 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.mart...@oracle.com>
Date: Mon, 20 Feb 2017 13:33:34 +0000
Subject: [PATCH WIP 2/2] config: add xen-netback PMD option

Default is disabled.

Signed-off-by: Joao Martins <joao.m.mart...@oracle.com>
---
 config/common_base | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/config/common_base b/config/common_base
index 7830535..a2f0330 100644
--- a/config/common_base
+++ b/config/common_base
@@ -563,11 +563,16 @@ CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
 CONFIG_RTE_LIBRTE_PMD_VHOST=n
 
 #
-#Compile Xen domain0 support
+# Compile Xen domain0 support
 #
 CONFIG_RTE_LIBRTE_XEN_DOM0=n
 
 #
+# Compile Xen netback PMD
+#
+CONFIG_RTE_LIBRTE_PMD_XEN_NETBACK=n
+
+#
 # Enable warning directives
 #
 CONFIG_RTE_INSECURE_FUNCTION_WARNING=n
-- 
2.1.4

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Reply via email to