On Mon, Mar 26, 2018 at 10:21 AM, David Miller <da...@davemloft.net> wrote:
> From: "Md. Islam" <misl...@kent.edu>
> Date: Fri, 23 Mar 2018 02:43:16 -0400
>
>> +#ifdef CONFIG_XDP_ROUTER
>> +        //if IP forwarding is enabled on the receiver, create xdp_buff
>> +        //from skb and call xdp_router_forward()
>
> Never use C++ comments, only use C style.
>
>> +        if(is_forwarding_enabled(rcv)){
>
> There must be a space between 'if' and the openning parenthesis.  You need
> to also have a space before the openning curly braces.
>
> In fact this entire patch is full of coding style issues, please run your
> changes through checkpatch.pl before resubmitting.


Thanks David for the suggestions! I fixed all the formating errors. I
also modified the forwarding logic. Now the iperf throughput improved
from
53.8Gb/s to around 60Gb/s and the median RTT improved from .055 ms to
.35 ms. The patch is attached.
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 944ec3c..8474eef 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -328,6 +328,18 @@ config VETH
 	  When one end receives the packet it appears on its pair and vice
 	  versa.
 
+config XDP_ROUTER
+	bool "XDP router for veth"
+	depends on IP_ADVANCED_ROUTER
+	depends on VETH
+	default y
+	---help---
+	  This option will enable IP forwarding on incoming xdp_buff.
+	  Currently it is only supported by veth. Say y or n.
+
+	  Currently veth uses slow path for packet forwarding. This option
+	  forwards packets as soon as it is received (as XDP generic).
+
 config VIRTIO_NET
 	tristate "Virtio network driver"
 	depends on VIRTIO
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index a69ad39..4ce10c9 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -111,6 +111,28 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto drop;
 	}
 
+#ifdef CONFIG_XDP_ROUTER
+
+	/* if IP forwarding is enabled on the receiver, create xdp_buff
+	 * from skb and call xdp_router_forward()
+	 */
+	if (is_forwarding_enabled(rcv)) {
+		struct xdp_buff *xdp = kmalloc(sizeof(*xdp), GFP_KERNEL);
+
+		xdp->data = skb->data;
+		xdp->data_end = skb->data + (skb->len - skb->data_len);
+		xdp->data_meta = skb;
+		if (likely(xdp_router_forward(rcv, xdp) == NET_RX_SUCCESS)) {
+			struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
+
+			u64_stats_update_begin(&stats->syncp);
+			stats->bytes += length;
+			stats->packets++;
+			u64_stats_update_end(&stats->syncp);
+			goto success;
+		}
+	}
+#endif
 	if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
 		struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
 
@@ -122,6 +144,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 drop:
 		atomic64_inc(&priv->dropped);
 	}
+success:
 	rcu_read_unlock();
 	return NETDEV_TX_OK;
 }
@@ -276,6 +299,61 @@ static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
 	rcu_read_unlock();
 }
 
+#ifdef CONFIG_XDP_ROUTER
+int veth_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+{
+	struct veth_priv *priv = netdev_priv(dev);
+	struct net_device *rcv;
+	struct ethhdr *ethh;
+	struct sk_buff *skb;
+	int length = xdp->data_end - xdp->data;
+
+	rcu_read_lock();
+	rcv = rcu_dereference(priv->peer);
+	if (unlikely(!rcv)) {
+		kfree(xdp);
+		goto drop;
+	}
+
+	/* Update MAC address and checksum */
+	ethh = eth_hdr_xdp(xdp);
+	ether_addr_copy(ethh->h_source, dev->dev_addr);
+	ether_addr_copy(ethh->h_dest, rcv->dev_addr);
+
+	/* if IP forwarding is enabled on the receiver,
+	 * call xdp_router_forward()
+	 */
+	if (is_forwarding_enabled(rcv)) {
+		if (likely(xdp_router_forward(rcv, xdp) == NET_RX_SUCCESS)) {
+			struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
+
+			u64_stats_update_begin(&stats->syncp);
+			stats->bytes += length;
+			stats->packets++;
+			u64_stats_update_end(&stats->syncp);
+			goto success;
+		}
+	}
+
+	/* Local deliver */
+	skb = (struct sk_buff *)xdp->data_meta;
+	if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
+		struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
+
+		u64_stats_update_begin(&stats->syncp);
+		stats->bytes += length;
+		stats->packets++;
+		u64_stats_update_end(&stats->syncp);
+	} else {
+drop:
+		atomic64_inc(&priv->dropped);
+	}
+success:
+	rcu_read_unlock();
+	return NETDEV_TX_OK;
+}
+#endif
+
 static const struct net_device_ops veth_netdev_ops = {
 	.ndo_init            = veth_dev_init,
 	.ndo_open            = veth_open,
@@ -290,6 +368,9 @@ static const struct net_device_ops veth_netdev_ops = {
 	.ndo_get_iflink		= veth_get_iflink,
 	.ndo_features_check	= passthru_features_check,
 	.ndo_set_rx_headroom	= veth_set_rx_headroom,
+#ifdef CONFIG_XDP_ROUTER
+	.ndo_xdp_xmit		= veth_xdp_xmit,
+#endif
 };
 
 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 492bc65..025a3ec 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -19,6 +19,29 @@
 
 #include <linux/skbuff.h>
 #include <uapi/linux/ip.h>
+#include <linux/filter.h>
+
+#ifdef CONFIG_XDP_ROUTER
+
+#define MIN_PACKET_SIZE 55
+
+static inline struct iphdr *ip_hdr_xdp(const struct xdp_buff *xdp)
+{
+	return (struct iphdr *)(xdp->data+ETH_HLEN);
+}
+
+static inline struct ethhdr *eth_hdr_xdp(const struct xdp_buff *xdp)
+{
+	return (struct ethhdr *)(xdp->data);
+}
+
+static inline bool is_xdp_forwardable(const struct xdp_buff *xdp)
+{
+	return xdp->data_end - xdp->data >= MIN_PACKET_SIZE;
+}
+
+#endif
+
 
 static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
 {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4c77f39..8369e5e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3290,6 +3290,11 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
 	__dev_kfree_skb_any(skb, SKB_REASON_CONSUMED);
 }
 
+#ifdef CONFIG_XDP_ROUTER
+bool is_forwarding_enabled(struct net_device *dev);
+int xdp_router_forward(struct net_device *dev, struct xdp_buff *xdp);
+#endif
+
 void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
 int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
 int netif_rx(struct sk_buff *skb);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f805243..623b2de 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -369,6 +369,12 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
 int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
+#ifdef CONFIG_XDP_ROUTER
+int ip_route_lookup(__be32 daddr, __be32 saddr,
+			       u8 tos, struct net_device *dev,
+			       struct fib_result *res);
+#endif
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 		       const struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index dda9d7b..18c18f3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4090,6 +4090,56 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(do_xdp_generic);
 
+#ifdef CONFIG_XDP_ROUTER
+
+bool is_forwarding_enabled(struct net_device *dev)
+{
+	struct in_device *in_dev;
+
+	/* verify forwarding is enabled on this interface */
+	in_dev = __in_dev_get_rcu(dev);
+	if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(is_forwarding_enabled);
+
+int xdp_router_forward(struct net_device *dev, struct xdp_buff *xdp)
+{
+		int err;
+		struct fib_result res;
+		struct iphdr *iph;
+		struct net_device *rcv;
+
+		if (unlikely(xdp->data_end - xdp->data < MIN_PACKET_SIZE))
+			return NET_RX_DROP;
+
+		iph = (struct iphdr *)(xdp->data + ETH_HLEN);
+
+		/*currently only supports IPv4
+		 */
+		if (unlikely(iph->version != 4))
+			return NET_RX_DROP;
+
+		err = ip_route_lookup(iph->daddr, iph->saddr,
+				      iph->tos, dev, &res);
+		if (unlikely(err))
+			return NET_RX_DROP;
+
+		rcv = FIB_RES_DEV(res);
+		if (likely(rcv)) {
+			if (likely(rcv->netdev_ops->ndo_xdp_xmit(rcv, xdp) ==
+					   NETDEV_TX_OK))
+				return NET_RX_SUCCESS;
+		}
+
+		return NET_RX_DROP;
+}
+EXPORT_SYMBOL_GPL(xdp_router_forward);
+
+#endif
+
 static int netif_rx_internal(struct sk_buff *skb)
 {
 	int ret;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 49cc1c1..2333205 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1866,6 +1866,35 @@ static int ip_mkroute_input(struct sk_buff *skb,
 	return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
 }
 
+#ifdef CONFIG_XDP_ROUTER
+
+int ip_route_lookup(__be32 daddr, __be32 saddr,
+		    u8 tos, struct net_device *dev,
+		    struct fib_result *res)
+{
+	struct flowi4	fl4;
+	int		err;
+	struct net    *net = dev_net(dev);
+
+	fl4.flowi4_oif = 0;
+	fl4.flowi4_iif = dev->ifindex;
+	fl4.flowi4_mark = 0;
+	fl4.flowi4_tos = tos & IPTOS_RT_MASK;
+	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+	fl4.flowi4_flags = 0;
+	fl4.daddr = daddr;
+	fl4.saddr = saddr;
+
+	err = fib_lookup(net, &fl4, res, 0);
+
+	if (unlikely(err != 0 || res->type != RTN_UNICAST))
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_route_lookup);
+#endif
+
 /*
  *	NOTE. We drop all the packets that has local source
  *	addresses, because every properly looped back packet

Reply via email to