Hello!
We disussed the resue of skb's some time ago.
Below some code to examine how skb's can be reused if upper layer (RX softirq)
can consume the skb so we with in NAPI path can detect and reuse the skb. It
can give new possibilites for TCP optimization (davem), driver common copbreak
etc. In the test below I use my usual lab setup but just let netfilter drop the
packets. We win about 13% in this experiment below.
Single Opteron 252 (2.6 GHz) CPU. e1000 6.2.15 Linux 2.6.14.5
Input rate 2 x 1.16 Mpps a netfilter drop rule for pktgen in the forward chain
Iface MTU Met RX-OK RX-ERR RX-DRP RX-OVR TX-OK TX-ERR TX-DRP TX-OVR Flags
eth0 1500 0 6680261 0 3319739 0 5 0 0 0 BRU
eth1 1500 0 0 0 0 0 3 0 0 0 BRU
eth2 1500 0 6680344 0 3319656 0 3 0 0 0 BRU
eth3 1500 0 0 0 0 0 3 0 0 0 BRU
With the consume/recycle code below.
Iface MTU Met RX-OK RX-ERR RX-DRP RX-OVR TX-OK TX-ERR TX-DRP TX-OVR Flags
eth0 1500 0 7541191 0 2458809 0 5 0 0 0 BRU
eth1 1500 0 0 0 0 0 3 0 0 0 BRU
eth2 1500 0 7541150 0 2458850 0 3 0 0 0 BRU
eth3 1500 0 0 0 0 0 3 0 0 0 BRU
Here we process (drop) about 13% packets more when skb'a get reued.
--- e1000_main.c.060124 2006-01-24 12:30:10.000000000 +0100
+++ e1000_main.c 2006-01-24 12:28:58.000000000 +0100
@@ -3802,6 +3802,10 @@
skb->protocol = eth_type_trans(skb, netdev);
#ifdef CONFIG_E1000_NAPI
+
+ /* Increment users so skb don't get destructed */
+ skb_get(skb);
+
#ifdef NETIF_F_HW_VLAN_TX
if(unlikely(adapter->vlgrp &&
(status & E1000_RXD_STAT_VP))) {
@@ -3814,6 +3818,23 @@
#else
netif_receive_skb(skb);
#endif
+
+ /*
+ * If skb is consumed by RX softirq we can simply use it again
+ * otherwise undo the users increment with kfree
+ */
+
+ if (!multi_descriptor && atomic_read(&skb->users) == 1 &&
+ realloc_skb(skb, adapter->rx_buffer_len, GFP_ATOMIC)) {
+
+ skb_reserve(skb, 16);
+ skb->dev = netdev;
+ buffer_info->skb = skb;
+ adapter->net_stats.rx_compressed++;
+ }
+ else
+ kfree_skb(skb);
+
#else /* CONFIG_E1000_NAPI */
#ifdef NETIF_F_HW_VLAN_TX
if(unlikely(adapter->vlgrp &&
realloc_skb from [EMAIL PROTECTED]
--- linux-2.6.14.5/include/linux/skbuff.h.orig 2006-01-19 15:51:08.000000000
+0100
+++ linux-2.6.14.5/include/linux/skbuff.h 2006-01-24 11:34:44.000000000
+0100
@@ -303,6 +303,9 @@
extern void __kfree_skb(struct sk_buff *skb);
extern struct sk_buff *__alloc_skb(unsigned int size,
gfp_t priority, int fclone);
+extern struct sk_buff *realloc_skb(struct sk_buff *skb, unsigned int size,
+ int priority);
+
static inline struct sk_buff *alloc_skb(unsigned int size,
gfp_t priority)
{
--- linux-2.6.14.5/net/core/skbuff.c.orig 2006-01-19 15:50:37.000000000
+0100
+++ linux-2.6.14.5/net/core/skbuff.c 2006-01-24 11:57:34.000000000 +0100
@@ -276,6 +276,59 @@
}
}
+/**
+ * realloc_skb - reset skb for new packet.
+ * @size: size to allocate
+ * @gfp_mask: allocation mask
+ *
+ * Allocate a new &sk_buff. The returned buffer has no headroom and a
+ * tail room of size bytes. The object has a reference count of one.
+ * The return is the buffer. On a failure the return is %NULL.
+ *
+ * Buffers may only be allocated from interrupts using a @gfp_mask of
+ * %GFP_ATOMIC.
+ */
+
+struct sk_buff *realloc_skb(struct sk_buff* skb, unsigned int size, int
gfp_mask)
+{
+ int truesize = skb->truesize;
+ u8 *data = skb->head;
+
+ memset(skb, 0, offsetof(struct sk_buff, truesize));
+
+ /* Get the DATA. Size must match skb_add_mtu(). */
+ size = SKB_DATA_ALIGN(size);
+ if ((size+sizeof(struct sk_buff)) > truesize) {
+ skb_release_data(skb);
+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+ if (data == NULL)
+ goto nodata;
+ }
+
+ /* XXX: does not include slab overhead */
+ skb->truesize = size + sizeof(struct sk_buff);
+
+ /* Load the data pointers. */
+ skb->head = data;
+ skb->data = data;
+ skb->tail = data;
+ skb->end = data + size;
+
+ /* Set up other state */
+ skb->len = 0;
+ skb->cloned = 0;
+ skb->data_len = 0;
+
+ atomic_set(&skb->users, 1);
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->frag_list = NULL;
+ return skb;
+
+nodata:
+ return NULL;
+}
+
/*
* Free an skbuff by memory without cleaning the state.
*/
@@ -1718,6 +1771,7 @@
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(__pskb_pull_tail);
EXPORT_SYMBOL(__alloc_skb);
+EXPORT_SYMBOL(realloc_skb);
EXPORT_SYMBOL(pskb_copy);
EXPORT_SYMBOL(pskb_expand_head);
EXPORT_SYMBOL(skb_checksum);
We need to disable the shared check for this.
--- linux-2.6.14.5/net/ipv4/ip_input.c.orig 2006-01-24 12:58:12.000000000
+0100
+++ linux-2.6.14.5/net/ipv4/ip_input.c 2006-01-24 12:57:39.000000000 +0100
@@ -402,6 +402,13 @@
}
}
#endif
+#if 0
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+ IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ goto out;
+ }
+
+#endif
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
Cheers.
--ro
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html