Hello! 
We disussed the resue of skb's some time ago.

Below some code to examine how skb's can be reused if upper layer (RX softirq) 
can consume the skb so we with in NAPI path can detect and reuse the skb. It 
can give new possibilites for TCP optimization (davem), driver common copbreak 
etc. In the test below I use my usual lab setup but just let netfilter drop the 
packets. We win about 13% in this experiment below.

Single Opteron 252 (2.6 GHz) CPU. e1000 6.2.15 Linux 2.6.14.5
Input rate 2 x 1.16 Mpps a netfilter drop rule for pktgen in the forward chain

Iface   MTU Met  RX-OK RX-ERR RX-DRP RX-OVR  TX-OK TX-ERR TX-DRP TX-OVR Flags
eth0   1500   0 6680261      0 3319739      0      5      0      0      0 BRU
eth1   1500   0      0      0      0      0      3      0      0      0 BRU
eth2   1500   0 6680344      0 3319656      0      3      0      0      0 BRU
eth3   1500   0      0      0      0      0      3      0      0      0 BRU

With the consume/recycle code below.

Iface   MTU Met  RX-OK RX-ERR RX-DRP RX-OVR  TX-OK TX-ERR TX-DRP TX-OVR Flags
eth0   1500   0 7541191      0 2458809      0      5      0      0      0 BRU
eth1   1500   0      0      0      0      0      3      0      0      0 BRU
eth2   1500   0 7541150      0 2458850      0      3      0      0      0 BRU
eth3   1500   0      0      0      0      0      3      0      0      0 BRU

Here we process (drop) about 13%  packets more when skb'a get reued.


--- e1000_main.c.060124 2006-01-24 12:30:10.000000000 +0100
+++ e1000_main.c        2006-01-24 12:28:58.000000000 +0100
@@ -3802,6 +3802,10 @@
 
                skb->protocol = eth_type_trans(skb, netdev);
 #ifdef CONFIG_E1000_NAPI
+
+               /* Increment users so skb don't get destructed */
+               skb_get(skb);
+
 #ifdef NETIF_F_HW_VLAN_TX
                if(unlikely(adapter->vlgrp &&
                            (status & E1000_RXD_STAT_VP))) {
@@ -3814,6 +3818,23 @@
 #else
                netif_receive_skb(skb);
 #endif
+
+               /* 
+                * If skb is consumed by RX softirq we can simply use it again 
+                * otherwise undo the users increment with kfree
+                */
+
+               if (!multi_descriptor && atomic_read(&skb->users) == 1 && 
+                   realloc_skb(skb, adapter->rx_buffer_len, GFP_ATOMIC)) {
+                       
+                       skb_reserve(skb, 16);
+                       skb->dev = netdev;
+                       buffer_info->skb = skb;
+                       adapter->net_stats.rx_compressed++; 
+               }
+               else 
+                       kfree_skb(skb);
+
 #else /* CONFIG_E1000_NAPI */
 #ifdef NETIF_F_HW_VLAN_TX
                if(unlikely(adapter->vlgrp &&


realloc_skb from [EMAIL PROTECTED]


--- linux-2.6.14.5/include/linux/skbuff.h.orig  2006-01-19 15:51:08.000000000 
+0100
+++ linux-2.6.14.5/include/linux/skbuff.h       2006-01-24 11:34:44.000000000 
+0100
@@ -303,6 +303,9 @@
 extern void           __kfree_skb(struct sk_buff *skb);
 extern struct sk_buff *__alloc_skb(unsigned int size,
                                   gfp_t priority, int fclone);
+extern struct sk_buff *realloc_skb(struct sk_buff *skb, unsigned int size, 
+                                  int priority);
+
 static inline struct sk_buff *alloc_skb(unsigned int size,
                                        gfp_t priority)
 {
--- linux-2.6.14.5/net/core/skbuff.c.orig       2006-01-19 15:50:37.000000000 
+0100
+++ linux-2.6.14.5/net/core/skbuff.c    2006-01-24 11:57:34.000000000 +0100
@@ -276,6 +276,59 @@
        }
 }
 
+/**
+ *     realloc_skb     -       reset skb for new packet.
+ *     @size: size to allocate
+ *     @gfp_mask: allocation mask
+ *
+ *     Allocate a new &sk_buff. The returned buffer has no headroom and a
+ *     tail room of size bytes. The object has a reference count of one.
+ *     The return is the buffer. On a failure the return is %NULL.
+ *
+ *     Buffers may only be allocated from interrupts using a @gfp_mask of
+ *     %GFP_ATOMIC.
+ */
+
+struct sk_buff *realloc_skb(struct sk_buff* skb, unsigned int size, int 
gfp_mask)
+{
+       int truesize = skb->truesize;
+       u8 *data = skb->head;
+
+       memset(skb, 0, offsetof(struct sk_buff, truesize));
+
+       /* Get the DATA. Size must match skb_add_mtu(). */
+       size = SKB_DATA_ALIGN(size);
+       if ((size+sizeof(struct sk_buff)) > truesize) {
+               skb_release_data(skb);
+               data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+               if (data == NULL)
+                       goto nodata;
+       }
+
+       /* XXX: does not include slab overhead */
+       skb->truesize = size + sizeof(struct sk_buff);
+
+       /* Load the data pointers. */
+       skb->head = data;
+       skb->data = data;
+       skb->tail = data;
+       skb->end = data + size;
+
+       /* Set up other state */
+       skb->len = 0;
+       skb->cloned = 0;
+       skb->data_len = 0;
+
+       atomic_set(&skb->users, 1);
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags = 0;
+       skb_shinfo(skb)->frag_list = NULL;
+       return skb;
+
+nodata:
+       return NULL;
+}
+
 /*
  *     Free an skbuff by memory without cleaning the state.
  */
@@ -1718,6 +1771,7 @@
 EXPORT_SYMBOL(__kfree_skb);
 EXPORT_SYMBOL(__pskb_pull_tail);
 EXPORT_SYMBOL(__alloc_skb);
+EXPORT_SYMBOL(realloc_skb);
 EXPORT_SYMBOL(pskb_copy);
 EXPORT_SYMBOL(pskb_expand_head);
 EXPORT_SYMBOL(skb_checksum);


We need to disable the shared check for this.



--- linux-2.6.14.5/net/ipv4/ip_input.c.orig     2006-01-24 12:58:12.000000000 
+0100
+++ linux-2.6.14.5/net/ipv4/ip_input.c  2006-01-24 12:57:39.000000000 +0100
@@ -402,6 +402,13 @@
                        }
                }
 #endif
+#if 0
+       if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+               IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+               goto out;
+       }
+
+#endif
 
        if (!pskb_may_pull(skb, sizeof(struct iphdr)))
                goto inhdr_error;



Cheers.
                                        --ro
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to