tree 0bfc57e73f0bf9f7bb9d5c8ce7d3d5afe550f94e
parent e92ae93a8aa66aea12935420cb22d4df1c18d023
author David S. Miller <[EMAIL PROTECTED]> Thu, 18 Aug 2005 04:57:30 -0700
committer David S. Miller <[EMAIL PROTECTED]> Tue, 30 Aug 2005 06:01:54 -0700

[NET]: Implement SKB fast cloning.

Protocols that make extensive use of SKB cloning,
for example TCP, eat at least 2 allocations per
packet sent as a result.

To cut the kmalloc() count in half, we implement
a pre-allocation scheme wherein we allocate
2 sk_buff objects in advance, then use a simple
reference count to free up the memory at the
correct time.

Based upon an initial patch by Thomas Graf and
suggestions from Herbert Xu.

Signed-off-by: David S. Miller <[EMAIL PROTECTED]>

 include/linux/skbuff.h |   26 ++++++++++++++-
 include/net/sock.h     |    2 -
 net/core/skbuff.c      |   80 +++++++++++++++++++++++++++++++++++++++++++------
 net/ipv4/tcp_output.c  |    4 +-
 4 files changed, 97 insertions(+), 15 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -162,6 +162,13 @@ struct skb_timeval {
        u32     off_usec;
 };
 
+
+enum {
+       SKB_FCLONE_UNAVAILABLE,
+       SKB_FCLONE_ORIG,
+       SKB_FCLONE_CLONE,
+};
+
 /** 
  *     struct sk_buff - socket buffer
  *     @next: Next buffer in list
@@ -255,7 +262,8 @@ struct sk_buff {
                                ip_summed:2,
                                nohdr:1,
                                nfctinfo:3;
-       __u8                    pkt_type;
+       __u8                    pkt_type:3,
+                               fclone:2;
        __be16                  protocol;
 
        void                    (*destructor)(struct sk_buff *skb);
@@ -295,8 +303,20 @@ struct sk_buff {
 #include <asm/system.h>
 
 extern void           __kfree_skb(struct sk_buff *skb);
-extern struct sk_buff *alloc_skb(unsigned int size,
-                                unsigned int __nocast priority);
+extern struct sk_buff *__alloc_skb(unsigned int size,
+                                  unsigned int __nocast priority, int fclone);
+static inline struct sk_buff *alloc_skb(unsigned int size,
+                                       unsigned int __nocast priority)
+{
+       return __alloc_skb(size, priority, 0);
+}
+
+static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
+                                              unsigned int __nocast priority)
+{
+       return __alloc_skb(size, priority, 1);
+}
+
 extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
                                            unsigned int size,
                                            unsigned int __nocast priority);
diff --git a/include/net/sock.h b/include/net/sock.h
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1200,7 +1200,7 @@ static inline struct sk_buff *sk_stream_
        int hdr_len;
 
        hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
-       skb = alloc_skb(size + hdr_len, gfp);
+       skb = alloc_skb_fclone(size + hdr_len, gfp);
        if (skb) {
                skb->truesize += mem;
                if (sk->sk_forward_alloc >= (int)skb->truesize ||
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -69,6 +69,7 @@
 #include <asm/system.h>
 
 static kmem_cache_t *skbuff_head_cache;
+static kmem_cache_t *skbuff_fclone_cache;
 
 struct timeval __read_mostly skb_tv_base;
 
@@ -120,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb
  */
 
 /**
- *     alloc_skb       -       allocate a network buffer
+ *     __alloc_skb     -       allocate a network buffer
  *     @size: size to allocate
  *     @gfp_mask: allocation mask
  *
@@ -131,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb
  *     Buffers may only be allocated from interrupts using a @gfp_mask of
  *     %GFP_ATOMIC.
  */
-struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
+struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
+                           int fclone)
 {
        struct sk_buff *skb;
        u8 *data;
 
        /* Get the HEAD */
-       skb = kmem_cache_alloc(skbuff_head_cache,
-                              gfp_mask & ~__GFP_DMA);
+       if (fclone)
+               skb = kmem_cache_alloc(skbuff_fclone_cache,
+                                      gfp_mask & ~__GFP_DMA);
+       else
+               skb = kmem_cache_alloc(skbuff_head_cache,
+                                      gfp_mask & ~__GFP_DMA);
+
        if (!skb)
                goto out;
 
@@ -155,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int s
        skb->data = data;
        skb->tail = data;
        skb->end  = data + size;
+       if (fclone) {
+               struct sk_buff *child = skb + 1;
+               atomic_t *fclone_ref = (atomic_t *) (child + 1);
 
+               skb->fclone = SKB_FCLONE_ORIG;
+               atomic_set(fclone_ref, 1);
+
+               child->fclone = SKB_FCLONE_UNAVAILABLE;
+       }
        atomic_set(&(skb_shinfo(skb)->dataref), 1);
        skb_shinfo(skb)->nr_frags  = 0;
        skb_shinfo(skb)->tso_size = 0;
@@ -268,8 +283,34 @@ void skb_release_data(struct sk_buff *sk
  */
 void kfree_skbmem(struct sk_buff *skb)
 {
+       struct sk_buff *other;
+       atomic_t *fclone_ref;
+
        skb_release_data(skb);
-       kmem_cache_free(skbuff_head_cache, skb);
+       switch (skb->fclone) {
+       case SKB_FCLONE_UNAVAILABLE:
+               kmem_cache_free(skbuff_head_cache, skb);
+               break;
+
+       case SKB_FCLONE_ORIG:
+               fclone_ref = (atomic_t *) (skb + 2);
+               if (atomic_dec_and_test(fclone_ref))
+                       kmem_cache_free(skbuff_fclone_cache, skb);
+               break;
+
+       case SKB_FCLONE_CLONE:
+               fclone_ref = (atomic_t *) (skb + 1);
+               other = skb - 1;
+
+               /* The clone portion is available for
+                * fast-cloning again.
+                */
+               skb->fclone = SKB_FCLONE_UNAVAILABLE;
+
+               if (atomic_dec_and_test(fclone_ref))
+                       kmem_cache_free(skbuff_fclone_cache, other);
+               break;
+       };
 }
 
 /**
@@ -324,10 +365,20 @@ void __kfree_skb(struct sk_buff *skb)
 
 struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 {
-       struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+       struct sk_buff *n;
 
-       if (!n) 
-               return NULL;
+       n = skb + 1;
+       if (skb->fclone == SKB_FCLONE_ORIG &&
+           n->fclone == SKB_FCLONE_UNAVAILABLE) {
+               atomic_t *fclone_ref = (atomic_t *) (n + 1);
+               n->fclone = SKB_FCLONE_CLONE;
+               atomic_inc(fclone_ref);
+       } else {
+               n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+               if (!n)
+                       return NULL;
+               n->fclone = SKB_FCLONE_UNAVAILABLE;
+       }
 
 #define C(x) n->x = skb->x
 
@@ -409,6 +460,7 @@ static void copy_skb_header(struct sk_bu
        new->mac.raw    = old->mac.raw + offset;
        memcpy(new->cb, old->cb, sizeof(old->cb));
        new->local_df   = old->local_df;
+       new->fclone     = SKB_FCLONE_UNAVAILABLE;
        new->pkt_type   = old->pkt_type;
        new->tstamp     = old->tstamp;
        new->destructor = NULL;
@@ -1647,13 +1699,23 @@ void __init skb_init(void)
                                              NULL, NULL);
        if (!skbuff_head_cache)
                panic("cannot create skbuff cache");
+
+       skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+                                               (2*sizeof(struct sk_buff)) +
+                                               sizeof(atomic_t),
+                                               0,
+                                               SLAB_HWCACHE_ALIGN,
+                                               NULL, NULL);
+       if (!skbuff_fclone_cache)
+               panic("cannot create skbuff cache");
+
        do_gettimeofday(&skb_tv_base);
 }
 
 EXPORT_SYMBOL(___pskb_trim);
 EXPORT_SYMBOL(__kfree_skb);
 EXPORT_SYMBOL(__pskb_pull_tail);
-EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(__alloc_skb);
 EXPORT_SYMBOL(pskb_copy);
 EXPORT_SYMBOL(pskb_expand_head);
 EXPORT_SYMBOL(skb_checksum);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1582,7 +1582,7 @@ void tcp_send_fin(struct sock *sk)
        } else {
                /* Socket is locked, keep trying until memory is available. */
                for (;;) {
-                       skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
+                       skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
                        if (skb)
                                break;
                        yield();
@@ -1804,7 +1804,7 @@ int tcp_connect(struct sock *sk)
 
        tcp_connect_init(sk);
 
-       buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
+       buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
        if (unlikely(buff == NULL))
                return -ENOBUFS;
 
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to