On 7/6/2011 4:22 PM, Shirley Ma wrote:
> This patch adds userspace buffers support in skb shared info. A new 
> struct skb_ubuf_info is needed to maintain the userspace buffers
> argument and index, a callback is used to notify userspace to release
> the buffers once lower device has done DMA (Last reference to that skb
> has gone).
> 
> If there is any userspace apps to reference these userspace buffers,
> then these userspaces buffers will be copied into kernel. This way we
> can prevent userspace apps from holding these userspace buffers too long.
> 
> Use destructor_arg to point to the userspace buffer info; a new tx flags
> SKBTX_DEV_ZEROCOPY is added for zero-copy buffer check.
> 
> Signed-off-by: Shirley Ma <x...@...ibm.com>

I was just reading this patch and noticed that you check if
uarg->callback is set before calling it in skb_release_data, but you do
not check before calling it in skb_copy_ubufs.

I was only skimming so I have probably missed something...

> ---
> 
>  include/linux/skbuff.h |   16 ++++++++++
>  net/core/skbuff.c      |   79 
> +++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 94 insertions(+), 1 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 3e54337..08d4507 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -187,6 +187,20 @@ enum {
>  
>       /* ensure the originating sk reference is available on driver level */
>       SKBTX_DRV_NEEDS_SK_REF = 1 << 3,
> +
> +     /* device driver supports TX zero-copy buffers */
> +     SKBTX_DEV_ZEROCOPY = 1 << 4,
> +};
> +
> +/*
> + * The callback notifies userspace to release buffers when skb DMA is done in
> + * lower device, the skb last reference should be 0 when calling this.
> + * The desc is used to track userspace buffer index.
> + */
> +struct ubuf_info {
> +     void (*callback)(void *);
> +     void *arg;
> +     unsigned long desc;
>  };
>  
>  /* This data is invariant across clones and lives at
> @@ -211,6 +225,7 @@ struct skb_shared_info {
>       /* Intermediate layers must ensure that destructor_arg
>        * remains valid until skb destructor */
>       void *          destructor_arg;
> +
>       /* must be last field, see pskb_expand_head() */
>       skb_frag_t      frags[MAX_SKB_FRAGS];
>  };
> @@ -2265,5 +2280,6 @@ static inline void skb_checksum_none_assert(struct 
> sk_buff *skb)
>  }
>  
>  bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
> +
>  #endif       /* __KERNEL__ */
>  #endif       /* _LINUX_SKBUFF_H */
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 46cbd28..42462f5 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -329,6 +329,18 @@ static void skb_release_data(struct sk_buff *skb)
>                               put_page(skb_shinfo(skb)->frags[i].page);
>               }
>  
> +             /*
> +              * If skb buf is from userspace, we need to notify the caller
> +              * the lower device DMA has done;
> +              */
> +             if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
> +                     struct ubuf_info *uarg;
> +
> +                     uarg = skb_shinfo(skb)->destructor_arg;
> +                     if (uarg->callback)
> +                             uarg->callback(uarg);
> +             }
> +
>               if (skb_has_frag_list(skb))
>                       skb_drop_fraglist(skb);
>  
> @@ -481,6 +493,9 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size)
>       if (irqs_disabled())
>               return false;
>  
> +     if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)
> +             return false;
> +
>       if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
>               return false;
>  
> @@ -596,6 +611,50 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct 
> sk_buff *src)
>  }
>  EXPORT_SYMBOL_GPL(skb_morph);
>  
> +/* skb frags copy userspace buffers to kernel */
> +static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
> +{
> +     int i;
> +     int num_frags = skb_shinfo(skb)->nr_frags;
> +     struct page *page, *head = NULL;
> +     struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
> +
> +     for (i = 0; i < num_frags; i++) {
> +             u8 *vaddr;
> +             skb_frag_t *f = &skb_shinfo(skb)->frags[i];
> +
> +             page = alloc_page(GFP_ATOMIC);
> +             if (!page) {
> +                     while (head) {
> +                             put_page(head);
> +                             head = (struct page *)head->private;
> +                     }
> +                     return -ENOMEM;
> +             }
> +             vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
> +             memcpy(page_address(page),
> +                    vaddr + f->page_offset, f->size);
> +             kunmap_skb_frag(vaddr);
> +             page->private = (unsigned long)head;
> +             head = page;
> +     }
> +
> +     /* skb frags release userspace buffers */
> +     for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
> +             put_page(skb_shinfo(skb)->frags[i].page);
> +
> +     uarg->callback(uarg);
> +
> +     /* skb frags point to kernel buffers */
> +     for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) {
> +             skb_shinfo(skb)->frags[i - 1].page_offset = 0;
> +             skb_shinfo(skb)->frags[i - 1].page = head;
> +             head = (struct page *)head->private;
> +     }
> +     return 0;
> +}
> +
> +
>  /**
>   *   skb_clone       -       duplicate an sk_buff
>   *   @skb: buffer to clone
> @@ -614,6 +673,11 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t 
> gfp_mask)
>  {
>       struct sk_buff *n;
>  
> +     if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
> +             if (skb_copy_ubufs(skb, gfp_mask))
> +                     return NULL;
> +     }
> +
>       n = skb + 1;
>       if (skb->fclone == SKB_FCLONE_ORIG &&
>           n->fclone == SKB_FCLONE_UNAVAILABLE) {
> @@ -731,6 +795,12 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t 
> gfp_mask)
>       if (skb_shinfo(skb)->nr_frags) {
>               int i;
>  
> +             if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
> +                     if (skb_copy_ubufs(skb, gfp_mask)) {
> +                             kfree(n);
> +                             goto out;
> +                     }
> +             }
>               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
>                       skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
>                       get_page(skb_shinfo(n)->frags[i].page);
> @@ -788,7 +858,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int 
> ntail,
>               fastpath = true;
>       else {
>               int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
> -
>               fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
>       }
>  
> @@ -819,6 +888,11 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int 
> ntail,
>       if (fastpath) {
>               kfree(skb->head);
>       } else {
> +             /* copy this zero copy skb frags */
> +             if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
> +                     if (skb_copy_ubufs(skb, gfp_mask))
> +                             goto nofrags;
> +             }
>               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
>                       get_page(skb_shinfo(skb)->frags[i].page);
>  
> @@ -853,6 +927,8 @@ adjust_others:
>       atomic_set(&skb_shinfo(skb)->dataref, 1);
>       return 0;
>  
> +nofrags:
> +     kfree(data);
>  nodata:
>       return -ENOMEM;
>  }
> @@ -1354,6 +1430,7 @@ int skb_copy_bits(const struct sk_buff *skb, int 
> offset, void *to, int len)
>               }
>               start = end;
>       }
> +
>       if (!len)
>               return 0;
>  
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to