Hi Michael,

You're absolutely right.

We detected the actual corruption running MS HCK on earlier kernels,
before the 1fd819ec, so the patch was developed as a fix for this issue.

However, 1fd819ec fixes the corruption and now it's only an optimization
that re-enables the zero copy for this case.

We're collecting the numbers right now and will post them as soon as
possible.

Best Regards,
Anton

On 5/20/2014 2:50 PM, Michael S. Tsirkin wrote:
> On Tue, May 20, 2014 at 02:24:21PM +0300, Igor Royzis wrote:
>> Fix accessing GSO fragments memory (and a possible corruption therefore) 
>> after
>> reporting completion in a zero copy callback. The previous fix in the commit 
>> 1fd819ec
>> orphaned frags which eliminates zero copy advantages. The fix makes the 
>> completion
>> called after all the fragments were processed avoiding unnecessary 
>> orphaning/copying
>> from userspace.
>>
>> The GSO fragments corruption issue was observed in a typical QEMU/KVM VM 
>> setup that
>> hosts a Windows guest (since QEMU virtio-net Windows driver doesn't support 
>> GRO).
>> The fix has been verified by running the HCK OffloadLSO test.
>>
>> Signed-off-by: Igor Royzis <[email protected]>
>> Signed-off-by: Anton Nayshtut <[email protected]>
> 
> OK but with 1fd819ec there's no corruption, correct?
> So this patch is in fact an optimization?
> If true, I'd like to see some performance numbers please.
> 
> Thanks!
> 
>> ---
>>  include/linux/skbuff.h |    1 +
>>  net/core/skbuff.c      |   18 +++++++++++++-----
>>  2 files changed, 14 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 08074a8..8c49edc 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -287,6 +287,7 @@ struct skb_shared_info {
>>      struct sk_buff  *frag_list;
>>      struct skb_shared_hwtstamps hwtstamps;
>>      __be32          ip6_frag_id;
>> +    struct sk_buff  *zcopy_src;
>>  
>>      /*
>>       * Warning : all fields before dataref are cleared in __alloc_skb()
>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>> index 1b62343..6fa6342 100644
>> --- a/net/core/skbuff.c
>> +++ b/net/core/skbuff.c
>> @@ -610,14 +610,18 @@ EXPORT_SYMBOL(__kfree_skb);
>>   */
>>  void kfree_skb(struct sk_buff *skb)
>>  {
>> +    struct sk_buff *zcopy_src;
>>      if (unlikely(!skb))
>>              return;
>>      if (likely(atomic_read(&skb->users) == 1))
>>              smp_rmb();
>>      else if (likely(!atomic_dec_and_test(&skb->users)))
>>              return;
>> +    zcopy_src = skb_shinfo(skb)->zcopy_src;
>>      trace_kfree_skb(skb, __builtin_return_address(0));
>>      __kfree_skb(skb);
>> +    if (unlikely(zcopy_src))
>> +            kfree_skb(zcopy_src);
>>  }
>>  EXPORT_SYMBOL(kfree_skb);
>>  
>> @@ -662,14 +666,18 @@ EXPORT_SYMBOL(skb_tx_error);
>>   */
>>  void consume_skb(struct sk_buff *skb)
>>  {
>> +    struct sk_buff *zcopy_src;
>>      if (unlikely(!skb))
>>              return;
>>      if (likely(atomic_read(&skb->users) == 1))
>>              smp_rmb();
>>      else if (likely(!atomic_dec_and_test(&skb->users)))
>>              return;
>> +    zcopy_src = skb_shinfo(skb)->zcopy_src;
>>      trace_consume_skb(skb);
>>      __kfree_skb(skb);
>> +    if (unlikely(zcopy_src))
>> +            consume_skb(zcopy_src);
>>  }
>>  EXPORT_SYMBOL(consume_skb);
>>  
>> @@ -2867,7 +2875,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>>      skb_frag_t *frag = skb_shinfo(head_skb)->frags;
>>      unsigned int mss = skb_shinfo(head_skb)->gso_size;
>>      unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
>> -    struct sk_buff *frag_skb = head_skb;
>>      unsigned int offset = doffset;
>>      unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
>>      unsigned int headroom;
>> @@ -2913,7 +2920,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>>                      i = 0;
>>                      nfrags = skb_shinfo(list_skb)->nr_frags;
>>                      frag = skb_shinfo(list_skb)->frags;
>> -                    frag_skb = list_skb;
>>                      pos += skb_headlen(list_skb);
>>  
>>                      while (pos < offset + len) {
>> @@ -2975,6 +2981,11 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>>                                               nskb->data - tnl_hlen,
>>                                               doffset + tnl_hlen);
>>  
>> +            if (skb_shinfo(head_skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
>> +                    skb_shinfo(nskb)->zcopy_src = head_skb;
>> +                    atomic_inc(&head_skb->users);
>> +            }
>> +
>>              if (nskb->len == len + doffset)
>>                      goto perform_csum_check;
>>  
>> @@ -3001,7 +3012,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>>                              i = 0;
>>                              nfrags = skb_shinfo(list_skb)->nr_frags;
>>                              frag = skb_shinfo(list_skb)->frags;
>> -                            frag_skb = list_skb;
>>  
>>                              BUG_ON(!nfrags);
>>  
>> @@ -3016,8 +3026,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>>                              goto err;
>>                      }
>>  
>> -                    if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
>> -                            goto err;
>>  
>>                      *nskb_frag = *frag;
>>                      __skb_frag_ref(nskb_frag);
>> -- 
>> 1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to