Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

2017-07-19 Thread Gao Zhenyu
_bw  =  788 KB/sec   send_bw
> =  812 KB/sec
> recv_bw  =  800 KB/sec recv_bw  =  773 KB/sec   recv_bw
> =  800 KB/sec
> udp_bw:
> send_bw  =  1.64 MB/secsend_bw  =  1.59 MB/sec  send_bw
> =  1.61 MB/sec
> recv_bw  =  1.63 MB/secrecv_bw  =  1.53 MB/sec  recv_bw
> =  1.58 MB/sec
> udp_bw:
> send_bw  =  3.25 MB/secsend_bw  =  3.16 MB/sec  send_bw
> =  3.24 MB/sec
> recv_bw  =  3.23 MB/secrecv_bw  =  3.05 MB/sec  recv_bw
> =  3.13 MB/sec
> udp_bw:
> send_bw  =  6.59 MB/secsend_bw  =  6.35 MB/sec  send_bw
> =  6.43 MB/sec
> recv_bw  =   6.5 MB/secrecv_bw  =  6.22 MB/sec  recv_bw
> =  6.22 MB/sec
> udp_bw:
> send_bw  =13 MB/secsend_bw  =  12.5 MB/sec  send_bw
> =  12.8 MB/sec
> recv_bw  =  12.9 MB/secrecv_bw  =  12.3 MB/sec  recv_bw
> =  12.4 MB/sec
> udp_bw:
>  send_bw  =  26.1 MB/sec   send_bw  =  25.3 MB/sec  send_bw
> =  25.8 MB/sec
> recv_bw  =  25.5 MB/secrecv_bw  =25 MB/sec  recv_bw
> =  25.1 MB/sec
> udp_bw:
> send_bw  =  51.3 MB/secsend_bw  =  50.5 MB/sec  send_bw
> =  51.7 MB/sec
> recv_bw  =  50.8 MB/secrecv_bw  =  49.9 MB/sec  recv_bw
> =  51.1 MB/sec
> udp_bw:
> send_bw  =   104 MB/secsend_bw  =   100 MB/sec  send_bw
> =   102 MB/sec
> recv_bw  =  99.3 MB/secrecv_bw  =  91.3 MB/sec  recv_bw
> =  99.1 MB/sec
> udp_bw:
> send_bw  =  206 MB/sec send_bw  =  194 MB/sec   send_bw
> =  206 MB/sec
> recv_bw  =  200 MB/sec recv_bw  =  164 MB/sec   recv_bw
> =  199 MB/sec
> udp_bw:
> send_bw  =  403 MB/sec send_bw  =  385 MB/sec   send_bw
> =  402 MB/sec
> recv_bw  =  390 MB/sec recv_bw  =  351 MB/sec   recv_bw
> =  389 MB/sec
> udp_bw:
>  send_bw  =  554 MB/secsend_bw  =  550 MB/sec   send_bw
> =  539 MB/sec
> recv_bw  =  367 MB/sec recv_bw  =  365 MB/sec   recv_bw
> =  393 MB/sec
> udp_bw:
> send_bw  =  868 MB/sec send_bw  =  835 MB/sec   send_bw
> =  854 MB/sec
> recv_bw  =  576 MB/sec recv_bw  =  569 MB/sec   recv_bw
> =  652 MB/sec
> udp_bw:
> send_bw  =  1.09 GB/secsend_bw  =  1.08 GB/sec  send_bw
> =  1.06 GB/sec
> recv_bw  =   772 MB/secrecv_bw  =   770 MB/sec  recv_bw
> =   569 MB/sec
> udp_bw:
> send_bw  =  1.22 GB/secsend_bw  =  1.22 GB/sec  send_bw
> =  1.19 GB/sec
> recv_bw  =   676 MB/secrecv_bw  =   700 MB/sec  recv_bw
> =   767 MB/sec
> udp_bw:
> send_bw  =  1.29 GB/secsend_bw  =  1.28 GB/sec  send_bw
> =  1.29 GB/sec
> recv_bw  =   666 MB/secrecv_bw  =   795 MB/sec  recv_bw
> =   671 MB/sec
> udp_bw:
>  send_bw  =  0 bytes/sec   send_bw  =  0 bytes/sec  send_bw
> =  0 bytes/sec
> recv_bw  =  0 bytes/secrecv_bw  =  0 bytes/sec  recv_bw
> =  0 bytes/sec
> udp_lat:
> latency  =  25.7 uslatency  =  25.8 us  latency
> =  25.9 us
> udp_lat:
> latency  =  26 us  latency  =  25.9 us  latency
> =  25.9 us
> udp_lat:
> latency  =  25.9 uslatency  =  25.8 us  latency
> =  26 us
> udp_lat:
> latency  =  25.8 uslatency  =  25.8 us  latency
> =  26 us
> udp_lat:
>  latency  =  25.9 us   latency  =  25.9 us  latency
> =  26.1 us
> udp_lat:
> latency  =  26 us  latency  =  25.8 us  latency
> =  26.1 us
> udp_lat:
> latency  =  26.2 uslatency  =  25.9 us  latency
> =  26.3 us
> udp_lat:
> latency  =  26.7 uslatency  =  26.5 us  latency
> =  27 us
> udp_lat:
> latency  =  27.3 uslatency  =  27.3 us  latency
> =  27.7 us
> udp_lat:
>  latency  =  28.3 us   latency  =  28.1 us  latency
> =  28.9 us
> udp_lat:
> latency  =  30 us  latency  =  29.7 us  latency
> =  30.4 us
> udp_lat:
> latency  =  41.3 uslatency  =  41.3 us  latency
> =  41.3 us
> udp_lat:
> latency  =  41.6 uslatency  =  41.6 us  latency
> =  41.6 us
> udp_lat:
> latency  =  64.2 uslatency  =  64.2 us  latency
> =  64.4 us
> udp_lat:
>  latency  =  73.2 us   latency  =  86.9 us      latency
> =  72.3 us
> udp_lat:
> latency  =  120 us latency  =  119 us   latency
> =  117 us
> udp_lat:
> latency  =  0 ns  

Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

2017-07-19 Thread Gao Zhenyu
   send_bw  =
25.8 MB/sec
recv_bw  =  25.5 MB/secrecv_bw  =25 MB/sec  recv_bw  =
25.1 MB/sec
udp_bw:
send_bw  =  51.3 MB/secsend_bw  =  50.5 MB/sec  send_bw  =
51.7 MB/sec
recv_bw  =  50.8 MB/secrecv_bw  =  49.9 MB/sec  recv_bw  =
51.1 MB/sec
udp_bw:
send_bw  =   104 MB/secsend_bw  =   100 MB/sec  send_bw
=   102 MB/sec
recv_bw  =  99.3 MB/secrecv_bw  =  91.3 MB/sec  recv_bw  =
99.1 MB/sec
udp_bw:
send_bw  =  206 MB/sec send_bw  =  194 MB/sec   send_bw  =
206 MB/sec
recv_bw  =  200 MB/sec recv_bw  =  164 MB/sec   recv_bw  =
199 MB/sec
udp_bw:
send_bw  =  403 MB/sec send_bw  =  385 MB/sec   send_bw  =
402 MB/sec
recv_bw  =  390 MB/sec recv_bw  =  351 MB/sec   recv_bw  =
389 MB/sec
udp_bw:
 send_bw  =  554 MB/secsend_bw  =  550 MB/sec   send_bw  =
539 MB/sec
recv_bw  =  367 MB/sec recv_bw  =  365 MB/sec   recv_bw  =
393 MB/sec
udp_bw:
send_bw  =  868 MB/sec send_bw  =  835 MB/sec   send_bw  =
854 MB/sec
recv_bw  =  576 MB/sec recv_bw  =  569 MB/sec   recv_bw  =
652 MB/sec
udp_bw:
send_bw  =  1.09 GB/secsend_bw  =  1.08 GB/sec  send_bw  =
1.06 GB/sec
recv_bw  =   772 MB/secrecv_bw  =   770 MB/sec  recv_bw
=   569 MB/sec
udp_bw:
send_bw  =  1.22 GB/secsend_bw  =  1.22 GB/sec  send_bw  =
1.19 GB/sec
recv_bw  =   676 MB/secrecv_bw  =   700 MB/sec  recv_bw
=   767 MB/sec
udp_bw:
send_bw  =  1.29 GB/secsend_bw  =  1.28 GB/sec  send_bw  =
1.29 GB/sec
recv_bw  =   666 MB/secrecv_bw  =   795 MB/sec  recv_bw
=   671 MB/sec
udp_bw:
 send_bw  =  0 bytes/sec   send_bw  =  0 bytes/sec  send_bw  =
0 bytes/sec
recv_bw  =  0 bytes/secrecv_bw  =  0 bytes/sec  recv_bw  =
0 bytes/sec
udp_lat:
latency  =  25.7 uslatency  =  25.8 us  latency  =
25.9 us
udp_lat:
latency  =  26 us  latency  =  25.9 us  latency  =
25.9 us
udp_lat:
latency  =  25.9 uslatency  =  25.8 us  latency  =
26 us
udp_lat:
latency  =  25.8 uslatency  =  25.8 us  latency  =
26 us
udp_lat:
 latency  =  25.9 us   latency  =  25.9 us  latency  =
26.1 us
udp_lat:
latency  =  26 us  latency  =  25.8 us  latency  =
26.1 us
udp_lat:
latency  =  26.2 uslatency  =  25.9 us  latency  =
26.3 us
udp_lat:
latency  =  26.7 uslatency  =  26.5 us  latency  =
27 us
udp_lat:
latency  =  27.3 uslatency  =  27.3 us  latency  =
27.7 us
udp_lat:
 latency  =  28.3 us   latency  =  28.1 us  latency  =
28.9 us
udp_lat:
latency  =  30 us  latency  =  29.7 us  latency  =
30.4 us
udp_lat:
latency  =  41.3 uslatency  =  41.3 us  latency  =
41.3 us
udp_lat:
latency  =  41.6 uslatency  =  41.6 us  latency  =
41.6 us
udp_lat:
latency  =  64.2 uslatency  =  64.2 us  latency  =
64.4 us
udp_lat:
 latency  =  73.2 us   latency  =  86.9 us  latency  =
72.3 us
udp_lat:
latency  =  120 us latency  =  119 us   latency  =
117 us
udp_lat:
latency  =  0 ns   latency  =  0 ns latency  =
0 ns


2017-07-20 1:00 GMT+08:00 Chandran, Sugesh <sugesh.chand...@intel.com>:

> Hi Gao,
>
> Thank you for working on this.
>
> Great to see it gives some performance improvement.
>
> Some comments/questions below.
>
>
>
> *Regards*
>
> *_Sugesh*
>
>
>
> *From:* Gao Zhenyu [mailto:sysugaozhe...@gmail.com]
> *Sent:* Monday, July 17, 2017 12:55 PM
> *To:* Chandran, Sugesh <sugesh.chand...@intel.com>
> *Cc:* b...@ovn.org; u9012...@gmail.com; d...@openvswitch.org
> *Subject:* Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW
> checksum offload support for DPDK pnic
>
>
>
> Hi Sugesh,
>
>I did more performance testings on it.
>
>In ovs-dpdk + VM environment, I consumed qperf on VM side and has there
> performace number (left colunm is consuming Hardware CKSUM,  right colunm
> is consuming Software CKSUM).
>
> *[Sugesh] May I know what is the test setup is looks like?*
>
> *Is it *
>
> *PHY **à** VM **à** PHY*
>
> *?*
>
>we can see in tcp throughput part, it has big improvment. I would like
> to make HW-TCP-CKSUM enabled in default in next patch.
>
> *[Sugesh] Ok. Looks like the performance improvement is really visible if
> msg size is getting bigger.*
>
> *Wondering what is the impact of this on other type of traffic (due to
> turning off vectorization)*
>
>
>
>
>
>
> [root@localhost ~]# qperf -t 6

Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

2017-07-19 Thread Chandran, Sugesh
Hi Gao,
Thank you for working on this.
Great to see it gives some performance improvement.
Some comments/questions below.

Regards
_Sugesh

From: Gao Zhenyu [mailto:sysugaozhe...@gmail.com]
Sent: Monday, July 17, 2017 12:55 PM
To: Chandran, Sugesh <sugesh.chand...@intel.com>
Cc: b...@ovn.org; u9012...@gmail.com; d...@openvswitch.org
Subject: Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum 
offload support for DPDK pnic

Hi Sugesh,

   I did more performance testings on it.
   In ovs-dpdk + VM environment, I consumed qperf on VM side and has there 
performace number (left colunm is consuming Hardware CKSUM,  right colunm is 
consuming Software CKSUM).
[Sugesh] May I know what is the test setup is looks like?
Is it
PHY --> VM --> PHY
?
   we can see in tcp throughput part, it has big improvment. I would like to 
make HW-TCP-CKSUM enabled in default in next patch.
[Sugesh] Ok. Looks like the performance improvement is really visible if msg 
size is getting bigger.
Wondering what is the impact of this on other type of traffic (due to turning 
off vectorization)



[root@localhost ~]# qperf -t 60 -oo msg_size:1:64K:*2  10.100.85.247 tcp_bw 
tcp_lat
tcp_bw:   HW-CKSUM   SW-CKSUM(in VM)
bw  =  1.91 MB/sec  1.93 MB/sec
tcp_bw:
bw  =  4 MB/sec  3.97 MB/sec
tcp_bw:
bw  =  7.74 MB/sec  7.76 MB/sec
tcp_bw:
bw  =  14.7 MB/sec  14.7 MB/sec
tcp_bw:
bw  =  27.8 MB/sec  27.4 MB/sec
tcp_bw:
 bw  =  51.3 MB/sec  49.1 MB/sec
tcp_bw:
bw  =  87.5 MB/sec  83.1 MB/sec
tcp_bw:
bw  =  144 MB/sec  129 MB/sec
tcp_bw:
bw  =  203 MB/sec  189 MB/sec
tcp_bw:
bw  =  261 MB/sec  252 MB/sec
tcp_bw:
 bw  =  317 MB/sec  253 MB/sec
tcp_bw:
bw  =  400 MB/sec  307 MB/sec
tcp_bw:
bw  =  611 MB/sec  491 MB/sec
tcp_bw:
bw  =  912 MB/sec  662 MB/sec
tcp_bw:
bw  =  1.11 GB/sec  729 MB/sec
tcp_bw:
 bw  =  1.17 GB/sec  861 MB/sec
tcp_bw:
bw  =  1.17 GB/sec  1.08 GB/sec
tcp_lat:
latency  =  29.1 us  29.4 us
tcp_lat:
latency  =  28.8 us  29.1 us
tcp_lat:
latency  =  29 us  28.9 us
tcp_lat:
 latency  =  28.7 us  29.2 us
tcp_lat:
latency  =  29.2 us  28.9 us
tcp_lat:
latency  =  28.9 us  29.1 us
tcp_lat:
latency  =  29.4 us  29.4 us
tcp_lat:
latency  =  29.6 us  29.9 us
tcp_lat:
 latency  =  30.5 us  30.4 us
tcp_lat:
latency  =  47.1 us  39.8 us
tcp_lat:
latency  =  53.6 us  45.2 us
tcp_lat:
latency  =  43.5 us  44.4 us
tcp_lat:
latency  =  53.8 us  49.1 us
tcp_lat:
 latency  =  81.8 us  78.5 us
tcp_lat:
latency  =  82.3 us  83.3 us
tcp_lat:
latency  =  93.1 us  97.2 us
tcp_lat:
latency  =  237 us  211 us

2017-06-23 15:58 GMT+08:00 Chandran, Sugesh 
<sugesh.chand...@intel.com<mailto:sugesh.chand...@intel.com>>:


Regards
_Sugesh

From: Gao Zhenyu 
[mailto:sysugaozhe...@gmail.com<mailto:sysugaozhe...@gmail.com>]
Sent: Wednesday, June 21, 2017 9:32 AM
To: Chandran, Sugesh 
<sugesh.chand...@intel.com<mailto:sugesh.chand...@intel.com>>
Cc: b...@ovn.org<mailto:b...@ovn.org>; 
u9012...@gmail.com<mailto:u9012...@gmail.com>; 
ktray...@redhat.com<mailto:ktray...@redhat.com>; Kavanagh, Mark B 
<mark.b.kavan...@intel.com<mailto:mark.b.kavan...@intel.com>>; 
d...@openvswitch.org<mailto:d...@openvswitch.org>
Subject: Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum 
offload support for DPDK pnic

I get it.  Maybe caculating it in OVS part is doable as well.
So, how about adding more options to let people choose HW-tcp-cksum(reduce cpu 
cycles) or SW-tcp-cksum(may be better performance)?
Then we have NO-TCP-CKSUM, SW-TCP-CKSUM, HW-TCP-CKSUM.
[Sugesh] In OVS-DPDK, I am not sure about the advantage of having HW checksum. 
Because even if you save CPU cycles, that will get used for non vector tx.
So I would prefer to keep these options only if there are really a need for 
that.
BTW, when will DPDK support tx checksum offload with vectorization?
[Sugesh] I don’t see any plan to do that in near future. Could be worth to ask 
in DPDK mailing list as well.

Thanks
Zhenyu Gao


2017-06-21 16:03 GMT+08:00 Chandran, Sugesh 
<sugesh.chand...@intel.com<mailto:sugesh.chand...@intel.com>>:


Regards
_Sugesh

From: Gao Zhenyu 
[mailto:sysugaozhe...@gmail.com<mailto:sysugaozhe...@gmail.com>]
Sent: Monday, June 19, 2017 1:23 PM
To: Chandran, Sugesh 
<sugesh.chand...@intel.com<mailto:sugesh.chand...@intel.com>>
Cc: b...@ovn.org<mailto:b...@ovn.org>; 
u9012...@gmail.com<mailto:u9012...@gmail.com>; 
ktray...@redhat.com<mailto:ktray...@redhat.com>; Kavanagh, Mark B 
<mark.b.kavan...@intel.com<mailto:mark.b.kavan...@intel.com>>; 
d...@openvswitch.org<mailto:d...@openvswitch.org>
Subject: Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum 
offload support for DPDK pnic

Thanks for that comments.

[Sugesh] Any reason, why this patch does only the TCP 

Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

2017-07-17 Thread Gao Zhenyu
Hi Sugesh,


   I did more performance testings on it.
   In ovs-dpdk + VM environment, I consumed qperf on VM side and has there
performace number (left colunm is consuming Hardware CKSUM,  right colunm
is consuming Software CKSUM).
   we can see in tcp throughput part, it has big improvment. I would like
to make HW-TCP-CKSUM enabled in default in next patch.

[root@localhost ~]# qperf -t 60 -oo msg_size:1:64K:*2  10.100.85.247 tcp_bw
tcp_lat
tcp_bw:   *HW-CKSUM*  * SW-CKSUM(in VM)*
bw  =  1.91 MB/sec  1.93 MB/sec
tcp_bw:
bw  =  4 MB/sec  3.97 MB/sec
tcp_bw:
bw  =  7.74 MB/sec  7.76 MB/sec
tcp_bw:
bw  =  14.7 MB/sec  14.7 MB/sec
tcp_bw:
bw  =  27.8 MB/sec  27.4 MB/sec
tcp_bw:
 bw  =  51.3 MB/sec  49.1 MB/sec
tcp_bw:
bw  =  87.5 MB/sec  83.1 MB/sec
tcp_bw:
bw  =  144 MB/sec  129 MB/sec
tcp_bw:
bw  =  203 MB/sec  189 MB/sec
tcp_bw:
bw  =  261 MB/sec  252 MB/sec
tcp_bw:
 bw  =  317 MB/sec  253 MB/sec
tcp_bw:
bw  =  400 MB/sec  307 MB/sec
tcp_bw:
bw  =  611 MB/sec  491 MB/sec
tcp_bw:
bw  =  912 MB/sec  662 MB/sec
tcp_bw:
bw  =  1.11 GB/sec  729 MB/sec
tcp_bw:
 bw  =  1.17 GB/sec  861 MB/sec
tcp_bw:
bw  =  1.17 GB/sec  1.08 GB/sec
tcp_lat:
latency  =  29.1 us  29.4 us
tcp_lat:
latency  =  28.8 us  29.1 us
tcp_lat:
latency  =  29 us  28.9 us
tcp_lat:
 latency  =  28.7 us  29.2 us
tcp_lat:
latency  =  29.2 us  28.9 us
tcp_lat:
latency  =  28.9 us  29.1 us
tcp_lat:
latency  =  29.4 us  29.4 us
tcp_lat:
latency  =  29.6 us  29.9 us
tcp_lat:
 latency  =  30.5 us  30.4 us
tcp_lat:
latency  =  47.1 us  39.8 us
tcp_lat:
latency  =  53.6 us  45.2 us
tcp_lat:
latency  =  43.5 us  44.4 us
tcp_lat:
latency  =  53.8 us  49.1 us
tcp_lat:
 latency  =  81.8 us  78.5 us
tcp_lat:
latency  =  82.3 us  83.3 us
tcp_lat:
latency  =  93.1 us  97.2 us
tcp_lat:
latency  =  237 us  211 us

2017-06-23 15:58 GMT+08:00 Chandran, Sugesh <sugesh.chand...@intel.com>:

>
>
>
>
> *Regards*
>
> *_Sugesh*
>
>
>
> *From:* Gao Zhenyu [mailto:sysugaozhe...@gmail.com]
> *Sent:* Wednesday, June 21, 2017 9:32 AM
> *To:* Chandran, Sugesh <sugesh.chand...@intel.com>
> *Cc:* b...@ovn.org; u9012...@gmail.com; ktray...@redhat.com; Kavanagh,
> Mark B <mark.b.kavan...@intel.com>; d...@openvswitch.org
> *Subject:* Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW
> checksum offload support for DPDK pnic
>
>
>
> I get it.  Maybe caculating it in OVS part is doable as well.
>
> So, how about adding more options to let people choose HW-tcp-cksum(reduce
> cpu cycles) or SW-tcp-cksum(may be better performance)?
>
> Then we have NO-TCP-CKSUM, SW-TCP-CKSUM, HW-TCP-CKSUM.
>
> *[Sugesh] In OVS-DPDK, I am not sure about the advantage of having HW
> checksum. Because even if you save CPU cycles, that will get used for non
> vector tx.*
>
> *So I would prefer to keep these options only if there are really a need
> for that.*
>
> BTW, when will DPDK support tx checksum offload with vectorization?
>
> *[Sugesh] I don’t see any plan to do that in near future. Could be worth
> to ask in DPDK mailing list as well.*
>
>
>
> Thanks
>
> Zhenyu Gao
>
>
>
>
>
> 2017-06-21 16:03 GMT+08:00 Chandran, Sugesh <sugesh.chand...@intel.com>:
>
>
>
>
>
> *Regards*
>
> *_Sugesh*
>
>
>
> *From:* Gao Zhenyu [mailto:sysugaozhe...@gmail.com]
> *Sent:* Monday, June 19, 2017 1:23 PM
> *To:* Chandran, Sugesh <sugesh.chand...@intel.com>
> *Cc:* b...@ovn.org; u9012...@gmail.com; ktray...@redhat.com; Kavanagh,
> Mark B <mark.b.kavan...@intel.com>; d...@openvswitch.org
> *Subject:* Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW
> checksum offload support for DPDK pnic
>
>
>
> Thanks for that comments.
>
> [Sugesh] Any reason, why this patch does only the TCP checksum offload??
> The command line option says tx_checksum offload (it could be mistakenly
> considered for full checksum offload).
>
> [Zhenyu Gao] DPDK nic supports many hw offload feature like IPv4,IPV6,TCP,
> UDP,VXLAN,GRE. I would like to make them work step by step. A huge patch
> may introduce more potential issues.
>
> TCP offload is a basic and essential feature so I prefer to implement it
> first.
>
> *[Sugesh] Ok, Fine!*
>
>
>
> [Sugesh] What is the performance improvement offered with this feature? Do
> you have any numbers to share?
> [Zhenyu Gao]I think DPDK uses non-vector functions when Tx checksum
> offload is enabled. Will it give enough performance improvement to mitigate
> that cost?
>
> It is a draft patch to collect advise and suggestions. In my draft
> testing, it doesn't show improvment or regression
>
> In ovs-dpdk + veth environment, 

Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

2017-06-23 Thread Chandran, Sugesh


Regards
_Sugesh

From: Gao Zhenyu [mailto:sysugaozhe...@gmail.com]
Sent: Wednesday, June 21, 2017 9:32 AM
To: Chandran, Sugesh <sugesh.chand...@intel.com>
Cc: b...@ovn.org; u9012...@gmail.com; ktray...@redhat.com; Kavanagh, Mark B 
<mark.b.kavan...@intel.com>; d...@openvswitch.org
Subject: Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum 
offload support for DPDK pnic

I get it.  Maybe caculating it in OVS part is doable as well.
So, how about adding more options to let people choose HW-tcp-cksum(reduce cpu 
cycles) or SW-tcp-cksum(may be better performance)?
Then we have NO-TCP-CKSUM, SW-TCP-CKSUM, HW-TCP-CKSUM.
[Sugesh] In OVS-DPDK, I am not sure about the advantage of having HW checksum. 
Because even if you save CPU cycles, that will get used for non vector tx.
So I would prefer to keep these options only if there are really a need for 
that.
BTW, when will DPDK support tx checksum offload with vectorization?
[Sugesh] I don’t see any plan to do that in near future. Could be worth to ask 
in DPDK mailing list as well.

Thanks
Zhenyu Gao


2017-06-21 16:03 GMT+08:00 Chandran, Sugesh 
<sugesh.chand...@intel.com<mailto:sugesh.chand...@intel.com>>:


Regards
_Sugesh

From: Gao Zhenyu 
[mailto:sysugaozhe...@gmail.com<mailto:sysugaozhe...@gmail.com>]
Sent: Monday, June 19, 2017 1:23 PM
To: Chandran, Sugesh 
<sugesh.chand...@intel.com<mailto:sugesh.chand...@intel.com>>
Cc: b...@ovn.org<mailto:b...@ovn.org>; 
u9012...@gmail.com<mailto:u9012...@gmail.com>; 
ktray...@redhat.com<mailto:ktray...@redhat.com>; Kavanagh, Mark B 
<mark.b.kavan...@intel.com<mailto:mark.b.kavan...@intel.com>>; 
d...@openvswitch.org<mailto:d...@openvswitch.org>
Subject: Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum 
offload support for DPDK pnic

Thanks for that comments.

[Sugesh] Any reason, why this patch does only the TCP checksum offload?? The 
command line option says tx_checksum offload (it could be mistakenly considered 
for full checksum offload).
[Zhenyu Gao] DPDK nic supports many hw offload feature like IPv4,IPV6,TCP, 
UDP,VXLAN,GRE. I would like to make them work step by step. A huge patch may 
introduce more potential issues.
TCP offload is a basic and essential feature so I prefer to implement it first.
[Sugesh] Ok, Fine!


[Sugesh] What is the performance improvement offered with this feature? Do you 
have any numbers to share?
[Zhenyu Gao]I think DPDK uses non-vector functions when Tx checksum offload is 
enabled. Will it give enough performance improvement to mitigate that cost?
It is a draft patch to collect advise and suggestions. In my draft testing, it 
doesn't show improvment or regression
In ovs-dpdk + veth environment, veth support tcp cksum offload by default, but 
it introduces tcp connection issue because veth believes it supports cksum and 
offload to ovs, but dpdk side doesn't do the offloading.
So I have to use ethtool -K eth1 tx off to disable all tx offloading if using 
original ovs-dpdk. That means we cannot consume TSO as well.
[Sugesh] This is a concern. We have to consider other usecases as well. Most of 
the high performance ovs-dpdk applications doesn’t use any kernel/veth pair 
interfaces in OVS-DPDK datapath.


It is a ovs-dpdk + veth environment. So it consumes sendmsg/ recvmsg on RX/TX 
in ovs-dpdk side. The netperf was executed on ovs-dpdk + veth side.
The veth side enabled tx-tcp hw cksum, disabled tso.Bottleneck was not in 
cksum, and running testing in a vhost VM is more reasonable.
[Sugesh] I agree with you. But its worthwhile to know what is the performance 
delta. Also if the cost of vectorization is high, we may consider to do the 
checksum calculation in software itself. I feel x86 instructions can do 
checksum calculation pretty efficient. Have you consider that option?

[root@16ee46e4b793 ~]# netperf -H 10.100.85.247 -t TCP_RR -l 10
MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 
10.100.85.247 () port 0 AF_INET : first burst 0
Local /Remote
Socket Size   Request  Resp.   Elapsed  Trans.
Send   Recv   Size SizeTime Rate
bytes  Bytes  bytesbytes   secs.per sec

16384  87380  11   10.0015001.87(HW tcp-cksum)   15062.72(No HW 
tcp-cksum)
16384  87380


[root@16ee46e4b793 ~]# netperf -H 10.100.85.247 -t TCP_STREAM -l 10
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.100.85.247 
() port 0 AF_INET
Recv   SendSend
Socket Socket  Message  Elapsed
Size   SizeSize Time Throughput
bytes  bytes   bytessecs.10^6bits/sec
 87380  16384  1638410.02 263.41(HW tcp-cksum)   265.31(No HW tcp-cksum)

I would like to keep it disabled in default setting unless we implement more tx 
offloading like TSO.(Do you have concern on it?)  BTW, I think I can rename 
NETDEV_TX_CHECKSUM_OFFLOAD into NETDEV_TX_TCP_CHECKSUM_OFFLOAD.
Please let me know if you get any questio

Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

2017-06-21 Thread Gao Zhenyu
I get it.  Maybe caculating it in OVS part is doable as well.
So, how about adding more options to let people choose HW-tcp-cksum(reduce
cpu cycles) or SW-tcp-cksum(may be better performance)?
Then we have NO-TCP-CKSUM, SW-TCP-CKSUM, HW-TCP-CKSUM.

BTW, when will DPDK support tx checksum offload with vectorization?

Thanks
Zhenyu Gao


2017-06-21 16:03 GMT+08:00 Chandran, Sugesh <sugesh.chand...@intel.com>:

>
>
>
>
> *Regards*
>
> *_Sugesh*
>
>
>
> *From:* Gao Zhenyu [mailto:sysugaozhe...@gmail.com]
> *Sent:* Monday, June 19, 2017 1:23 PM
> *To:* Chandran, Sugesh <sugesh.chand...@intel.com>
> *Cc:* b...@ovn.org; u9012...@gmail.com; ktray...@redhat.com; Kavanagh,
> Mark B <mark.b.kavan...@intel.com>; d...@openvswitch.org
> *Subject:* Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW
> checksum offload support for DPDK pnic
>
>
>
> Thanks for that comments.
>
> [Sugesh] Any reason, why this patch does only the TCP checksum offload??
> The command line option says tx_checksum offload (it could be mistakenly
> considered for full checksum offload).
>
> [Zhenyu Gao] DPDK nic supports many hw offload feature like IPv4,IPV6,TCP,
> UDP,VXLAN,GRE. I would like to make them work step by step. A huge patch
> may introduce more potential issues.
>
> TCP offload is a basic and essential feature so I prefer to implement it
> first.
>
> *[Sugesh] Ok, Fine!*
>
>
>
> [Sugesh] What is the performance improvement offered with this feature? Do
> you have any numbers to share?
> [Zhenyu Gao]I think DPDK uses non-vector functions when Tx checksum
> offload is enabled. Will it give enough performance improvement to mitigate
> that cost?
>
> It is a draft patch to collect advise and suggestions. In my draft
> testing, it doesn't show improvment or regression
>
> In ovs-dpdk + veth environment, veth support tcp cksum offload by default,
> but it introduces tcp connection issue because veth believes it supports
> cksum and offload to ovs, but dpdk side doesn't do the offloading.
>
> So I have to use ethtool -K eth1 tx off to disable all tx offloading if
> using original ovs-dpdk. That means we cannot consume TSO as well.
>
> *[Sugesh] This is a concern. We have to consider other usecases as well.
> Most of the high performance ovs-dpdk applications doesn’t use any
> kernel/veth pair interfaces in OVS-DPDK datapath.*
>
>
>
>
>
> It is a ovs-dpdk + veth environment. So it consumes sendmsg/ recvmsg on
> RX/TX in ovs-dpdk side. The netperf was executed on ovs-dpdk + veth side.
> The veth side enabled tx-tcp hw cksum, disabled tso.Bottleneck was not
> in cksum, and running testing in a vhost VM is more reasonable.
>
> *[Sugesh] I agree with you. But its worthwhile to know what is the
> performance delta. Also if the cost of vectorization is high, we may
> consider to do the checksum calculation in software itself. I feel x86
> instructions can do checksum calculation pretty efficient. Have you
> consider that option?*
>
>
> [root@16ee46e4b793 ~]# netperf -H 10.100.85.247 -t TCP_RR -l 10
> MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> to 10.100.85.247 () port 0 AF_INET : first burst 0
> Local /Remote
> Socket Size   Request  Resp.   Elapsed  Trans.
> Send   Recv   Size SizeTime Rate
> bytes  Bytes  bytesbytes   secs.per sec
>
> 16384  87380  11   10.0015001.87(HW tcp-cksum)
> 15062.72(No HW tcp-cksum)
> 16384  87380
>
>
> [root@16ee46e4b793 ~]# netperf -H 10.100.85.247 -t TCP_STREAM -l 10
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 10.100.85.247 () port 0 AF_INET
> Recv   SendSend
> Socket Socket  Message  Elapsed
> Size   SizeSize Time Throughput
> bytes  bytes   bytessecs.10^6bits/sec
>
>  87380  16384  1638410.02 263.41(HW tcp-cksum)   265.31(No HW
> tcp-cksum)
>
>
>
> I would like to keep it disabled in default setting unless we implement
> more tx offloading like TSO.(Do you have concern on it?)  BTW, I think I
> can rename NETDEV_TX_CHECKSUM_OFFLOAD into NETDEV_TX_TCP_CHECKSUM_OFFLOAD.
>
> Please let me know if you get any questions. :)
>
> *[Sugesh] On Rx checksum offload case, it works with vector instructions.
> The latest DPDK support rx checksum offload with vectorization. *
>
> Thanks
>
>
>
> 2017-06-19 17:26 GMT+08:00 Chandran, Sugesh <sugesh.chand...@intel.com>:
>
> Hi Zhenyu,
>
> Thank you for working on this,
> I have couple of questions in this patch.
>
> Regards
> _Sugesh
>
>
> > -Original Message-
> > From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-
> >

Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

2017-06-21 Thread Chandran, Sugesh


Regards
_Sugesh

From: Gao Zhenyu [mailto:sysugaozhe...@gmail.com]
Sent: Monday, June 19, 2017 1:23 PM
To: Chandran, Sugesh <sugesh.chand...@intel.com>
Cc: b...@ovn.org; u9012...@gmail.com; ktray...@redhat.com; Kavanagh, Mark B 
<mark.b.kavan...@intel.com>; d...@openvswitch.org
Subject: Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum 
offload support for DPDK pnic

Thanks for that comments.

[Sugesh] Any reason, why this patch does only the TCP checksum offload?? The 
command line option says tx_checksum offload (it could be mistakenly considered 
for full checksum offload).
[Zhenyu Gao] DPDK nic supports many hw offload feature like IPv4,IPV6,TCP, 
UDP,VXLAN,GRE. I would like to make them work step by step. A huge patch may 
introduce more potential issues.
TCP offload is a basic and essential feature so I prefer to implement it first.
[Sugesh] Ok, Fine!


[Sugesh] What is the performance improvement offered with this feature? Do you 
have any numbers to share?
[Zhenyu Gao]I think DPDK uses non-vector functions when Tx checksum offload is 
enabled. Will it give enough performance improvement to mitigate that cost?
It is a draft patch to collect advise and suggestions. In my draft testing, it 
doesn't show improvment or regression
In ovs-dpdk + veth environment, veth support tcp cksum offload by default, but 
it introduces tcp connection issue because veth believes it supports cksum and 
offload to ovs, but dpdk side doesn't do the offloading.
So I have to use ethtool -K eth1 tx off to disable all tx offloading if using 
original ovs-dpdk. That means we cannot consume TSO as well.
[Sugesh] This is a concern. We have to consider other usecases as well. Most of 
the high performance ovs-dpdk applications doesn’t use any kernel/veth pair 
interfaces in OVS-DPDK datapath.


It is a ovs-dpdk + veth environment. So it consumes sendmsg/ recvmsg on RX/TX 
in ovs-dpdk side. The netperf was executed on ovs-dpdk + veth side.
The veth side enabled tx-tcp hw cksum, disabled tso.Bottleneck was not in 
cksum, and running testing in a vhost VM is more reasonable.
[Sugesh] I agree with you. But its worthwhile to know what is the performance 
delta. Also if the cost of vectorization is high, we may consider to do the 
checksum calculation in software itself. I feel x86 instructions can do 
checksum calculation pretty efficient. Have you consider that option?

[root@16ee46e4b793 ~]# netperf -H 10.100.85.247 -t TCP_RR -l 10
MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 
10.100.85.247 () port 0 AF_INET : first burst 0
Local /Remote
Socket Size   Request  Resp.   Elapsed  Trans.
Send   Recv   Size SizeTime Rate
bytes  Bytes  bytesbytes   secs.per sec

16384  87380  11   10.0015001.87(HW tcp-cksum)   15062.72(No HW 
tcp-cksum)
16384  87380


[root@16ee46e4b793 ~]# netperf -H 10.100.85.247 -t TCP_STREAM -l 10
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.100.85.247 
() port 0 AF_INET
Recv   SendSend
Socket Socket  Message  Elapsed
Size   SizeSize Time Throughput
bytes  bytes   bytessecs.10^6bits/sec
 87380  16384  1638410.02 263.41(HW tcp-cksum)   265.31(No HW tcp-cksum)

I would like to keep it disabled in default setting unless we implement more tx 
offloading like TSO.(Do you have concern on it?)  BTW, I think I can rename 
NETDEV_TX_CHECKSUM_OFFLOAD into NETDEV_TX_TCP_CHECKSUM_OFFLOAD.

Please let me know if you get any questions. :)
[Sugesh] On Rx checksum offload case, it works with vector instructions. The 
latest DPDK support rx checksum offload with vectorization.
Thanks

2017-06-19 17:26 GMT+08:00 Chandran, Sugesh 
<sugesh.chand...@intel.com<mailto:sugesh.chand...@intel.com>>:
Hi Zhenyu,

Thank you for working on this,
I have couple of questions in this patch.

Regards
_Sugesh

> -Original Message-
> From: ovs-dev-boun...@openvswitch.org<mailto:ovs-dev-boun...@openvswitch.org> 
> [mailto:ovs-dev-<mailto:ovs-dev->
> boun...@openvswitch.org<mailto:boun...@openvswitch.org>] On Behalf Of Zhenyu 
> Gao
> Sent: Friday, June 16, 2017 1:54 PM
> To: b...@ovn.org<mailto:b...@ovn.org>; 
> u9012...@gmail.com<mailto:u9012...@gmail.com>; 
> ktray...@redhat.com<mailto:ktray...@redhat.com>; Kavanagh,
> Mark B <mark.b.kavan...@intel.com<mailto:mark.b.kavan...@intel.com>>; 
> d...@openvswitch.org<mailto:d...@openvswitch.org>
> Subject: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW
> checksum offload support for DPDK pnic
>
> This patch introduce TX tcp-checksum offload support for DPDK pnic.
> The feature is disabled by default and can be enabled by setting tx-
> checksum-offload, which like:
> ovs-vsctl set Interface dpdk-eth3 \
>  options:tx-checksum-offload=true
> ---
>  lib/netdev-dpdk.c| 112
> 

Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

2017-06-19 Thread Gao Zhenyu
Thanks for that comments.

[Sugesh] Any reason, why this patch does only the TCP checksum offload??
The command line option says tx_checksum offload (it could be mistakenly
considered for full checksum offload).

[Zhenyu Gao] DPDK nic supports many hw offload feature like IPv4,IPV6,TCP,
UDP,VXLAN,GRE. I would like to make them work step by step. A huge patch
may introduce more potential issues.
TCP offload is a basic and essential feature so I prefer to implement it
first.

[Sugesh] What is the performance improvement offered with this feature? Do
you have any numbers to share?
[Zhenyu Gao]I think DPDK uses non-vector functions when Tx checksum offload
is enabled. Will it give enough performance improvement to mitigate that
cost?
It is a draft patch to collect advise and suggestions. In my draft testing,
it doesn't show improvment or regression
In ovs-dpdk + veth environment, veth support tcp cksum offload by default,
but it introduces tcp connection issue because veth believes it supports
cksum and offload to ovs, but dpdk side doesn't do the offloading.
So I have to use ethtool -K eth1 tx off to disable all tx offloading if
using original ovs-dpdk. That means we cannot consume TSO as well.

It is a ovs-dpdk + veth environment. So it consumes sendmsg/ recvmsg on
RX/TX in ovs-dpdk side. The netperf was executed on ovs-dpdk + veth side.
The veth side enabled tx-tcp hw cksum, disabled tso.Bottleneck was not
in cksum, and running testing in a vhost VM is more reasonable.

[root@16ee46e4b793 ~]# netperf -H 10.100.85.247 -t TCP_RR -l 10
MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
10.100.85.247 () port 0 AF_INET : first burst 0
Local /Remote
Socket Size   Request  Resp.   Elapsed  Trans.
Send   Recv   Size SizeTime Rate
bytes  Bytes  bytesbytes   secs.per sec

16384  87380  11   10.0015001.87(HW tcp-cksum)
15062.72(No HW tcp-cksum)
16384  87380


[root@16ee46e4b793 ~]# netperf -H 10.100.85.247 -t TCP_STREAM -l 10
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
10.100.85.247 () port 0 AF_INET
Recv   SendSend
Socket Socket  Message  Elapsed
Size   SizeSize Time Throughput
bytes  bytes   bytessecs.10^6bits/sec

 87380  16384  1638410.02 263.41(HW tcp-cksum)   265.31(No HW
tcp-cksum)

I would like to keep it disabled in default setting unless we implement
more tx offloading like TSO.(Do you have concern on it?)  BTW, I think I
can rename NETDEV_TX_CHECKSUM_OFFLOAD into NETDEV_TX_TCP_CHECKSUM_OFFLOAD.


Please let me know if you get any questions. :)

Thanks

2017-06-19 17:26 GMT+08:00 Chandran, Sugesh <sugesh.chand...@intel.com>:

> Hi Zhenyu,
>
> Thank you for working on this,
> I have couple of questions in this patch.
>
> Regards
> _Sugesh
>
> > -Original Message-
> > From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-
> > boun...@openvswitch.org] On Behalf Of Zhenyu Gao
> > Sent: Friday, June 16, 2017 1:54 PM
> > To: b...@ovn.org; u9012...@gmail.com; ktray...@redhat.com; Kavanagh,
> > Mark B <mark.b.kavan...@intel.com>; d...@openvswitch.org
> > Subject: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW
> > checksum offload support for DPDK pnic
> >
> > This patch introduce TX tcp-checksum offload support for DPDK pnic.
> > The feature is disabled by default and can be enabled by setting tx-
> > checksum-offload, which like:
> > ovs-vsctl set Interface dpdk-eth3 \
> >  options:tx-checksum-offload=true
> > ---
> >  lib/netdev-dpdk.c| 112
> > +++
> >  vswitchd/vswitch.xml |  13 --
> >  2 files changed, 115 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index bba4de3..5a68a48
> > 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -32,6 +32,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  #include "dirs.h"
> >  #include "dp-packet.h"
> > @@ -328,6 +329,7 @@ struct ingress_policer {
> >
> >  enum dpdk_hw_ol_features {
> >  NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
> > +NETDEV_TX_CHECKSUM_OFFLOAD = 1 << 1,
> >  };
> >
> >  struct netdev_dpdk {
> > @@ -649,6 +651,8 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk
> > *dev, int n_rxq, int n_txq)
> >  int diag = 0;
> >  int i;
> >  struct rte_eth_conf conf = port_conf;
> > +struct rte_eth_txconf *txconf;
> > +struct rte_eth_dev_info dev_info;
> >
> >  if (dev->mtu > ETHER_MTU) {
> >  conf.rxmode.jumbo_frame = 1;
> > @@ -676,9 +680,16 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk
>

Re: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

2017-06-19 Thread Chandran, Sugesh
Hi Zhenyu,

Thank you for working on this,
I have couple of questions in this patch.

Regards
_Sugesh

> -Original Message-
> From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-
> boun...@openvswitch.org] On Behalf Of Zhenyu Gao
> Sent: Friday, June 16, 2017 1:54 PM
> To: b...@ovn.org; u9012...@gmail.com; ktray...@redhat.com; Kavanagh,
> Mark B <mark.b.kavan...@intel.com>; d...@openvswitch.org
> Subject: [ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW
> checksum offload support for DPDK pnic
> 
> This patch introduce TX tcp-checksum offload support for DPDK pnic.
> The feature is disabled by default and can be enabled by setting tx-
> checksum-offload, which like:
> ovs-vsctl set Interface dpdk-eth3 \
>  options:tx-checksum-offload=true
> ---
>  lib/netdev-dpdk.c| 112
> +++
>  vswitchd/vswitch.xml |  13 --
>  2 files changed, 115 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index bba4de3..5a68a48
> 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -32,6 +32,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "dirs.h"
>  #include "dp-packet.h"
> @@ -328,6 +329,7 @@ struct ingress_policer {
> 
>  enum dpdk_hw_ol_features {
>  NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
> +NETDEV_TX_CHECKSUM_OFFLOAD = 1 << 1,
>  };
> 
>  struct netdev_dpdk {
> @@ -649,6 +651,8 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk
> *dev, int n_rxq, int n_txq)
>  int diag = 0;
>  int i;
>  struct rte_eth_conf conf = port_conf;
> +struct rte_eth_txconf *txconf;
> +struct rte_eth_dev_info dev_info;
> 
>  if (dev->mtu > ETHER_MTU) {
>  conf.rxmode.jumbo_frame = 1;
> @@ -676,9 +680,16 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk
> *dev, int n_rxq, int n_txq)
>  break;
>  }
> 
> +rte_eth_dev_info_get(dev->port_id, _info);
> +txconf = _info.default_txconf;
> +if (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) {
> +/*Enable tx offload feature on pnic*/
> +txconf->txq_flags = 0;
> +}
> +
>  for (i = 0; i < n_txq; i++) {
>  diag = rte_eth_tx_queue_setup(dev->port_id, i, dev->txq_size,
> -  dev->socket_id, NULL);
> +  dev->socket_id, txconf);
>  if (diag) {
>  VLOG_INFO("Interface %s txq(%d) setup error: %s",
>dev->up.name, i, rte_strerror(-diag)); @@ -724,11 
> +735,15 @@
> dpdk_eth_checksum_offload_configure(struct netdev_dpdk *dev)  {
>  struct rte_eth_dev_info info;
>  bool rx_csum_ol_flag = false;
> +bool tx_csum_ol_flag = false;
>  uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM |
>   DEV_RX_OFFLOAD_TCP_CKSUM |
>   DEV_RX_OFFLOAD_IPV4_CKSUM;
> +uint32_t tx_chksm_offload_capa = DEV_TX_OFFLOAD_TCP_CKSUM;
[Sugesh] Any reason, why this patch does only the TCP checksum offload?? The 
command line option says tx_checksum offload (it could be mistakenly considered 
for full checksum offload).

> +
>  rte_eth_dev_info_get(dev->port_id, );
>  rx_csum_ol_flag = (dev->hw_ol_features &
> NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
> +tx_csum_ol_flag = (dev->hw_ol_features &
> + NETDEV_TX_CHECKSUM_OFFLOAD) != 0;
> 
>  if (rx_csum_ol_flag &&
>  (info.rx_offload_capa & rx_chksm_offload_capa) != @@ -736,9 +751,15
> @@ dpdk_eth_checksum_offload_configure(struct netdev_dpdk *dev)
>  VLOG_WARN_ONCE("Rx checksum offload is not supported on device
> %"PRIu8,
> dev->port_id);
>  dev->hw_ol_features &= ~NETDEV_RX_CHECKSUM_OFFLOAD;
> -return;
> +} else if (tx_csum_ol_flag &&
> +   (info.tx_offload_capa & tx_chksm_offload_capa) !=
> +tx_chksm_offload_capa) {
> +VLOG_WARN_ONCE("Tx checksum offload is not supported on device
> %"PRIu8,
> +   dev->port_id);
> +dev->hw_ol_features &= ~NETDEV_TX_CHECKSUM_OFFLOAD;
> +} else {
> +netdev_request_reconfigure(>up);
>  }
> -netdev_request_reconfigure(>up);
>  }
> 
> --

[Sugesh] What is the performance improvement offered with this feature? Do you 
have any numbers to share?
I think DPDK uses non-vector functions when Tx checksum offload is enable

[ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

2017-06-16 Thread Zhenyu Gao
This patch introduce TX tcp-checksum offload support for DPDK pnic.
The feature is disabled by default and can be enabled by setting
tx-checksum-offload, which like:
ovs-vsctl set Interface dpdk-eth3 \
 options:tx-checksum-offload=true
---
 lib/netdev-dpdk.c| 112 +++
 vswitchd/vswitch.xml |  13 --
 2 files changed, 115 insertions(+), 10 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index bba4de3..5a68a48 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "dirs.h"
 #include "dp-packet.h"
@@ -328,6 +329,7 @@ struct ingress_policer {
 
 enum dpdk_hw_ol_features {
 NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
+NETDEV_TX_CHECKSUM_OFFLOAD = 1 << 1,
 };
 
 struct netdev_dpdk {
@@ -649,6 +651,8 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int 
n_rxq, int n_txq)
 int diag = 0;
 int i;
 struct rte_eth_conf conf = port_conf;
+struct rte_eth_txconf *txconf;
+struct rte_eth_dev_info dev_info;
 
 if (dev->mtu > ETHER_MTU) {
 conf.rxmode.jumbo_frame = 1;
@@ -676,9 +680,16 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int 
n_rxq, int n_txq)
 break;
 }
 
+rte_eth_dev_info_get(dev->port_id, _info);
+txconf = _info.default_txconf;
+if (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) {
+/*Enable tx offload feature on pnic*/
+txconf->txq_flags = 0;
+}
+
 for (i = 0; i < n_txq; i++) {
 diag = rte_eth_tx_queue_setup(dev->port_id, i, dev->txq_size,
-  dev->socket_id, NULL);
+  dev->socket_id, txconf);
 if (diag) {
 VLOG_INFO("Interface %s txq(%d) setup error: %s",
   dev->up.name, i, rte_strerror(-diag));
@@ -724,11 +735,15 @@ dpdk_eth_checksum_offload_configure(struct netdev_dpdk 
*dev)
 {
 struct rte_eth_dev_info info;
 bool rx_csum_ol_flag = false;
+bool tx_csum_ol_flag = false;
 uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM |
  DEV_RX_OFFLOAD_TCP_CKSUM |
  DEV_RX_OFFLOAD_IPV4_CKSUM;
+uint32_t tx_chksm_offload_capa = DEV_TX_OFFLOAD_TCP_CKSUM;
+
 rte_eth_dev_info_get(dev->port_id, );
 rx_csum_ol_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
+tx_csum_ol_flag = (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) != 0;
 
 if (rx_csum_ol_flag &&
 (info.rx_offload_capa & rx_chksm_offload_capa) !=
@@ -736,9 +751,15 @@ dpdk_eth_checksum_offload_configure(struct netdev_dpdk 
*dev)
 VLOG_WARN_ONCE("Rx checksum offload is not supported on device %"PRIu8,
dev->port_id);
 dev->hw_ol_features &= ~NETDEV_RX_CHECKSUM_OFFLOAD;
-return;
+} else if (tx_csum_ol_flag &&
+   (info.tx_offload_capa & tx_chksm_offload_capa) !=
+tx_chksm_offload_capa) {
+VLOG_WARN_ONCE("Tx checksum offload is not supported on device %"PRIu8,
+   dev->port_id);
+dev->hw_ol_features &= ~NETDEV_TX_CHECKSUM_OFFLOAD;
+} else {
+netdev_request_reconfigure(>up);
 }
-netdev_request_reconfigure(>up);
 }
 
 static void
@@ -1119,6 +1140,11 @@ netdev_dpdk_get_config(const struct netdev *netdev, 
struct smap *args)
 } else {
 smap_add(args, "rx_csum_offload", "false");
 }
+if (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) {
+smap_add(args, "tx_csum_offload", "true");
+} else {
+smap_add(args, "tx_csum_offload", "false");
+}
 }
 ovs_mutex_unlock(>mutex);
 
@@ -1210,7 +1236,10 @@ netdev_dpdk_set_config(struct netdev *netdev, const 
struct smap *args,
 {RTE_FC_RX_PAUSE, RTE_FC_FULL}
 };
 bool rx_chksm_ofld;
-bool temp_flag;
+bool tx_chksm_ofld;
+bool temp_rx_flag;
+bool temp_tx_flag;
+bool change = false;
 const char *new_devargs;
 int err = 0;
 
@@ -1295,13 +1324,24 @@ netdev_dpdk_set_config(struct netdev *netdev, const 
struct smap *args,
 /* Rx checksum offload configuration */
 /* By default the Rx checksum offload is ON */
 rx_chksm_ofld = smap_get_bool(args, "rx-checksum-offload", true);
-temp_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD)
+tx_chksm_ofld = smap_get_bool(args, "tx-checksum-offload", false);
+temp_rx_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD)
 != 0;
-if (temp_flag != rx_chksm_ofld) {
+temp_tx_flag = (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD)
+!= 0;
+if (temp_rx_flag != rx_chksm_ofld) {
 dev->hw_ol_features ^= NETDEV_RX_CHECKSUM_OFFLOAD;
-dpdk_eth_checksum_offload_configure(dev);
+change =