On 8/7/20 12:56 PM, [email protected] wrote:
> From: Yi Yang <[email protected]>
>
> GSO(Generic Segment Offload) can segment large UDP
> and TCP packet to small packets per MTU of destination
> , especially for the case that physical NIC can't
> do hardware offload VXLAN TSO and VXLAN UFO, GSO can
> make sure userspace TSO can still work but not drop.
>
> In addition, GSO can help improve UDP performane when
> UFO is enabled in VM.
>
> GSO can support TCP, UDP, VXLAN TCP, VXLAN UDP, it is
> done in Tx function of physical NIC.
>
> Signed-off-by: Yi Yang <[email protected]>
> ---
> lib/dp-packet.h | 21 +++-
> lib/netdev-dpdk.c | 358
> +++++++++++++++++++++++++++++++++++++++++++++++++----
> lib/netdev-linux.c | 17 ++-
> lib/netdev.c | 67 +++++++---
> 4 files changed, 417 insertions(+), 46 deletions(-)
>
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 79895f2..c33868d 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -83,6 +83,8 @@ enum dp_packet_offload_mask {
> DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, PKT_TX_SCTP_CKSUM, 0x800),
> /* VXLAN TCP Segmentation Offload. */
> DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_VXLAN, 0x1000),
> + /* UDP Segmentation Offload. */
> + DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_SEG, PKT_TX_UDP_SEG, 0x2000),
> /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
> };
>
> @@ -97,7 +99,8 @@ enum dp_packet_offload_mask {
> DP_PACKET_OL_TX_IPV6 | \
> DP_PACKET_OL_TX_TCP_CKSUM | \
> DP_PACKET_OL_TX_UDP_CKSUM | \
> - DP_PACKET_OL_TX_SCTP_CKSUM)
> + DP_PACKET_OL_TX_SCTP_CKSUM | \
> + DP_PACKET_OL_TX_UDP_SEG)
>
> #define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \
> DP_PACKET_OL_TX_UDP_CKSUM | \
> @@ -956,6 +959,13 @@ dp_packet_hwol_is_tso(const struct dp_packet *b)
> return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TCP_SEG);
> }
>
> +/* Returns 'true' if packet 'b' is marked for UDP segmentation offloading. */
> +static inline bool
> +dp_packet_hwol_is_uso(const struct dp_packet *b)
> +{
> + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_UDP_SEG);
> +}
> +
> /* Returns 'true' if packet 'b' is marked for IPv4 checksum offloading. */
> static inline bool
> dp_packet_hwol_is_ipv4(const struct dp_packet *b)
> @@ -1034,6 +1044,15 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b)
> *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG;
> }
>
> +/* Mark packet 'b' for UDP segmentation offloading. It implies that
> + * either the packet 'b' is marked for IPv4 or IPv6 checksum offloading
> + * and also for UDP checksum offloading. */
> +static inline void
> +dp_packet_hwol_set_udp_seg(struct dp_packet *b)
> +{
> + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_UDP_SEG;
> +}
> +
> #ifdef DPDK_NETDEV
> /* Mark packet 'b' for VXLAN TCP segmentation offloading. */
> static inline void
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 30493ed..888a45e 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -38,13 +38,15 @@
> #include <rte_errno.h>
> #include <rte_ethdev.h>
> #include <rte_flow.h>
> +#include <rte_gso.h>
> +#include <rte_ip.h>
> #include <rte_malloc.h>
> #include <rte_mbuf.h>
> #include <rte_meter.h>
> #include <rte_pci.h>
> #include <rte_version.h>
> #include <rte_vhost.h>
> -#include <rte_ip.h>
> +#include <rte_ip_frag.h>
>
> #include "cmap.h"
> #include "coverage.h"
> @@ -162,6 +164,7 @@ typedef uint16_t dpdk_port_t;
> | DEV_TX_OFFLOAD_UDP_CKSUM \
> | DEV_TX_OFFLOAD_IPV4_CKSUM)
>
> +#define MAX_GSO_MBUFS 64
>
> static const struct rte_eth_conf port_conf = {
> .rxmode = {
> @@ -2171,6 +2174,16 @@ is_local_to_local(uint16_t src_port_id, struct
> netdev_dpdk *dev)
> return ret;
> }
>
> +static uint16_t
> +get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
> +{
> + if (ethertype == htons(RTE_ETHER_TYPE_IPV4)) {
> + return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
> + } else { /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
> + return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
> + }
> +}
> +
> /* Prepare the packet for HWOL.
> * Return True if the packet is OK to continue. */
> static bool
> @@ -2203,10 +2216,9 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev,
> struct rte_mbuf *mbuf)
> * also can't be handled. So PKT_TX_TUNNEL_VXLAN must be cleared
> * outer_l2_len and outer_l3_len must be zeroed.
> */
> - if (!(dev->up.ol_flags & NETDEV_TX_OFFLOAD_VXLAN_TSO)
> - || (!(mbuf->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
> + if (!(mbuf->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
> && (mbuf->pkt_len <= 1450 + mbuf->outer_l2_len +
> mbuf->outer_l3_len
> - + mbuf->l2_len))) {
> + + mbuf->l2_len)) {
> mbuf->ol_flags &= ~PKT_TX_TUNNEL_VXLAN;
> mbuf->l2_len -= sizeof(struct udp_header)
> + sizeof(struct vxlanhdr);
> @@ -2249,7 +2261,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev,
> struct rte_mbuf *mbuf)
> /* inner IP checksum offload offload */
> mbuf->ol_flags |= PKT_TX_IP_CKSUM;
> }
> - } else if (mbuf->ol_flags & PKT_TX_L4_MASK) {
> + } else if (mbuf->ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) {
> /* Handle VLAN TSO */
> /* no inner IP checksum for IPV6 */
> if (mbuf->ol_flags & PKT_TX_IPV4) {
> @@ -2273,6 +2285,18 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev,
> struct rte_mbuf *mbuf)
> mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
> mbuf->outer_l2_len = 0;
> mbuf->outer_l3_len = 0;
> +
> + /* In case of GRO, PKT_TX_TCP_SEG or PKT_TX_UDP_SEG wasn't set by GRO
> + * APIs, here is a place we can mark it.
> + */
> + if ((mbuf->pkt_len > 1464)
> + && (!(mbuf->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)))) {
> + if (l4_proto == IPPROTO_UDP) {
> + mbuf->ol_flags |= PKT_TX_UDP_SEG;
> + } else if (l4_proto == IPPROTO_TCP) {
> + mbuf->ol_flags |= PKT_TX_TCP_SEG;
> + }
> + }
> }
>
> /* It is possible that l4_len isn't set for vhostuserclient */
> @@ -2284,6 +2308,10 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev,
> struct rte_mbuf *mbuf)
> mbuf->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
> }
>
> + if ((l4_proto != IPPROTO_UDP) && (l4_proto != IPPROTO_TCP)) {
> + return true;
> + }
> +
> if ((mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
> if (l4_proto != IPPROTO_UDP) {
> VLOG_WARN_RL(&rl, "%s: UDP packet without L4 header"
> @@ -2294,11 +2322,13 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev,
> struct rte_mbuf *mbuf)
> mbuf->ol_flags & PKT_TX_TCP_CKSUM) {
> if (l4_proto != IPPROTO_TCP) {
> VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
> - " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len);
> + " pkt len: %"PRIu32" l4_proto = %d",
> + dev->up.name, mbuf->pkt_len, l4_proto);
> return false;
> }
>
> - if (mbuf->pkt_len - mbuf->l2_len > 1450) {
> + if (mbuf->pkt_len > 1450 + mbuf->outer_l2_len + mbuf->outer_l3_len
> + + mbuf->l2_len) {
> dp_packet_hwol_set_tcp_seg(pkt);
> }
>
> @@ -2308,7 +2338,66 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev,
> struct rte_mbuf *mbuf)
> } else {
> mbuf->tso_segsz = 0;
> }
> +
> + if (!(dev->up.ol_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
> + /* PKT_TX_TCP_CKSUM must be cleaned for GSO because
> + * tcp checksum only can be caculated by software for
> + * GSO case.
> + */
> + mbuf->ol_flags &= ~PKT_TX_TCP_CKSUM;
> + }
> }
> +
> + /* UDP GSO if necessary */
> + if (l4_proto == IPPROTO_UDP) {
> + /* VXLAN GSO can be done here */
> + if ((mbuf->ol_flags & PKT_TX_UDP_SEG) ||
> + (mbuf->pkt_len > (1450 + mbuf->outer_l2_len + mbuf->outer_l3_len
> +
> + mbuf->l2_len))) {
> + dp_packet_hwol_set_udp_seg(pkt);
> +
> + /* For UDP GSO, udp checksum must be calculated by software */
> + if ((mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
> + void *l3_hdr, *l4_hdr;
> + struct rte_udp_hdr *udp_hdr;
> +
> + /* PKT_TX_UDP_CKSUM must be cleaned for GSO because
> + * udp checksum only can be caculated by software for
> + * GSO case.
> + */
> + mbuf->ol_flags &= ~PKT_TX_UDP_CKSUM;
> +
> + eth_hdr = (struct rte_ether_hdr *)
> + ((uint8_t *)eth_hdr + mbuf->outer_l2_len +
> + mbuf->outer_l3_len +
> + sizeof(struct udp_header) +
> + sizeof(struct vxlanhdr));
> + l3_hdr = (uint8_t *)eth_hdr + mbuf->l2_len -
> + sizeof(struct udp_header) -
> + sizeof(struct vxlanhdr);
> + l4_hdr = (uint8_t *)l3_hdr + mbuf->l3_len;
> + ip_hdr = (struct rte_ipv4_hdr *)l3_hdr;
> + ip_hdr->hdr_checksum = 0;
> + ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr);
> + /* Don't touch UDP checksum if it is ip fragment */
> + if (!rte_ipv4_frag_pkt_is_fragmented(ip_hdr)) {
> + udp_hdr = (struct rte_udp_hdr *)l4_hdr;
> + udp_hdr->dgram_cksum = 0;
> + udp_hdr->dgram_cksum =
> + get_udptcp_checksum(l3_hdr, l4_hdr,
> + eth_hdr->ether_type);
> + }
> + }
> +
> + /* FOR GSO, gso_size includes l2_len + l3_len */
> + mbuf->tso_segsz = 1450 + mbuf->outer_l2_len + mbuf->outer_l3_len
> +
> + mbuf->l2_len;
> + if (mbuf->tso_segsz > dev->mtu) {
> + mbuf->tso_segsz = dev->mtu;
> + }
> + }
> + }
> +
> return true;
> }
>
> @@ -2339,24 +2428,19 @@ netdev_dpdk_prep_hwol_batch(struct netdev_dpdk *dev,
> struct rte_mbuf **pkts,
> return cnt;
> }
>
> -/* Tries to transmit 'pkts' to txq 'qid' of device 'dev'. Takes ownership of
> - * 'pkts', even in case of failure.
> - *
> - * Returns the number of packets that weren't transmitted. */
> static inline int
> -netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> - struct rte_mbuf **pkts, int cnt)
> +__netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> + struct rte_mbuf **pkts, int cnt)
> {
> uint32_t nb_tx = 0;
> - uint16_t nb_tx_prep = cnt;
> + uint32_t nb_tx_prep;
>
> - if (userspace_tso_enabled()) {
> - nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> - if (nb_tx_prep != cnt) {
> - VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
> - "Only %u/%u are valid: %s", dev->up.name,
> nb_tx_prep,
> - cnt, rte_strerror(rte_errno));
> - }
> + nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> + if (nb_tx_prep != cnt) {
> + VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
> + "Only %u/%u are valid: %s",
> + dev->up.name, nb_tx_prep,
> + cnt, rte_strerror(rte_errno));
> }
>
> while (nb_tx != nb_tx_prep) {
> @@ -2384,6 +2468,200 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int
> qid,
> return cnt - nb_tx;
> }
>
> +static inline void
> +set_multiseg_udptcp_cksum(struct rte_mbuf *mbuf)
I didn't review the patch, only had a quick glance, but this part
bothers me. OVS doesn't support multi-segment mbufs, so it should not
be possible for such mbufs being transmitted by OVS. So, I do not
understand why this function needs to work with such mbufs.
> +{
> + uint16_t l3_offset = mbuf->outer_l2_len + mbuf->outer_l3_len
> + + mbuf->l2_len;
> + struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
> + (rte_pktmbuf_mtod(mbuf, char *) + l3_offset);
> + struct rte_tcp_hdr *tcp_hdr;
> + uint32_t l4_hdr_len;
> + uint8_t *l4_hdr;
> + struct rte_mbuf *next = mbuf->next;
> + uint32_t cksum = 0;
> + uint16_t l4_proto;
> + uint32_t inner_cksum;
> +
> + l4_proto = ipv4_hdr->next_proto_id;
> + if ((l4_proto != IPPROTO_UDP) && (l4_proto != IPPROTO_TCP)) {
> + return;
> + }
> +
> + if (l4_proto == IPPROTO_TCP) {
> + /* For TCP GSO, inner TCP header is in every seg,
> + * TCP checksum has to be calculated by software.
> + */
> +
> + l4_hdr_len = mbuf->data_len - l3_offset
> + - sizeof(struct rte_ipv4_hdr);
> + l4_hdr = (uint8_t *)(ipv4_hdr + 1);
> + tcp_hdr = (struct rte_tcp_hdr *)l4_hdr;
> + tcp_hdr->cksum = 0;
> + }
> +
> + /* Set inner ip checksum */
> + ipv4_hdr->hdr_checksum = 0;
> + ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
> +
> + if (l4_proto == IPPROTO_TCP) {
> + cksum = rte_raw_cksum(l4_hdr, l4_hdr_len);
> + } else if (l4_proto == IPPROTO_UDP) {
> + if (next == NULL) {
> + /* It wasn't GSOed */
> + cksum = rte_raw_cksum(ipv4_hdr + 1,
> + ntohs(ipv4_hdr->total_length)
> + - sizeof(struct rte_ipv4_hdr));
> + } else {
> + cksum = 0;
> + }
> + }
> +
> + /* It was GSOed */
> + while (next) {
> + cksum += rte_raw_cksum(rte_pktmbuf_mtod(next, char *),
> next->data_len);
> + next = next->next;
> + }
> +
> + /* Save cksum to inner_cksum, outer udp checksum needs it */
> + inner_cksum = cksum;
> +
> + cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
> + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> + cksum = (~cksum) & 0xffff;
> + if (cksum == 0) {
> + cksum = 0xffff;
> + }
> +
> + /* Set inner TCP checksum */
> + if (l4_proto == IPPROTO_TCP) {
> + tcp_hdr->cksum = (uint16_t)cksum;
> + }
> +
> + /* Set outer udp checksum in case of VXLAN */
> + if (mbuf->outer_l2_len != 0) {
> + ipv4_hdr = (struct rte_ipv4_hdr *)
> + (rte_pktmbuf_mtod(mbuf, char *) + mbuf->outer_l2_len);
> + struct rte_udp_hdr *udp_hdr = (struct rte_udp_hdr *)
> + (ipv4_hdr + 1);
> +
> + /* Set outer ip checksum */
> + ipv4_hdr->hdr_checksum = 0;
> + ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
> +
> + udp_hdr->dgram_cksum = 0;
> + cksum = rte_ipv4_phdr_cksum(ipv4_hdr, 0);
> + cksum += rte_raw_cksum(udp_hdr, mbuf->l2_len + mbuf->l3_len);
> + cksum += inner_cksum;
> + if (l4_proto == IPPROTO_TCP) {
> + cksum += tcp_hdr->cksum;
> + }
> + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> + cksum = (~cksum) & 0xffff;
> + if (cksum == 0) {
> + cksum = 0xffff;
> + }
> + udp_hdr->dgram_cksum = (uint16_t)cksum;
> + }
> +}
> +
> +/* Tries to transmit 'pkts' to txq 'qid' of device 'dev'. Takes ownership of
> + * 'pkts', even in case of failure.
> + *
> + * Returns the number of packets that weren't transmitted. */
> +static inline int
> +netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> + struct rte_mbuf **pkts, int cnt)
> +{
> + uint32_t nb_tx = 0;
> + int i;
> + int ret;
> + int failures = 0;
> +
> + if (userspace_tso_enabled()) {
> + /* The best point to do gso */
> + struct rte_gso_ctx gso_ctx;
> + struct rte_mbuf *gso_mbufs[MAX_GSO_MBUFS];
> + int tx_start = -1;
> +
> + /* Setup gso context */
> + gso_ctx.direct_pool = dev->dpdk_mp->mp;
> + gso_ctx.indirect_pool = dev->dpdk_mp->mp;
> +
> + /* Do GSO if needed */
> + for (i = 0; i < cnt; i++) {
> + if (((pkts[i]->ol_flags & PKT_TX_UDP_SEG) &&
> + !(dev->hw_ol_features & DEV_TX_OFFLOAD_UDP_TSO)) ||
> + ((pkts[i]->ol_flags & PKT_TX_TCP_SEG) &&
> + ((!(dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD)
> + && (pkts[i]->ol_flags & PKT_TX_TUNNEL_VXLAN))
> + || !(dev->hw_ol_features & DEV_TX_OFFLOAD_TCP_TSO)))) {
> + /* Send non GSO packets before pkts[i] */
> + if (tx_start != -1) {
> + failures += __netdev_dpdk_eth_tx_burst(
> + dev, qid,
> + pkts + tx_start,
> + i - tx_start);
> + }
> + tx_start = -1;
> +
> + gso_ctx.gso_types = 0;
> + gso_ctx.gso_size = pkts[i]->tso_segsz;
> + gso_ctx.flag = 0;
> + if (pkts[i]->ol_flags & PKT_TX_TUNNEL_VXLAN) {
> + gso_ctx.gso_types |= DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
> + }
> + if (pkts[i]->ol_flags & PKT_TX_UDP_SEG) {
> + gso_ctx.gso_types |= DEV_TX_OFFLOAD_UDP_TSO;
> + } else if (pkts[i]->ol_flags & PKT_TX_TCP_SEG) {
> + gso_ctx.gso_types |= DEV_TX_OFFLOAD_TCP_TSO;
> + pkts[i]->ol_flags &= ~PKT_TX_TCP_CKSUM;
> + }
> + ret = rte_gso_segment(pkts[i], /* packet to segment */
> + &gso_ctx, /* gso context */
> + /* gso output mbufs */
> + (struct rte_mbuf **)&gso_mbufs,
> + MAX_GSO_MBUFS);
> + if (ret < 0) {
> + rte_pktmbuf_free(pkts[i]);
> + } else {
> + int j, k;
> + struct rte_mbuf * next_part;
> + nb_tx = ret;
> + for (j = 0; j < nb_tx; j++) {
> + set_multiseg_udptcp_cksum(gso_mbufs[j]);
> + /* Clear them because of no offload */
> + gso_mbufs[j]->ol_flags = 0;
> + gso_mbufs[j]->outer_l2_len = 0;
> + gso_mbufs[j]->outer_l3_len = 0;
> + gso_mbufs[j]->l2_len = 0;
> + gso_mbufs[j]->l3_len = 0;
> + gso_mbufs[j]->l4_len = 0;
> + next_part = gso_mbufs[j];
> + for (k = 0; k < gso_mbufs[j]->nb_segs; k++) {
> + next_part = next_part->next;
> + }
> + }
> + __netdev_dpdk_eth_tx_burst(dev, qid, gso_mbufs, nb_tx);
> + }
> + continue;
> + }
> + if (tx_start == -1) {
> + tx_start = i;
> + }
> + }
> +
> + if (tx_start != -1) {
> + /* Send non GSO packets before pkts[i] */
> + failures += __netdev_dpdk_eth_tx_burst(dev, qid, pkts + tx_start,
> + i - tx_start);
> + }
> + return failures;
> + }
> +
> + return __netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
> +}
> +
> static inline bool
> netdev_dpdk_srtcm_policer_pkt_handle(struct rte_meter_srtcm *meter,
> struct rte_meter_srtcm_profile *profile,
> @@ -2786,10 +3064,24 @@ out:
> }
> }
>
> +struct shinfo_arg {
> + void * buf;
> + struct rte_mbuf *mbuf;
> +};
> +
> +/* For GSO case, the extended mbuf only can be freed by
> + * netdev_dpdk_extbuf_free
> + */
> static void
> -netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque)
> +netdev_dpdk_extbuf_free(struct rte_mbuf *m, void *opaque)
> {
> - rte_free(opaque);
> + struct shinfo_arg *arg = (struct shinfo_arg *)opaque;
> +
> + rte_free(arg->buf);
> + if (m != arg->mbuf) {
> + rte_pktmbuf_free(arg->mbuf);
> + }
> + free(arg);
> }
>
> static struct rte_mbuf *
> @@ -2821,8 +3113,11 @@ dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt,
> uint32_t data_len)
>
> /* Initialize shinfo. */
> if (shinfo) {
> + struct shinfo_arg *arg = xmalloc(sizeof(struct shinfo_arg));
> + arg->buf = buf;
> + arg->mbuf = pkt;
> shinfo->free_cb = netdev_dpdk_extbuf_free;
> - shinfo->fcb_opaque = buf;
> + shinfo->fcb_opaque = arg;
> rte_mbuf_ext_refcnt_set(shinfo, 1);
> } else {
> shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
> @@ -2852,6 +3147,10 @@ dpdk_pktmbuf_alloc(struct rte_mempool *mp, uint32_t
> data_len)
> return NULL;
> }
>
> + if (unlikely(pkt->shinfo != NULL)) {
> + pkt->shinfo = NULL;
> + }
> +
> if (rte_pktmbuf_tailroom(pkt) >= data_len) {
> return pkt;
> }
> @@ -5192,6 +5491,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev
> *netdev)
> int err;
> uint64_t vhost_flags = 0;
> uint64_t vhost_unsup_flags;
> + uint64_t vhost_supported_flags;
> bool zc_enabled;
>
> ovs_mutex_lock(&dev->mutex);
> @@ -5277,6 +5577,16 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev
> *netdev)
> goto unlock;
> }
>
> + err = rte_vhost_driver_get_features(dev->vhost_id,
> + &vhost_supported_flags);
> + if (err) {
> + VLOG_ERR("rte_vhost_driver_get_features failed for "
> + "vhost user client port: %s\n", dev->up.name);
> + goto unlock;
> + }
> + VLOG_INFO("vhostuserclient port %s features: 0x%016lx",
> + dev->up.name, vhost_supported_flags);
> +
> err = rte_vhost_driver_start(dev->vhost_id);
> if (err) {
> VLOG_ERR("rte_vhost_driver_start failed for vhost user "
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 9f830b4..557f139 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -6566,6 +6566,16 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t
> *l4proto)
>
> l4_len = TCP_OFFSET(tcp_hdr->tcp_ctl) * 4;
> dp_packet_hwol_set_l4_len(b, l4_len);
> + } else if (*l4proto == IPPROTO_UDP) {
> + struct udp_header *udp_hdr = dp_packet_at(b, l2_len + l3_len,
> + sizeof(struct udp_header));
> +
> + if (!udp_hdr) {
> + return -EINVAL;
> + }
> +
> + l4_len = sizeof(struct udp_header);
> + dp_packet_hwol_set_l4_len(b, l4_len);
> }
>
> return 0;
> @@ -6581,10 +6591,6 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
> return -EINVAL;
> }
>
> - if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) {
> - return 0;
> - }
> -
> if (netdev_linux_parse_l2(b, &l4proto)) {
> return -EINVAL;
> }
> @@ -6609,6 +6615,9 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
> || type == VIRTIO_NET_HDR_GSO_TCPV6) {
> dp_packet_hwol_set_tcp_seg(b);
> }
> + if (type == VIRTIO_NET_HDR_GSO_UDP) {
> + dp_packet_hwol_set_udp_seg(b);
> + }
> }
>
> return 0;
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 64583d1..02f28c8 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -825,23 +825,41 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
> {
> uint64_t l4_mask;
>
> - if (dp_packet_hwol_is_vxlan_tcp_seg(packet)
> - && (dp_packet_hwol_is_tso(packet) || dp_packet_hwol_l4_mask(packet))
> - && !(netdev_flags & NETDEV_TX_OFFLOAD_VXLAN_TSO)) {
> - /* Fall back to GSO in software. */
> - VLOG_ERR_BUF(errormsg, "No VXLAN TSO support");
> - return false;
> - }
> -
> - if (dp_packet_hwol_is_tso(packet)
> - && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
> - /* Fall back to GSO in software. */
> - VLOG_ERR_BUF(errormsg, "No TSO support");
> - return false;
> - }
> -
> + /* GSO can handle TSO by software even if device can't handle hardware
> + * offload, so needn't check it here.
> + */
> l4_mask = dp_packet_hwol_l4_mask(packet);
> if (l4_mask) {
> + /* Calculate checksum for VLAN TSO case when no hardware offload
> + * feature is available. Note: for VXLAN TSO case, checksum has
> + * been calculated before here, so it won't be done here again
> + * because checksum flags in packet->m.ol_flags have been cleaned.
> + */
> + if (dp_packet_hwol_l4_is_tcp(packet)
> + && !dp_packet_hwol_is_vxlan_tcp_seg(packet)
> + && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
> + packet->mbuf.ol_flags &= ~DP_PACKET_OL_TX_TCP_CKSUM;
> + /* Only calculate TCP checksum for non-TSO packet,
> + * it will be calculated after GSO for TSO packet.
> + */
> + if (!(packet->mbuf.ol_flags & DP_PACKET_OL_TX_TCP_SEG)) {
> + calculate_tcpudp_checksum(packet);
> + }
> + return true;
> + } else if (dp_packet_hwol_l4_is_udp(packet)
> + && !dp_packet_hwol_is_vxlan_tcp_seg(packet)
> + && !(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) {
> + packet->mbuf.ol_flags &= ~DP_PACKET_OL_TX_UDP_CKSUM;
> + /* Only calculate UDP checksum for non-UFO packet,
> + * it will be calculated immediately before GSO for
> + * UFO packet.
> + */
> + if (!(packet->mbuf.ol_flags & DP_PACKET_OL_TX_UDP_SEG)) {
> + calculate_tcpudp_checksum(packet);
> + }
> + return true;
> + }
> +
> if (dp_packet_hwol_l4_is_tcp(packet)) {
> if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
> /* Fall back to TCP csum in software. */
> @@ -1013,11 +1031,26 @@ netdev_push_header(const struct netdev *netdev,
> /* VXLAN offload can't support udp checksum offload
> * for inner udp packet, so udp checksum must be set
> * before push header in order that outer checksum can
> - * be set correctly.
> + * be set correctly. But GSO code will set udp checksum
> + * if packet->mbuf.ol_flags has DP_PACKET_OL_TX_UDP_SEG.
> */
> if (dp_packet_hwol_l4_is_udp(packet)) {
> packet->mbuf.ol_flags &= ~DP_PACKET_OL_TX_UDP_CKSUM;
> - calculate_tcpudp_checksum(packet);
> + /* Only calculate UDP checksum for non-UFO packet,
> + * it will be calculated immediately before GSO for
> + * UFO packet.
> + */
> + if (!(packet->mbuf.ol_flags & DP_PACKET_OL_TX_UDP_SEG)) {
> + calculate_tcpudp_checksum(packet);
> + }
> + } else if (dp_packet_hwol_l4_is_tcp(packet)) {
> + packet->mbuf.ol_flags &= ~DP_PACKET_OL_TX_TCP_CKSUM;
> + /* Only calculate TCP checksum for non-TSO packet,
> + * it will be calculated after GSO for TSO packet.
> + */
> + if (!(packet->mbuf.ol_flags & DP_PACKET_OL_TX_TCP_SEG)) {
> + calculate_tcpudp_checksum(packet);
> + }
> }
> }
> netdev->netdev_class->push_header(netdev, packet, data);
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev