Hi Mike, Coverity reported a problem on this patch, see below.
Can you take a look and sent a fix? Thanks, Eelco On 17 Jan 2024, at 20:26, Mike Pattrick wrote: > From: Dexia Li <[email protected]> > > For userspace datapath, this patch provides vxlan and geneve tunnel tso. > Only support userspace vxlan or geneve tunnel, meanwhile support > tunnel outter and inner csum offload. If netdev do not support offload > features, there is a software fallback.If netdev do not support vxlan > and geneve tso,packets will drop. Front-end devices can close offload > features by ethtool also. > > Signed-off-by: Dexia Li <[email protected]> > Co-authored-by: Mike Pattrick <[email protected]> > Signed-off-by: Mike Pattrick <[email protected]> > --- > v9: Rebased patch > v12: > - Sent in by Dexia > v13: > - Corrected formatting and comment/function naming consistency > - Double encapsulation with TSO will now drop packets > - Corrected packet leak condition. > --- > lib/dp-packet.c | 41 +++++++- > lib/dp-packet.h | 201 +++++++++++++++++++++++++++++++++++++--- > lib/dpif-netdev.c | 4 +- > lib/flow.c | 2 +- > lib/netdev-dpdk.c | 86 +++++++++++++++-- > lib/netdev-dummy.c | 2 +- > lib/netdev-native-tnl.c | 101 +++++++++++++++++++- > lib/netdev-provider.h | 4 + > lib/netdev.c | 53 +++++++++-- > lib/packets.c | 12 +-- > lib/packets.h | 6 +- > tests/dpif-netdev.at | 4 +- > 12 files changed, 462 insertions(+), 54 deletions(-) > > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > index 920402369..e7738c37a 100644 > --- a/lib/dp-packet.c > +++ b/lib/dp-packet.c > @@ -546,16 +546,47 @@ dp_packet_compare_offsets(struct dp_packet *b1, struct > dp_packet *b2, > return true; > } > > +void > +dp_packet_tnl_outer_ol_send_prepare(struct dp_packet *p, > + uint64_t flags) > +{ > + if (dp_packet_hwol_is_outer_ipv4_cksum(p)) { > + if (!(flags & NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM)) { > + dp_packet_ip_set_header_csum(p, false); > + dp_packet_ol_set_ip_csum_good(p); > + dp_packet_hwol_reset_outer_ipv4_csum(p); > + } > + } > + > + if (!dp_packet_hwol_is_outer_udp_cksum(p)) { > + return; > + } > + > + if (!(flags & NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM)) { > + packet_udp_complete_csum(p, false); > + dp_packet_ol_set_l4_csum_good(p); > + dp_packet_hwol_reset_outer_udp_csum(p); > + } > +} > + > /* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags' > * and if not, updates the packet with the software fall back. */ > void > dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags) > { > + bool tnl_inner = false; > + > + if (dp_packet_hwol_is_tunnel_geneve(p) || > + dp_packet_hwol_is_tunnel_vxlan(p)) { > + dp_packet_tnl_outer_ol_send_prepare(p, flags); > + tnl_inner = true; > + } > + > if (dp_packet_hwol_tx_ip_csum(p)) { > if (dp_packet_ip_checksum_good(p)) { > dp_packet_hwol_reset_tx_ip_csum(p); > } else if (!(flags & NETDEV_TX_OFFLOAD_IPV4_CKSUM)) { > - dp_packet_ip_set_header_csum(p); > + dp_packet_ip_set_header_csum(p, tnl_inner); > dp_packet_ol_set_ip_csum_good(p); > dp_packet_hwol_reset_tx_ip_csum(p); > } > @@ -565,24 +596,24 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t > flags) > return; > } > > - if (dp_packet_l4_checksum_good(p)) { > + if (dp_packet_l4_checksum_good(p) && !tnl_inner) { > dp_packet_hwol_reset_tx_l4_csum(p); > return; > } > > if (dp_packet_hwol_l4_is_tcp(p) > && !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { > - packet_tcp_complete_csum(p); > + packet_tcp_complete_csum(p, tnl_inner); > dp_packet_ol_set_l4_csum_good(p); > dp_packet_hwol_reset_tx_l4_csum(p); > } else if (dp_packet_hwol_l4_is_udp(p) > && !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { > - packet_udp_complete_csum(p); > + packet_udp_complete_csum(p, tnl_inner); > dp_packet_ol_set_l4_csum_good(p); > dp_packet_hwol_reset_tx_l4_csum(p); > } else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM) > && dp_packet_hwol_l4_is_sctp(p)) { > - packet_sctp_complete_csum(p); > + packet_sctp_complete_csum(p, tnl_inner); > dp_packet_ol_set_l4_csum_good(p); > dp_packet_hwol_reset_tx_l4_csum(p); > } > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 11aa00723..c626acfbd 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -86,22 +86,47 @@ enum dp_packet_offload_mask { > DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x800), > /* Offload IP checksum. */ > DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CKSUM, RTE_MBUF_F_TX_IP_CKSUM, 0x1000), > + /* Offload packet is tunnel GENEVE. */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_GENEVE, > + RTE_MBUF_F_TX_TUNNEL_GENEVE, 0x2000), > + /* Offload packet is tunnel VXLAN. */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_VXLAN, > + RTE_MBUF_F_TX_TUNNEL_VXLAN, 0x4000), > + /* Offload tunnel packet, out is IPv4 */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV4, > + RTE_MBUF_F_TX_OUTER_IPV4, 0x8000), > + /* Offload tunnel out IPv4 checksum */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IP_CKSUM, > + RTE_MBUF_F_TX_OUTER_IP_CKSUM, 0x10000), > + /* Offload tunnel out UDP checksum */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_UDP_CKSUM, > + RTE_MBUF_F_TX_OUTER_UDP_CKSUM, 0x20000), > + /* Offload tunnel packet, out is IPv6 */ > + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV6, > + RTE_MBUF_F_TX_OUTER_IPV6, 0x40000), > + > /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */ > }; > > -#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \ > - DP_PACKET_OL_FLOW_MARK | \ > - DP_PACKET_OL_RX_L4_CKSUM_BAD | \ > - DP_PACKET_OL_RX_IP_CKSUM_BAD | \ > - DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ > - DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ > - DP_PACKET_OL_TX_TCP_SEG | \ > - DP_PACKET_OL_TX_IPV4 | \ > - DP_PACKET_OL_TX_IPV6 | \ > - DP_PACKET_OL_TX_TCP_CKSUM | \ > - DP_PACKET_OL_TX_UDP_CKSUM | \ > - DP_PACKET_OL_TX_SCTP_CKSUM | \ > - DP_PACKET_OL_TX_IP_CKSUM) > +#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \ > + DP_PACKET_OL_FLOW_MARK | \ > + DP_PACKET_OL_RX_L4_CKSUM_BAD | \ > + DP_PACKET_OL_RX_IP_CKSUM_BAD | \ > + DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ > + DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ > + DP_PACKET_OL_TX_TCP_SEG | \ > + DP_PACKET_OL_TX_IPV4 | \ > + DP_PACKET_OL_TX_IPV6 | \ > + DP_PACKET_OL_TX_TCP_CKSUM | \ > + DP_PACKET_OL_TX_UDP_CKSUM | \ > + DP_PACKET_OL_TX_SCTP_CKSUM | \ > + DP_PACKET_OL_TX_IP_CKSUM | \ > + DP_PACKET_OL_TX_TUNNEL_GENEVE | \ > + DP_PACKET_OL_TX_TUNNEL_VXLAN | \ > + DP_PACKET_OL_TX_OUTER_IPV4 | \ > + DP_PACKET_OL_TX_OUTER_IP_CKSUM | \ > + DP_PACKET_OL_TX_OUTER_UDP_CKSUM | \ > + DP_PACKET_OL_TX_OUTER_IPV6) > > #define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \ > DP_PACKET_OL_TX_UDP_CKSUM | \ > @@ -139,6 +164,10 @@ struct dp_packet { > * or UINT16_MAX. */ > uint16_t l4_ofs; /* Transport-level header offset, > or UINT16_MAX. */ > + uint16_t inner_l3_ofs; /* Inner Network-level header offset, > + * or UINT16_MAX. */ > + uint16_t inner_l4_ofs; /* Inner Transport-level header offset, > + or UINT16_MAX. */ > uint32_t cutlen; /* length in bytes to cut from the end. */ > ovs_be32 packet_type; /* Packet type as defined in OpenFlow */ > uint16_t csum_start; /* Position to start checksumming from. */ > @@ -250,6 +279,8 @@ bool dp_packet_compare_offsets(struct dp_packet *good, > struct dp_packet *test, > struct ds *err_str); > void dp_packet_ol_send_prepare(struct dp_packet *, uint64_t); > +void dp_packet_tnl_outer_ol_send_prepare(struct dp_packet *, uint64_t); > + > > > /* Frees memory that 'b' points to, as well as 'b' itself. */ > @@ -482,6 +513,22 @@ dp_packet_l4_size(const struct dp_packet *b) > : 0; > } > > +static inline void * > +dp_packet_inner_l3(const struct dp_packet *b) > +{ > + return b->inner_l3_ofs != UINT16_MAX > + ? (char *) dp_packet_data(b) + b->inner_l3_ofs > + : NULL; > +} > + > +static inline void * > +dp_packet_inner_l4(const struct dp_packet *b) > +{ > + return b->inner_l4_ofs != UINT16_MAX > + ? (char *) dp_packet_data(b) + b->inner_l4_ofs > + : NULL; > +} > + > static inline const void * > dp_packet_get_tcp_payload(const struct dp_packet *b) > { > @@ -539,6 +586,25 @@ dp_packet_get_nd_payload(const struct dp_packet *b) > } > > #ifdef DPDK_NETDEV > +static inline void > +dp_packet_set_l2_len(struct dp_packet *b, size_t l2_len) > +{ > + b->mbuf.l2_len = l2_len; > +} > + > +static inline void > +dp_packet_set_l3_len(struct dp_packet *b, size_t l3_len) > +{ > + b->mbuf.l3_len = l3_len; > +} > + > +static inline void > +dp_packet_set_l4_len(struct dp_packet *b, size_t l4_len) > +{ > + b->mbuf.l4_len = l4_len; > +} > + > + > static inline uint64_t * > dp_packet_ol_flags_ptr(const struct dp_packet *b) > { > @@ -558,6 +624,24 @@ dp_packet_flow_mark_ptr(const struct dp_packet *b) > } > > #else > +static inline void > +dp_packet_set_l2_len(struct dp_packet *b OVS_UNUSED, size_t l2_len > OVS_UNUSED) > +{ > + /* There is no implementation. */ > +} > + > +static inline void > +dp_packet_set_l3_len(struct dp_packet *b OVS_UNUSED, size_t l3_len > OVS_UNUSED) > +{ > + /* There is no implementation. */ > +} > + > +static inline void > +dp_packet_set_l4_len(struct dp_packet *b OVS_UNUSED, size_t l4_len > OVS_UNUSED) > +{ > + /* There is no implementation. */ > +} > + > static inline uint32_t * > dp_packet_ol_flags_ptr(const struct dp_packet *b) > { > @@ -619,6 +703,8 @@ dp_packet_set_size(struct dp_packet *b, uint32_t v) > * (and thus 'v') will always be <= UINT16_MAX; this means that there is > no > * loss of accuracy in assigning 'v' to 'data_len'. > */ > + > + ovs_assert(v <= UINT16_MAX); > b->mbuf.data_len = (uint16_t)v; /* Current seg length. */ > b->mbuf.pkt_len = v; /* Total length of all segments linked > to > * this segment. */ > @@ -1056,6 +1142,36 @@ dp_packet_hwol_l4_is_sctp(struct dp_packet *b) > DP_PACKET_OL_TX_SCTP_CKSUM; > } > > +/* Returns 'true' if packet 'b' is marked for tunnel GENEVE > + * checksum offloading. */ > +static inline bool > +dp_packet_hwol_is_tunnel_geneve(struct dp_packet *b) > +{ > + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_GENEVE); > +} > + > +/* Returns 'true' if packet 'b' is marked for tunnel VXLAN > + * checksum offloading. */ > +static inline bool > +dp_packet_hwol_is_tunnel_vxlan(struct dp_packet *b) > +{ > + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_VXLAN); > +} > + > +/* Returns 'true' if packet 'b' is marked for outer IPv4 checksum offload. */ > +static inline bool > +dp_packet_hwol_is_outer_ipv4_cksum(struct dp_packet *b) > +{ > + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_OUTER_IP_CKSUM); > +} > + > +/* Returns 'true' if packet 'b' is marked for outer UDP checksum offload. */ > +static inline bool > +dp_packet_hwol_is_outer_udp_cksum(struct dp_packet *b) > +{ > + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_OUTER_UDP_CKSUM); > +} > + > static inline void > dp_packet_hwol_reset_tx_l4_csum(struct dp_packet *p) > { > @@ -1078,6 +1194,14 @@ dp_packet_hwol_set_tx_ipv6(struct dp_packet *a) > *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6; > } > > +/* Mark packet 'a' as a tunnel packet with outer IPv6 header. */ > +static inline void > +dp_packet_hwol_set_tx_outer_ipv6(struct dp_packet *a) > +{ > + *dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_OUTER_IPV4; > + *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_OUTER_IPV6; > +} > + > /* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */ > static inline bool > dp_packet_hwol_tx_ip_csum(const struct dp_packet *p) > @@ -1131,6 +1255,53 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) > *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; > } > > +/* Mark packet 'b' for tunnel GENEVE offloading. */ > +static inline void > +dp_packet_hwol_set_tunnel_geneve(struct dp_packet *b) > +{ > + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_GENEVE; > +} > + > +/* Mark packet 'b' for tunnel VXLAN offloading. */ > +static inline void > +dp_packet_hwol_set_tunnel_vxlan(struct dp_packet *b) > +{ > + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN; > +} > + > +/* Mark packet 'b' for out IPv4 packet. */ > +static inline void > +dp_packet_hwol_set_tx_outer_ipv4(struct dp_packet *b) > +{ > + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_IPV4; > +} > + > +/* Mark packet 'b' for out IPv4 csum offloading. */ > +static inline void > +dp_packet_hwol_set_tx_outer_ipv4_csum(struct dp_packet *b) > +{ > + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_IP_CKSUM; > +} > + > +static inline void > +dp_packet_hwol_reset_outer_ipv4_csum(struct dp_packet *p) > +{ > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_OUTER_IP_CKSUM; > +} > + > +static inline void > +dp_packet_hwol_reset_outer_udp_csum(struct dp_packet *p) > +{ > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_OUTER_UDP_CKSUM; > +} > + > +/* Mark packet 'b' for out UDP csum offloading. */ > +static inline void > +dp_packet_hwol_set_outer_udp_csum(struct dp_packet *b) > +{ > + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_UDP_CKSUM; > +} > + > /* Resets TCP Segmentation flag in packet 'p'. */ > static inline void > dp_packet_hwol_reset_tcp_seg(struct dp_packet *p) > @@ -1172,9 +1343,9 @@ dp_packet_ip_checksum_bad(const struct dp_packet *p) > > /* Calculate and set the IPv4 header checksum in packet 'p'. */ > static inline void > -dp_packet_ip_set_header_csum(struct dp_packet *p) > +dp_packet_ip_set_header_csum(struct dp_packet *p, bool inner) > { > - struct ip_header *ip = dp_packet_l3(p); > + struct ip_header *ip = (inner) ? dp_packet_inner_l3(p) : dp_packet_l3(p); > > ovs_assert(ip); > ip->ip_csum = 0; > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index df5bbf85a..c1981137f 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -8194,7 +8194,9 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, > struct dp_packet *packet_, > ds_destroy(&ds); > } > > - dp_packet_ol_send_prepare(packet_, 0); > + if (type != DPIF_UC_MISS) { > + dp_packet_ol_send_prepare(packet_, 0); > + } > > return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata, > actions, wc, put_actions, dp->upcall_aux); > diff --git a/lib/flow.c b/lib/flow.c > index b8f99f66b..82d93570a 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -3278,7 +3278,7 @@ packet_expand(struct dp_packet *p, const struct flow > *flow, size_t size) > if (dp_packet_hwol_tx_ip_csum(p)) { > dp_packet_ol_reset_ip_csum_good(p); > } else { > - dp_packet_ip_set_header_csum(p); > + dp_packet_ip_set_header_csum(p, false); > dp_packet_ol_set_ip_csum_good(p); > } > pseudo_hdr_csum = packet_csum_pseudoheader(ip); > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 1ff25c246..fb26825ff 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -416,6 +416,10 @@ enum dpdk_hw_ol_features { > NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5, > NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6, > NETDEV_TX_TSO_OFFLOAD = 1 << 7, > + NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD = 1 << 8, > + NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD = 1 << 9, > + NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD = 1 << 10, > + NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD = 1 << 11, > }; > > enum dpdk_rx_steer_flags { > @@ -1075,6 +1079,14 @@ netdev_dpdk_update_netdev_flags(struct netdev_dpdk > *dev) > NETDEV_TX_OFFLOAD_SCTP_CKSUM); > netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD, > NETDEV_TX_OFFLOAD_TCP_TSO); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD, > + NETDEV_TX_VXLAN_TNL_TSO); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD, > + NETDEV_TX_GENEVE_TNL_TSO); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM); > } > > static int > @@ -1129,6 +1141,22 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int > n_rxq, int n_txq) > conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; > } > > + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM; > + } > + > /* Limit configured rss hash functions to only those supported > * by the eth device. */ > conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads; > @@ -1346,6 +1374,18 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; > } > > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD; > + } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD; > + } > + > dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; > if (userspace_tso_enabled()) { > if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) { > @@ -1354,6 +1394,20 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > VLOG_WARN("%s: Tx TSO offload is not supported.", > netdev_get_name(&dev->up)); > } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) { > + dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; > + } else { > + VLOG_WARN("%s: Tx Vxlan tunnel TSO offload is not supported.", > + netdev_get_name(&dev->up)); > + } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) { > + dev->hw_ol_features |= NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD; > + } else { > + VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.", > + netdev_get_name(&dev->up)); > + } > } > > n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq); > @@ -2479,11 +2533,23 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, > struct rte_mbuf *mbuf) > return true; > } > > - mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt); > - mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt); > - mbuf->l4_len = 0; > - mbuf->outer_l2_len = 0; > - mbuf->outer_l3_len = 0; > + /* If packet is vxlan or geneve tunnel packet, calculate outer > + * l2 len and outer l3 len. Inner l2/l3/l4 len are calculated > + * before. */ > + if (mbuf->ol_flags & > + (RTE_MBUF_F_TX_TUNNEL_GENEVE | RTE_MBUF_F_TX_TUNNEL_VXLAN)) { > + mbuf->outer_l2_len = (char *) dp_packet_l3(pkt) - > + (char *) dp_packet_eth(pkt); > + mbuf->outer_l3_len = (char *) dp_packet_l4(pkt) - > + (char *) dp_packet_l3(pkt); > + } else { > + mbuf->l2_len = (char *) dp_packet_l3(pkt) - > + (char *) dp_packet_eth(pkt); > + mbuf->l3_len = (char *) dp_packet_l4(pkt) - > + (char *) dp_packet_l3(pkt); > + mbuf->outer_l2_len = 0; > + mbuf->outer_l3_len = 0; > + } > th = dp_packet_l4(pkt); > > if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { > @@ -2501,8 +2567,14 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, > struct rte_mbuf *mbuf) > return false; > } > > - mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > - mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > + if (mbuf->ol_flags & (RTE_MBUF_F_TX_TUNNEL_GENEVE | > + RTE_MBUF_F_TX_TUNNEL_VXLAN)) { > + mbuf->tso_segsz = dev->mtu - mbuf->l2_len - mbuf->l3_len - > + mbuf->l4_len - mbuf->outer_l3_len; > + } else { > + mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > + mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > + } > > if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { > int hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; > diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c > index 8c6e6d448..21db9edb5 100644 > --- a/lib/netdev-dummy.c > +++ b/lib/netdev-dummy.c > @@ -1202,7 +1202,7 @@ netdev_dummy_send(struct netdev *netdev, int qid, > > if (dp_packet_hwol_tx_ip_csum(packet) && > !dp_packet_ip_checksum_good(packet)) { > - dp_packet_ip_set_header_csum(packet); > + dp_packet_ip_set_header_csum(packet, false); > dp_packet_ol_set_ip_csum_good(packet); > } > > diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c > index a0682c70f..fa87c6281 100644 > --- a/lib/netdev-native-tnl.c > +++ b/lib/netdev-native-tnl.c > @@ -173,15 +173,29 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, > const void *header, > ip6->ip6_plen = htons(*ip_tot_size); > packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label); > packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size; > - dp_packet_hwol_set_tx_ipv6(packet); > + > + if (dp_packet_hwol_is_tunnel_geneve(packet) || > + dp_packet_hwol_is_tunnel_vxlan(packet)) { > + dp_packet_hwol_set_tx_outer_ipv6(packet); > + } else { > + dp_packet_hwol_set_tx_ipv6(packet); > + } > + > dp_packet_ol_reset_ip_csum_good(packet); > return ip6 + 1; > } else { > ip = netdev_tnl_ip_hdr(eth); > ip->ip_tot_len = htons(*ip_tot_size); > /* Postpone checksum to when the packet is pushed to the port. */ > - dp_packet_hwol_set_tx_ipv4(packet); > - dp_packet_hwol_set_tx_ip_csum(packet); > + if (dp_packet_hwol_is_tunnel_geneve(packet) || > + dp_packet_hwol_is_tunnel_vxlan(packet)) { > + dp_packet_hwol_set_tx_outer_ipv4(packet); > + dp_packet_hwol_set_tx_outer_ipv4_csum(packet); > + } else { > + dp_packet_hwol_set_tx_ipv4(packet); > + dp_packet_hwol_set_tx_ip_csum(packet); > + } > + > dp_packet_ol_reset_ip_csum_good(packet); > *ip_tot_size -= IP_HEADER_LEN; > packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size; > @@ -226,6 +240,74 @@ udp_extract_tnl_md(struct dp_packet *packet, struct > flow_tnl *tnl, > return udp + 1; > } > > +/* Calculate inner l2 l3 l4 len as tunnel outer header is not > + * encapsulated now. */ > +static void > +dp_packet_tnl_ol_process(struct dp_packet *packet, > + const struct ovs_action_push_tnl *data) > +{ > + struct udp_header *udp = NULL; > + uint8_t opt_len = 0; > + struct eth_header *eth = NULL; > + struct ip_header *ip = NULL; > + struct genevehdr *gnh = NULL; > + > + /* l2 l3 l4 len refer to inner len, tunnel outer > + * header is not encapsulated here. */ > + if (dp_packet_hwol_l4_mask(packet)) { > + ip = dp_packet_l3(packet); > + > + if (ip->ip_proto == IPPROTO_TCP) { > + struct tcp_header *th = dp_packet_l4(packet); > + dp_packet_set_l4_len(packet, TCP_OFFSET(th->tcp_ctl) * 4); > + } else if (ip->ip_proto == IPPROTO_UDP) { > + dp_packet_set_l4_len(packet, UDP_HEADER_LEN); > + } else if (ip->ip_proto == IPPROTO_SCTP) { > + dp_packet_set_l4_len(packet, SCTP_HEADER_LEN); > + } > + > + dp_packet_set_l3_len(packet, (char *) dp_packet_l4(packet) - > + (char *) dp_packet_l3(packet)); > + > + if (data->tnl_type == OVS_VPORT_TYPE_GENEVE || > + data->tnl_type == OVS_VPORT_TYPE_VXLAN) { > + > + if (IP_VER(ip->ip_ihl_ver) == 4) { > + dp_packet_hwol_set_tx_ipv4(packet); > + dp_packet_hwol_tx_ip_csum(packet); CID 425094: (#1 of 1): Unchecked return value (CHECKED_RETURN) 4. check_return: Calling dp_packet_hwol_tx_ip_csum without checking return value (as is done elsewhere 9 out of 11 times). I guess this needs to be dp_packet_hwol_set_tx_ip_csum()? > + } else if (IP_VER(ip->ip_ihl_ver) == 6) { > + dp_packet_hwol_set_tx_ipv6(packet); > + } > + } > + > + /* Attention please, tunnel inner l2 len is consist of udp header > + * len and tunnel header len and inner l2 len. */ > + if (data->tnl_type == OVS_VPORT_TYPE_GENEVE) { > + eth = (struct eth_header *)(data->header); > + ip = (struct ip_header *)(eth + 1); > + udp = (struct udp_header *)(ip + 1); > + gnh = (struct genevehdr *)(udp + 1); > + opt_len = gnh->opt_len * 4; > + dp_packet_hwol_set_tunnel_geneve(packet); > + dp_packet_set_l2_len(packet, (char *) dp_packet_l3(packet) - > + (char *) dp_packet_eth(packet) + > + GENEVE_BASE_HLEN + opt_len); > + > + packet->inner_l3_ofs = packet->l3_ofs + GENEVE_BASE_HLEN + > opt_len; > + packet->inner_l4_ofs = packet->l4_ofs + GENEVE_BASE_HLEN + > opt_len; > + > + } else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) { > + dp_packet_hwol_set_tunnel_vxlan(packet); > + dp_packet_set_l2_len(packet, (char *) dp_packet_l3(packet) - > + (char *) dp_packet_eth(packet) + > + VXLAN_HLEN); > + > + packet->inner_l3_ofs = packet->l3_ofs + VXLAN_HLEN; > + packet->inner_l4_ofs = packet->l4_ofs + VXLAN_HLEN; > + } > + } > +} > + > void > netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, > struct dp_packet *packet, > @@ -234,6 +316,7 @@ netdev_tnl_push_udp_header(const struct netdev *netdev > OVS_UNUSED, > struct udp_header *udp; > int ip_tot_size; > > + dp_packet_tnl_ol_process(packet, data); > udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, > &ip_tot_size, 0); > > @@ -241,13 +324,21 @@ netdev_tnl_push_udp_header(const struct netdev *netdev > OVS_UNUSED, > udp->udp_src = netdev_tnl_get_src_port(packet); > udp->udp_len = htons(ip_tot_size); > > - /* Postpone checksum to the egress netdev. */ > - dp_packet_hwol_set_csum_udp(packet); > if (udp->udp_csum) { > dp_packet_ol_reset_l4_csum_good(packet); > + if (dp_packet_hwol_is_tunnel_geneve(packet) || > + dp_packet_hwol_is_tunnel_vxlan(packet)) { > + dp_packet_hwol_set_outer_udp_csum(packet); > + } else { > + dp_packet_hwol_set_csum_udp(packet); > + } > } else { > dp_packet_ol_set_l4_csum_good(packet); > } > + > + packet->inner_l3_ofs += packet->l4_ofs; > + packet->inner_l4_ofs += packet->l4_ofs; > + > } > > static void * > diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h > index a7393c7ce..22840a058 100644 > --- a/lib/netdev-provider.h > +++ b/lib/netdev-provider.h > @@ -43,6 +43,10 @@ enum netdev_ol_flags { > NETDEV_TX_OFFLOAD_UDP_CKSUM = 1 << 2, > NETDEV_TX_OFFLOAD_SCTP_CKSUM = 1 << 3, > NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 4, > + NETDEV_TX_VXLAN_TNL_TSO = 1 << 5, > + NETDEV_TX_GENEVE_TNL_TSO = 1 << 6, > + NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM = 1 << 7, > + NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM = 1 << 8, > }; > > /* A network device (e.g. an Ethernet device). > diff --git a/lib/netdev.c b/lib/netdev.c > index 3ed8049f7..cc2bdbcd2 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -69,6 +69,8 @@ COVERAGE_DEFINE(netdev_received); > COVERAGE_DEFINE(netdev_sent); > COVERAGE_DEFINE(netdev_add_router); > COVERAGE_DEFINE(netdev_get_stats); > +COVERAGE_DEFINE(netdev_vxlan_tso_drops); > +COVERAGE_DEFINE(netdev_geneve_tso_drops); > COVERAGE_DEFINE(netdev_push_header_drops); > COVERAGE_DEFINE(netdev_soft_seg_good); > COVERAGE_DEFINE(netdev_soft_seg_drops); > @@ -912,6 +914,23 @@ netdev_send(struct netdev *netdev, int qid, struct > dp_packet_batch *batch, > !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > if (dp_packet_hwol_is_tso(packet)) { > + if (dp_packet_hwol_is_tunnel_vxlan(packet) > + && !(netdev_flags & NETDEV_TX_VXLAN_TNL_TSO)) { > + VLOG_ERR_RL(&rl, "%s: No VXLAN TSO support", > + netdev_get_name(netdev)); > + COVERAGE_INC(netdev_vxlan_tso_drops); > + dp_packet_delete_batch(batch, true); > + return false; > + } > + > + if (dp_packet_hwol_is_tunnel_geneve(packet) > + && !(netdev_flags & NETDEV_TX_GENEVE_TNL_TSO)) { > + VLOG_ERR_RL(&rl, "%s: No GENEVE TSO support", > + netdev_get_name(netdev)); > + COVERAGE_INC(netdev_geneve_tso_drops); > + dp_packet_delete_batch(batch, true); > + return false; > + } > return netdev_send_tso(netdev, qid, batch, concurrent_txq); > } > } > @@ -990,17 +1009,31 @@ netdev_push_header(const struct netdev *netdev, > size_t i, size = dp_packet_batch_size(batch); > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet))) { > + if (OVS_UNLIKELY(data->tnl_type != OVS_VPORT_TYPE_GENEVE && > + data->tnl_type != OVS_VPORT_TYPE_VXLAN && > + dp_packet_hwol_is_tso(packet))) { > COVERAGE_INC(netdev_push_header_drops); > dp_packet_delete(packet); > - VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is " > - "not supported: packet dropped", > - netdev_get_name(netdev)); > + VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is not " > + "supported for %s tunnels: packet dropped", > + netdev_get_name(netdev), netdev_get_type(netdev)); > } else { > - /* The packet is going to be encapsulated and there is > - * no support yet for inner network header csum offloading. */ > - dp_packet_ol_send_prepare(packet, 0); > - > + if (data->tnl_type != OVS_VPORT_TYPE_GENEVE && > + data->tnl_type != OVS_VPORT_TYPE_VXLAN) { > + dp_packet_ol_send_prepare(packet, 0); > + } else if (dp_packet_hwol_is_tunnel_geneve(packet) || > + dp_packet_hwol_is_tunnel_vxlan(packet)) { > + if (dp_packet_hwol_is_tso(packet)) { > + COVERAGE_INC(netdev_push_header_drops); > + dp_packet_delete(packet); > + VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is not > " > + "supported with multiple levels of " > + "VXLAN or GENEVE encapsulation.", > + netdev_get_name(netdev)); > + continue; > + } > + dp_packet_ol_send_prepare(packet, 0); > + } > netdev->netdev_class->push_header(netdev, packet, data); > > pkt_metadata_init(&packet->md, data->out_port); > @@ -1446,6 +1479,10 @@ netdev_get_status(const struct netdev *netdev, struct > smap *smap) > OL_ADD_STAT("udp_csum", NETDEV_TX_OFFLOAD_UDP_CKSUM); > OL_ADD_STAT("sctp_csum", NETDEV_TX_OFFLOAD_SCTP_CKSUM); > OL_ADD_STAT("tcp_seg", NETDEV_TX_OFFLOAD_TCP_TSO); > + OL_ADD_STAT("vxlan_tso", NETDEV_TX_VXLAN_TNL_TSO); > + OL_ADD_STAT("geneve_tso", NETDEV_TX_GENEVE_TNL_TSO); > + OL_ADD_STAT("out_ip_csum", NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM); > + OL_ADD_STAT("out_udp_csum", NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM); > #undef OL_ADD_STAT > > err = 0; > diff --git a/lib/packets.c b/lib/packets.c > index dab823ba2..d9e41346e 100644 > --- a/lib/packets.c > +++ b/lib/packets.c > @@ -1997,9 +1997,9 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6) > /* Set TCP checksum field in packet 'p' with complete checksum. > * The packet must have the L3 and L4 offsets. */ > void > -packet_tcp_complete_csum(struct dp_packet *p) > +packet_tcp_complete_csum(struct dp_packet *p, bool inner) > { > - struct tcp_header *tcp = dp_packet_l4(p); > + struct tcp_header *tcp = (inner) ? dp_packet_inner_l4(p) : > dp_packet_l4(p); > > tcp->tcp_csum = 0; > if (dp_packet_hwol_is_ipv4(p)) { > @@ -2020,9 +2020,9 @@ packet_tcp_complete_csum(struct dp_packet *p) > /* Set UDP checksum field in packet 'p' with complete checksum. > * The packet must have the L3 and L4 offsets. */ > void > -packet_udp_complete_csum(struct dp_packet *p) > +packet_udp_complete_csum(struct dp_packet *p, bool inner) > { > - struct udp_header *udp = dp_packet_l4(p); > + struct udp_header *udp = (inner) ? dp_packet_inner_l4(p) : > dp_packet_l4(p); > > /* Skip csum calculation if the udp_csum is zero. */ > if (!udp->udp_csum) { > @@ -2052,9 +2052,9 @@ packet_udp_complete_csum(struct dp_packet *p) > /* Set SCTP checksum field in packet 'p' with complete checksum. > * The packet must have the L3 and L4 offsets. */ > void > -packet_sctp_complete_csum(struct dp_packet *p) > +packet_sctp_complete_csum(struct dp_packet *p, bool inner) > { > - struct sctp_header *sh = dp_packet_l4(p); > + struct sctp_header *sh = (inner) ? dp_packet_inner_l4(p) : > dp_packet_l4(p); > uint16_t tp_len = dp_packet_l4_size(p); > ovs_be32 csum; > > diff --git a/lib/packets.h b/lib/packets.h > index 12245b764..8b6994809 100644 > --- a/lib/packets.h > +++ b/lib/packets.h > @@ -1682,9 +1682,9 @@ uint32_t packet_csum_pseudoheader(const struct > ip_header *); > bool packet_rh_present(struct dp_packet *packet, uint8_t *nexthdr, > bool *first_frag); > void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6); > -void packet_tcp_complete_csum(struct dp_packet *); > -void packet_udp_complete_csum(struct dp_packet *); > -void packet_sctp_complete_csum(struct dp_packet *); > +void packet_tcp_complete_csum(struct dp_packet *, bool is_inner); > +void packet_udp_complete_csum(struct dp_packet *, bool is_inner); > +void packet_sctp_complete_csum(struct dp_packet *, bool is_inner); > > #define DNS_HEADER_LEN 12 > struct dns_header { > diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at > index c9474af0a..24c24207b 100644 > --- a/tests/dpif-netdev.at > +++ b/tests/dpif-netdev.at > @@ -658,11 +658,11 @@ OVS_VSWITCHD_START( > other-config:datapath-id=1234 fail-mode=secure]) > > AT_CHECK([ovs-vsctl get interface p1 status | sed -n 's/^{\(.*\).*}$/\1/p'], > [0], [dnl > -tx_ip_csum_offload="false", tx_sctp_csum_offload="false", > tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", > tx_udp_csum_offload="false" > +tx_geneve_tso_offload="false", tx_ip_csum_offload="false", > tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", > tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", > tx_tcp_seg_offload="false", tx_udp_csum_offload="false", > tx_vxlan_tso_offload="false" > ], []) > > AT_CHECK([ovs-vsctl get interface br0 status | sed -n > 's/^{\(.*\).*}$/\1/p'], [0], [dnl > -tx_ip_csum_offload="false", tx_sctp_csum_offload="false", > tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", > tx_udp_csum_offload="false" > +tx_geneve_tso_offload="false", tx_ip_csum_offload="false", > tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", > tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", > tx_tcp_seg_offload="false", tx_udp_csum_offload="false", > tx_vxlan_tso_offload="false" > ], []) > > OVS_VSWITCHD_STOP > -- > 2.39.3 > > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
