On Tue, Feb 20, 2024 at 11:09 PM Mike Pattrick <m...@redhat.com> wrote: > > When sending packets that are flagged as requiring segmentation to an > interface that doens't support this feature, send the packet to the TSO > software fallback instead of dropping it. > > Signed-off-by: Mike Pattrick <m...@redhat.com>
Recheck-request: github-robot > --- > lib/dp-packet-gso.c | 73 +++++++++++++++++++++++++++++++++-------- > lib/dp-packet.h | 26 +++++++++++++++ > lib/netdev-native-tnl.c | 8 +++++ > lib/netdev.c | 37 +++++++++------------ > tests/system-traffic.at | 58 ++++++++++++++++++++++++++++++++ > 5 files changed, 167 insertions(+), 35 deletions(-) > > diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c > index 847685ad9..f25abf436 100644 > --- a/lib/dp-packet-gso.c > +++ b/lib/dp-packet-gso.c > @@ -47,6 +47,8 @@ dp_packet_gso_seg_new(const struct dp_packet *p, size_t > hdr_len, > seg->l2_5_ofs = p->l2_5_ofs; > seg->l3_ofs = p->l3_ofs; > seg->l4_ofs = p->l4_ofs; > + seg->inner_l3_ofs = p->inner_l3_ofs; > + seg->inner_l4_ofs = p->inner_l4_ofs; > > /* The protocol headers remain the same, so preserve hash and mark. */ > *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p); > @@ -71,7 +73,12 @@ dp_packet_gso_nr_segs(struct dp_packet *p) > const char *data_tail; > const char *data_pos; > > - data_pos = dp_packet_get_tcp_payload(p); > + if (dp_packet_hwol_is_tunnel_vxlan(p) || > + dp_packet_hwol_is_tunnel_geneve(p)) { > + data_pos = dp_packet_get_inner_tcp_payload(p); > + } else { > + data_pos = dp_packet_get_tcp_payload(p); > + } > data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p); > > return DIV_ROUND_UP(data_tail - data_pos, segsz); > @@ -91,12 +98,15 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch > **batches) > struct tcp_header *tcp_hdr; > struct ip_header *ip_hdr; > struct dp_packet *seg; > + const char *data_pos; > uint16_t tcp_offset; > uint16_t tso_segsz; > + uint16_t ip_id = 0; > uint32_t tcp_seq; > - uint16_t ip_id; > + bool outer_ipv4; > int hdr_len; > int seg_len; > + bool tnl; > > tso_segsz = dp_packet_get_tso_segsz(p); > if (!tso_segsz) { > @@ -105,20 +115,35 @@ dp_packet_gso(struct dp_packet *p, struct > dp_packet_batch **batches) > return false; > } > > - tcp_hdr = dp_packet_l4(p); > - tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); > - tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq)); > - hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p)) > - + tcp_offset * 4; > - ip_id = 0; > - if (dp_packet_hwol_is_ipv4(p)) { > + if (dp_packet_hwol_is_tunnel_vxlan(p) || > + dp_packet_hwol_is_tunnel_geneve(p)) { > + data_pos = dp_packet_get_inner_tcp_payload(p); > + outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p); > + tcp_hdr = dp_packet_inner_l4(p); > + ip_hdr = dp_packet_inner_l3(p); > + tnl = true; > + if (outer_ipv4) { > + ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id); > + } else if (dp_packet_hwol_is_ipv4(p)) { > + ip_id = ntohs(ip_hdr->ip_id); > + } > + } else { > + data_pos = dp_packet_get_tcp_payload(p); > + outer_ipv4 = dp_packet_hwol_is_ipv4(p); > + tcp_hdr = dp_packet_l4(p); > ip_hdr = dp_packet_l3(p); > - ip_id = ntohs(ip_hdr->ip_id); > + tnl = false; > + if (outer_ipv4) { > + ip_id = ntohs(ip_hdr->ip_id); > + } > } > > + tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); > + tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq)); > + hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p)) > + + tcp_offset * 4; > const char *data_tail = (char *) dp_packet_tail(p) > - dp_packet_l2_pad_size(p); > - const char *data_pos = dp_packet_get_tcp_payload(p); > int n_segs = dp_packet_gso_nr_segs(p); > > for (int i = 0; i < n_segs; i++) { > @@ -130,8 +155,26 @@ dp_packet_gso(struct dp_packet *p, struct > dp_packet_batch **batches) > seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len); > data_pos += seg_len; > > + if (tnl) { > + /* Update tunnel L3 header. */ > + if (dp_packet_hwol_is_ipv4(seg)) { > + ip_hdr = dp_packet_inner_l3(seg); > + ip_hdr->ip_tot_len = htons(sizeof *ip_hdr + > + dp_packet_inner_l4_size(seg)); > + ip_hdr->ip_id = htons(ip_id); > + ip_hdr->ip_csum = 0; > + ip_id++; > + } else { > + struct ovs_16aligned_ip6_hdr *ip6_hdr; > + > + ip6_hdr = dp_packet_inner_l3(seg); > + ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen > + = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr); > + } > + } > + > /* Update L3 header. */ > - if (dp_packet_hwol_is_ipv4(seg)) { > + if (outer_ipv4) { > ip_hdr = dp_packet_l3(seg); > ip_hdr->ip_tot_len = htons(sizeof *ip_hdr + > dp_packet_l4_size(seg)); > @@ -146,7 +189,11 @@ dp_packet_gso(struct dp_packet *p, struct > dp_packet_batch **batches) > } > > /* Update L4 header. */ > - tcp_hdr = dp_packet_l4(seg); > + if (tnl) { > + tcp_hdr = dp_packet_inner_l4(seg); > + } else { > + tcp_hdr = dp_packet_l4(seg); > + } > put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq)); > tcp_seq += seg_len; > if (OVS_LIKELY(i < (n_segs - 1))) { > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 2fa17d814..c3dabc5b0 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -529,6 +529,16 @@ dp_packet_inner_l3(const struct dp_packet *b) > : NULL; > } > > +static inline size_t > +dp_packet_inner_l3_size(const struct dp_packet *b) > +{ > + return OVS_LIKELY(b->inner_l3_ofs != UINT16_MAX) > + ? (const char *) dp_packet_tail(b) > + - (const char *) dp_packet_inner_l3(b) > + - dp_packet_l2_pad_size(b) > + : 0; > +} > + > static inline void * > dp_packet_inner_l4(const struct dp_packet *b) > { > @@ -563,6 +573,22 @@ dp_packet_get_tcp_payload(const struct dp_packet *b) > return NULL; > } > > +static inline const void * > +dp_packet_get_inner_tcp_payload(const struct dp_packet *b) > +{ > + size_t l4_size = dp_packet_inner_l4_size(b); > + > + if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) { > + struct tcp_header *tcp = dp_packet_inner_l4(b); > + int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4; > + > + if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) { > + return (const char *) tcp + tcp_len; > + } > + } > + return NULL; > +} > + > static inline uint32_t > dp_packet_get_tcp_payload_length(const struct dp_packet *pkt) > { > diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c > index dee9ab344..369b92acc 100644 > --- a/lib/netdev-native-tnl.c > +++ b/lib/netdev-native-tnl.c > @@ -348,6 +348,14 @@ netdev_tnl_push_udp_header(const struct netdev *netdev > OVS_UNUSED, > if (l4_ofs != UINT16_MAX) { > packet->inner_l4_ofs = l4_ofs + data->header_len; > } > + > + if (dp_packet_hwol_is_tso(packet)) { > + uint16_t tso_segsz = dp_packet_get_tso_segsz(packet); > + if (tso_segsz > data->header_len) { > + tso_segsz -= data->header_len; > + dp_packet_set_tso_segsz(packet, tso_segsz); > + } > + } > } > > static void * > diff --git a/lib/netdev.c b/lib/netdev.c > index f2d921ed6..1d59bbe5d 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -69,8 +69,6 @@ COVERAGE_DEFINE(netdev_received); > COVERAGE_DEFINE(netdev_sent); > COVERAGE_DEFINE(netdev_add_router); > COVERAGE_DEFINE(netdev_get_stats); > -COVERAGE_DEFINE(netdev_vxlan_tso_drops); > -COVERAGE_DEFINE(netdev_geneve_tso_drops); > COVERAGE_DEFINE(netdev_push_header_drops); > COVERAGE_DEFINE(netdev_soft_seg_good); > COVERAGE_DEFINE(netdev_soft_seg_drops); > @@ -910,28 +908,23 @@ netdev_send(struct netdev *netdev, int qid, struct > dp_packet_batch *batch, > struct dp_packet *packet; > int error; > > - if (userspace_tso_enabled() && > - !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > - if (dp_packet_hwol_is_tso(packet)) { > - if (dp_packet_hwol_is_tunnel_vxlan(packet) > - && !(netdev_flags & NETDEV_TX_VXLAN_TNL_TSO)) { > - VLOG_WARN_RL(&rl, "%s: No VXLAN TSO support", > - netdev_get_name(netdev)); > - COVERAGE_INC(netdev_vxlan_tso_drops); > - dp_packet_delete_batch(batch, true); > - return false; > + if (userspace_tso_enabled()) { > + if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + if (dp_packet_hwol_is_tso(packet)) { > + return netdev_send_tso(netdev, qid, batch, > concurrent_txq); > } > - > - if (dp_packet_hwol_is_tunnel_geneve(packet) > - && !(netdev_flags & NETDEV_TX_GENEVE_TNL_TSO)) { > - VLOG_WARN_RL(&rl, "%s: No GENEVE TSO support", > - netdev_get_name(netdev)); > - COVERAGE_INC(netdev_geneve_tso_drops); > - dp_packet_delete_batch(batch, true); > - return false; > + } > + } else if (!(netdev_flags & (NETDEV_TX_VXLAN_TNL_TSO | > + NETDEV_TX_GENEVE_TNL_TSO))) { > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + if (!dp_packet_hwol_is_tso(packet)) { > + continue; > + } > + if (dp_packet_hwol_is_tunnel_vxlan(packet) || > + dp_packet_hwol_is_tunnel_geneve(packet)) { > + return netdev_send_tso(netdev, qid, batch, > concurrent_txq); > } > - return netdev_send_tso(netdev, qid, batch, concurrent_txq); > } > } > } > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index 98e494abf..c1681506e 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -351,6 +351,64 @@ OVS_WAIT_UNTIL([diff -q payload.bin udp_data]) > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > > +AT_SETUP([datapath - tcp over vxlan tunnel with software fallback]) > +AT_SKIP_IF([test $HAVE_NC = no]) > +OVS_CHECK_VXLAN() > + > +OVS_TRAFFIC_VSWITCHD_START() > +ADD_BR([br-underlay]) > + > +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) > +AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"]) > + > +ADD_NAMESPACES(at_ns0) > + > +dnl Set up underlay link from host into the namespace using veth pair. > +ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24") > +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) > +AT_CHECK([ip link set dev br-underlay up]) > + > +dnl Test the case where only one side has all checksum and tso offload > disabled. > +AT_CHECK([ethtool -K ovs-p0 tso off], [0], [ignore], [ignore]) > +AT_CHECK([ethtool -K ovs-p0 sg off], [0], [ignore], [ignore]) > + > +dnl Reinitialize. > +AT_CHECK([ovs-vsctl del-port ovs-p0]) > +AT_CHECK([ovs-vsctl add-port br-underlay ovs-p0]) > + > +dnl Set up tunnel endpoints on OVS outside the namespace and with a native > +dnl linux device inside the namespace. > +ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24]) > +ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], > [10.1.1.1/24], > + [id 0 dstport 4789]) > + > +dnl First, check the underlay. > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 172.31.1.100 | > FORMAT_PING], [0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > + > +dnl Check that the tunnel is up. > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PING], > [0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > + > +dnl Initialize the listener before it is needed. > +NETNS_DAEMONIZE([at_ns0], [nc -l 10.1.1.1 1234 > data2], [nc.pid]) > + > +dnl Verify that ncat is ready. > +OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :1234])]) > + > +dnl Large TCP transfer aimed towards ovs-p0, which has TSO disabled. > +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null]) > +AT_CHECK([nc $NC_EOF_OPT 10.1.1.1 1234 < payload.bin]) > + > +dnl Wait until transfer completes before checking. > +OVS_WAIT_WHILE([kill -0 $(cat nc.pid)]) > +AT_CHECK([diff -q payload.bin data2], [0]) > + > +OVS_TRAFFIC_VSWITCHD_STOP > +AT_CLEANUP > + > AT_SETUP([datapath - ping vlan over vxlan tunnel]) > OVS_CHECK_TUNNEL_TSO() > OVS_CHECK_VXLAN() > -- > 2.39.3 > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev