On Tue, Feb 20, 2024 at 11:09 PM Mike Pattrick <[email protected]> wrote:
>
> When sending packets that are flagged as requiring segmentation to an
> interface that doens't support this feature, send the packet to the TSO
> software fallback instead of dropping it.
>
> Signed-off-by: Mike Pattrick <[email protected]>
Recheck-request: github-robot
> ---
> lib/dp-packet-gso.c | 73 +++++++++++++++++++++++++++++++++--------
> lib/dp-packet.h | 26 +++++++++++++++
> lib/netdev-native-tnl.c | 8 +++++
> lib/netdev.c | 37 +++++++++------------
> tests/system-traffic.at | 58 ++++++++++++++++++++++++++++++++
> 5 files changed, 167 insertions(+), 35 deletions(-)
>
> diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
> index 847685ad9..f25abf436 100644
> --- a/lib/dp-packet-gso.c
> +++ b/lib/dp-packet-gso.c
> @@ -47,6 +47,8 @@ dp_packet_gso_seg_new(const struct dp_packet *p, size_t
> hdr_len,
> seg->l2_5_ofs = p->l2_5_ofs;
> seg->l3_ofs = p->l3_ofs;
> seg->l4_ofs = p->l4_ofs;
> + seg->inner_l3_ofs = p->inner_l3_ofs;
> + seg->inner_l4_ofs = p->inner_l4_ofs;
>
> /* The protocol headers remain the same, so preserve hash and mark. */
> *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p);
> @@ -71,7 +73,12 @@ dp_packet_gso_nr_segs(struct dp_packet *p)
> const char *data_tail;
> const char *data_pos;
>
> - data_pos = dp_packet_get_tcp_payload(p);
> + if (dp_packet_hwol_is_tunnel_vxlan(p) ||
> + dp_packet_hwol_is_tunnel_geneve(p)) {
> + data_pos = dp_packet_get_inner_tcp_payload(p);
> + } else {
> + data_pos = dp_packet_get_tcp_payload(p);
> + }
> data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p);
>
> return DIV_ROUND_UP(data_tail - data_pos, segsz);
> @@ -91,12 +98,15 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch
> **batches)
> struct tcp_header *tcp_hdr;
> struct ip_header *ip_hdr;
> struct dp_packet *seg;
> + const char *data_pos;
> uint16_t tcp_offset;
> uint16_t tso_segsz;
> + uint16_t ip_id = 0;
> uint32_t tcp_seq;
> - uint16_t ip_id;
> + bool outer_ipv4;
> int hdr_len;
> int seg_len;
> + bool tnl;
>
> tso_segsz = dp_packet_get_tso_segsz(p);
> if (!tso_segsz) {
> @@ -105,20 +115,35 @@ dp_packet_gso(struct dp_packet *p, struct
> dp_packet_batch **batches)
> return false;
> }
>
> - tcp_hdr = dp_packet_l4(p);
> - tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
> - tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
> - hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
> - + tcp_offset * 4;
> - ip_id = 0;
> - if (dp_packet_hwol_is_ipv4(p)) {
> + if (dp_packet_hwol_is_tunnel_vxlan(p) ||
> + dp_packet_hwol_is_tunnel_geneve(p)) {
> + data_pos = dp_packet_get_inner_tcp_payload(p);
> + outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
> + tcp_hdr = dp_packet_inner_l4(p);
> + ip_hdr = dp_packet_inner_l3(p);
> + tnl = true;
> + if (outer_ipv4) {
> + ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
> + } else if (dp_packet_hwol_is_ipv4(p)) {
> + ip_id = ntohs(ip_hdr->ip_id);
> + }
> + } else {
> + data_pos = dp_packet_get_tcp_payload(p);
> + outer_ipv4 = dp_packet_hwol_is_ipv4(p);
> + tcp_hdr = dp_packet_l4(p);
> ip_hdr = dp_packet_l3(p);
> - ip_id = ntohs(ip_hdr->ip_id);
> + tnl = false;
> + if (outer_ipv4) {
> + ip_id = ntohs(ip_hdr->ip_id);
> + }
> }
>
> + tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
> + tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
> + hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p))
> + + tcp_offset * 4;
> const char *data_tail = (char *) dp_packet_tail(p)
> - dp_packet_l2_pad_size(p);
> - const char *data_pos = dp_packet_get_tcp_payload(p);
> int n_segs = dp_packet_gso_nr_segs(p);
>
> for (int i = 0; i < n_segs; i++) {
> @@ -130,8 +155,26 @@ dp_packet_gso(struct dp_packet *p, struct
> dp_packet_batch **batches)
> seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
> data_pos += seg_len;
>
> + if (tnl) {
> + /* Update tunnel L3 header. */
> + if (dp_packet_hwol_is_ipv4(seg)) {
> + ip_hdr = dp_packet_inner_l3(seg);
> + ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
> + dp_packet_inner_l4_size(seg));
> + ip_hdr->ip_id = htons(ip_id);
> + ip_hdr->ip_csum = 0;
> + ip_id++;
> + } else {
> + struct ovs_16aligned_ip6_hdr *ip6_hdr;
> +
> + ip6_hdr = dp_packet_inner_l3(seg);
> + ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen
> + = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr);
> + }
> + }
> +
> /* Update L3 header. */
> - if (dp_packet_hwol_is_ipv4(seg)) {
> + if (outer_ipv4) {
> ip_hdr = dp_packet_l3(seg);
> ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
> dp_packet_l4_size(seg));
> @@ -146,7 +189,11 @@ dp_packet_gso(struct dp_packet *p, struct
> dp_packet_batch **batches)
> }
>
> /* Update L4 header. */
> - tcp_hdr = dp_packet_l4(seg);
> + if (tnl) {
> + tcp_hdr = dp_packet_inner_l4(seg);
> + } else {
> + tcp_hdr = dp_packet_l4(seg);
> + }
> put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq));
> tcp_seq += seg_len;
> if (OVS_LIKELY(i < (n_segs - 1))) {
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 2fa17d814..c3dabc5b0 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -529,6 +529,16 @@ dp_packet_inner_l3(const struct dp_packet *b)
> : NULL;
> }
>
> +static inline size_t
> +dp_packet_inner_l3_size(const struct dp_packet *b)
> +{
> + return OVS_LIKELY(b->inner_l3_ofs != UINT16_MAX)
> + ? (const char *) dp_packet_tail(b)
> + - (const char *) dp_packet_inner_l3(b)
> + - dp_packet_l2_pad_size(b)
> + : 0;
> +}
> +
> static inline void *
> dp_packet_inner_l4(const struct dp_packet *b)
> {
> @@ -563,6 +573,22 @@ dp_packet_get_tcp_payload(const struct dp_packet *b)
> return NULL;
> }
>
> +static inline const void *
> +dp_packet_get_inner_tcp_payload(const struct dp_packet *b)
> +{
> + size_t l4_size = dp_packet_inner_l4_size(b);
> +
> + if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) {
> + struct tcp_header *tcp = dp_packet_inner_l4(b);
> + int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
> +
> + if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) {
> + return (const char *) tcp + tcp_len;
> + }
> + }
> + return NULL;
> +}
> +
> static inline uint32_t
> dp_packet_get_tcp_payload_length(const struct dp_packet *pkt)
> {
> diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
> index dee9ab344..369b92acc 100644
> --- a/lib/netdev-native-tnl.c
> +++ b/lib/netdev-native-tnl.c
> @@ -348,6 +348,14 @@ netdev_tnl_push_udp_header(const struct netdev *netdev
> OVS_UNUSED,
> if (l4_ofs != UINT16_MAX) {
> packet->inner_l4_ofs = l4_ofs + data->header_len;
> }
> +
> + if (dp_packet_hwol_is_tso(packet)) {
> + uint16_t tso_segsz = dp_packet_get_tso_segsz(packet);
> + if (tso_segsz > data->header_len) {
> + tso_segsz -= data->header_len;
> + dp_packet_set_tso_segsz(packet, tso_segsz);
> + }
> + }
> }
>
> static void *
> diff --git a/lib/netdev.c b/lib/netdev.c
> index f2d921ed6..1d59bbe5d 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -69,8 +69,6 @@ COVERAGE_DEFINE(netdev_received);
> COVERAGE_DEFINE(netdev_sent);
> COVERAGE_DEFINE(netdev_add_router);
> COVERAGE_DEFINE(netdev_get_stats);
> -COVERAGE_DEFINE(netdev_vxlan_tso_drops);
> -COVERAGE_DEFINE(netdev_geneve_tso_drops);
> COVERAGE_DEFINE(netdev_push_header_drops);
> COVERAGE_DEFINE(netdev_soft_seg_good);
> COVERAGE_DEFINE(netdev_soft_seg_drops);
> @@ -910,28 +908,23 @@ netdev_send(struct netdev *netdev, int qid, struct
> dp_packet_batch *batch,
> struct dp_packet *packet;
> int error;
>
> - if (userspace_tso_enabled() &&
> - !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
> - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> - if (dp_packet_hwol_is_tso(packet)) {
> - if (dp_packet_hwol_is_tunnel_vxlan(packet)
> - && !(netdev_flags & NETDEV_TX_VXLAN_TNL_TSO)) {
> - VLOG_WARN_RL(&rl, "%s: No VXLAN TSO support",
> - netdev_get_name(netdev));
> - COVERAGE_INC(netdev_vxlan_tso_drops);
> - dp_packet_delete_batch(batch, true);
> - return false;
> + if (userspace_tso_enabled()) {
> + if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
> + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> + if (dp_packet_hwol_is_tso(packet)) {
> + return netdev_send_tso(netdev, qid, batch,
> concurrent_txq);
> }
> -
> - if (dp_packet_hwol_is_tunnel_geneve(packet)
> - && !(netdev_flags & NETDEV_TX_GENEVE_TNL_TSO)) {
> - VLOG_WARN_RL(&rl, "%s: No GENEVE TSO support",
> - netdev_get_name(netdev));
> - COVERAGE_INC(netdev_geneve_tso_drops);
> - dp_packet_delete_batch(batch, true);
> - return false;
> + }
> + } else if (!(netdev_flags & (NETDEV_TX_VXLAN_TNL_TSO |
> + NETDEV_TX_GENEVE_TNL_TSO))) {
> + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> + if (!dp_packet_hwol_is_tso(packet)) {
> + continue;
> + }
> + if (dp_packet_hwol_is_tunnel_vxlan(packet) ||
> + dp_packet_hwol_is_tunnel_geneve(packet)) {
> + return netdev_send_tso(netdev, qid, batch,
> concurrent_txq);
> }
> - return netdev_send_tso(netdev, qid, batch, concurrent_txq);
> }
> }
> }
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 98e494abf..c1681506e 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -351,6 +351,64 @@ OVS_WAIT_UNTIL([diff -q payload.bin udp_data])
> OVS_TRAFFIC_VSWITCHD_STOP
> AT_CLEANUP
>
> +AT_SETUP([datapath - tcp over vxlan tunnel with software fallback])
> +AT_SKIP_IF([test $HAVE_NC = no])
> +OVS_CHECK_VXLAN()
> +
> +OVS_TRAFFIC_VSWITCHD_START()
> +ADD_BR([br-underlay])
> +
> +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
> +AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"])
> +
> +ADD_NAMESPACES(at_ns0)
> +
> +dnl Set up underlay link from host into the namespace using veth pair.
> +ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24")
> +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"])
> +AT_CHECK([ip link set dev br-underlay up])
> +
> +dnl Test the case where only one side has all checksum and tso offload
> disabled.
> +AT_CHECK([ethtool -K ovs-p0 tso off], [0], [ignore], [ignore])
> +AT_CHECK([ethtool -K ovs-p0 sg off], [0], [ignore], [ignore])
> +
> +dnl Reinitialize.
> +AT_CHECK([ovs-vsctl del-port ovs-p0])
> +AT_CHECK([ovs-vsctl add-port br-underlay ovs-p0])
> +
> +dnl Set up tunnel endpoints on OVS outside the namespace and with a native
> +dnl linux device inside the namespace.
> +ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24])
> +ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100],
> [10.1.1.1/24],
> + [id 0 dstport 4789])
> +
> +dnl First, check the underlay.
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 172.31.1.100 |
> FORMAT_PING], [0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +dnl Check that the tunnel is up.
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PING],
> [0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +dnl Initialize the listener before it is needed.
> +NETNS_DAEMONIZE([at_ns0], [nc -l 10.1.1.1 1234 > data2], [nc.pid])
> +
> +dnl Verify that ncat is ready.
> +OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :1234])])
> +
> +dnl Large TCP transfer aimed towards ovs-p0, which has TSO disabled.
> +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
> +AT_CHECK([nc $NC_EOF_OPT 10.1.1.1 1234 < payload.bin])
> +
> +dnl Wait until transfer completes before checking.
> +OVS_WAIT_WHILE([kill -0 $(cat nc.pid)])
> +AT_CHECK([diff -q payload.bin data2], [0])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
> AT_SETUP([datapath - ping vlan over vxlan tunnel])
> OVS_CHECK_TUNNEL_TSO()
> OVS_CHECK_VXLAN()
> --
> 2.39.3
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev