On 10/31/23 20:51, Mike Pattrick wrote:
> From: Flavio Leitner <[email protected]>
> 
> This provides a software implementation in the case
> the egress netdev doesn't support segmentation in hardware.
> 
> The challenge here is to guarantee packet ordering in the
> original batch that may be full of TSO packets. Each TSO
> packet can go up to ~64kB, so with segment size of 1440
> that means about 44 packets for each TSO. Each batch has
> 32 packets, so the total batch amounts to 1408 normal
> packets.
> 
> The segmentation estimates the total number of packets
> and then the total number of batches. Then allocate
> enough memory and finally do the work.
> 
> Finally each batch is sent in order to the netdev.
> 
> Signed-off-by: Flavio Leitner <[email protected]>
> Co-authored-by: Mike Pattrick <[email protected]>
> Signed-off-by: Mike Pattrick <[email protected]>
> 
> ---
> 
> v4:
>  - Various formatting changes
>  - Fixed memory leak in soft-gso code if packet is flagged
>    for GSO but incorrectly lacks segment size.
> 
> v5:
>  - Corrected indentation and comments
>  - Fixed pointer arithmatic issue
>  - Added a test to exercise software gso code
> ---
>  lib/automake.mk         |   2 +
>  lib/dp-packet-gso.c     | 168 ++++++++++++++++++++++++++++++++++++++++
>  lib/dp-packet-gso.h     |  23 ++++++
>  lib/dp-packet.h         |   7 ++
>  lib/netdev-dpdk.c       |  44 ++++++++---
>  lib/netdev.c            | 139 +++++++++++++++++++++------------
>  lib/packets.c           |   4 +-
>  tests/system-traffic.at |  45 +++++++++++
>  8 files changed, 367 insertions(+), 65 deletions(-)
>  create mode 100644 lib/dp-packet-gso.c
>  create mode 100644 lib/dp-packet-gso.h
> 
> diff --git a/lib/automake.mk b/lib/automake.mk
> index 24b0ffefe..82dd85c5f 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -118,6 +118,8 @@ lib_libopenvswitch_la_SOURCES = \
>       lib/dpctl.h \
>       lib/dp-packet.h \
>       lib/dp-packet.c \
> +     lib/dp-packet-gso.c \
> +     lib/dp-packet-gso.h \
>       lib/dpdk.h \
>       lib/dpif-netdev-extract-study.c \
>       lib/dpif-netdev-lookup.h \
> diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
> new file mode 100644
> index 000000000..44dc8061e
> --- /dev/null
> +++ b/lib/dp-packet-gso.c
> @@ -0,0 +1,168 @@
> +/*
> + * Copyright (c) 2023 Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include <stdlib.h>
> +#include <string.h>
> +
> +#include "dp-packet.h"
> +#include "dp-packet-gso.h"
> +#include "netdev-provider.h"
> +#include "openvswitch/vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(dp_packet_gso);
> +
> +/* Retuns a new packet that is a segment of packet 'p'.
> + *
> + * The new packet is initialized with 'hdr_len' bytes from the
> + * start of packet 'p' and then appended with 'data_len' bytes
> + * from the 'data' buffer.
> + *
> + * Note: The packet headers are not updated. */
> +static struct dp_packet *
> +dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len,
> +                      const char *data, size_t data_len)
> +{
> +    struct dp_packet *seg = dp_packet_new_with_headroom(hdr_len + data_len,
> +                                                        
> dp_packet_headroom(p));
> +
> +    /* Append the original packet headers and then the payload. */
> +    dp_packet_put(seg, dp_packet_data(p), hdr_len);
> +    dp_packet_put(seg, data, data_len);
> +
> +    /* The new segment should have the same offsets. */
> +    seg->l2_5_ofs = p->l2_5_ofs;
> +    seg->l3_ofs = p->l3_ofs;
> +    seg->l4_ofs = p->l4_ofs;
> +
> +    /* The protocol headers remain the same, so preserve hash and mark. */
> +    *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p);
> +    *dp_packet_flow_mark_ptr(seg) = *dp_packet_flow_mark_ptr(p);
> +
> +    /* The segment should inherit all the offloading flags from the
> +     * original packet, except for the TCP segmentation, external
> +     * buffer and indirect buffer flags. */
> +    *dp_packet_ol_flags_ptr(seg) = *dp_packet_ol_flags_ptr(p)
> +        & DP_PACKET_OL_SUPPORTED_MASK;
> +
> +    dp_packet_hwol_reset_tcp_seg(seg);
> +
> +    return seg;
> +}
> +
> +/* Returns the calculated number of TCP segments in packet 'p'. */
> +int
> +dp_packet_gso_nr_segs(struct dp_packet *p)
> +{
> +    uint16_t segsz = dp_packet_get_tso_segsz(p);
> +    const char *data_tail;
> +    const char *data_pos;
> +
> +    data_pos = dp_packet_get_tcp_payload(p);
> +    data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p);
> +
> +    return DIV_ROUND_UP((data_tail - data_pos), segsz);
> +}
> +
> +/* Perform software segmentation on packet 'p'.
> + *
> + * Segments packet 'p' into the array of preallocated batches in 'batches',
> + * updating the 'batches' pointer as needed and returns true.
> + *
> + * Returns false if the packet cannot be segmented. */
> +bool
> +dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
> +{
> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> +    struct dp_packet_batch *curr_batch = *batches;
> +    struct tcp_header *tcp_hdr;
> +    struct ip_header *ip_hdr;
> +    struct dp_packet *seg;
> +    uint16_t tcp_offset;
> +    uint16_t tso_segsz;
> +    uint32_t tcp_seq;
> +    uint16_t ip_id;
> +    int hdr_len;
> +    int seg_len;
> +
> +    tso_segsz = dp_packet_get_tso_segsz(p);
> +    if (!tso_segsz) {
> +        VLOG_WARN_RL(&rl, "GSO packet with len %d with no segment size.",
> +                     dp_packet_size(p));
> +        return false;
> +    }
> +
> +    tcp_hdr = dp_packet_l4(p);
> +    tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
> +    tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
> +    hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
> +              + tcp_offset * 4;
> +    ip_id = 0;
> +    if (dp_packet_hwol_is_ipv4(p)) {
> +        ip_hdr = dp_packet_l3(p);
> +        ip_id = ntohs(ip_hdr->ip_id);
> +    }
> +
> +    const char *data_tail = (char *) dp_packet_tail(p)
> +                            - dp_packet_l2_pad_size(p);
> +    const char *data_pos = dp_packet_get_tcp_payload(p);
> +    int n_segs = dp_packet_gso_nr_segs(p);
> +
> +    for (int i = 0; i < n_segs; i++) {
> +        seg_len = data_tail - data_pos;
> +        if (seg_len > tso_segsz) {
> +            seg_len = tso_segsz;
> +        }
> +
> +        seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
> +        data_pos += seg_len;
> +
> +        /* Update L3 header. */
> +        if (dp_packet_hwol_is_ipv4(seg)) {
> +            ip_hdr = dp_packet_l3(seg);
> +            ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
> +                                       dp_packet_l4_size(seg));
> +            ip_hdr->ip_id = htons(ip_id);
> +            ip_hdr->ip_csum = 0;
> +            ip_id++;
> +        } else {
> +            struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg);
> +
> +            ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen
> +                = htons(sizeof *ip_hdr + dp_packet_l4_size(seg));
> +        }
> +
> +        /* Update L4 header. */
> +        tcp_hdr = dp_packet_l4(seg);
> +        put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq));
> +        tcp_seq += seg_len;
> +        if (OVS_LIKELY(i < (n_segs - 1))) {
> +            /* Reset flags PUSH and FIN unless it is the last segment. */
> +            uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl)
> +                                 & ~(TCP_PSH | TCP_FIN);
> +            tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset);
> +        }
> +
> +        if (dp_packet_batch_is_full(curr_batch)) {
> +            curr_batch++;
> +        }
> +
> +        dp_packet_batch_add(curr_batch, seg);
> +    }
> +
> +    *batches = curr_batch;
> +    return true;
> +}
> diff --git a/lib/dp-packet-gso.h b/lib/dp-packet-gso.h
> new file mode 100644
> index 000000000..9c282fb86
> --- /dev/null
> +++ b/lib/dp-packet-gso.h
> @@ -0,0 +1,23 @@
> +/*
> + * Copyright (c) 2023 Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef DP_PACKET_GSO_H
> +#define DP_PACKET_GSO_H 1
> +
> +bool dp_packet_gso(struct dp_packet *, struct dp_packet_batch **);
> +int dp_packet_gso_nr_segs(struct dp_packet *);
> +
> +#endif /* dp-packet-gso.h */
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 6a924f3ff..239d7a3d4 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -1131,6 +1131,13 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b)
>      *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG;
>  }
> 
> +/* Resets TCP Segmentation flag in packet 'p'. */
> +static inline void
> +dp_packet_hwol_reset_tcp_seg(struct dp_packet *p)
> +{
> +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_TCP_SEG;
> +}
> +
>  /* Returns 'true' if the IP header has good integrity and the
>   * checksum in it is complete. */
>  static inline bool
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 9f20cc689..6af876ad3 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2438,6 +2438,7 @@ static bool
>  netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
>  {
>      struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
> +    struct tcp_header *th;
> 
>      if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK
>                              | RTE_MBUF_F_TX_TCP_SEG))) {
> @@ -2450,27 +2451,36 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, 
> struct rte_mbuf *mbuf)
>      mbuf->l4_len = 0;
>      mbuf->outer_l2_len = 0;
>      mbuf->outer_l3_len = 0;
> +    th = dp_packet_l4(pkt);
> 
>      if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
> -        struct tcp_header *th = dp_packet_l4(pkt);
> -        int hdr_len;
> -
>          if (!th) {
>              VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
>                           " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len);
>              return false;
>          }
> +    }
> +
> +    if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) {
> +        if (!th) {
> +            VLOG_WARN_RL(&rl, "%s: TCP offloading without L4 header"
> +                         " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len);
> +            return false;
> +        }
> 
>          mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
> -        mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
> -        hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
>          mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len;
> -        if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) 
> {
> -            VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. "
> -                         "hdr: %"PRIu32", gso: %"PRIu32", max len: 
> %"PRIu32"",
> -                         dev->up.name, hdr_len, mbuf->tso_segsz,
> -                         dev->max_packet_len);
> -            return false;
> +
> +        if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
> +            int hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
> +            if (OVS_UNLIKELY((hdr_len +
> +                              mbuf->tso_segsz) > dev->max_packet_len)) {
> +                VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. hdr: %"PRIu32", 
> "
> +                             "gso: %"PRIu32", max len: %"PRIu32"",
> +                             dev->up.name, hdr_len, mbuf->tso_segsz,
> +                             dev->max_packet_len);
> +                return false;
> +            }
>          }
> 
>          if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) {
> @@ -2858,6 +2868,7 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, 
> struct dp_packet *pkt_orig)
>      mbuf_dest->packet_type = pkt_orig->mbuf.packet_type;
>      mbuf_dest->ol_flags |= (pkt_orig->mbuf.ol_flags &
>                              ~(RTE_MBUF_F_EXTERNAL | RTE_MBUF_F_INDIRECT));
> +    mbuf_dest->tso_segsz = pkt_orig->mbuf.tso_segsz;
> 
>      memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
>             sizeof(struct dp_packet) - offsetof(struct dp_packet, 
> l2_pad_size));
> @@ -2916,11 +2927,20 @@ netdev_dpdk_common_send(struct netdev *netdev, struct 
> dp_packet_batch *batch,
>      struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      size_t cnt, pkt_cnt = dp_packet_batch_size(batch);
> +    struct dp_packet *packet;
> +    bool need_copy = false;
> 
>      memset(stats, 0, sizeof *stats);
> 
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        if (packet->source != DPBUF_DPDK) {
> +            need_copy = true;
> +            break;
> +        }
> +    }
> +
>      /* Copy dp-packets to mbufs. */
> -    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
> +    if (OVS_UNLIKELY(need_copy)) {
>          cnt = dpdk_copy_batch_to_mbuf(netdev, batch);
>          stats->tx_failure_drops += pkt_cnt - cnt;
>          pkt_cnt = cnt;
> diff --git a/lib/netdev.c b/lib/netdev.c
> index e5ac7713d..3ed8049f7 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -35,6 +35,7 @@
>  #include "coverage.h"
>  #include "dpif.h"
>  #include "dp-packet.h"
> +#include "dp-packet-gso.h"
>  #include "openvswitch/dynamic-string.h"
>  #include "fatal-signal.h"
>  #include "hash.h"
> @@ -56,6 +57,7 @@
>  #include "svec.h"
>  #include "openvswitch/vlog.h"
>  #include "flow.h"
> +#include "userspace-tso.h"
>  #include "util.h"
>  #ifdef __linux__
>  #include "tc.h"
> @@ -67,8 +69,9 @@ COVERAGE_DEFINE(netdev_received);
>  COVERAGE_DEFINE(netdev_sent);
>  COVERAGE_DEFINE(netdev_add_router);
>  COVERAGE_DEFINE(netdev_get_stats);
> -COVERAGE_DEFINE(netdev_send_prepare_drops);
>  COVERAGE_DEFINE(netdev_push_header_drops);
> +COVERAGE_DEFINE(netdev_soft_seg_good);
> +COVERAGE_DEFINE(netdev_soft_seg_drops);
> 
>  struct netdev_saved_flags {
>      struct netdev *netdev;
> @@ -792,60 +795,84 @@ netdev_get_pt_mode(const struct netdev *netdev)
>              : NETDEV_PT_LEGACY_L2);
>  }
> 
> -/* Check if a 'packet' is compatible with 'netdev_flags'.
> - * If a packet is incompatible, return 'false' with the 'errormsg'
> - * pointing to a reason. */
> -static bool
> -netdev_send_prepare_packet(const uint64_t netdev_flags,
> -                           struct dp_packet *packet, char **errormsg)
> -{
> -    if (dp_packet_hwol_is_tso(packet)
> -        && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
> -            /* Fall back to GSO in software. */
> -            VLOG_ERR_BUF(errormsg, "No TSO support");
> -            return false;
> -    }
> -
> -    /* Packet with IP csum offloading enabled was received with verified 
> csum.
> -     * Leave the IP csum offloading enabled even with good checksum to the
> -     * netdev to decide what would be the best to do.
> -     * Provide a software fallback in case the device doesn't support IP csum
> -     * offloading. Note: Encapsulated packet must have the inner IP header
> -     * csum already calculated.
> -     * Packet with L4 csum offloading enabled was received with verified 
> csum.
> -     * Leave the L4 csum offloading enabled even with good checksum for the
> -     * netdev to decide what would be the best to do.
> -     * Netdev that requires pseudo header csum needs to calculate that.
> -     * Provide a software fallback in case the netdev doesn't support L4 csum
> -     * offloading. Note: Encapsulated packet must have the inner L4 header
> -     * csum already calculated. */
> -    dp_packet_ol_send_prepare(packet, netdev_flags);
> -
> -    return true;
> -}
> -
> -/* Check if each packet in 'batch' is compatible with 'netdev' features,
> - * otherwise either fall back to software implementation or drop it. */
> -static void
> -netdev_send_prepare_batch(const struct netdev *netdev,
> -                          struct dp_packet_batch *batch)
> +/* Attempts to segment GSO flagged packets and send them as multiple bundles.
> + * This function is only used if at least one packet in the current batch is
> + * flagged for TSO and the netdev does not support this.
> + *
> + * The return value is 0 if all batches sent successfully, and an error code
> + * from netdev_class->send() if at least one batch failed to send. */
> +static int
> +netdev_send_tso(struct netdev *netdev, int qid,
> +                struct dp_packet_batch *batch, bool concurrent_txq)
>  {
> +    struct dp_packet_batch *batches;
>      struct dp_packet *packet;
> -    size_t i, size = dp_packet_batch_size(batch);
> -
> -    DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
> -        char *errormsg = NULL;
> +    int retval = 0;
> +    int n_packets;
> +    int n_batches;
> +    int error;
> 
> -        if (netdev_send_prepare_packet(netdev->ol_flags, packet, &errormsg)) 
> {
> -            dp_packet_batch_refill(batch, packet, i);
> +    /* Calculate the total number of packets in the batch after
> +     * the segmentation. */
> +    n_packets = 0;
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        if (dp_packet_hwol_is_tso(packet)) {
> +            n_packets += dp_packet_gso_nr_segs(packet);
>          } else {
> +            n_packets++;
> +        }
> +    }
> +
> +    if (!n_packets) {
> +        return 0;
> +    }
> +
> +    /* Allocate enough batches to store all the packets in order. */
> +    n_batches = DIV_ROUND_UP(n_packets, NETDEV_MAX_BURST);
> +    batches = xmalloc(n_batches * sizeof *batches);
> +
> +    struct dp_packet_batch *curr_batch = batches;
> +    struct dp_packet_batch *last_batch = &batches[n_batches - 1];
> +    for (curr_batch = batches; curr_batch <= last_batch; curr_batch++) {
> +        dp_packet_batch_init(curr_batch);
> +    }
> +
> +    /* Do the packet segmentation if TSO is flagged. */
> +    size_t size = dp_packet_batch_size(batch);
> +    size_t k;
> +    curr_batch = batches;
> +    DP_PACKET_BATCH_REFILL_FOR_EACH (k, size, packet, batch) {
> +        if (dp_packet_hwol_is_tso(packet)) {
> +            if (dp_packet_gso(packet, &curr_batch)) {
> +                COVERAGE_INC(netdev_soft_seg_good);
> +            } else {
> +                COVERAGE_INC(netdev_soft_seg_drops);
> +            }
>              dp_packet_delete(packet);
> -            COVERAGE_INC(netdev_send_prepare_drops);
> -            VLOG_WARN_RL(&rl, "%s: Packet dropped: %s",
> -                         netdev_get_name(netdev), errormsg);
> -            free(errormsg);
> +        } else {
> +            if (dp_packet_batch_is_full(curr_batch)) {
> +                curr_batch++;
> +            }
> +
> +            dp_packet_batch_add(curr_batch, packet);
> +        }
> +    }
> +
> +    for (curr_batch = batches; curr_batch <= last_batch; curr_batch++) {
> +        DP_PACKET_BATCH_FOR_EACH (i, packet, curr_batch) {
> +            dp_packet_ol_send_prepare(packet, netdev->ol_flags);
> +        }
> +
> +        error = netdev->netdev_class->send(netdev, qid, curr_batch,
> +                                           concurrent_txq);
> +        if (!error) {
> +            COVERAGE_INC(netdev_sent);
> +        } else {
> +            retval = error;
>          }
>      }
> +    free(batches);
> +    return retval;
>  }
> 
>  /* Sends 'batch' on 'netdev'.  Returns 0 if successful (for every packet),
> @@ -877,11 +904,21 @@ int
>  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
>              bool concurrent_txq)
>  {
> +    const uint64_t netdev_flags = netdev->ol_flags;
> +    struct dp_packet *packet;
>      int error;
> 
> -    netdev_send_prepare_batch(netdev, batch);
> -    if (OVS_UNLIKELY(dp_packet_batch_is_empty(batch))) {
> -        return 0;
> +    if (userspace_tso_enabled() &&
> +        !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
> +        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +            if (dp_packet_hwol_is_tso(packet)) {
> +                return netdev_send_tso(netdev, qid, batch, concurrent_txq);
> +            }
> +        }
> +    }
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        dp_packet_ol_send_prepare(packet, netdev_flags);
>      }
> 
>      error = netdev->netdev_class->send(netdev, qid, batch, concurrent_txq);
> diff --git a/lib/packets.c b/lib/packets.c
> index 462b51f92..dab823ba2 100644
> --- a/lib/packets.c
> +++ b/lib/packets.c
> @@ -427,7 +427,7 @@ add_mpls(struct dp_packet *packet, ovs_be16 ethtype, 
> ovs_be32 lse,
>      }
> 
>      if (!l3_encap) {
> -        struct mpls_hdr *header = dp_packet_push_uninit(packet, MPLS_HLEN);
> +        struct mpls_hdr *header = dp_packet_resize_l2(packet, MPLS_HLEN);
> 
>          put_16aligned_be32(&header->mpls_lse, lse);
>          packet->l2_5_ofs = 0;
> @@ -513,7 +513,7 @@ push_nsh(struct dp_packet *packet, const struct nsh_hdr 
> *nsh_hdr_src)
>              OVS_NOT_REACHED();
>      }
> 
> -    nsh = (struct nsh_hdr *) dp_packet_push_uninit(packet, length);
> +    nsh = (struct nsh_hdr *) dp_packet_resize_l2(packet, length);
>      memcpy(nsh, nsh_hdr_src, length);
>      nsh->next_proto = next_proto;
>      packet->packet_type = htonl(PT_NSH);
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 7ea450202..5bb295acb 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -1,5 +1,50 @@
>  AT_BANNER([datapath-sanity])
> 
> +AT_SETUP([datapath - netdev offload software fallback])

While having a system test is fine, it does excersise the netdev-specific
code, it might make sense to have a unit test like we have for checksum
offloading.  e.g. send a large packet from one dummy port and check that
it is correctly split in parts on receive from the other port that doesn't
support TSO.  Might be a separate patch though to avoid blocking this series.

> +AT_SKIP_IF([test $HAVE_NC = no])
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +
> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
> +
> +dnl Test the case where only one side has all checksum and tso offload 
> disabled

Nit: Period at the end of a comment.

> +ethtool -K ovs-p1 tso off
> +ethtool -K ovs-p1 sg off
> +
> +dnl Reinitialize
> +ovs-vsctl del-port ovs-p1
> +ovs-vsctl add-port br0 ovs-p1

Above commends should be under AT_CHECK.

> +
> +NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.2 | 
> FORMAT_PING], [0], [dnl

We recently changed all the tests to use -W instead of -w.

> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +NS_CHECK_EXEC([at_ns1], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.1 | 
> FORMAT_PING], [0], [dnl

Same, -W.

> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +NETNS_DAEMONIZE([at_ns0], [nc -l 1234 > data_0], [nc1.pid])
> +NETNS_DAEMONIZE([at_ns1], [nc -l 1234 > data_1], [nc2.pid])
> +
> +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
> +on_exit 'rm -f payload.bin'

This should not be necessary.  And we'll lose the file on test failure,
while it might be useful for debugging.

> +
> +NS_CHECK_EXEC([at_ns0], [nc $NC_EOF_OPT 10.1.1.2 1234 < payload.bin])
> +NS_CHECK_EXEC([at_ns1], [nc $NC_EOF_OPT 10.1.1.1 1234 < payload.bin])
> +
> +dnl Wait until transfer completes
> +OVS_WAIT_WHILE([kill -0 `cat nc1.pid` `cat nc2.pid`])

Nit: Prefer $() to ``.

> +
> +AT_CHECK([diff -q payload.bin data_0], [0], [])
> +AT_CHECK([diff -q payload.bin data_1], [0], [])

Nit: Empty branckets should not be needed.

> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
>  AT_SETUP([datapath - ping between two ports])
>  OVS_TRAFFIC_VSWITCHD_START()
> 
> --
> 2.39.3

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to