On 12/11/2025 17:04, David Marchand via dev wrote:
> Refactor software segmentation dp_packet_gso() using two helpers:
> - dp_packet_gso_seg_new: create a segment from the original packet,
>   copying headers and relevant payload,
> - dp_packet_gso_update_segment: update the headers,
> 
> For dp_packet_gso_seg_new, prefer working on data offset instead of
> passing a pointer to the packet data.
> This simplifies calling this helper: segments data are either of size
> tso_segsz, or the remaining data for the last segment.
> 
> For dp_packet_gso_update_segment, (inner and outer) ip_id is based
> on the value copied from the original packet + the segment index.
> The first and last segments handling are separated from the other
> segments:
> - rather than copying data for the first segment, we can simply resize the
>   original packet (once all segments have been copied), this small
>   optimisation saves one tso_segsz copy,
> - the last segment needs special handling wrt its length and TCP flags,
> 
> Finally, replace the check on tso_segsz presence with an ovs_assert() as
> this code should only be invoked when segmenting.
> Thanks to this, dp_packet_gso() can be declared void, and the
> netdev_soft_seg_drops counter has no reason to exist anymore.
> 
> The code is easier to read this way, and it will be beneficial to the next
> commit.
> 

> Note: this patch is easier to read if ignoring whitespace changes.
> 

Can drop this from commit msg

One comment below, but LGTM
Acked-by: Kevin Traynor <[email protected]>

> Signed-off-by: David Marchand <[email protected]>
> ---
>  lib/dp-packet-gso.c | 235 ++++++++++++++++++++++----------------------
>  lib/dp-packet-gso.h |   2 +-
>  lib/netdev.c        |  10 +-
>  3 files changed, 122 insertions(+), 125 deletions(-)
> 
> diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
> index fe7186ddf4..6b8da8b370 100644
> --- a/lib/dp-packet-gso.c
> +++ b/lib/dp-packet-gso.c
> @@ -29,19 +29,19 @@ VLOG_DEFINE_THIS_MODULE(dp_packet_gso);
>   *
>   * The new packet is initialized with 'hdr_len' bytes from the
>   * start of packet 'p' and then appended with 'data_len' bytes
> - * from the 'data' buffer.
> + * from the packet 'p' at offset 'data_off'.
>   *
>   * Note: The packet headers are not updated. */
>  static struct dp_packet *
>  dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len,
> -                      const char *data, size_t data_len)
> +                      size_t data_off, size_t data_len)
>  {
>      struct dp_packet *seg = dp_packet_new_with_headroom(hdr_len + data_len,
>                                                          
> dp_packet_headroom(p));
>  
>      /* Append the original packet headers and then the payload. */
>      dp_packet_put(seg, dp_packet_data(p), hdr_len);
> -    dp_packet_put(seg, data, data_len);
> +    dp_packet_put(seg, (char *) dp_packet_data(p) + data_off, data_len);
>  
>      /* The new segment should have the same offsets. */
>      seg->l2_5_ofs = p->l2_5_ofs;
> @@ -77,151 +77,154 @@ dp_packet_gso_nr_segs(struct dp_packet *p)
>      return DIV_ROUND_UP(data_length, segsz);
>  }
>  
> -/* Perform software segmentation on packet 'p'.
> - *
> - * Segments packet 'p' into the array of preallocated batches in 'batches',
> - * updating the 'batches' pointer as needed and returns true.
> - *
> - * Returns false if the packet cannot be segmented. */
> -bool
> -dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
> +static void
> +dp_packet_gso_update_segment(struct dp_packet *seg, int seg_no, int n_segs,
> +                             uint16_t tso_segsz)
>  {
> -    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> -    struct dp_packet_batch *curr_batch = *batches;

> +    bool udp_tnl = dp_packet_tunnel_vxlan(seg)
> +                   || dp_packet_tunnel_geneve(seg);
> +    bool gre_tnl = dp_packet_tunnel_gre(seg);

Only disadvantage I can see from change is that these have to be read
for every update. I guess it shouldn't make any real difference and
adding them to the API doesn't seem very nice either

>      struct tcp_header *tcp_hdr;
>      struct ip_header *ip_hdr;
> -    uint16_t inner_ip_id = 0;
> -    uint16_t outer_ip_id = 0;
> -    struct dp_packet *seg;
> -    uint16_t tcp_offset;
> -    uint16_t tso_segsz;
>      uint32_t tcp_seq;
> -    bool outer_ipv4;
> -    int hdr_len;
> -    int seg_len;
> -    bool udp_tnl = dp_packet_tunnel_vxlan(p)
> -                   || dp_packet_tunnel_geneve(p);
> -    bool gre_tnl = dp_packet_tunnel_gre(p);
>  
> -    tso_segsz = dp_packet_get_tso_segsz(p);
> -    if (!tso_segsz) {
> -        VLOG_WARN_RL(&rl, "GSO packet with len %d with no segment size.",
> -                     dp_packet_size(p));
> -        return false;
> +    if (udp_tnl) {
> +        /* Update tunnel UDP header length. */
> +        struct udp_header *tnl_hdr;
> +
> +        tnl_hdr = dp_packet_l4(seg);
> +        tnl_hdr->udp_len = htons(dp_packet_l4_size(seg));
>      }
>  
>      if (udp_tnl || gre_tnl) {
> -        ip_hdr = dp_packet_inner_l3(p);
> +        /* Update tunnel inner L3 header. */
> +        ip_hdr = dp_packet_inner_l3(seg);
>          if (IP_VER(ip_hdr->ip_ihl_ver) == 4) {
> -            inner_ip_id = ntohs(ip_hdr->ip_id);
> +            ip_hdr->ip_tot_len = htons(dp_packet_inner_l3_size(seg));
> +            ip_hdr->ip_id = htons(ntohs(ip_hdr->ip_id) + seg_no);
> +            ip_hdr->ip_csum = 0;
> +            dp_packet_inner_ip_checksum_set_partial(seg);
> +        } else {
> +            struct ovs_16aligned_ip6_hdr *ip6_hdr;
> +
> +            ip6_hdr = dp_packet_inner_l3(seg);
> +            ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen
> +                = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr);
>          }
> +    }
>  
> -        tcp_hdr = dp_packet_inner_l4(p);
> +    /* Update L3 header. */
> +    ip_hdr = dp_packet_l3(seg);
> +    if (IP_VER(ip_hdr->ip_ihl_ver) == 4) {
> +        ip_hdr->ip_tot_len = htons(dp_packet_l3_size(seg));
> +        ip_hdr->ip_id = htons(ntohs(ip_hdr->ip_id) + seg_no);
> +        ip_hdr->ip_csum = 0;
> +        dp_packet_ip_checksum_set_partial(seg);
>      } else {
> -        tcp_hdr = dp_packet_l4(p);
> -    }
> +        struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg);
>  
> -    ip_hdr = dp_packet_l3(p);
> -    outer_ipv4 = IP_VER(ip_hdr->ip_ihl_ver) == 4;
> -    if (outer_ipv4) {
> -        outer_ip_id = ntohs(ip_hdr->ip_id);
> +        ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen
> +            = htons(dp_packet_l3_size(seg) - sizeof *ip6_hdr);
>      }
>  
> -    tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
> +    /* Update L4 header. */
> +    if (udp_tnl || gre_tnl) {
> +        tcp_hdr = dp_packet_inner_l4(seg);
> +        dp_packet_inner_l4_checksum_set_partial(seg);
> +    } else {
> +        tcp_hdr = dp_packet_l4(seg);
> +        dp_packet_l4_checksum_set_partial(seg);
> +    }
>      tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
> -    hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p))
> -              + tcp_offset * 4;
> -    const char *data_tail = (char *) dp_packet_tail(p)
> -                            - dp_packet_l2_pad_size(p);
> -    const char *data_pos = (char *) tcp_hdr + tcp_offset * 4;
> -    int n_segs = dp_packet_gso_nr_segs(p);
> -
> -    for (int i = 0; i < n_segs; i++) {
> -        seg_len = data_tail - data_pos;
> -        if (seg_len > tso_segsz) {
> -            seg_len = tso_segsz;
> -        }
> -
> -        seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
> -        data_pos += seg_len;
> +    tcp_seq += seg_no * tso_segsz;
> +    put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq));
> +
> +    if (OVS_LIKELY(seg_no < (n_segs - 1))) {
> +        uint16_t tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
> +        /* Reset flags PUSH and FIN unless it is the last segment. */
> +        uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl)
> +                             & ~(TCP_PSH | TCP_FIN);
> +        tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset);
> +    }
>  
> -        if (udp_tnl) {
> -            /* Update tunnel UDP header length. */
> -            struct udp_header *tnl_hdr;
> +    if (gre_tnl) {
> +        struct gre_base_hdr *ghdr;
>  
> -            tnl_hdr = dp_packet_l4(seg);
> -            tnl_hdr->udp_len = htons(dp_packet_l4_size(seg));
> -        }
> +        ghdr = dp_packet_l4(seg);
>  
> -        if (udp_tnl || gre_tnl) {
> -            /* Update tunnel inner L3 header. */
> -            ip_hdr = dp_packet_inner_l3(seg);
> -            if (IP_VER(ip_hdr->ip_ihl_ver) == 4) {
> -                ip_hdr->ip_tot_len = htons(dp_packet_inner_l3_size(seg));
> -                ip_hdr->ip_id = htons(inner_ip_id);
> -                ip_hdr->ip_csum = 0;
> -                dp_packet_inner_ip_checksum_set_partial(seg);
> -                inner_ip_id++;
> -            } else {
> -                struct ovs_16aligned_ip6_hdr *ip6_hdr;
> -
> -                ip6_hdr = dp_packet_inner_l3(seg);
> -                ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen
> -                    = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr);
> -            }
> +        if (ghdr->flags & htons(GRE_CSUM)) {
> +            ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1);
> +            *csum_opt = 0;
> +            *csum_opt = csum(ghdr, dp_packet_l4_size(seg));
>          }
> +    }
> +}
>  
> -        /* Update L3 header. */
> -        if (outer_ipv4) {
> -            ip_hdr = dp_packet_l3(seg);
> -            ip_hdr->ip_tot_len = htons(dp_packet_l3_size(seg));
> -            ip_hdr->ip_id = htons(outer_ip_id);
> -            ip_hdr->ip_csum = 0;
> -            dp_packet_ip_checksum_set_partial(seg);
> -            outer_ip_id++;
> -        } else {
> -            struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg);
> +/* Perform software segmentation on packet 'p'.
> + *
> + * Segments packet 'p' into the array of preallocated batches in 'batches',
> + * updating the 'batches' pointer as needed. */
> +void
> +dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
> +{
> +    struct dp_packet_batch *curr_batch = *batches;
> +    struct dp_packet *seg;
> +    uint16_t tso_segsz;
> +    size_t data_len;
> +    size_t hdr_len;
> +    int n_segs;
>  
> -            ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen
> -                = htons(dp_packet_l3_size(seg) - sizeof *ip6_hdr);
> -        }
> +    tso_segsz = dp_packet_get_tso_segsz(p);
> +    ovs_assert(tso_segsz);
> +    n_segs = dp_packet_gso_nr_segs(p);
>  
> -        /* Update L4 header. */
> -        if (udp_tnl || gre_tnl) {
> -            tcp_hdr = dp_packet_inner_l4(seg);
> -            dp_packet_inner_l4_checksum_set_partial(seg);
> -        } else {
> -            tcp_hdr = dp_packet_l4(seg);
> -            dp_packet_l4_checksum_set_partial(seg);
> -        }
> -        put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq));
> -        tcp_seq += seg_len;
> -        if (OVS_LIKELY(i < (n_segs - 1))) {
> -            /* Reset flags PUSH and FIN unless it is the last segment. */
> -            uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl)
> -                                 & ~(TCP_PSH | TCP_FIN);
> -            tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset);
> -        }
> +    /* Put back the first segment in the batch, it will be trimmed after
> +     * all segments have been copied. */
> +    if (dp_packet_batch_is_full(curr_batch)) {
> +        curr_batch++;
> +    }
> +    dp_packet_batch_add(curr_batch, p);
>  
> -        if (gre_tnl) {
> -            struct gre_base_hdr *ghdr;
> +    if (n_segs == 1) {
> +        goto out;
> +    }
>  
> -            ghdr = dp_packet_l4(seg);
> +    if (dp_packet_tunnel(p)) {
> +        hdr_len = (char *) dp_packet_get_inner_tcp_payload(p)
> +                  - (char *) dp_packet_eth(p);
> +        data_len = dp_packet_get_inner_tcp_payload_length(p);
> +    } else {
> +        hdr_len = (char *) dp_packet_get_tcp_payload(p)
> +                  - (char *) dp_packet_eth(p);
> +        data_len = dp_packet_get_tcp_payload_length(p);
> +    }
>  
> -            if (ghdr->flags & htons(GRE_CSUM)) {
> -                ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1);
> -                *csum_opt = 0;
> -                *csum_opt = csum(ghdr, dp_packet_l4_size(seg));
> -            }
> -        }
> +    for (int i = 1; i < n_segs - 1; i++) {
> +        seg = dp_packet_gso_seg_new(p, hdr_len, hdr_len + i * tso_segsz,
> +                                    tso_segsz);
> +        dp_packet_gso_update_segment(seg, i, n_segs, tso_segsz);
>  
>          if (dp_packet_batch_is_full(curr_batch)) {
>              curr_batch++;
>          }
> -
>          dp_packet_batch_add(curr_batch, seg);
>      }
>  
> +    /* Create the last segment. */
> +    seg = dp_packet_gso_seg_new(p, hdr_len, hdr_len + (n_segs - 1) * 
> tso_segsz,
> +                                data_len - (n_segs - 1) * tso_segsz);
> +    dp_packet_gso_update_segment(seg, n_segs - 1, n_segs, tso_segsz);
> +
> +    if (dp_packet_batch_is_full(curr_batch)) {
> +        curr_batch++;
> +    }
> +    dp_packet_batch_add(curr_batch, seg);
> +
> +    /* Trim the first segment and reset TSO. */
> +    dp_packet_set_size(p, hdr_len + tso_segsz);
> +    dp_packet_set_tso_segsz(p, 0);
> +    dp_packet_gso_update_segment(p, 0, n_segs, tso_segsz);
> +
> +out:
>      *batches = curr_batch;
> -    return true;
>  }
> diff --git a/lib/dp-packet-gso.h b/lib/dp-packet-gso.h
> index 9c282fb86c..2abc19ea31 100644
> --- a/lib/dp-packet-gso.h
> +++ b/lib/dp-packet-gso.h
> @@ -17,7 +17,7 @@
>  #ifndef DP_PACKET_GSO_H
>  #define DP_PACKET_GSO_H 1
>  
> -bool dp_packet_gso(struct dp_packet *, struct dp_packet_batch **);
> +void dp_packet_gso(struct dp_packet *, struct dp_packet_batch **);
>  int dp_packet_gso_nr_segs(struct dp_packet *);
>  
>  #endif /* dp-packet-gso.h */
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 6a05e9a7e5..f493dab005 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -71,7 +71,6 @@ COVERAGE_DEFINE(netdev_add_router);
>  COVERAGE_DEFINE(netdev_get_stats);
>  COVERAGE_DEFINE(netdev_push_header_drops);
>  COVERAGE_DEFINE(netdev_soft_seg_good);
> -COVERAGE_DEFINE(netdev_soft_seg_drops);
>  
>  struct netdev_saved_flags {
>      struct netdev *netdev;
> @@ -843,17 +842,12 @@ netdev_send_tso(struct netdev *netdev, int qid,
>      curr_batch = batches;
>      DP_PACKET_BATCH_REFILL_FOR_EACH (k, size, packet, batch) {
>          if (dp_packet_get_tso_segsz(packet)) {
> -            if (dp_packet_gso(packet, &curr_batch)) {
> -                COVERAGE_INC(netdev_soft_seg_good);
> -            } else {
> -                COVERAGE_INC(netdev_soft_seg_drops);
> -            }
> -            dp_packet_delete(packet);
> +            dp_packet_gso(packet, &curr_batch);
> +            COVERAGE_INC(netdev_soft_seg_good);
>          } else {
>              if (dp_packet_batch_is_full(curr_batch)) {
>                  curr_batch++;
>              }
> -
>              dp_packet_batch_add(curr_batch, packet);
>          }
>      }

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to