On 12/11/2025 17:04, David Marchand via dev wrote: > Refactor software segmentation dp_packet_gso() using two helpers: > - dp_packet_gso_seg_new: create a segment from the original packet, > copying headers and relevant payload, > - dp_packet_gso_update_segment: update the headers, > > For dp_packet_gso_seg_new, prefer working on data offset instead of > passing a pointer to the packet data. > This simplifies calling this helper: segments data are either of size > tso_segsz, or the remaining data for the last segment. > > For dp_packet_gso_update_segment, (inner and outer) ip_id is based > on the value copied from the original packet + the segment index. > The first and last segments handling are separated from the other > segments: > - rather than copying data for the first segment, we can simply resize the > original packet (once all segments have been copied), this small > optimisation saves one tso_segsz copy, > - the last segment needs special handling wrt its length and TCP flags, > > Finally, replace the check on tso_segsz presence with an ovs_assert() as > this code should only be invoked when segmenting. > Thanks to this, dp_packet_gso() can be declared void, and the > netdev_soft_seg_drops counter has no reason to exist anymore. > > The code is easier to read this way, and it will be beneficial to the next > commit. >
> Note: this patch is easier to read if ignoring whitespace changes. > Can drop this from commit msg One comment below, but LGTM Acked-by: Kevin Traynor <[email protected]> > Signed-off-by: David Marchand <[email protected]> > --- > lib/dp-packet-gso.c | 235 ++++++++++++++++++++++---------------------- > lib/dp-packet-gso.h | 2 +- > lib/netdev.c | 10 +- > 3 files changed, 122 insertions(+), 125 deletions(-) > > diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c > index fe7186ddf4..6b8da8b370 100644 > --- a/lib/dp-packet-gso.c > +++ b/lib/dp-packet-gso.c > @@ -29,19 +29,19 @@ VLOG_DEFINE_THIS_MODULE(dp_packet_gso); > * > * The new packet is initialized with 'hdr_len' bytes from the > * start of packet 'p' and then appended with 'data_len' bytes > - * from the 'data' buffer. > + * from the packet 'p' at offset 'data_off'. > * > * Note: The packet headers are not updated. */ > static struct dp_packet * > dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len, > - const char *data, size_t data_len) > + size_t data_off, size_t data_len) > { > struct dp_packet *seg = dp_packet_new_with_headroom(hdr_len + data_len, > > dp_packet_headroom(p)); > > /* Append the original packet headers and then the payload. */ > dp_packet_put(seg, dp_packet_data(p), hdr_len); > - dp_packet_put(seg, data, data_len); > + dp_packet_put(seg, (char *) dp_packet_data(p) + data_off, data_len); > > /* The new segment should have the same offsets. */ > seg->l2_5_ofs = p->l2_5_ofs; > @@ -77,151 +77,154 @@ dp_packet_gso_nr_segs(struct dp_packet *p) > return DIV_ROUND_UP(data_length, segsz); > } > > -/* Perform software segmentation on packet 'p'. > - * > - * Segments packet 'p' into the array of preallocated batches in 'batches', > - * updating the 'batches' pointer as needed and returns true. > - * > - * Returns false if the packet cannot be segmented. */ > -bool > -dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) > +static void > +dp_packet_gso_update_segment(struct dp_packet *seg, int seg_no, int n_segs, > + uint16_t tso_segsz) > { > - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); > - struct dp_packet_batch *curr_batch = *batches; > + bool udp_tnl = dp_packet_tunnel_vxlan(seg) > + || dp_packet_tunnel_geneve(seg); > + bool gre_tnl = dp_packet_tunnel_gre(seg); Only disadvantage I can see from change is that these have to be read for every update. I guess it shouldn't make any real difference and adding them to the API doesn't seem very nice either > struct tcp_header *tcp_hdr; > struct ip_header *ip_hdr; > - uint16_t inner_ip_id = 0; > - uint16_t outer_ip_id = 0; > - struct dp_packet *seg; > - uint16_t tcp_offset; > - uint16_t tso_segsz; > uint32_t tcp_seq; > - bool outer_ipv4; > - int hdr_len; > - int seg_len; > - bool udp_tnl = dp_packet_tunnel_vxlan(p) > - || dp_packet_tunnel_geneve(p); > - bool gre_tnl = dp_packet_tunnel_gre(p); > > - tso_segsz = dp_packet_get_tso_segsz(p); > - if (!tso_segsz) { > - VLOG_WARN_RL(&rl, "GSO packet with len %d with no segment size.", > - dp_packet_size(p)); > - return false; > + if (udp_tnl) { > + /* Update tunnel UDP header length. */ > + struct udp_header *tnl_hdr; > + > + tnl_hdr = dp_packet_l4(seg); > + tnl_hdr->udp_len = htons(dp_packet_l4_size(seg)); > } > > if (udp_tnl || gre_tnl) { > - ip_hdr = dp_packet_inner_l3(p); > + /* Update tunnel inner L3 header. */ > + ip_hdr = dp_packet_inner_l3(seg); > if (IP_VER(ip_hdr->ip_ihl_ver) == 4) { > - inner_ip_id = ntohs(ip_hdr->ip_id); > + ip_hdr->ip_tot_len = htons(dp_packet_inner_l3_size(seg)); > + ip_hdr->ip_id = htons(ntohs(ip_hdr->ip_id) + seg_no); > + ip_hdr->ip_csum = 0; > + dp_packet_inner_ip_checksum_set_partial(seg); > + } else { > + struct ovs_16aligned_ip6_hdr *ip6_hdr; > + > + ip6_hdr = dp_packet_inner_l3(seg); > + ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen > + = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr); > } > + } > > - tcp_hdr = dp_packet_inner_l4(p); > + /* Update L3 header. */ > + ip_hdr = dp_packet_l3(seg); > + if (IP_VER(ip_hdr->ip_ihl_ver) == 4) { > + ip_hdr->ip_tot_len = htons(dp_packet_l3_size(seg)); > + ip_hdr->ip_id = htons(ntohs(ip_hdr->ip_id) + seg_no); > + ip_hdr->ip_csum = 0; > + dp_packet_ip_checksum_set_partial(seg); > } else { > - tcp_hdr = dp_packet_l4(p); > - } > + struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg); > > - ip_hdr = dp_packet_l3(p); > - outer_ipv4 = IP_VER(ip_hdr->ip_ihl_ver) == 4; > - if (outer_ipv4) { > - outer_ip_id = ntohs(ip_hdr->ip_id); > + ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen > + = htons(dp_packet_l3_size(seg) - sizeof *ip6_hdr); > } > > - tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); > + /* Update L4 header. */ > + if (udp_tnl || gre_tnl) { > + tcp_hdr = dp_packet_inner_l4(seg); > + dp_packet_inner_l4_checksum_set_partial(seg); > + } else { > + tcp_hdr = dp_packet_l4(seg); > + dp_packet_l4_checksum_set_partial(seg); > + } > tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq)); > - hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p)) > - + tcp_offset * 4; > - const char *data_tail = (char *) dp_packet_tail(p) > - - dp_packet_l2_pad_size(p); > - const char *data_pos = (char *) tcp_hdr + tcp_offset * 4; > - int n_segs = dp_packet_gso_nr_segs(p); > - > - for (int i = 0; i < n_segs; i++) { > - seg_len = data_tail - data_pos; > - if (seg_len > tso_segsz) { > - seg_len = tso_segsz; > - } > - > - seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len); > - data_pos += seg_len; > + tcp_seq += seg_no * tso_segsz; > + put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq)); > + > + if (OVS_LIKELY(seg_no < (n_segs - 1))) { > + uint16_t tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); > + /* Reset flags PUSH and FIN unless it is the last segment. */ > + uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl) > + & ~(TCP_PSH | TCP_FIN); > + tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); > + } > > - if (udp_tnl) { > - /* Update tunnel UDP header length. */ > - struct udp_header *tnl_hdr; > + if (gre_tnl) { > + struct gre_base_hdr *ghdr; > > - tnl_hdr = dp_packet_l4(seg); > - tnl_hdr->udp_len = htons(dp_packet_l4_size(seg)); > - } > + ghdr = dp_packet_l4(seg); > > - if (udp_tnl || gre_tnl) { > - /* Update tunnel inner L3 header. */ > - ip_hdr = dp_packet_inner_l3(seg); > - if (IP_VER(ip_hdr->ip_ihl_ver) == 4) { > - ip_hdr->ip_tot_len = htons(dp_packet_inner_l3_size(seg)); > - ip_hdr->ip_id = htons(inner_ip_id); > - ip_hdr->ip_csum = 0; > - dp_packet_inner_ip_checksum_set_partial(seg); > - inner_ip_id++; > - } else { > - struct ovs_16aligned_ip6_hdr *ip6_hdr; > - > - ip6_hdr = dp_packet_inner_l3(seg); > - ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen > - = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr); > - } > + if (ghdr->flags & htons(GRE_CSUM)) { > + ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1); > + *csum_opt = 0; > + *csum_opt = csum(ghdr, dp_packet_l4_size(seg)); > } > + } > +} > > - /* Update L3 header. */ > - if (outer_ipv4) { > - ip_hdr = dp_packet_l3(seg); > - ip_hdr->ip_tot_len = htons(dp_packet_l3_size(seg)); > - ip_hdr->ip_id = htons(outer_ip_id); > - ip_hdr->ip_csum = 0; > - dp_packet_ip_checksum_set_partial(seg); > - outer_ip_id++; > - } else { > - struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg); > +/* Perform software segmentation on packet 'p'. > + * > + * Segments packet 'p' into the array of preallocated batches in 'batches', > + * updating the 'batches' pointer as needed. */ > +void > +dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) > +{ > + struct dp_packet_batch *curr_batch = *batches; > + struct dp_packet *seg; > + uint16_t tso_segsz; > + size_t data_len; > + size_t hdr_len; > + int n_segs; > > - ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen > - = htons(dp_packet_l3_size(seg) - sizeof *ip6_hdr); > - } > + tso_segsz = dp_packet_get_tso_segsz(p); > + ovs_assert(tso_segsz); > + n_segs = dp_packet_gso_nr_segs(p); > > - /* Update L4 header. */ > - if (udp_tnl || gre_tnl) { > - tcp_hdr = dp_packet_inner_l4(seg); > - dp_packet_inner_l4_checksum_set_partial(seg); > - } else { > - tcp_hdr = dp_packet_l4(seg); > - dp_packet_l4_checksum_set_partial(seg); > - } > - put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq)); > - tcp_seq += seg_len; > - if (OVS_LIKELY(i < (n_segs - 1))) { > - /* Reset flags PUSH and FIN unless it is the last segment. */ > - uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl) > - & ~(TCP_PSH | TCP_FIN); > - tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); > - } > + /* Put back the first segment in the batch, it will be trimmed after > + * all segments have been copied. */ > + if (dp_packet_batch_is_full(curr_batch)) { > + curr_batch++; > + } > + dp_packet_batch_add(curr_batch, p); > > - if (gre_tnl) { > - struct gre_base_hdr *ghdr; > + if (n_segs == 1) { > + goto out; > + } > > - ghdr = dp_packet_l4(seg); > + if (dp_packet_tunnel(p)) { > + hdr_len = (char *) dp_packet_get_inner_tcp_payload(p) > + - (char *) dp_packet_eth(p); > + data_len = dp_packet_get_inner_tcp_payload_length(p); > + } else { > + hdr_len = (char *) dp_packet_get_tcp_payload(p) > + - (char *) dp_packet_eth(p); > + data_len = dp_packet_get_tcp_payload_length(p); > + } > > - if (ghdr->flags & htons(GRE_CSUM)) { > - ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1); > - *csum_opt = 0; > - *csum_opt = csum(ghdr, dp_packet_l4_size(seg)); > - } > - } > + for (int i = 1; i < n_segs - 1; i++) { > + seg = dp_packet_gso_seg_new(p, hdr_len, hdr_len + i * tso_segsz, > + tso_segsz); > + dp_packet_gso_update_segment(seg, i, n_segs, tso_segsz); > > if (dp_packet_batch_is_full(curr_batch)) { > curr_batch++; > } > - > dp_packet_batch_add(curr_batch, seg); > } > > + /* Create the last segment. */ > + seg = dp_packet_gso_seg_new(p, hdr_len, hdr_len + (n_segs - 1) * > tso_segsz, > + data_len - (n_segs - 1) * tso_segsz); > + dp_packet_gso_update_segment(seg, n_segs - 1, n_segs, tso_segsz); > + > + if (dp_packet_batch_is_full(curr_batch)) { > + curr_batch++; > + } > + dp_packet_batch_add(curr_batch, seg); > + > + /* Trim the first segment and reset TSO. */ > + dp_packet_set_size(p, hdr_len + tso_segsz); > + dp_packet_set_tso_segsz(p, 0); > + dp_packet_gso_update_segment(p, 0, n_segs, tso_segsz); > + > +out: > *batches = curr_batch; > - return true; > } > diff --git a/lib/dp-packet-gso.h b/lib/dp-packet-gso.h > index 9c282fb86c..2abc19ea31 100644 > --- a/lib/dp-packet-gso.h > +++ b/lib/dp-packet-gso.h > @@ -17,7 +17,7 @@ > #ifndef DP_PACKET_GSO_H > #define DP_PACKET_GSO_H 1 > > -bool dp_packet_gso(struct dp_packet *, struct dp_packet_batch **); > +void dp_packet_gso(struct dp_packet *, struct dp_packet_batch **); > int dp_packet_gso_nr_segs(struct dp_packet *); > > #endif /* dp-packet-gso.h */ > diff --git a/lib/netdev.c b/lib/netdev.c > index 6a05e9a7e5..f493dab005 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -71,7 +71,6 @@ COVERAGE_DEFINE(netdev_add_router); > COVERAGE_DEFINE(netdev_get_stats); > COVERAGE_DEFINE(netdev_push_header_drops); > COVERAGE_DEFINE(netdev_soft_seg_good); > -COVERAGE_DEFINE(netdev_soft_seg_drops); > > struct netdev_saved_flags { > struct netdev *netdev; > @@ -843,17 +842,12 @@ netdev_send_tso(struct netdev *netdev, int qid, > curr_batch = batches; > DP_PACKET_BATCH_REFILL_FOR_EACH (k, size, packet, batch) { > if (dp_packet_get_tso_segsz(packet)) { > - if (dp_packet_gso(packet, &curr_batch)) { > - COVERAGE_INC(netdev_soft_seg_good); > - } else { > - COVERAGE_INC(netdev_soft_seg_drops); > - } > - dp_packet_delete(packet); > + dp_packet_gso(packet, &curr_batch); > + COVERAGE_INC(netdev_soft_seg_good); > } else { > if (dp_packet_batch_is_full(curr_batch)) { > curr_batch++; > } > - > dp_packet_batch_add(curr_batch, packet); > } > } _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
