Refactor software segmentation dp_packet_gso() using two helpers: - dp_packet_gso_seg_new: create a segment from the original packet, copying headers and relevant payload, - dp_packet_gso_update_segment: update the headers,
For dp_packet_gso_seg_new, prefer working on data offset instead of passing a pointer to the packet data. This simplifies calling this helper: segments data are either of size tso_segsz, or the remaining data for the last segment. For dp_packet_gso_update_segment, (inner and outer) ip_id is based on the value copied from the original packet + the segment index. The first and last segments handling are separated from the other segments: - rather than copying data for the first segment, we can simply resize the original packet (once all segments have been copied), this small optimisation saves one tso_segsz copy, - the last segment needs special handling wrt its length and TCP flags, Finally, replace the check on tso_segsz presence with an ovs_assert() as this code should only be invoked when segmenting. Thanks to this, dp_packet_gso() can be declared void, and the netdev_soft_seg_drops counter has no reason to exist anymore. The code is easier to read this way, and it will be beneficial to the next commit. Acked-by: Kevin Traynor <[email protected]> Signed-off-by: David Marchand <[email protected]> --- Changes since v2: - passed original packet tunnel info, --- lib/dp-packet-gso.c | 240 +++++++++++++++++++++++--------------------- lib/dp-packet-gso.h | 2 +- lib/netdev.c | 10 +- 3 files changed, 126 insertions(+), 126 deletions(-) diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c index fe7186ddf4..a7915dfe56 100644 --- a/lib/dp-packet-gso.c +++ b/lib/dp-packet-gso.c @@ -29,19 +29,19 @@ VLOG_DEFINE_THIS_MODULE(dp_packet_gso); * * The new packet is initialized with 'hdr_len' bytes from the * start of packet 'p' and then appended with 'data_len' bytes - * from the 'data' buffer. + * from the packet 'p' at offset 'data_off'. * * Note: The packet headers are not updated. */ static struct dp_packet * dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len, - const char *data, size_t data_len) + size_t data_off, size_t data_len) { struct dp_packet *seg = dp_packet_new_with_headroom(hdr_len + data_len, dp_packet_headroom(p)); /* Append the original packet headers and then the payload. */ dp_packet_put(seg, dp_packet_data(p), hdr_len); - dp_packet_put(seg, data, data_len); + dp_packet_put(seg, (char *) dp_packet_data(p) + data_off, data_len); /* The new segment should have the same offsets. */ seg->l2_5_ofs = p->l2_5_ofs; @@ -77,151 +77,157 @@ dp_packet_gso_nr_segs(struct dp_packet *p) return DIV_ROUND_UP(data_length, segsz); } -/* Perform software segmentation on packet 'p'. - * - * Segments packet 'p' into the array of preallocated batches in 'batches', - * updating the 'batches' pointer as needed and returns true. - * - * Returns false if the packet cannot be segmented. */ -bool -dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) +static void +dp_packet_gso_update_segment(struct dp_packet *seg, int seg_no, int n_segs, + uint16_t tso_segsz, bool udp_tnl, bool gre_tnl) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - struct dp_packet_batch *curr_batch = *batches; struct tcp_header *tcp_hdr; struct ip_header *ip_hdr; - uint16_t inner_ip_id = 0; - uint16_t outer_ip_id = 0; - struct dp_packet *seg; - uint16_t tcp_offset; - uint16_t tso_segsz; uint32_t tcp_seq; - bool outer_ipv4; - int hdr_len; - int seg_len; - bool udp_tnl = dp_packet_tunnel_vxlan(p) - || dp_packet_tunnel_geneve(p); - bool gre_tnl = dp_packet_tunnel_gre(p); - tso_segsz = dp_packet_get_tso_segsz(p); - if (!tso_segsz) { - VLOG_WARN_RL(&rl, "GSO packet with len %d with no segment size.", - dp_packet_size(p)); - return false; + if (udp_tnl) { + /* Update tunnel UDP header length. */ + struct udp_header *tnl_hdr; + + tnl_hdr = dp_packet_l4(seg); + tnl_hdr->udp_len = htons(dp_packet_l4_size(seg)); } if (udp_tnl || gre_tnl) { - ip_hdr = dp_packet_inner_l3(p); + /* Update tunnel inner L3 header. */ + ip_hdr = dp_packet_inner_l3(seg); if (IP_VER(ip_hdr->ip_ihl_ver) == 4) { - inner_ip_id = ntohs(ip_hdr->ip_id); + ip_hdr->ip_tot_len = htons(dp_packet_inner_l3_size(seg)); + ip_hdr->ip_id = htons(ntohs(ip_hdr->ip_id) + seg_no); + ip_hdr->ip_csum = 0; + dp_packet_inner_ip_checksum_set_partial(seg); + } else { + struct ovs_16aligned_ip6_hdr *ip6_hdr; + + ip6_hdr = dp_packet_inner_l3(seg); + ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen + = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr); } + } - tcp_hdr = dp_packet_inner_l4(p); + /* Update L3 header. */ + ip_hdr = dp_packet_l3(seg); + if (IP_VER(ip_hdr->ip_ihl_ver) == 4) { + ip_hdr->ip_tot_len = htons(dp_packet_l3_size(seg)); + ip_hdr->ip_id = htons(ntohs(ip_hdr->ip_id) + seg_no); + ip_hdr->ip_csum = 0; + dp_packet_ip_checksum_set_partial(seg); } else { - tcp_hdr = dp_packet_l4(p); - } + struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg); - ip_hdr = dp_packet_l3(p); - outer_ipv4 = IP_VER(ip_hdr->ip_ihl_ver) == 4; - if (outer_ipv4) { - outer_ip_id = ntohs(ip_hdr->ip_id); + ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen + = htons(dp_packet_l3_size(seg) - sizeof *ip6_hdr); } - tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); + /* Update L4 header. */ + if (udp_tnl || gre_tnl) { + tcp_hdr = dp_packet_inner_l4(seg); + dp_packet_inner_l4_checksum_set_partial(seg); + } else { + tcp_hdr = dp_packet_l4(seg); + dp_packet_l4_checksum_set_partial(seg); + } tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq)); - hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p)) - + tcp_offset * 4; - const char *data_tail = (char *) dp_packet_tail(p) - - dp_packet_l2_pad_size(p); - const char *data_pos = (char *) tcp_hdr + tcp_offset * 4; - int n_segs = dp_packet_gso_nr_segs(p); - - for (int i = 0; i < n_segs; i++) { - seg_len = data_tail - data_pos; - if (seg_len > tso_segsz) { - seg_len = tso_segsz; - } - - seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len); - data_pos += seg_len; + tcp_seq += seg_no * tso_segsz; + put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq)); + + if (OVS_LIKELY(seg_no < (n_segs - 1))) { + uint16_t tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); + /* Reset flags PUSH and FIN unless it is the last segment. */ + uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl) + & ~(TCP_PSH | TCP_FIN); + tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); + } - if (udp_tnl) { - /* Update tunnel UDP header length. */ - struct udp_header *tnl_hdr; + if (gre_tnl) { + struct gre_base_hdr *ghdr; - tnl_hdr = dp_packet_l4(seg); - tnl_hdr->udp_len = htons(dp_packet_l4_size(seg)); - } + ghdr = dp_packet_l4(seg); - if (udp_tnl || gre_tnl) { - /* Update tunnel inner L3 header. */ - ip_hdr = dp_packet_inner_l3(seg); - if (IP_VER(ip_hdr->ip_ihl_ver) == 4) { - ip_hdr->ip_tot_len = htons(dp_packet_inner_l3_size(seg)); - ip_hdr->ip_id = htons(inner_ip_id); - ip_hdr->ip_csum = 0; - dp_packet_inner_ip_checksum_set_partial(seg); - inner_ip_id++; - } else { - struct ovs_16aligned_ip6_hdr *ip6_hdr; - - ip6_hdr = dp_packet_inner_l3(seg); - ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen - = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr); - } + if (ghdr->flags & htons(GRE_CSUM)) { + ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1); + *csum_opt = 0; + *csum_opt = csum(ghdr, dp_packet_l4_size(seg)); } + } +} - /* Update L3 header. */ - if (outer_ipv4) { - ip_hdr = dp_packet_l3(seg); - ip_hdr->ip_tot_len = htons(dp_packet_l3_size(seg)); - ip_hdr->ip_id = htons(outer_ip_id); - ip_hdr->ip_csum = 0; - dp_packet_ip_checksum_set_partial(seg); - outer_ip_id++; - } else { - struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg); - - ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen - = htons(dp_packet_l3_size(seg) - sizeof *ip6_hdr); - } +/* Perform software segmentation on packet 'p'. + * + * Segments packet 'p' into the array of preallocated batches in 'batches', + * updating the 'batches' pointer as needed. */ +void +dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) +{ + struct dp_packet_batch *curr_batch = *batches; + struct dp_packet *seg; + uint16_t tso_segsz; + size_t data_len; + size_t hdr_len; + bool udp_tnl; + bool gre_tnl; + int n_segs; - /* Update L4 header. */ - if (udp_tnl || gre_tnl) { - tcp_hdr = dp_packet_inner_l4(seg); - dp_packet_inner_l4_checksum_set_partial(seg); - } else { - tcp_hdr = dp_packet_l4(seg); - dp_packet_l4_checksum_set_partial(seg); - } - put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq)); - tcp_seq += seg_len; - if (OVS_LIKELY(i < (n_segs - 1))) { - /* Reset flags PUSH and FIN unless it is the last segment. */ - uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl) - & ~(TCP_PSH | TCP_FIN); - tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); - } + tso_segsz = dp_packet_get_tso_segsz(p); + ovs_assert(tso_segsz); + n_segs = dp_packet_gso_nr_segs(p); + udp_tnl = dp_packet_tunnel_vxlan(p) || dp_packet_tunnel_geneve(p); + gre_tnl = dp_packet_tunnel_gre(p); + + /* Put back the first segment in the batch, it will be trimmed after + * all segments have been copied. */ + if (dp_packet_batch_is_full(curr_batch)) { + curr_batch++; + } + dp_packet_batch_add(curr_batch, p); - if (gre_tnl) { - struct gre_base_hdr *ghdr; + if (n_segs == 1) { + goto out; + } - ghdr = dp_packet_l4(seg); + if (dp_packet_tunnel(p)) { + hdr_len = (char *) dp_packet_get_inner_tcp_payload(p) + - (char *) dp_packet_eth(p); + data_len = dp_packet_get_inner_tcp_payload_length(p); + } else { + hdr_len = (char *) dp_packet_get_tcp_payload(p) + - (char *) dp_packet_eth(p); + data_len = dp_packet_get_tcp_payload_length(p); + } - if (ghdr->flags & htons(GRE_CSUM)) { - ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1); - *csum_opt = 0; - *csum_opt = csum(ghdr, dp_packet_l4_size(seg)); - } - } + for (int i = 1; i < n_segs - 1; i++) { + seg = dp_packet_gso_seg_new(p, hdr_len, hdr_len + i * tso_segsz, + tso_segsz); + dp_packet_gso_update_segment(seg, i, n_segs, tso_segsz, udp_tnl, + gre_tnl); if (dp_packet_batch_is_full(curr_batch)) { curr_batch++; } - dp_packet_batch_add(curr_batch, seg); } + /* Create the last segment. */ + seg = dp_packet_gso_seg_new(p, hdr_len, hdr_len + (n_segs - 1) * tso_segsz, + data_len - (n_segs - 1) * tso_segsz); + dp_packet_gso_update_segment(seg, n_segs - 1, n_segs, tso_segsz, udp_tnl, + gre_tnl); + + if (dp_packet_batch_is_full(curr_batch)) { + curr_batch++; + } + dp_packet_batch_add(curr_batch, seg); + + /* Trim the first segment and reset TSO. */ + dp_packet_set_size(p, hdr_len + tso_segsz); + dp_packet_set_tso_segsz(p, 0); + dp_packet_gso_update_segment(p, 0, n_segs, tso_segsz, udp_tnl, gre_tnl); + +out: *batches = curr_batch; - return true; } diff --git a/lib/dp-packet-gso.h b/lib/dp-packet-gso.h index 9c282fb86c..2abc19ea31 100644 --- a/lib/dp-packet-gso.h +++ b/lib/dp-packet-gso.h @@ -17,7 +17,7 @@ #ifndef DP_PACKET_GSO_H #define DP_PACKET_GSO_H 1 -bool dp_packet_gso(struct dp_packet *, struct dp_packet_batch **); +void dp_packet_gso(struct dp_packet *, struct dp_packet_batch **); int dp_packet_gso_nr_segs(struct dp_packet *); #endif /* dp-packet-gso.h */ diff --git a/lib/netdev.c b/lib/netdev.c index 6a05e9a7e5..f493dab005 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -71,7 +71,6 @@ COVERAGE_DEFINE(netdev_add_router); COVERAGE_DEFINE(netdev_get_stats); COVERAGE_DEFINE(netdev_push_header_drops); COVERAGE_DEFINE(netdev_soft_seg_good); -COVERAGE_DEFINE(netdev_soft_seg_drops); struct netdev_saved_flags { struct netdev *netdev; @@ -843,17 +842,12 @@ netdev_send_tso(struct netdev *netdev, int qid, curr_batch = batches; DP_PACKET_BATCH_REFILL_FOR_EACH (k, size, packet, batch) { if (dp_packet_get_tso_segsz(packet)) { - if (dp_packet_gso(packet, &curr_batch)) { - COVERAGE_INC(netdev_soft_seg_good); - } else { - COVERAGE_INC(netdev_soft_seg_drops); - } - dp_packet_delete(packet); + dp_packet_gso(packet, &curr_batch); + COVERAGE_INC(netdev_soft_seg_good); } else { if (dp_packet_batch_is_full(curr_batch)) { curr_batch++; } - dp_packet_batch_add(curr_batch, packet); } } -- 2.51.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
