Define a helper capable of segmenting packets from a batch and returning them in the input batch. This will allow calling GSO in other places of OVS during packet processing.
Using this helper, we still need to ensure that the netdev ->send() op is called with only NETDEV_MAX_BURST packets. Add two more helpers for sending one burst of NETDEV_MAX_BURST, and another to split a large batch into multiple bursts. Signed-off-by: David Marchand <[email protected]> --- Documentation/topics/userspace-tso.rst | 4 +- lib/dp-packet-gso.c | 66 +++++++----- lib/dp-packet-gso.h | 4 +- lib/dp-packet.h | 16 +++ lib/netdev.c | 133 +++++++++---------------- tests/dpif-netdev.at | 31 ++++++ 6 files changed, 135 insertions(+), 119 deletions(-) diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst index 9f29e5ee3f..4a1f972c73 100644 --- a/Documentation/topics/userspace-tso.rst +++ b/Documentation/topics/userspace-tso.rst @@ -117,7 +117,7 @@ This case is common for port not supporting TSO at all. A coverage counter exists to reflect when the software fallback helper is called.:: - $ ovs-appctl coverage/read-counter netdev_soft_seg_good + $ ovs-appctl coverage/read-counter dp_packet_gso 178760 Another possibility is when the `TSO` packet involves a header encapsulation @@ -128,7 +128,7 @@ OvS does some partial segmentation based on the segmentation size coming from the receiving port. A coverage counter exists to reflect when this optimisation is called.:: - $ ovs-appctl coverage/read-counter netdev_partial_seg_good + $ ovs-appctl coverage/read-counter dp_packet_partial_gso 1345 ~~~~~~~~~~~ diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c index 83a07c8e31..8c88173520 100644 --- a/lib/dp-packet-gso.c +++ b/lib/dp-packet-gso.c @@ -18,6 +18,7 @@ #include <stdlib.h> #include <string.h> +#include "coverage.h" #include "dp-packet.h" #include "dp-packet-gso.h" #include "netdev-provider.h" @@ -25,6 +26,9 @@ VLOG_DEFINE_THIS_MODULE(dp_packet_gso); +COVERAGE_DEFINE(dp_packet_gso); +COVERAGE_DEFINE(dp_packet_partial_gso); + /* Retuns a new packet that is a segment of packet 'p'. * * The new packet is initialized with 'hdr_len' bytes from the @@ -176,10 +180,9 @@ dp_packet_gso_update_segment(struct dp_packet *seg, unsigned int seg_no, } static void -dp_packet_gso__(struct dp_packet *p, struct dp_packet_batch **batches, +dp_packet_gso__(struct dp_packet *p, struct dp_packet_batch *batch, bool partial_seg) { - struct dp_packet_batch *curr_batch = *batches; struct dp_packet *seg; unsigned int n_segs; uint16_t tso_segsz; @@ -196,13 +199,10 @@ dp_packet_gso__(struct dp_packet *p, struct dp_packet_batch **batches, /* Put back the first segment in the batch, it will be trimmed after * all segments have been copied. */ - if (curr_batch->count == NETDEV_MAX_BURST) { - curr_batch++; - } - dp_packet_batch_add(curr_batch, p); + dp_packet_batch_add(batch, p); if (n_segs <= 1) { - goto out; + return; } if (dp_packet_tunnel(p)) { @@ -227,11 +227,7 @@ dp_packet_gso__(struct dp_packet *p, struct dp_packet_batch **batches, tso_segsz); dp_packet_gso_update_segment(seg, i, n_segs, tso_segsz, udp_tnl, gre_tnl); - - if (curr_batch->count == NETDEV_MAX_BURST) { - curr_batch++; - } - dp_packet_batch_add(curr_batch, seg); + dp_packet_batch_add(batch, seg); } last_seg: @@ -240,11 +236,7 @@ last_seg: data_len - (n_segs - 1) * tso_segsz); dp_packet_gso_update_segment(seg, n_segs - 1, n_segs, tso_segsz, udp_tnl, gre_tnl); - - if (curr_batch->count == NETDEV_MAX_BURST) { - curr_batch++; - } - dp_packet_batch_add(curr_batch, seg); + dp_packet_batch_add(batch, seg); first_seg: if (partial_seg) { @@ -261,19 +253,39 @@ first_seg: dp_packet_set_tso_segsz(p, 0); } dp_packet_gso_update_segment(p, 0, n_segs, tso_segsz, udp_tnl, gre_tnl); +} -out: - *batches = curr_batch; +static void +dp_packet_gso_batch__(struct dp_packet_batch *batch, bool partial_seg) +{ + struct dp_packet_batch gso_batch; + struct dp_packet *packet; + + dp_packet_batch_init(&gso_batch); + gso_batch.trunc = batch->trunc; + + DP_PACKET_BATCH_FOR_EACH (k, packet, batch) { + if (dp_packet_get_tso_segsz(packet)) { + dp_packet_gso__(packet, &gso_batch, partial_seg); + if (partial_seg) { + COVERAGE_INC(dp_packet_partial_gso); + } else { + COVERAGE_INC(dp_packet_gso); + } + } else { + dp_packet_batch_add(&gso_batch, packet); + } + } + + dp_packet_batch_swap(batch, &gso_batch); + dp_packet_batch_destroy(&gso_batch); } -/* Perform software segmentation on packet 'p'. - * - * Segments packet 'p' into the array of preallocated batches in 'batches', - * updating the 'batches' pointer as needed. */ +/* Segment GSO packets and send them back to caller in the input batch. */ void -dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) +dp_packet_gso_batch(struct dp_packet_batch *b) { - dp_packet_gso__(p, batches, false); + dp_packet_gso_batch__(b, false); } /* Perform partial software segmentation on packet 'p'. @@ -283,7 +295,7 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) * of preallocated batches in 'batches', updating the 'batches' pointer * as needed. */ void -dp_packet_gso_partial(struct dp_packet *p, struct dp_packet_batch **batches) +dp_packet_gso_batch_partial(struct dp_packet_batch *b) { - dp_packet_gso__(p, batches, true); + dp_packet_gso_batch__(b, true); } diff --git a/lib/dp-packet-gso.h b/lib/dp-packet-gso.h index 8d16179026..3549dcf4a3 100644 --- a/lib/dp-packet-gso.h +++ b/lib/dp-packet-gso.h @@ -17,9 +17,9 @@ #ifndef DP_PACKET_GSO_H #define DP_PACKET_GSO_H 1 -void dp_packet_gso(struct dp_packet *, struct dp_packet_batch **); +void dp_packet_gso_batch(struct dp_packet_batch *); unsigned int dp_packet_gso_nr_segs(struct dp_packet *); -void dp_packet_gso_partial(struct dp_packet *, struct dp_packet_batch **); +void dp_packet_gso_batch_partial(struct dp_packet_batch *); unsigned int dp_packet_gso_partial_nr_segs(struct dp_packet *); #endif /* dp-packet-gso.h */ diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 03d1eda6a6..247f64aaa7 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -972,6 +972,22 @@ dp_packet_batch_clone(struct dp_packet_batch *dst, dst->trunc = src->trunc; } +/* Swaps content of two batches. */ +static inline void +dp_packet_batch_swap(struct dp_packet_batch *a, struct dp_packet_batch *b) +{ + struct dp_packet_batch c = *a; + + *a = *b; + *b = c; + if (a->packets == b->embedded) { + a->packets = a->embedded; + } + if (b->packets == a->embedded) { + b->packets = b->embedded; + } +} + static inline void dp_packet_batch_destroy(struct dp_packet_batch *batch) { diff --git a/lib/netdev.c b/lib/netdev.c index 1e50d2f007..1288ae7871 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -65,12 +65,11 @@ VLOG_DEFINE_THIS_MODULE(netdev); COVERAGE_DEFINE(netdev_received); +COVERAGE_DEFINE(netdev_send_multi); COVERAGE_DEFINE(netdev_sent); COVERAGE_DEFINE(netdev_add_router); COVERAGE_DEFINE(netdev_get_stats); COVERAGE_DEFINE(netdev_push_header_drops); -COVERAGE_DEFINE(netdev_soft_seg_good); -COVERAGE_DEFINE(netdev_partial_seg_good); struct netdev_saved_flags { struct netdev *netdev; @@ -790,90 +789,54 @@ netdev_get_pt_mode(const struct netdev *netdev) : NETDEV_PT_LEGACY_L2); } -/* Attempts to segment GSO flagged packets and send them as multiple bundles. - * This function is only used if at least one packet in the current batch is - * flagged for TSO and the netdev does not support this. - * - * The return value is 0 if all batches sent successfully, and an error code - * from netdev_class->send() if at least one batch failed to send. */ static int -netdev_send_tso(struct netdev *netdev, int qid, - struct dp_packet_batch *batch, bool concurrent_txq, - bool partial_seg) +netdev_send_burst(struct netdev *netdev, int qid, + struct dp_packet_batch *batch, bool concurrent_txq) { - struct dp_packet_batch *batches; + const uint64_t netdev_flags = netdev->ol_flags; struct dp_packet *packet; - int retval = 0; - int n_packets; - int n_batches; int error; - /* Calculate the total number of packets in the batch after - * the (partial?) segmentation. */ - n_packets = 0; DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { - if (dp_packet_get_tso_segsz(packet)) { - if (partial_seg) { - n_packets += dp_packet_gso_partial_nr_segs(packet); - } else { - n_packets += dp_packet_gso_nr_segs(packet); - } - } else { - n_packets++; - } + dp_packet_ol_send_prepare(packet, netdev_flags); } - if (!n_packets) { - return 0; + error = netdev->netdev_class->send(netdev, qid, batch, concurrent_txq); + if (!error) { + COVERAGE_INC(netdev_sent); } + return error; +} - /* Allocate enough batches to store all the packets in order. */ - n_batches = DIV_ROUND_UP(n_packets, NETDEV_MAX_BURST); - batches = xmalloc(n_batches * sizeof *batches); - - struct dp_packet_batch *curr_batch = batches; - struct dp_packet_batch *last_batch = &batches[n_batches - 1]; - for (curr_batch = batches; curr_batch <= last_batch; curr_batch++) { - dp_packet_batch_init(curr_batch); - } +static int +netdev_send_multi(struct netdev *netdev, int qid, + struct dp_packet_batch *batch, bool concurrent_txq) +{ + struct dp_packet_batch smaller_batch; + size_t batch_cnt; + size_t sent = 0; + int error; - /* Do the packet segmentation if TSO is flagged. */ - size_t size = dp_packet_batch_size(batch); - size_t k; - curr_batch = batches; - DP_PACKET_BATCH_REFILL_FOR_EACH (k, size, packet, batch) { - if (dp_packet_get_tso_segsz(packet)) { - if (partial_seg) { - dp_packet_gso_partial(packet, &curr_batch); - COVERAGE_INC(netdev_partial_seg_good); - } else { - dp_packet_gso(packet, &curr_batch); - COVERAGE_INC(netdev_soft_seg_good); - } - } else { - if (curr_batch->count == NETDEV_MAX_BURST) { - curr_batch++; + COVERAGE_INC(netdev_send_multi); + batch_cnt = dp_packet_batch_size(batch); + dp_packet_batch_init(&smaller_batch); + do { + size_t count = MIN(batch_cnt - sent, NETDEV_MAX_BURST); + + smaller_batch.count = 0; + dp_packet_batch_add_array(&smaller_batch, &batch->packets[sent], + count); + error = netdev_send_burst(netdev, qid, &smaller_batch, concurrent_txq); + sent += count; + if (error) { + for (; sent < batch_cnt; sent++) { + dp_packet_delete(batch->packets[sent]); } - dp_packet_batch_add(curr_batch, packet); + break; } - } + } while (sent < batch_cnt); - for (curr_batch = batches; curr_batch <= last_batch; curr_batch++) { - DP_PACKET_BATCH_FOR_EACH (i, packet, curr_batch) { - dp_packet_ol_send_prepare(packet, netdev->ol_flags); - } - - error = netdev->netdev_class->send(netdev, qid, curr_batch, - concurrent_txq); - if (!error) { - COVERAGE_INC(netdev_sent); - } else { - retval = error; - } - dp_packet_batch_destroy(curr_batch); - } - free(batches); - return retval; + return error; } /* Sends 'batch' on 'netdev'. Returns 0 if successful (for every packet), @@ -907,14 +870,14 @@ netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, { const uint64_t netdev_flags = netdev->ol_flags; struct dp_packet *packet; - int error; if (userspace_tso_enabled()) { if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { if (dp_packet_get_tso_segsz(packet)) { - return netdev_send_tso(netdev, qid, batch, concurrent_txq, - false); + dp_packet_gso_batch(batch); + return netdev_send_multi(netdev, qid, batch, + concurrent_txq); } } } else if (!(netdev_flags & (NETDEV_TX_VXLAN_TNL_TSO | @@ -923,30 +886,24 @@ netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { if (dp_packet_get_tso_segsz(packet) && dp_packet_tunnel(packet)) { - return netdev_send_tso(netdev, qid, batch, concurrent_txq, - false); + dp_packet_gso_batch(batch); + return netdev_send_multi(netdev, qid, batch, + concurrent_txq); } } } else if (!(netdev_flags & NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM)) { DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { if (dp_packet_get_tso_segsz(packet) && dp_packet_gso_partial_nr_segs(packet) != 1) { - return netdev_send_tso(netdev, qid, batch, concurrent_txq, - true); + dp_packet_gso_batch_partial(batch); + return netdev_send_multi(netdev, qid, batch, + concurrent_txq); } } } } - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { - dp_packet_ol_send_prepare(packet, netdev_flags); - } - - error = netdev->netdev_class->send(netdev, qid, batch, concurrent_txq); - if (!error) { - COVERAGE_INC(netdev_sent); - } - return error; + return netdev_send_burst(netdev, qid, batch, concurrent_txq); } /* Pop tunnel header, build tunnel metadata and resize 'batch->packets' diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index 4e6aa3ce04..b4eb64ffd0 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -3405,6 +3405,37 @@ AT_CHECK([ovs-pcap p3-tx.pcap | wc -l], [0], [2 OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([dpif-netdev - tso + dynamic packet batch grow]) +AT_KEYWORDS([userspace offload]) +OVS_VSWITCHD_START( + [set Open_vSwitch . other_config:userspace-tso-enable=true]) +add_of_ports --pcap br0 $(seq 1 2) + +AT_CHECK([ovs-vsctl set Interface p1 options:ol_tso_segsz=1000]) + +AT_CHECK([ovs-ofctl add-flow br0 in_port=1,actions=output:2]) + +OVS_CHECK_BATCH_GROW([0]) +CHECK_COVERAGE([dp_packet_gso], [0]) + +flow_s="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800), \ + ipv4(src=10.0.1.1,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no), \ + tcp(src=1234,dst=80),tcp_flags(ack)" +AT_CHECK([ovs-appctl netdev-dummy/receive p1 "${flow_s}" --len 35000]) + +dnl Interface statistics and coverage counters take some time to refresh. +OVS_WAIT_UNTIL([test `ovs-vsctl get interface p1 statistics:rx_packets` -eq 1]) +CHECK_COVERAGE([netdev_received], [1]) +OVS_CHECK_BATCH_GROW([1]) +CHECK_COVERAGE([dp_packet_gso], [1]) +CHECK_COVERAGE([netdev_send_multi], [1]) +CHECK_COVERAGE([netdev_sent], [2]) +AT_CHECK([ovs-vsctl get interface p2 statistics:tx_packets], [0], [35 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([dpif-netdev - revalidators handle dp modification fail correctly]) OVS_VSWITCHD_START( [add-port br0 p1 \ -- 2.53.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
