Prepare dpif-netdev to handle larger batches.

Metering does not need support for large batches, as it is invoked
before actions that will produce large batches in next commits (ip reassembly,
GSO).
(can someone with more knowledge about this code confirm?)

Split big batches into "small" NETDEV_MAX_BURST batches for recirculation,
as some dpif-netdev input operations (flow extraction and AVX512 lookups)
are using arrays (on the stack) sized for NETDEV_MAX_BURST burst of
packets.

Also update dpif-netdev output action, as it relies on structures sized against
NETDEV_MAX_BURST (for stats/cycles tracking, and txq distribution).

Add coverage counters so that next changes can test this code.

Signed-off-by: David Marchand <[email protected]>
---
 lib/dp-packet.h   | 16 ++++++++++++
 lib/dpif-netdev.c | 63 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 69 insertions(+), 10 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index d92b9d4730..16b9afbc36 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -890,6 +890,22 @@ dp_packet_batch_add(struct dp_packet_batch *batch, struct 
dp_packet *packet)
     dp_packet_batch_add__(batch, packet, NETDEV_MAX_BURST);
 }
 
+static inline void
+dp_packet_batch_add_array(struct dp_packet_batch *batch,
+                          struct dp_packet *packets[], size_t n)
+{
+    size_t count = MIN(n, NETDEV_MAX_BURST - batch->count);
+
+    if (count) {
+        memcpy(&batch->packets[batch->count], packets,
+               count * sizeof packets[0]);
+        batch->count += count;
+    }
+    for (size_t i = count; i < n; i++) {
+        dp_packet_delete(packets[i]);
+    }
+}
+
 static inline size_t
 dp_packet_batch_size(const struct dp_packet_batch *batch)
 {
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 07699df729..0690d22cf2 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -123,6 +123,9 @@ COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
 COVERAGE_DEFINE(datapath_drop_hw_post_process);
 COVERAGE_DEFINE(datapath_drop_hw_post_process_consumed);
 
+COVERAGE_DEFINE(dpif_netdev_output_big_batch);
+COVERAGE_DEFINE(dpif_netdev_recirc_big_batch);
+
 /* Protects against changes to 'dp_netdevs'. */
 struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
 
@@ -6798,6 +6801,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
         return;
     }
 
+    ovs_assert(cnt <= NETDEV_MAX_BURST);
+
     /* Initialize as negative values. */
     memset(exceeded_band, 0xff, cnt * sizeof *exceeded_band);
     /* Initialize as zeroes. */
@@ -8221,6 +8226,27 @@ static void
 dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
                       struct dp_packet_batch *packets)
 {
+    if (dp_packet_batch_size(packets) > NETDEV_MAX_BURST) {
+        struct dp_packet_batch smaller_batch;
+        size_t batch_cnt;
+        size_t sent = 0;
+
+        COVERAGE_INC(dpif_netdev_recirc_big_batch);
+        batch_cnt = dp_packet_batch_size(packets);
+        dp_packet_batch_init(&smaller_batch);
+        do {
+            size_t count = MIN(batch_cnt - sent, NETDEV_MAX_BURST);
+
+            smaller_batch.trunc = packets->trunc;
+            smaller_batch.count = 0;
+            dp_packet_batch_add_array(&smaller_batch, &packets->packets[sent],
+                                      count);
+            dp_netdev_input__(pmd, &smaller_batch, true, 0);
+            sent += count;
+        } while (sent < batch_cnt);
+        return;
+    }
+
     dp_netdev_input__(pmd, packets, true, 0);
 }
 
@@ -8378,17 +8404,32 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread 
*pmd,
     }
 }
 
+static size_t
+dp_execute_output_chunk(struct dp_netdev_pmd_thread *pmd, struct tx_port *p,
+                        struct dp_packet **packets, size_t count)
+{
+    count = MIN(count, NETDEV_MAX_BURST - p->output_pkts.count);
+
+    for (unsigned i = 0; i < count; i++) {
+        p->output_pkts_rxqs[p->output_pkts.count + i] = pmd->ctx.last_rxq;
+    }
+    dp_packet_batch_add_array(&p->output_pkts, packets, count);
+
+    return count;
+}
+
 static bool
 dp_execute_output_action(struct dp_netdev_pmd_thread *pmd,
                          struct dp_packet_batch *packets_,
                          bool should_steal, odp_port_t port_no)
 {
     struct tx_port *p = pmd_send_port_cache_lookup(pmd, port_no);
+    size_t batch_cnt = dp_packet_batch_size(packets_);
     struct dp_packet_batch out;
+    size_t sent;
 
     if (!OVS_LIKELY(p)) {
-        COVERAGE_ADD(datapath_drop_invalid_port,
-                     dp_packet_batch_size(packets_));
+        COVERAGE_ADD(datapath_drop_invalid_port, batch_cnt);
         dp_packet_delete_batch(packets_, should_steal);
         return false;
     }
@@ -8398,20 +8439,22 @@ dp_execute_output_action(struct dp_netdev_pmd_thread 
*pmd,
         packets_ = &out;
     }
     dp_packet_batch_apply_cutlen(packets_);
-    if (dp_packet_batch_size(&p->output_pkts)
-        + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) {
-        /* Flush here to avoid overflow. */
+    if (dp_packet_batch_size(&p->output_pkts) == NETDEV_MAX_BURST) {
         dp_netdev_pmd_flush_output_on_port(pmd, p);
     }
     if (dp_packet_batch_is_empty(&p->output_pkts)) {
         pmd->n_output_batches++;
     }
 
-    struct dp_packet *packet;
-    DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
-        p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] =
-            pmd->ctx.last_rxq;
-        dp_packet_batch_add(&p->output_pkts, packet);
+    sent = dp_execute_output_chunk(pmd, p, packets_->packets, batch_cnt);
+    if (OVS_UNLIKELY(sent < batch_cnt)) {
+        COVERAGE_INC(dpif_netdev_output_big_batch);
+        do {
+            dp_netdev_pmd_flush_output_on_port(pmd, p);
+            pmd->n_output_batches++;
+            sent += dp_execute_output_chunk(pmd, p, &packets_->packets[sent],
+                                            batch_cnt - sent);
+        } while (sent < batch_cnt);
     }
     return true;
 }
-- 
2.53.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to