Hi Ilya,

This series needs to be rebased.  Few comments below.

>While processing incoming batch of packets they are scattered across many
>per-flow batches and sent separately.
>
>This becomes an issue while using more than a few flows.
>
>For example if we have balanced-tcp OvS bonding with 2 ports there will be
>256 datapath internal flows for each dp_hash pattern. This will lead to
>scattering of a single recieved batch across all of that 256 per-flow batches 
>and
>invoking send for each packet separately. This behaviour greatly degrades
>overall performance of netdev_send because of inability to use advantages of
>vectorized transmit functions.
>But the half (if 2 ports in bonding) of datapath flows will have the same 
>output
>actions. This means that we can collect them in a single place back and send at
>once using single call to netdev_send. This patch introduces per-port packet
>batch for output packets for that purpose.
>
>'output_pkts' batch is thread local and located in send port cache.
>
>Signed-off-by: Ilya Maximets <[email protected]>
>---
> lib/dpif-netdev.c | 104
>++++++++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 82 insertions(+), 22 deletions(-)
>
>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index e2cd931..a2a25be
>100644
>--- a/lib/dpif-netdev.c
>+++ b/lib/dpif-netdev.c
>@@ -502,6 +502,7 @@ struct tx_port {
>     int qid;
>     long long last_used;
>     struct hmap_node node;
>+    struct dp_packet_batch output_pkts;
> };
>
> /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
>@@ -633,9 +634,10 @@ static void dp_netdev_execute_actions(struct
>dp_netdev_pmd_thread *pmd,
>                                       size_t actions_len,
>                                       long long now);  static void 
> dp_netdev_input(struct
>dp_netdev_pmd_thread *,
>-                            struct dp_packet_batch *, odp_port_t port_no);
>+                            struct dp_packet_batch *, odp_port_t port_no,
>+                            long long now);
> static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
>-                                  struct dp_packet_batch *);
>+                                  struct dp_packet_batch *, long long
>+ now);
>
> static void dp_netdev_disable_upcall(struct dp_netdev *);  static void
>dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd); @@ -
>667,6 +669,9 @@ static void dp_netdev_add_rxq_to_pmd(struct
>dp_netdev_pmd_thread *pmd,  static void
>dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
>                                        struct rxq_poll *poll)
>     OVS_REQUIRES(pmd->port_mutex);
>+static void
>+dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread
>*pmd,
>+                                   long long now);
> static void reconfigure_datapath(struct dp_netdev *dp)
>     OVS_REQUIRES(dp->port_mutex);
> static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
>@@ -2809,6 +2814,7 @@ dpif_netdev_execute(struct dpif *dpif, struct
>dpif_execute *execute)
>     struct dp_netdev *dp = get_dp_netdev(dpif);
>     struct dp_netdev_pmd_thread *pmd;
>     struct dp_packet_batch pp;
>+    long long now = time_msec();

[BHANU] Calling time_msec() can be moved little down in this function, may be 
after the 'probe' check.

>
>     if (dp_packet_size(execute->packet) < ETH_HEADER_LEN ||
>         dp_packet_size(execute->packet) > UINT16_MAX) { @@ -2851,8 +2857,8
>@@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
>
>     dp_packet_batch_init_packet(&pp, execute->packet);
>     dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
>-                              execute->actions, execute->actions_len,
>-                              time_msec());
>+                              execute->actions, execute->actions_len, now);
>+    dp_netdev_pmd_flush_output_packets(pmd, now);

[BHANU] Is this code path mostly run in non-pmd thread context? I can only 
think of bfd case where the 
where all the above runs in monitoring thread(non-pmd) context.  

>
>     if (pmd->core_id == NON_PMD_CORE_ID) {
>         ovs_mutex_unlock(&dp->non_pmd_mutex);
>@@ -3101,6 +3107,37 @@ cycles_count_intermediate(struct
>dp_netdev_pmd_thread *pmd,
>     non_atomic_ullong_add(&pmd->cycles.n[type], interval);  }
>
>+static void
>+dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread
>*pmd,
>+                                   struct tx_port *p, long long now) {
>+    int tx_qid;
>+    bool dynamic_txqs;
>+
>+    dynamic_txqs = p->port->dynamic_txqs;
>+    if (dynamic_txqs) {
>+        tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p, now);
>+    } else {
>+        tx_qid = pmd->static_tx_qid;
>+    }
>+
>+    netdev_send(p->port->netdev, tx_qid, &p->output_pkts, true,
>dynamic_txqs);
>+    dp_packet_batch_init(&p->output_pkts);
>+}
>+
>+static void
>+dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread
>*pmd,
>+                                   long long now) {
>+    struct tx_port *p;
>+
>+    HMAP_FOR_EACH (p, node, &pmd->send_port_cache) {
>+        if (!dp_packet_batch_is_empty(&p->output_pkts)) {
>+            dp_netdev_pmd_flush_output_on_port(pmd, p, now);
>+        }
>+    }
>+}
>+
> static int
> dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
>                            struct netdev_rxq *rx, @@ -3113,10 +3150,13 @@
>dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
>     dp_packet_batch_init(&batch);
>     error = netdev_rxq_recv(rx, &batch);
>     if (!error) {
>+        long long now = time_msec();
>+
>         *recirc_depth_get() = 0;
>
>         batch_cnt = batch.count;
>-        dp_netdev_input(pmd, &batch, port_no);
>+        dp_netdev_input(pmd, &batch, port_no, now);
>+        dp_netdev_pmd_flush_output_packets(pmd, now);
>     } else if (error != EAGAIN && error != EOPNOTSUPP) {
>         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>
>@@ -4481,6 +4521,7 @@ dp_netdev_add_port_tx_to_pmd(struct
>dp_netdev_pmd_thread *pmd,
>
>     tx->port = port;
>     tx->qid = -1;
>+    dp_packet_batch_init(&tx->output_pkts);
>
>     hmap_insert(&pmd->tx_ports, &tx->node, hash_port_no(tx->port-
>>port_no));
>     pmd->need_reload = true;
>@@ -4901,7 +4942,8 @@ fast_path_processing(struct
>dp_netdev_pmd_thread *pmd,  static void  dp_netdev_input__(struct
>dp_netdev_pmd_thread *pmd,
>                   struct dp_packet_batch *packets,
>-                  bool md_is_valid, odp_port_t port_no)
>+                  bool md_is_valid, odp_port_t port_no,
>+                  long long now)
> {
>     int cnt = packets->count;
> #if !defined(__CHECKER__) && !defined(_WIN32) @@ -4913,7 +4955,6 @@
>dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
>     OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
>         struct netdev_flow_key keys[PKT_ARRAY_SIZE];
>     struct packet_batch_per_flow batches[PKT_ARRAY_SIZE];
>-    long long now = time_msec();
>     size_t n_batches;
>     odp_port_t in_port;
>
>@@ -4949,16 +4990,16 @@ dp_netdev_input__(struct
>dp_netdev_pmd_thread *pmd,  static void  dp_netdev_input(struct
>dp_netdev_pmd_thread *pmd,
>                 struct dp_packet_batch *packets,
>-                odp_port_t port_no)
>+                odp_port_t port_no, long long now)
> {
>-    dp_netdev_input__(pmd, packets, false, port_no);
>+    dp_netdev_input__(pmd, packets, false, port_no, now);
> }
>
> static void
> dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
>-                      struct dp_packet_batch *packets)
>+                      struct dp_packet_batch *packets, long long now)
> {
>-    dp_netdev_input__(pmd, packets, true, 0);
>+    dp_netdev_input__(pmd, packets, true, 0, now);
> }
>
> struct dp_netdev_execute_aux {
>@@ -5136,18 +5177,37 @@ dp_execute_cb(void *aux_, struct
>dp_packet_batch *packets_,
>     case OVS_ACTION_ATTR_OUTPUT:
>         p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
>         if (OVS_LIKELY(p)) {
>-            int tx_qid;
>-            bool dynamic_txqs;
>+            struct dp_packet *packet;
>+            struct dp_packet_batch out;
>
>-            dynamic_txqs = p->port->dynamic_txqs;
>-            if (dynamic_txqs) {
>-                tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p, now);
>-            } else {
>-                tx_qid = pmd->static_tx_qid;
>+            if (!may_steal) {
>+                dp_packet_batch_clone(&out, packets_);
>+                dp_packet_batch_reset_cutlen(packets_);
>+                packets_ = &out;
>+            }

[BHANU]   The above change seems to be independent and is more about 
refactoring the code.
I see that  redundant dp_packet_batch_reset_cutlen()  in netdev_send() is 
removed in the 2/4.

>+            dp_packet_batch_apply_cutlen(packets_);

[BHANU]  Seems fine, as all the redundant calls are removed in 3/4. 

>+
>+#ifdef DPDK_NETDEV
>+            if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts)
>+                             && packets_->packets[0]->source
>+                                != p->output_pkts.packets[0]->source)) {
>+                /* XXX: netdev-dpdk assumes that all packets in a single
>+                 *      outptut batch has the same source. Flush here to

[BHANU]  Typo with 'output'.

>+                 *      avoid memory access issues. */
>+                dp_netdev_pmd_flush_output_on_port(pmd, p, now);

[BHANU] When would we hit this case?

>+            }
>+#endif
>+
>+            if (OVS_UNLIKELY(dp_packet_batch_size(&p->output_pkts)
>+                       + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST)) {
>+                /* Some packets was generated while input batch processing.
>+                 * Flush here to avoid overflow. */
>+                dp_netdev_pmd_flush_output_on_port(pmd, p, now);
>             }
>
>-            netdev_send(p->port->netdev, tx_qid, packets_, may_steal,
>-                        dynamic_txqs);
>+            DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
>+                dp_packet_batch_add(&p->output_pkts, packet);
>+            }
>             return;
>         }
>         break;
>@@ -5188,7 +5248,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
>*packets_,
>                 }
>
>                 (*depth)++;
>-                dp_netdev_recirculate(pmd, packets_);
>+                dp_netdev_recirculate(pmd, packets_, now);
>                 (*depth)--;
>                 return;
>             }
>@@ -5253,7 +5313,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
>*packets_,
>             }
>
>             (*depth)++;
>-            dp_netdev_recirculate(pmd, packets_);
>+            dp_netdev_recirculate(pmd, packets_, now);
>             (*depth)--;
>
>             return;
>--
>2.7.4

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to