From: Waldemar Kozaczuk <[email protected]>
Committer: WALDEMAR KOZACZUK <[email protected]>
Branch: master

ena: improve driver implementation

This last patch improves certain aspects of the driver implementation:
- completes LRO handling
- adds number of tracepoints to help trubleshoot and analaze performance
- pins cleanup worker thread and corresponding MSIX vector to a cpu

Signed-off-by: Waldemar Kozaczuk <[email protected]>

---
diff --git a/bsd/sys/dev/ena/ena.cc b/bsd/sys/dev/ena/ena.cc
--- a/bsd/sys/dev/ena/ena.cc
+++ b/bsd/sys/dev/ena/ena.cc
@@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
 #include <osv/aligned_new.hh>
 #include <osv/msi.hh>
 #include <osv/sched.hh>
+#include <osv/trace.hh>
 
 int ena_log_level = ENA_INFO;
 
@@ -398,6 +399,8 @@ ena_free_all_io_rings_resources(struct ena_adapter *adapter)
                ena_free_io_ring_resources(adapter, i);
 }
 
+TRACEPOINT(trace_ena_enqueue_wake, "");
+
 static void
 enqueue_work(ena_ring *ring)
 {
@@ -406,6 +409,7 @@ enqueue_work(ena_ring *ring)
                ring->enqueue_pending = 0;
 
                if (!ring->enqueue_stop) {
+                       trace_ena_enqueue_wake();
                        ena_deferred_mq_start(ring);
                }
        } while (!ring->enqueue_stop);
@@ -941,6 +945,8 @@ ena_destroy_all_io_queues(struct ena_adapter *adapter)
        ena_destroy_all_rx_queues(adapter);
 }
 
+TRACEPOINT(trace_ena_cleanup_wake, "");
+
 static void
 cleanup_work(ena_que *queue)
 {
@@ -952,6 +958,7 @@ cleanup_work(ena_que *queue)
                if (!queue->cleanup_stop) {
                        ena_log(dev, INFO, "cleanup_work: cleaning up queue 
%d", queue->id);
                        ena_cleanup(queue);
+                       trace_ena_cleanup_wake();
                }
        } while (!queue->cleanup_stop);
 }
@@ -1041,8 +1048,12 @@ ena_create_io_queues(struct ena_adapter *adapter)
        for (i = 0; i < adapter->num_io_queues; i++) {
                queue = &adapter->que[i];
 
+               //We pin each cleanup worker thread and corresponding MSIX 
vector
+               //to one of the cpus (queue modulo #cpus) in order to minimize 
IPIs
+               int cpu = i % sched::cpus.size();
                queue->cleanup_thread = sched::thread::make([queue] { 
cleanup_work(queue); },
-                       sched::thread::attr().name("ena_cleanup_queue_" + 
std::to_string(i)));
+                       sched::thread::attr().name("ena_clean_que_" + 
std::to_string(i)).pin(sched::cpus[cpu]));
+               
queue->cleanup_thread->set_priority(sched::thread::priority_infinity);
                queue->cleanup_stop = false;
                queue->cleanup_pending = 0;
                queue->cleanup_thread->start();
@@ -1067,17 +1078,13 @@ ena_create_io_queues(struct ena_adapter *adapter)
  *
  **********************************************************************/
 
-static std::atomic<u64> mgmt_intr_count = {0};
-static std::atomic<u64> io_intr_count = {0};
-
 /**
  * ena_intr_msix_mgmnt - MSIX Interrupt Handler for admin/async queue
  * @arg: interrupt number
  **/
 static void
 ena_intr_msix_mgmnt(void *arg)
 {
-       mgmt_intr_count++;
        struct ena_adapter *adapter = (struct ena_adapter *)arg;
 
        ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
@@ -1096,7 +1103,6 @@ ena_handle_msix(void *arg)
        struct ena_adapter *adapter = queue->adapter;
        if_t ifp = adapter->ifp;
 
-       io_intr_count++;
        if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
                return 0;
 
@@ -1220,7 +1226,7 @@ ena_request_io_irq(struct ena_adapter *adapter)
        }
 
        for (int entry = ENA_IO_IRQ_FIRST_IDX; entry < adapter->msix_vecs; 
entry++) {
-               auto idx = entry - 1;
+               auto idx = entry - ENA_IO_IRQ_FIRST_IDX;
                auto vec = assigned[idx];
                auto queue = &adapter->que[idx];
                if (!_msi.assign_isr(vec, [queue]() { ena_handle_msix(queue); 
})) {
@@ -1239,7 +1245,16 @@ ena_request_io_irq(struct ena_adapter *adapter)
        //Save assigned msix vectors
        for (int entry = ENA_IO_IRQ_FIRST_IDX; entry < adapter->msix_vecs; 
entry++) {
                ena_irq *irq = &adapter->irq_tbl[entry];
-               irq->mvector = assigned[entry - 1];
+               auto idx = entry - ENA_IO_IRQ_FIRST_IDX;
+               auto vec = irq->mvector = assigned[idx];
+               //In our case the worker threads are all pinned so we probably 
do not need
+               //to re-pin the interrupt vector
+               auto cpu = idx % sched::cpus.size();
+               std::atomic_thread_fence(std::memory_order_seq_cst);
+               vec->set_affinity(sched::cpus[cpu]->arch.apic_id);
+               std::atomic_thread_fence(std::memory_order_seq_cst);
+
+               ena_log(pdev, INFO, "pinned MSIX vector on queue %d - cpu 
%d\n", idx, cpu);
        }
 
        _msi.unmask_interrupts(assigned);
@@ -1462,7 +1477,7 @@ ena_up(struct ena_adapter *adapter)
        }
 
        if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
-               if_link_state_change(adapter->ifp, LINK_STATE_UP);
+               adapter->ifp->if_link_state = LINK_STATE_UP;
 
        rc = ena_up_complete(adapter);
        if (unlikely(rc != 0))
@@ -1564,6 +1579,10 @@ ena_get_dev_offloads(struct ena_com_dev_get_features_ctx 
*feat)
 
        caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
 
+       ena_log(nullptr, INFO,
+               "device offloads (caps): TXCSUM=%d, TXCSUM_IPV6=%d, TSO4=%d, 
TSO6=%d, RXCSUM=%d, RXCSUM_IPV6=%d, LRO=1, JUMBO_MTU=1\n",
+               caps & IFCAP_TXCSUM, caps & IFCAP_TXCSUM_IPV6, caps & 
IFCAP_TSO4, caps & IFCAP_TSO6, caps & IFCAP_RXCSUM, caps & IFCAP_RXCSUM_IPV6);
+
        return (caps);
 }
 
@@ -1596,6 +1615,10 @@ ena_update_hwassist(struct ena_adapter *adapter)
                flags |= CSUM_TSO;
 
        adapter->ifp->if_hwassist = flags;
+
+       ena_log(nullptr, INFO,
+               "ena_update_hwassist: CSUM_IP=%d, CSUM_UDP=%d, CSUM_TCP=%d, 
CSUM_UDP_IPV6=%d, CSUM_TCP_IPV6=%d, CSUM_TSO=%d\n",
+               flags & CSUM_IP, flags & CSUM_UDP, flags & CSUM_TCP, flags & 
CSUM_UDP_IPV6, flags & CSUM_TCP_IPV6, flags & CSUM_TSO);
 }
 
 static int
@@ -1942,8 +1965,7 @@ check_for_rx_interrupt_queue(struct ena_adapter *adapter,
        if (rx_ring->no_interrupt_event_cnt ==
            ENA_MAX_NO_INTERRUPT_ITERATIONS) {
                ena_log(adapter->pdev, ERR,
-                   "Potential MSIX issue on Rx side Queue = %d, 
mgmt_intr_count=%lu, io_intr_count=%lu. Reset the device",
-                   rx_ring->qid, mgmt_intr_count.load(), io_intr_count.load());
+                   "Potential MSIX issue on Rx side Queue = %d. Reset the 
device", rx_ring->qid);
                ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
                return (EIO);
        }
@@ -2186,15 +2208,14 @@ ena_timer_service(void *data)
 void
 ena_destroy_device(struct ena_adapter *adapter, bool graceful)
 {
-       if_t ifp = adapter->ifp;
        struct ena_com_dev *ena_dev = adapter->ena_dev;
        bool dev_up;
 
        if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
                return;
 
        if (!graceful)
-               if_link_state_change(ifp, LINK_STATE_DOWN);
+               adapter->ifp->if_link_state = LINK_STATE_DOWN;
 
        ENA_TIMER_DRAIN(adapter);
 
@@ -2266,7 +2287,6 @@ ena_restore_device(struct ena_adapter *adapter)
 {
        struct ena_com_dev_get_features_ctx get_feat_ctx;
        struct ena_com_dev *ena_dev = adapter->ena_dev;
-       if_t ifp = adapter->ifp;
        pci::device *dev = adapter->pdev;
        int wd_active;
        int rc;
@@ -2294,7 +2314,7 @@ ena_restore_device(struct ena_adapter *adapter)
        ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
        /* Make sure we don't have a race with AENQ Links state handler */
        if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
-               if_link_state_change(ifp, LINK_STATE_UP);
+               adapter->ifp->if_link_state = LINK_STATE_UP;
 
        rc = ena_enable_msix_and_set_admin_interrupts(adapter);
        if (rc != 0) {
@@ -2653,10 +2673,10 @@ ena_update_on_link_change(void *adapter_data,
                ena_log(adapter->pdev, INFO, "link is UP");
                ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
                if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
-                       if_link_state_change(ifp, LINK_STATE_UP);
+                       ifp->if_link_state = LINK_STATE_UP;
        } else {
                ena_log(adapter->pdev, INFO, "link is DOWN");
-               if_link_state_change(ifp, LINK_STATE_DOWN);
+               ifp->if_link_state = LINK_STATE_DOWN;
                ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
        }
 }
diff --git a/bsd/sys/dev/ena/ena_datapath.cc b/bsd/sys/dev/ena/ena_datapath.cc
--- a/bsd/sys/dev/ena/ena_datapath.cc
+++ b/bsd/sys/dev/ena/ena_datapath.cc
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
 #include "ena_datapath.h"
 
 #include <osv/sched.hh>
+#include <osv/trace.hh>
 
 static inline void critical_enter()  { sched::preempt_disable(); }
 static inline void critical_exit() { sched::preempt_enable(); }
@@ -61,7 +62,7 @@ static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf 
*, bool);
 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
     struct mbuf **mbuf);
 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
-static void ena_start_xmit(struct ena_ring *);
+static void ena_start_xmit(struct ena_ring *, bool);
 
 /*********************************************************************
  *  Global functions
@@ -119,7 +120,7 @@ ena_deferred_mq_start(struct ena_ring *tx_ring )
        while (!buf_ring_empty(tx_ring->br) && tx_ring->running &&
            (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
                ENA_RING_MTX_LOCK(tx_ring);
-               ena_start_xmit(tx_ring);
+               ena_start_xmit(tx_ring, true);
                ENA_RING_MTX_UNLOCK(tx_ring);
        }
 }
@@ -158,7 +159,7 @@ ena_mq_start(if_t ifp, struct mbuf *m)
        }
 
        if (is_br_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
-               ena_start_xmit(tx_ring);
+               ena_start_xmit(tx_ring, false);
                ENA_RING_MTX_UNLOCK(tx_ring);
        } else {
                tx_ring->enqueue_thread->wake_with([tx_ring] { 
tx_ring->enqueue_pending++; });
@@ -220,6 +221,8 @@ ena_get_tx_req_id(struct ena_ring *tx_ring, struct 
ena_com_io_cq *io_cq,
        return (EFAULT);
 }
 
+TRACEPOINT(trace_ena_tx_cleanup, "qid=%d pkts=%d", int, int);
+
 /**
  * ena_tx_cleanup - clear sent packets and corresponding descriptors
  * @tx_ring: ring for which we want to clean packets
@@ -293,6 +296,7 @@ ena_tx_cleanup(struct ena_ring *tx_ring)
 
        ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d",
            tx_ring->qid, work_done);
+       trace_ena_tx_cleanup(tx_ring->qid, work_done);
 
        /* If there is still something to commit update ring state */
        if (likely(commit != ENA_TX_COMMIT)) {
@@ -455,6 +459,11 @@ ena_rx_checksum(struct ena_ring *rx_ring, struct 
ena_com_rx_ctx *ena_rx_ctx,
        }
 }
 
+TRACEPOINT(trace_ena_rx_cleanup_packet, "qid=%d", int);
+TRACEPOINT(trace_ena_rx_cleanup_fast_path, "qid=%d", int);
+TRACEPOINT(trace_ena_rx_cleanup_lro, "qid=%d", int);
+TRACEPOINT(trace_ena_rx_cleanup_refill, "qid=%d refill=%u", int, uint32_t);
+
 /**
  * ena_rx_cleanup - handle rx irq
  * @arg: ring for which irq is being handled
@@ -530,6 +539,7 @@ ena_rx_cleanup(struct ena_ring *rx_ring)
                        }
                        break;
                }
+               trace_ena_rx_cleanup_packet(rx_ring->qid);
 
                if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
                    ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
@@ -557,15 +567,19 @@ ena_rx_cleanup(struct ena_ring *rx_ring)
                         *  - lro enqueue fails
                         */
                        if ((rx_ring->lro.lro_cnt != 0) &&
-                           (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
+                           (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0)) {
                                do_if_input = 0;
+                               trace_ena_rx_cleanup_lro(rx_ring->qid);
+                       }
                }
                if (do_if_input != 0) {
                        ena_log_io(adapter->pdev, DBG,
                            "calling if_input() with mbuf %p", mbuf);
                        bool fast_path = ifp->if_classifier.post_packet(mbuf);
                        if (!fast_path) {
                                (*ifp->if_input)(ifp, mbuf);
+                       } else {
+                               trace_ena_rx_cleanup_fast_path(rx_ring->qid);
                        }
                }
 
@@ -584,12 +598,18 @@ ena_rx_cleanup(struct ena_ring *rx_ring)
 
        if (refill_required > refill_threshold) {
                ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
+               trace_ena_rx_cleanup_refill(rx_ring->qid, refill_required);
                ena_refill_rx_bufs(rx_ring, refill_required);
        }
 
         //TODO: Can wait? investigate 
https://github.com/freebsd/freebsd-src/commit/e936121d3140af047a498559493b9375a6ba6ba3
         //to port it back
        //tcp_lro_flush_all(&rx_ring->lro);
+       while (!SLIST_EMPTY(&rx_ring->lro.lro_active)) {
+               auto *queued = SLIST_FIRST(&rx_ring->lro.lro_active);
+               SLIST_REMOVE_HEAD(&rx_ring->lro.lro_active, next);
+               tcp_lro_flush(&rx_ring->lro, queued);
+        }
 
        return (ENA_RX_BUDGET - budget);
 }
@@ -903,8 +923,11 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
        return (rc);
 }
 
+TRACEPOINT(trace_ena_start_xmit, "qid=%d pkts=%d", int, int);
+TRACEPOINT(trace_ena_start_xmit_deferred, "qid=%d pkts=%d", int, int);
+
 static void
-ena_start_xmit(struct ena_ring *tx_ring)
+ena_start_xmit(struct ena_ring *tx_ring, bool deferred)
 {
        struct mbuf *mbuf;
        struct ena_adapter *adapter = tx_ring->adapter;
@@ -955,6 +978,11 @@ ena_start_xmit(struct ena_ring *tx_ring)
 
        ena_log_io(adapter->pdev, INFO,
                "dequeued %d mbufs", mnum);
+       if (deferred) {
+               trace_ena_start_xmit_deferred(tx_ring->qid, mnum);
+       } else {
+               trace_ena_start_xmit(tx_ring->qid, mnum);
+       }
 
        if (likely(tx_ring->acum_pkts != 0)) {
                /* Trigger the dma engine */

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/00000000000040ba67060eaf152e%40google.com.

Reply via email to