From: Waldemar Kozaczuk <[email protected]> Committer: WALDEMAR KOZACZUK <[email protected]> Branch: master
ena: improve driver implementation This last patch improves certain aspects of the driver implementation: - completes LRO handling - adds number of tracepoints to help trubleshoot and analaze performance - pins cleanup worker thread and corresponding MSIX vector to a cpu Signed-off-by: Waldemar Kozaczuk <[email protected]> --- diff --git a/bsd/sys/dev/ena/ena.cc b/bsd/sys/dev/ena/ena.cc --- a/bsd/sys/dev/ena/ena.cc +++ b/bsd/sys/dev/ena/ena.cc @@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$"); #include <osv/aligned_new.hh> #include <osv/msi.hh> #include <osv/sched.hh> +#include <osv/trace.hh> int ena_log_level = ENA_INFO; @@ -398,6 +399,8 @@ ena_free_all_io_rings_resources(struct ena_adapter *adapter) ena_free_io_ring_resources(adapter, i); } +TRACEPOINT(trace_ena_enqueue_wake, ""); + static void enqueue_work(ena_ring *ring) { @@ -406,6 +409,7 @@ enqueue_work(ena_ring *ring) ring->enqueue_pending = 0; if (!ring->enqueue_stop) { + trace_ena_enqueue_wake(); ena_deferred_mq_start(ring); } } while (!ring->enqueue_stop); @@ -941,6 +945,8 @@ ena_destroy_all_io_queues(struct ena_adapter *adapter) ena_destroy_all_rx_queues(adapter); } +TRACEPOINT(trace_ena_cleanup_wake, ""); + static void cleanup_work(ena_que *queue) { @@ -952,6 +958,7 @@ cleanup_work(ena_que *queue) if (!queue->cleanup_stop) { ena_log(dev, INFO, "cleanup_work: cleaning up queue %d", queue->id); ena_cleanup(queue); + trace_ena_cleanup_wake(); } } while (!queue->cleanup_stop); } @@ -1041,8 +1048,12 @@ ena_create_io_queues(struct ena_adapter *adapter) for (i = 0; i < adapter->num_io_queues; i++) { queue = &adapter->que[i]; + //We pin each cleanup worker thread and corresponding MSIX vector + //to one of the cpus (queue modulo #cpus) in order to minimize IPIs + int cpu = i % sched::cpus.size(); queue->cleanup_thread = sched::thread::make([queue] { cleanup_work(queue); }, - sched::thread::attr().name("ena_cleanup_queue_" + std::to_string(i))); + sched::thread::attr().name("ena_clean_que_" + std::to_string(i)).pin(sched::cpus[cpu])); + queue->cleanup_thread->set_priority(sched::thread::priority_infinity); queue->cleanup_stop = false; queue->cleanup_pending = 0; queue->cleanup_thread->start(); @@ -1067,17 +1078,13 @@ ena_create_io_queues(struct ena_adapter *adapter) * **********************************************************************/ -static std::atomic<u64> mgmt_intr_count = {0}; -static std::atomic<u64> io_intr_count = {0}; - /** * ena_intr_msix_mgmnt - MSIX Interrupt Handler for admin/async queue * @arg: interrupt number **/ static void ena_intr_msix_mgmnt(void *arg) { - mgmt_intr_count++; struct ena_adapter *adapter = (struct ena_adapter *)arg; ena_com_admin_q_comp_intr_handler(adapter->ena_dev); @@ -1096,7 +1103,6 @@ ena_handle_msix(void *arg) struct ena_adapter *adapter = queue->adapter; if_t ifp = adapter->ifp; - io_intr_count++; if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)) return 0; @@ -1220,7 +1226,7 @@ ena_request_io_irq(struct ena_adapter *adapter) } for (int entry = ENA_IO_IRQ_FIRST_IDX; entry < adapter->msix_vecs; entry++) { - auto idx = entry - 1; + auto idx = entry - ENA_IO_IRQ_FIRST_IDX; auto vec = assigned[idx]; auto queue = &adapter->que[idx]; if (!_msi.assign_isr(vec, [queue]() { ena_handle_msix(queue); })) { @@ -1239,7 +1245,16 @@ ena_request_io_irq(struct ena_adapter *adapter) //Save assigned msix vectors for (int entry = ENA_IO_IRQ_FIRST_IDX; entry < adapter->msix_vecs; entry++) { ena_irq *irq = &adapter->irq_tbl[entry]; - irq->mvector = assigned[entry - 1]; + auto idx = entry - ENA_IO_IRQ_FIRST_IDX; + auto vec = irq->mvector = assigned[idx]; + //In our case the worker threads are all pinned so we probably do not need + //to re-pin the interrupt vector + auto cpu = idx % sched::cpus.size(); + std::atomic_thread_fence(std::memory_order_seq_cst); + vec->set_affinity(sched::cpus[cpu]->arch.apic_id); + std::atomic_thread_fence(std::memory_order_seq_cst); + + ena_log(pdev, INFO, "pinned MSIX vector on queue %d - cpu %d\n", idx, cpu); } _msi.unmask_interrupts(assigned); @@ -1462,7 +1477,7 @@ ena_up(struct ena_adapter *adapter) } if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) - if_link_state_change(adapter->ifp, LINK_STATE_UP); + adapter->ifp->if_link_state = LINK_STATE_UP; rc = ena_up_complete(adapter); if (unlikely(rc != 0)) @@ -1564,6 +1579,10 @@ ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat) caps |= IFCAP_LRO | IFCAP_JUMBO_MTU; + ena_log(nullptr, INFO, + "device offloads (caps): TXCSUM=%d, TXCSUM_IPV6=%d, TSO4=%d, TSO6=%d, RXCSUM=%d, RXCSUM_IPV6=%d, LRO=1, JUMBO_MTU=1\n", + caps & IFCAP_TXCSUM, caps & IFCAP_TXCSUM_IPV6, caps & IFCAP_TSO4, caps & IFCAP_TSO6, caps & IFCAP_RXCSUM, caps & IFCAP_RXCSUM_IPV6); + return (caps); } @@ -1596,6 +1615,10 @@ ena_update_hwassist(struct ena_adapter *adapter) flags |= CSUM_TSO; adapter->ifp->if_hwassist = flags; + + ena_log(nullptr, INFO, + "ena_update_hwassist: CSUM_IP=%d, CSUM_UDP=%d, CSUM_TCP=%d, CSUM_UDP_IPV6=%d, CSUM_TCP_IPV6=%d, CSUM_TSO=%d\n", + flags & CSUM_IP, flags & CSUM_UDP, flags & CSUM_TCP, flags & CSUM_UDP_IPV6, flags & CSUM_TCP_IPV6, flags & CSUM_TSO); } static int @@ -1942,8 +1965,7 @@ check_for_rx_interrupt_queue(struct ena_adapter *adapter, if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) { ena_log(adapter->pdev, ERR, - "Potential MSIX issue on Rx side Queue = %d, mgmt_intr_count=%lu, io_intr_count=%lu. Reset the device", - rx_ring->qid, mgmt_intr_count.load(), io_intr_count.load()); + "Potential MSIX issue on Rx side Queue = %d. Reset the device", rx_ring->qid); ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT); return (EIO); } @@ -2186,15 +2208,14 @@ ena_timer_service(void *data) void ena_destroy_device(struct ena_adapter *adapter, bool graceful) { - if_t ifp = adapter->ifp; struct ena_com_dev *ena_dev = adapter->ena_dev; bool dev_up; if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)) return; if (!graceful) - if_link_state_change(ifp, LINK_STATE_DOWN); + adapter->ifp->if_link_state = LINK_STATE_DOWN; ENA_TIMER_DRAIN(adapter); @@ -2266,7 +2287,6 @@ ena_restore_device(struct ena_adapter *adapter) { struct ena_com_dev_get_features_ctx get_feat_ctx; struct ena_com_dev *ena_dev = adapter->ena_dev; - if_t ifp = adapter->ifp; pci::device *dev = adapter->pdev; int wd_active; int rc; @@ -2294,7 +2314,7 @@ ena_restore_device(struct ena_adapter *adapter) ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter); /* Make sure we don't have a race with AENQ Links state handler */ if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) - if_link_state_change(ifp, LINK_STATE_UP); + adapter->ifp->if_link_state = LINK_STATE_UP; rc = ena_enable_msix_and_set_admin_interrupts(adapter); if (rc != 0) { @@ -2653,10 +2673,10 @@ ena_update_on_link_change(void *adapter_data, ena_log(adapter->pdev, INFO, "link is UP"); ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter); if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter)) - if_link_state_change(ifp, LINK_STATE_UP); + ifp->if_link_state = LINK_STATE_UP; } else { ena_log(adapter->pdev, INFO, "link is DOWN"); - if_link_state_change(ifp, LINK_STATE_DOWN); + ifp->if_link_state = LINK_STATE_DOWN; ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter); } } diff --git a/bsd/sys/dev/ena/ena_datapath.cc b/bsd/sys/dev/ena/ena_datapath.cc --- a/bsd/sys/dev/ena/ena_datapath.cc +++ b/bsd/sys/dev/ena/ena_datapath.cc @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include "ena_datapath.h" #include <osv/sched.hh> +#include <osv/trace.hh> static inline void critical_enter() { sched::preempt_disable(); } static inline void critical_exit() { sched::preempt_enable(); } @@ -61,7 +62,7 @@ static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool); static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf); static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **); -static void ena_start_xmit(struct ena_ring *); +static void ena_start_xmit(struct ena_ring *, bool); /********************************************************************* * Global functions @@ -119,7 +120,7 @@ ena_deferred_mq_start(struct ena_ring *tx_ring ) while (!buf_ring_empty(tx_ring->br) && tx_ring->running && (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { ENA_RING_MTX_LOCK(tx_ring); - ena_start_xmit(tx_ring); + ena_start_xmit(tx_ring, true); ENA_RING_MTX_UNLOCK(tx_ring); } } @@ -158,7 +159,7 @@ ena_mq_start(if_t ifp, struct mbuf *m) } if (is_br_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) { - ena_start_xmit(tx_ring); + ena_start_xmit(tx_ring, false); ENA_RING_MTX_UNLOCK(tx_ring); } else { tx_ring->enqueue_thread->wake_with([tx_ring] { tx_ring->enqueue_pending++; }); @@ -220,6 +221,8 @@ ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq, return (EFAULT); } +TRACEPOINT(trace_ena_tx_cleanup, "qid=%d pkts=%d", int, int); + /** * ena_tx_cleanup - clear sent packets and corresponding descriptors * @tx_ring: ring for which we want to clean packets @@ -293,6 +296,7 @@ ena_tx_cleanup(struct ena_ring *tx_ring) ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d", tx_ring->qid, work_done); + trace_ena_tx_cleanup(tx_ring->qid, work_done); /* If there is still something to commit update ring state */ if (likely(commit != ENA_TX_COMMIT)) { @@ -455,6 +459,11 @@ ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, } } +TRACEPOINT(trace_ena_rx_cleanup_packet, "qid=%d", int); +TRACEPOINT(trace_ena_rx_cleanup_fast_path, "qid=%d", int); +TRACEPOINT(trace_ena_rx_cleanup_lro, "qid=%d", int); +TRACEPOINT(trace_ena_rx_cleanup_refill, "qid=%d refill=%u", int, uint32_t); + /** * ena_rx_cleanup - handle rx irq * @arg: ring for which irq is being handled @@ -530,6 +539,7 @@ ena_rx_cleanup(struct ena_ring *rx_ring) } break; } + trace_ena_rx_cleanup_packet(rx_ring->qid); if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) || ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) { @@ -557,15 +567,19 @@ ena_rx_cleanup(struct ena_ring *rx_ring) * - lro enqueue fails */ if ((rx_ring->lro.lro_cnt != 0) && - (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0)) + (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0)) { do_if_input = 0; + trace_ena_rx_cleanup_lro(rx_ring->qid); + } } if (do_if_input != 0) { ena_log_io(adapter->pdev, DBG, "calling if_input() with mbuf %p", mbuf); bool fast_path = ifp->if_classifier.post_packet(mbuf); if (!fast_path) { (*ifp->if_input)(ifp, mbuf); + } else { + trace_ena_rx_cleanup_fast_path(rx_ring->qid); } } @@ -584,12 +598,18 @@ ena_rx_cleanup(struct ena_ring *rx_ring) if (refill_required > refill_threshold) { ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); + trace_ena_rx_cleanup_refill(rx_ring->qid, refill_required); ena_refill_rx_bufs(rx_ring, refill_required); } //TODO: Can wait? investigate https://github.com/freebsd/freebsd-src/commit/e936121d3140af047a498559493b9375a6ba6ba3 //to port it back //tcp_lro_flush_all(&rx_ring->lro); + while (!SLIST_EMPTY(&rx_ring->lro.lro_active)) { + auto *queued = SLIST_FIRST(&rx_ring->lro.lro_active); + SLIST_REMOVE_HEAD(&rx_ring->lro.lro_active, next); + tcp_lro_flush(&rx_ring->lro, queued); + } return (ENA_RX_BUDGET - budget); } @@ -903,8 +923,11 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) return (rc); } +TRACEPOINT(trace_ena_start_xmit, "qid=%d pkts=%d", int, int); +TRACEPOINT(trace_ena_start_xmit_deferred, "qid=%d pkts=%d", int, int); + static void -ena_start_xmit(struct ena_ring *tx_ring) +ena_start_xmit(struct ena_ring *tx_ring, bool deferred) { struct mbuf *mbuf; struct ena_adapter *adapter = tx_ring->adapter; @@ -955,6 +978,11 @@ ena_start_xmit(struct ena_ring *tx_ring) ena_log_io(adapter->pdev, INFO, "dequeued %d mbufs", mnum); + if (deferred) { + trace_ena_start_xmit_deferred(tx_ring->qid, mnum); + } else { + trace_ena_start_xmit(tx_ring->qid, mnum); + } if (likely(tx_ring->acum_pkts != 0)) { /* Trigger the dma engine */ -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/00000000000040ba67060eaf152e%40google.com.
