In eDMA-backed mode (when using ntb_transport_edma), NTB transport can
scale throughput across multiple queue pairs without being constrained
by scarce BAR/memory window space used for data-plane buffers. It
contrasts with the default ntb_transport, where even with a single queue
pair, only up to 15 in-flight descriptors fit in a 1 MiB MW.

Teach ntb_netdev to allocate multiple ntb_transport queue pairs and
expose them as a multi-queue net_device.

With this patch, up to N queue pairs are created, where N is chosen as
follows:

  - By default, N is num_online_cpus(), to give each CPU its own queue.
  - If the ntb_num_queues module parameter is non-zero, it overrides the
    default and requests that many queues.
  - In both cases the requested value is capped at a fixed upper bound
    to avoid unbounded allocations, and by the number of queue pairs
    actually available from ntb_transport.

If only one queue pair can be created (or ntb_num_queues=1 is set), the
driver effectively falls back to the previous single-queue behavior.

Signed-off-by: Koichiro Den <[email protected]>
---
 drivers/net/ntb_netdev.c | 341 ++++++++++++++++++++++++++++-----------
 1 file changed, 243 insertions(+), 98 deletions(-)

diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index fbeae05817e9..fc300db66ef7 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -53,6 +53,8 @@
 #include <linux/pci.h>
 #include <linux/ntb.h>
 #include <linux/ntb_transport.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
 
 #define NTB_NETDEV_VER "0.7"
 
@@ -70,26 +72,84 @@ static unsigned int tx_start = 10;
 /* Number of descriptors still available before stop upper layer tx */
 static unsigned int tx_stop = 5;
 
+/*
+ * Upper bound on how many queue pairs we will try to create even if
+ * ntb_num_queues or num_online_cpus() is very large. This is an
+ * arbitrary safety cap to avoid unbounded allocations.
+ */
+#define NTB_NETDEV_MAX_QUEUES  64
+
+/*
+ * ntb_num_queues == 0 (default) means:
+ *   - use num_online_cpus() as the desired queue count, capped by
+ *     NTB_NETDEV_MAX_QUEUES.
+ * ntb_num_queues > 0:
+ *   - try to create exactly ntb_num_queues queue pairs (again capped
+ *     by NTB_NETDEV_MAX_QUEUES), but fall back to the number of queue
+ *     pairs actually available from ntb_transport.
+ */
+static unsigned int ntb_num_queues;
+module_param(ntb_num_queues, uint, 0644);
+MODULE_PARM_DESC(ntb_num_queues,
+                "Number of NTB netdev queue pairs to use (0 = per-CPU)");
+
+struct ntb_netdev;
+
+struct ntb_netdev_queue {
+       struct ntb_netdev *ntdev;
+       struct ntb_transport_qp *qp;
+       struct timer_list tx_timer;
+       u16 qid;
+};
+
 struct ntb_netdev {
        struct pci_dev *pdev;
        struct net_device *ndev;
-       struct ntb_transport_qp *qp;
-       struct timer_list tx_timer;
+       unsigned int num_queues;
+       struct ntb_netdev_queue *queues;
 };
 
 #define        NTB_TX_TIMEOUT_MS       1000
 #define        NTB_RXQ_SIZE            100
 
+static unsigned int ntb_netdev_default_queues(void)
+{
+       unsigned int n;
+
+       if (ntb_num_queues)
+               n = ntb_num_queues;
+       else
+               n = num_online_cpus();
+
+       if (!n)
+               n = 1;
+
+       if (n > NTB_NETDEV_MAX_QUEUES)
+               n = NTB_NETDEV_MAX_QUEUES;
+
+       return n;
+}
+
 static void ntb_netdev_event_handler(void *data, int link_is_up)
 {
-       struct net_device *ndev = data;
-       struct ntb_netdev *dev = netdev_priv(ndev);
+       struct ntb_netdev_queue *q = data;
+       struct ntb_netdev *dev = q->ntdev;
+       struct net_device *ndev = dev->ndev;
+       bool any_up = false;
+       unsigned int i;
 
-       netdev_dbg(ndev, "Event %x, Link %x\n", link_is_up,
-                  ntb_transport_link_query(dev->qp));
+       netdev_dbg(ndev, "Event %x, Link %x, qp %u\n", link_is_up,
+                  ntb_transport_link_query(q->qp), q->qid);
 
        if (link_is_up) {
-               if (ntb_transport_link_query(dev->qp))
+               for (i = 0; i < dev->num_queues; i++) {
+                       if (ntb_transport_link_query(dev->queues[i].qp)) {
+                               any_up = true;
+                               break;
+                       }
+               }
+
+               if (any_up)
                        netif_carrier_on(ndev);
        } else {
                netif_carrier_off(ndev);
@@ -99,7 +159,9 @@ static void ntb_netdev_event_handler(void *data, int 
link_is_up)
 static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
                                  void *data, int len)
 {
-       struct net_device *ndev = qp_data;
+       struct ntb_netdev_queue *q = qp_data;
+       struct ntb_netdev *dev = q->ntdev;
+       struct net_device *ndev = dev->ndev;
        struct sk_buff *skb;
        int rc;
 
@@ -135,7 +197,8 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp 
*qp, void *qp_data,
        }
 
 enqueue_again:
-       rc = ntb_transport_rx_enqueue(qp, skb, skb->data, ndev->mtu + ETH_HLEN);
+       rc = ntb_transport_rx_enqueue(q->qp, skb, skb->data,
+                                     ndev->mtu + ETH_HLEN);
        if (rc) {
                dev_kfree_skb_any(skb);
                ndev->stats.rx_errors++;
@@ -143,42 +206,37 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp 
*qp, void *qp_data,
        }
 }
 
-static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev,
-                                     struct ntb_transport_qp *qp, int size)
+static int ntb_netdev_maybe_stop_tx(struct ntb_netdev_queue *q, int size)
 {
-       struct ntb_netdev *dev = netdev_priv(netdev);
+       struct net_device *ndev = q->ntdev->ndev;
+
+       if (ntb_transport_tx_free_entry(q->qp) >= size)
+               return 0;
+
+       netif_stop_subqueue(ndev, q->qid);
 
-       netif_stop_queue(netdev);
        /* Make sure to see the latest value of ntb_transport_tx_free_entry()
         * since the queue was last started.
         */
        smp_mb();
 
-       if (likely(ntb_transport_tx_free_entry(qp) < size)) {
-               mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
+       if (likely(ntb_transport_tx_free_entry(q->qp) < size)) {
+               mod_timer(&q->tx_timer, jiffies + usecs_to_jiffies(tx_time));
                return -EBUSY;
        }
 
-       netif_start_queue(netdev);
-       return 0;
-}
-
-static int ntb_netdev_maybe_stop_tx(struct net_device *ndev,
-                                   struct ntb_transport_qp *qp, int size)
-{
-       if (netif_queue_stopped(ndev) ||
-           (ntb_transport_tx_free_entry(qp) >= size))
-               return 0;
+       netif_wake_subqueue(ndev, q->qid);
 
-       return __ntb_netdev_maybe_stop_tx(ndev, qp, size);
+       return 0;
 }
 
 static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
                                  void *data, int len)
 {
-       struct net_device *ndev = qp_data;
+       struct ntb_netdev_queue *q = qp_data;
+       struct ntb_netdev *dev = q->ntdev;
+       struct net_device *ndev = dev->ndev;
        struct sk_buff *skb;
-       struct ntb_netdev *dev = netdev_priv(ndev);
 
        skb = data;
        if (!skb || !ndev)
@@ -194,13 +252,12 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp 
*qp, void *qp_data,
 
        dev_kfree_skb_any(skb);
 
-       if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) {
+       if (ntb_transport_tx_free_entry(qp) >= tx_start) {
                /* Make sure anybody stopping the queue after this sees the new
                 * value of ntb_transport_tx_free_entry()
                 */
                smp_mb();
-               if (netif_queue_stopped(ndev))
-                       netif_wake_queue(ndev);
+               netif_wake_subqueue(ndev, q->qid);
        }
 }
 
@@ -208,16 +265,26 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff 
*skb,
                                         struct net_device *ndev)
 {
        struct ntb_netdev *dev = netdev_priv(ndev);
+       u16 qid = skb_get_queue_mapping(skb);
+       struct ntb_netdev_queue *q;
        int rc;
 
-       ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
+       if (unlikely(!dev->num_queues))
+               goto err;
+
+       if (unlikely(qid >= dev->num_queues))
+               qid = qid % dev->num_queues;
 
-       rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len);
+       q = &dev->queues[qid];
+
+       ntb_netdev_maybe_stop_tx(q, tx_stop);
+
+       rc = ntb_transport_tx_enqueue(q->qp, skb, skb->data, skb->len);
        if (rc)
                goto err;
 
        /* check for next submit */
-       ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
+       ntb_netdev_maybe_stop_tx(q, tx_stop);
 
        return NETDEV_TX_OK;
 
@@ -229,80 +296,103 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff 
*skb,
 
 static void ntb_netdev_tx_timer(struct timer_list *t)
 {
-       struct ntb_netdev *dev = timer_container_of(dev, t, tx_timer);
+       struct ntb_netdev_queue *q = container_of(t, struct ntb_netdev_queue, 
tx_timer);
+       struct ntb_netdev *dev = q->ntdev;
        struct net_device *ndev = dev->ndev;
 
-       if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) {
-               mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
+       if (ntb_transport_tx_free_entry(q->qp) < tx_stop) {
+               mod_timer(&q->tx_timer, jiffies + usecs_to_jiffies(tx_time));
        } else {
-               /* Make sure anybody stopping the queue after this sees the new
+               /*
+                * Make sure anybody stopping the queue after this sees the new
                 * value of ntb_transport_tx_free_entry()
                 */
                smp_mb();
-               if (netif_queue_stopped(ndev))
-                       netif_wake_queue(ndev);
+               netif_wake_subqueue(ndev, q->qid);
        }
 }
 
 static int ntb_netdev_open(struct net_device *ndev)
 {
        struct ntb_netdev *dev = netdev_priv(ndev);
+       struct ntb_netdev_queue *queue;
        struct sk_buff *skb;
-       int rc, i, len;
-
-       /* Add some empty rx bufs */
-       for (i = 0; i < NTB_RXQ_SIZE; i++) {
-               skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
-               if (!skb) {
-                       rc = -ENOMEM;
-                       goto err;
-               }
+       int rc = 0, i, len;
+       unsigned int q;
 
-               rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
-                                             ndev->mtu + ETH_HLEN);
-               if (rc) {
-                       dev_kfree_skb(skb);
-                       goto err;
+       /* Add some empty rx bufs for each queue */
+       for (q = 0; q < dev->num_queues; q++) {
+               queue = &dev->queues[q];
+
+               for (i = 0; i < NTB_RXQ_SIZE; i++) {
+                       skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
+                       if (!skb) {
+                               rc = -ENOMEM;
+                               goto err;
+                       }
+
+                       rc = ntb_transport_rx_enqueue(queue->qp, skb, skb->data,
+                                                     ndev->mtu + ETH_HLEN);
+                       if (rc) {
+                               dev_kfree_skb(skb);
+                               goto err;
+                       }
                }
-       }
 
-       timer_setup(&dev->tx_timer, ntb_netdev_tx_timer, 0);
+               timer_setup(&queue->tx_timer, ntb_netdev_tx_timer, 0);
+       }
 
        netif_carrier_off(ndev);
-       ntb_transport_link_up(dev->qp);
-       netif_start_queue(ndev);
+
+       for (q = 0; q < dev->num_queues; q++)
+               ntb_transport_link_up(dev->queues[q].qp);
+
+       netif_tx_start_all_queues(ndev);
 
        return 0;
 
 err:
-       while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
-               dev_kfree_skb(skb);
+       for (q = 0; q < dev->num_queues; q++) {
+               queue = &dev->queues[q];
+
+               while ((skb = ntb_transport_rx_remove(queue->qp, &len)))
+                       dev_kfree_skb(skb);
+       }
        return rc;
 }
 
 static int ntb_netdev_close(struct net_device *ndev)
 {
        struct ntb_netdev *dev = netdev_priv(ndev);
+       struct ntb_netdev_queue *queue;
        struct sk_buff *skb;
+       unsigned int q;
        int len;
 
-       ntb_transport_link_down(dev->qp);
+       netif_tx_stop_all_queues(ndev);
+
+       for (q = 0; q < dev->num_queues; q++) {
+               queue = &dev->queues[q];
 
-       while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
-               dev_kfree_skb(skb);
+               ntb_transport_link_down(queue->qp);
 
-       timer_delete_sync(&dev->tx_timer);
+               while ((skb = ntb_transport_rx_remove(queue->qp, &len)))
+                       dev_kfree_skb(skb);
 
+               timer_delete_sync(&queue->tx_timer);
+       }
        return 0;
 }
 
 static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu)
 {
        struct ntb_netdev *dev = netdev_priv(ndev);
+       struct ntb_netdev_queue *queue;
        struct sk_buff *skb;
-       int len, rc;
+       unsigned int q, i;
+       int len, rc = 0;
 
-       if (new_mtu > ntb_transport_max_size(dev->qp) - ETH_HLEN)
+       if (new_mtu > ntb_transport_max_size(dev->queues[0].qp) - ETH_HLEN)
                return -EINVAL;
 
        if (!netif_running(ndev)) {
@@ -311,41 +401,54 @@ static int ntb_netdev_change_mtu(struct net_device *ndev, 
int new_mtu)
        }
 
        /* Bring down the link and dispose of posted rx entries */
-       ntb_transport_link_down(dev->qp);
+       for (q = 0; q < dev->num_queues; q++)
+               ntb_transport_link_down(dev->queues[q].qp);
 
        if (ndev->mtu < new_mtu) {
-               int i;
-
-               for (i = 0; (skb = ntb_transport_rx_remove(dev->qp, &len)); i++)
-                       dev_kfree_skb(skb);
+               for (q = 0; q < dev->num_queues; q++) {
+                       queue = &dev->queues[q];
 
-               for (; i; i--) {
-                       skb = netdev_alloc_skb(ndev, new_mtu + ETH_HLEN);
-                       if (!skb) {
-                               rc = -ENOMEM;
-                               goto err;
-                       }
-
-                       rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
-                                                     new_mtu + ETH_HLEN);
-                       if (rc) {
+                       for (i = 0;
+                            (skb = ntb_transport_rx_remove(queue->qp, &len));
+                            i++)
                                dev_kfree_skb(skb);
-                               goto err;
+
+                       for (; i; i--) {
+                               skb = netdev_alloc_skb(ndev,
+                                                      new_mtu + ETH_HLEN);
+                               if (!skb) {
+                                       rc = -ENOMEM;
+                                       goto err;
+                               }
+
+                               rc = ntb_transport_rx_enqueue(queue->qp, skb,
+                                                             skb->data,
+                                                             new_mtu +
+                                                             ETH_HLEN);
+                               if (rc) {
+                                       dev_kfree_skb(skb);
+                                       goto err;
+                               }
                        }
                }
        }
 
        WRITE_ONCE(ndev->mtu, new_mtu);
 
-       ntb_transport_link_up(dev->qp);
+       for (q = 0; q < dev->num_queues; q++)
+               ntb_transport_link_up(dev->queues[q].qp);
 
        return 0;
 
 err:
-       ntb_transport_link_down(dev->qp);
+       for (q = 0; q < dev->num_queues; q++) {
+               struct ntb_netdev_queue *queue = &dev->queues[q];
+
+               ntb_transport_link_down(queue->qp);
 
-       while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
-               dev_kfree_skb(skb);
+               while ((skb = ntb_transport_rx_remove(queue->qp, &len)))
+                       dev_kfree_skb(skb);
+       }
 
        netdev_err(ndev, "Error changing MTU, device inoperable\n");
        return rc;
@@ -404,6 +507,7 @@ static int ntb_netdev_probe(struct device *client_dev)
        struct net_device *ndev;
        struct pci_dev *pdev;
        struct ntb_netdev *dev;
+       unsigned int q, desired_queues;
        int rc;
 
        ntb = dev_ntb(client_dev->parent);
@@ -411,7 +515,9 @@ static int ntb_netdev_probe(struct device *client_dev)
        if (!pdev)
                return -ENODEV;
 
-       ndev = alloc_etherdev(sizeof(*dev));
+       desired_queues = ntb_netdev_default_queues();
+
+       ndev = alloc_etherdev_mq(sizeof(*dev), desired_queues);
        if (!ndev)
                return -ENOMEM;
 
@@ -420,6 +526,15 @@ static int ntb_netdev_probe(struct device *client_dev)
        dev = netdev_priv(ndev);
        dev->ndev = ndev;
        dev->pdev = pdev;
+       dev->num_queues = 0;
+
+       dev->queues = kcalloc(desired_queues, sizeof(*dev->queues),
+                             GFP_KERNEL);
+       if (!dev->queues) {
+               rc = -ENOMEM;
+               goto err_free_netdev;
+       }
+
        ndev->features = NETIF_F_HIGHDMA;
 
        ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
@@ -436,26 +551,51 @@ static int ntb_netdev_probe(struct device *client_dev)
        ndev->min_mtu = 0;
        ndev->max_mtu = ETH_MAX_MTU;
 
-       dev->qp = ntb_transport_create_queue(ndev, client_dev,
-                                            &ntb_netdev_handlers);
-       if (!dev->qp) {
+       for (q = 0; q < desired_queues; q++) {
+               struct ntb_netdev_queue *queue = &dev->queues[q];
+
+               queue->ntdev = dev;
+               queue->qid = q;
+               queue->qp = ntb_transport_create_queue(queue, client_dev,
+                                                      &ntb_netdev_handlers);
+               if (!queue->qp)
+                       break;
+
+               dev->num_queues++;
+       }
+
+       if (!dev->num_queues) {
                rc = -EIO;
-               goto err;
+               goto err_free_queues;
        }
 
-       ndev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
+       rc = netif_set_real_num_tx_queues(ndev, dev->num_queues);
+       if (rc)
+               goto err_free_qps;
+
+       rc = netif_set_real_num_rx_queues(ndev, dev->num_queues);
+       if (rc)
+               goto err_free_qps;
+
+       ndev->mtu = ntb_transport_max_size(dev->queues[0].qp) - ETH_HLEN;
 
        rc = register_netdev(ndev);
        if (rc)
-               goto err1;
+               goto err_free_qps;
 
        dev_set_drvdata(client_dev, ndev);
-       dev_info(&pdev->dev, "%s created\n", ndev->name);
+       dev_info(&pdev->dev, "%s created with %u queue pairs\n",
+                ndev->name, dev->num_queues);
        return 0;
 
-err1:
-       ntb_transport_free_queue(dev->qp);
-err:
+err_free_qps:
+       for (q = 0; q < dev->num_queues; q++)
+               ntb_transport_free_queue(dev->queues[q].qp);
+
+err_free_queues:
+       kfree(dev->queues);
+
+err_free_netdev:
        free_netdev(ndev);
        return rc;
 }
@@ -464,9 +604,14 @@ static void ntb_netdev_remove(struct device *client_dev)
 {
        struct net_device *ndev = dev_get_drvdata(client_dev);
        struct ntb_netdev *dev = netdev_priv(ndev);
+       unsigned int q;
+
 
        unregister_netdev(ndev);
-       ntb_transport_free_queue(dev->qp);
+       for (q = 0; q < dev->num_queues; q++)
+               ntb_transport_free_queue(dev->queues[q].qp);
+
+       kfree(dev->queues);
        free_netdev(ndev);
 }
 
-- 
2.51.0


Reply via email to