This patch implements NAPI for iopib. It is a draft implementation.
I would like your opinion on whether we need a module parameter
to control if NAPI should be activated or not.
Also there is a need to implement peek_cq and call it for
ib_req_notify_cq() so as to know if there is a need to call
netif_rx_schedule_prep() again.

Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
---

Index: openib-1.1-rc6/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- openib-1.1-rc6.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c       
2006-09-21 16:30:35.000000000 +0300
+++ openib-1.1-rc6/drivers/infiniband/ulp/ipoib/ipoib_main.c    2006-09-21 
16:30:42.000000000 +0300
@@ -69,6 +69,8 @@
 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
 #endif
 
+static const int poll_def_weight = 64;
+
 struct ipoib_path_iter {
        struct net_device *dev;
        struct ipoib_path  path;
@@ -91,6 +93,9 @@
        .remove = ipoib_remove_one
 };
 
+
+int ipoib_poll(struct net_device *dev, int *budget);
+
 int ipoib_open(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -689,6 +694,7 @@
                        goto out;
                }
 
+
                if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
                        spin_lock(&priv->lock);
                        __skb_queue_tail(&neigh->queue, skb);
@@ -892,6 +898,7 @@
 
        /* Delete any child interfaces first */
        list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
+               netif_poll_disable(priv->dev);
                unregister_netdev(cpriv->dev);
                ipoib_dev_cleanup(cpriv->dev);
                free_netdev(cpriv->dev);
@@ -919,6 +926,8 @@
        dev->hard_header         = ipoib_hard_header;
        dev->set_multicast_list  = ipoib_set_mcast_list;
        dev->neigh_setup         = ipoib_neigh_setup_dev;
+       dev->poll                = ipoib_poll;
+       dev->weight              = poll_def_weight;
 
        dev->watchdog_timeo      = HZ;
 
@@ -1097,6 +1106,8 @@
                goto register_failed;
        }
 
+       netif_poll_enable(priv->dev);
+
        ipoib_create_debug_files(priv->dev);
 
        if (ipoib_add_pkey_attr(priv->dev))
@@ -1111,6 +1122,7 @@
        return priv->dev;
 
 sysfs_failed:
+       netif_poll_disable(priv->dev);
        ipoib_delete_debug_files(priv->dev);
        unregister_netdev(priv->dev);
 
@@ -1168,6 +1180,7 @@
        dev_list = ib_get_client_data(device, &ipoib_client);
 
        list_for_each_entry_safe(priv, tmp, dev_list, list) {
+               netif_poll_disable(priv->dev);
                ib_unregister_event_handler(&priv->event_handler);
                flush_scheduled_work();
 
Index: openib-1.1-rc6/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- openib-1.1-rc6.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2006-09-21 
16:30:38.000000000 +0300
+++ openib-1.1-rc6/drivers/infiniband/ulp/ipoib/ipoib_ib.c      2006-09-21 
17:24:59.000000000 +0300
@@ -169,7 +169,7 @@
        return 0;
 }
 
-static void ipoib_ib_handle_wc(struct net_device *dev,
+static void ipoib_ib_handle_rwc(struct net_device *dev,
                               struct ib_wc *wc)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -178,122 +178,186 @@
        ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
                       wr_id, wc->opcode, wc->status);
 
-       if (wr_id & IPOIB_OP_RECV) {
-               wr_id &= ~IPOIB_OP_RECV;
-
-               if (wr_id < ipoib_recvq_size) {
-                       struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
-                       dma_addr_t      addr = priv->rx_ring[wr_id].mapping;
-
-                       if (unlikely(wc->status != IB_WC_SUCCESS)) {
-                               if (wc->status != IB_WC_WR_FLUSH_ERR)
-                                       ipoib_warn(priv, "failed recv event "
-                                                  "(status=%d, wrid=%d 
vend_err %x)\n",
-                                                  wc->status, wr_id, 
wc->vendor_err);
-                               dma_unmap_single(priv->ca->dma_device, addr,
-                                                IPOIB_BUF_SIZE, 
DMA_FROM_DEVICE);
-                               dev_kfree_skb_any(skb);
-                               priv->rx_ring[wr_id].skb = NULL;
-                               return;
-                       }
-
-                       /*
-                        * If we can't allocate a new RX buffer, dump
-                        * this packet and reuse the old buffer.
-                        */
-                       if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
-                               ++priv->stats.rx_dropped;
-                               goto repost;
-                       }
-
-                       ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
-                                      wc->byte_len, wc->slid);
+       wr_id &= ~IPOIB_OP_RECV;
 
+       if (wr_id < ipoib_recvq_size) {
+               struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
+               dma_addr_t      addr = priv->rx_ring[wr_id].mapping;
+
+               if (unlikely(wc->status != IB_WC_SUCCESS)) {
+                       if (wc->status != IB_WC_WR_FLUSH_ERR)
+                               ipoib_warn(priv, "failed recv event "
+                                          "(status=%d, wrid=%d vend_err %x)\n",
+                                          wc->status, wr_id, wc->vendor_err);
                        dma_unmap_single(priv->ca->dma_device, addr,
                                         IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+                       dev_kfree_skb_any(skb);
+                       priv->rx_ring[wr_id].skb = NULL;
+                       return;
+               }
 
-                       skb_put(skb, wc->byte_len);
-                       skb_pull(skb, IB_GRH_BYTES);
+               /*
+                * If we can't allocate a new RX buffer, dump
+                * this packet and reuse the old buffer.
+                */
+               if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+                       ++priv->stats.rx_dropped;
+                       goto repost;
+               }
 
-                       if (wc->slid != priv->local_lid ||
-                           wc->src_qp != priv->qp->qp_num) {
-                               skb->protocol = ((struct ipoib_header *) 
skb->data)->proto;
-                               skb->mac.raw = skb->data;
-                               skb_pull(skb, IPOIB_ENCAP_LEN);
-
-                               dev->last_rx = jiffies;
-                               ++priv->stats.rx_packets;
-                               priv->stats.rx_bytes += skb->len;
-
-                               skb->dev = dev;
-                               /* XXX get correct PACKET_ type here */
-                               skb->pkt_type = PACKET_HOST;
-                               netif_rx_ni(skb);
-                       } else {
-                               ipoib_dbg_data(priv, "dropping loopback 
packet\n");
-                               dev_kfree_skb_any(skb);
-                       }
+               ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+                              wc->byte_len, wc->slid);
 
-               repost:
-                       if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
-                               ipoib_warn(priv, "ipoib_ib_post_receive failed "
-                                          "for buf %d\n", wr_id);
-               } else
-                       ipoib_warn(priv, "completion event with wrid %d\n",
-                                  wr_id);
+               dma_unmap_single(priv->ca->dma_device, addr,
+                                IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 
-       } else {
-               struct ipoib_tx_buf *tx_req;
-               unsigned long flags;
+               skb_put(skb, wc->byte_len);
+               skb_pull(skb, IB_GRH_BYTES);
 
-               if (wr_id >= ipoib_sendq_size) {
-                       ipoib_warn(priv, "completion event with wrid %d (> 
%d)\n",
-                                  wr_id, ipoib_sendq_size);
-                       return;
+               if (wc->slid != priv->local_lid ||
+                   wc->src_qp != priv->qp->qp_num) {
+                       skb->protocol = ((struct ipoib_header *) 
skb->data)->proto;
+                       skb->mac.raw = skb->data;
+                       skb_pull(skb, IPOIB_ENCAP_LEN);
+
+                       dev->last_rx = jiffies;
+                       ++priv->stats.rx_packets;
+                       priv->stats.rx_bytes += skb->len;
+
+                       skb->dev = dev;
+                       /* XXX get correct PACKET_ type here */
+                       skb->pkt_type = PACKET_HOST;
+                       netif_receive_skb(skb);
+               } else {
+                       ipoib_dbg_data(priv, "dropping loopback packet\n");
+                       dev_kfree_skb_any(skb);
                }
 
-               ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
+       repost:
+               if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
+                       ipoib_warn(priv, "ipoib_ib_post_receive failed "
+                                  "for buf %d\n", wr_id);
+       } else
+               ipoib_warn(priv, "completion event with wrid %d\n",
+                          wr_id);
 
-               tx_req = &priv->tx_ring[wr_id];
+}
 
-               dma_unmap_single(priv->ca->dma_device,
-                                pci_unmap_addr(tx_req, mapping),
-                                tx_req->skb->len,
-                                DMA_TO_DEVICE);
 
-               ++priv->stats.tx_packets;
-               priv->stats.tx_bytes += tx_req->skb->len;
+static void ipoib_ib_handle_swc(struct net_device *dev,
+                              struct ib_wc *wc)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       unsigned int wr_id = wc->wr_id;
+       struct ipoib_tx_buf *tx_req;
+       unsigned long flags;
 
-               dev_kfree_skb_any(tx_req->skb);
+       ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
+                      wr_id, wc->opcode, wc->status);
 
-               spin_lock_irqsave(&priv->tx_lock, flags);
-               ++priv->tx_tail;
-               if (netif_queue_stopped(dev) &&
-                   test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
-                   priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
-                       netif_wake_queue(dev);
-               spin_unlock_irqrestore(&priv->tx_lock, flags);
-
-               if (wc->status != IB_WC_SUCCESS &&
-                   wc->status != IB_WC_WR_FLUSH_ERR)
-                       ipoib_warn(priv, "failed send event "
-                                  "(status=%d, wrid=%d vend_err %x)\n",
-                                  wc->status, wr_id, wc->vendor_err);
+       if (wr_id >= ipoib_sendq_size) {
+               ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
+                          wr_id, ipoib_sendq_size);
+               return;
        }
+
+       ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
+
+       tx_req = &priv->tx_ring[wr_id];
+
+       dma_unmap_single(priv->ca->dma_device,
+                        pci_unmap_addr(tx_req, mapping),
+                        tx_req->skb->len,
+                        DMA_TO_DEVICE);
+
+       ++priv->stats.tx_packets;
+       priv->stats.tx_bytes += tx_req->skb->len;
+
+       dev_kfree_skb_any(tx_req->skb);
+
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       ++priv->tx_tail;
+       if (netif_queue_stopped(dev) &&
+           test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
+           priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
+               netif_wake_queue(dev);
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+       if (wc->status != IB_WC_SUCCESS &&
+           wc->status != IB_WC_WR_FLUSH_ERR)
+               ipoib_warn(priv, "failed send event "
+                          "(status=%d, wrid=%d vend_err %x)\n",
+                          wc->status, wr_id, wc->vendor_err);
 }
 
-void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
+static inline int is_rx_comp(struct ib_wc *wc)
+{
+       unsigned int wr_id = wc->wr_id;
+
+       if (wr_id & IPOIB_OP_RECV)
+               return 1;
+
+       return 0;
+}
+
+int ipoib_poll(struct net_device *dev, int *budget)
 {
-       struct net_device *dev = (struct net_device *) dev_ptr;
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int n, i;
+       struct ib_cq *cq = priv->cq;
+       int quota = dev->quota;
+       int wc;
+       int rx = 0;
+       int tx = 0;
 
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
        do {
-               n = ib_poll_cq(cq, IPOIB_NUM_WC, priv->ibwc);
-               for (i = 0; i < n; ++i)
-                       ipoib_ib_handle_wc(dev, priv->ibwc + i);
-       } while (n == IPOIB_NUM_WC);
+               wc = min_t(int, quota, IPOIB_NUM_WC);
+               n = ib_poll_cq(cq, wc, priv->ibwc);
+               for (i = 0; i < n; ++i) {
+                       if (is_rx_comp(priv->ibwc + i)) {
+                               ++rx;
+                               --quota;
+                               ipoib_ib_handle_rwc(dev, priv->ibwc + i);
+                       }
+                       else {
+                               ++tx;
+                               ipoib_ib_handle_swc(dev, priv->ibwc + i);
+                       }
+
+                       if (unlikely(quota <= 0))
+                               goto not_done;
+               }
+       } while (n == wc);
+
+       if (rx || tx)
+               goto not_done;
+
+
+       netif_rx_complete(dev);
+       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+       /* TODO we need peek_cq here for hw devices that
+          could would not generate interrupts for completions
+          arriving between end of polling till request notify */
+
+       return 0;
+
+not_done:
+       *budget -= rx;
+       dev->quota = quota;
+       return 1;
+}
+
+void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
+{
+       struct net_device *dev = (struct net_device *) dev_ptr;
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+       /* tell the network layer we have packts */
+        if (netif_rx_schedule_prep(dev))
+               __netif_rx_schedule(dev);
+       else {
+               ipoib_warn(priv, "received interupt while in polling\n");
+       }
 }
 
 static inline int post_send(struct ipoib_dev_priv *priv,



_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to