Reposting - rediff against an updated trunk.
This patch does the following:

1. Split the QP spinlock to send and receive lock.
The only place where we have to lock both is upon modify_qp,
and that is not on data path.

2. Avoid taking any QP locks when polling CQ.

This last part is achieved by getting rid of the cur field in
mthca_wq, and calculating the number of outstanding WQEs by comparing
the last_comp and next fields. next is only updated by post,
last_comp is only updated by poll.

In a rare case where an overrun is detected, a CQ is locked
and the overrun condition is re-tested, to avoid any potential
for stale last_comp values.

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>


Index: hw/mthca/mthca_provider.h
===================================================================
--- hw/mthca/mthca_provider.h   (revision 1894)
+++ hw/mthca/mthca_provider.h   (working copy)
@@ -166,8 +166,8 @@ struct mthca_cq {
 };
 
 struct mthca_wq {
+       spinlock_t lock;
        int   max;
-       int   cur;
        int   next;
        int   last_comp;
        void *last;
@@ -180,7 +180,6 @@ struct mthca_wq {
 
 struct mthca_qp {
        struct ib_qp           ibqp;
-       spinlock_t             lock;
        atomic_t               refcount;
        u32                    qpn;
        int                    is_direct;
Index: hw/mthca/mthca_cq.c
===================================================================
--- hw/mthca/mthca_cq.c (revision 1894)
+++ hw/mthca/mthca_cq.c (working copy)
@@ -423,15 +423,6 @@ static inline int mthca_poll_one(struct 
        is_send  = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;
 
        if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) {
-               if (*cur_qp) {
-                       if (*freed) {
-                               wmb();
-                               update_cons_index(dev, cq, *freed);
-                               *freed = 0;
-                       }
-                       spin_unlock(&(*cur_qp)->lock);
-               }
-
                /*
                 * We do not have to take the QP table lock here,
                 * because CQs will be locked while QPs are removed
@@ -446,8 +437,6 @@ static inline int mthca_poll_one(struct 
                        err = -EINVAL;
                        goto out;
                }
-
-               spin_lock(&(*cur_qp)->lock);
        }
 
        entry->qp_num = (*cur_qp)->qpn;
@@ -464,11 +453,6 @@ static inline int mthca_poll_one(struct 
                entry->wr_id = (*cur_qp)->wrid[wqe_index];
        }
 
-       if (wq->last_comp < wqe_index)
-               wq->cur -= wqe_index - wq->last_comp;
-       else
-               wq->cur -= wq->max - wq->last_comp + wqe_index;
-
        wq->last_comp = wqe_index;
 
        if (0)
@@ -551,9 +535,6 @@ int mthca_poll_cq(struct ib_cq *ibcq, in
                update_cons_index(dev, cq, freed);
        }
 
-       if (qp)
-               spin_unlock(&qp->lock);
-
        spin_unlock_irqrestore(&cq->lock, flags);
 
        return err == 0 || err == -EAGAIN ? npolled : err;
Index: hw/mthca/mthca_qp.c
===================================================================
--- hw/mthca/mthca_qp.c (revision 1894)
+++ hw/mthca/mthca_qp.c (working copy)
@@ -577,9 +577,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
                else
                        cur_state = attr->cur_qp_state;
        } else {
-               spin_lock_irq(&qp->lock);
+               spin_lock_irq(&qp->sq.lock);
+               spin_lock(&qp->rq.lock);
                cur_state = qp->state;
-               spin_unlock_irq(&qp->lock);
+               spin_unlock(&qp->rq.lock);
+               spin_unlock_irq(&qp->sq.lock);
        }
 
        if (attr_mask & IB_QP_STATE) {
@@ -1076,6 +1078,14 @@ static void mthca_free_memfree(struct mt
        }
 }
 
+static void mthca_wq_init(struct mthca_wq* wq)
+{
+       spin_lock_init(&wq->lock);
+       wq->next      = 0;
+       wq->last_comp = wq->max - 1;
+       wq->last      = NULL;
+}
+
 static int mthca_alloc_qp_common(struct mthca_dev *dev,
                                 struct mthca_pd *pd,
                                 struct mthca_cq *send_cq,
@@ -1087,20 +1097,13 @@ static int mthca_alloc_qp_common(struct 
        int ret;
        int i;
 
-       spin_lock_init(&qp->lock);
        atomic_set(&qp->refcount, 1);
        qp->state        = IB_QPS_RESET;
        qp->atomic_rd_en = 0;
        qp->resp_depth   = 0;
        qp->sq_policy    = send_policy;
-       qp->rq.cur       = 0;
-       qp->sq.cur       = 0;
-       qp->rq.next      = 0;
-       qp->sq.next      = 0;
-       qp->rq.last_comp = qp->rq.max - 1;
-       qp->sq.last_comp = qp->sq.max - 1;
-       qp->rq.last      = NULL;
-       qp->sq.last      = NULL;
+       mthca_wq_init(&qp->sq);
+       mthca_wq_init(&qp->rq);
 
        ret = mthca_alloc_memfree(dev, qp);
        if (ret)
@@ -1394,6 +1397,24 @@ static int build_mlx_header(struct mthca
        return 0;
 }
 
+static inline int mthca_wq_overflow(struct mthca_wq* wq, int nreq,
+        struct ib_cq* ib_cq)
+{
+       int cur;
+       struct mthca_cq* cq;
+
+       cur = (wq->next - wq->last_comp - 1) & (wq->max - 1);
+       if (likely(cur + nreq < wq->max))
+               return 0;
+
+       cq = to_mcq(ib_cq);
+       spin_lock(&cq->lock);
+       cur = (wq->next - wq->last_comp - 1) & (wq->max - 1);
+       spin_unlock(&cq->lock);
+
+       return cur + nreq >= wq->max;
+}
+
 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                          struct ib_send_wr **bad_wr)
 {
@@ -1411,16 +1432,18 @@ int mthca_tavor_post_send(struct ib_qp *
        int ind;
        u8 op0 = 0;
 
-       spin_lock_irqsave(&qp->lock, flags);
+       spin_lock_irqsave(&qp->sq.lock, flags);
 
        /* XXX check that state is OK to post send */
 
        ind = qp->sq.next;
 
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
-               if (qp->sq.cur + nreq >= qp->sq.max) {
-                       mthca_err(dev, "SQ full (%d posted, %d max, %d nreq)\n",
-                                 qp->sq.cur, qp->sq.max, nreq);
+               if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+                       mthca_err(dev, "SQ %06x full (%d next, %d last_polled,"
+                                       " %d max, %d nreq)\n", qp->qpn,
+                                       qp->sq.next, qp->sq.last_comp,
+                                       qp->sq.max, nreq);
                        err = -ENOMEM;
                        *bad_wr = wr;
                        goto out;
@@ -1591,10 +1614,9 @@ out:
                              MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
        }
 
-       qp->sq.cur += nreq;
        qp->sq.next = ind;
 
-       spin_unlock_irqrestore(&qp->lock, flags);
+       spin_unlock_irqrestore(&qp->sq.lock, flags);
        return err;
 }
 
@@ -1613,15 +1635,18 @@ int mthca_tavor_post_receive(struct ib_q
        void *wqe;
        void *prev_wqe;
 
-       spin_lock_irqsave(&qp->lock, flags);
+       spin_lock_irqsave(&qp->rq.lock, flags);
 
        /* XXX check that state is OK to post receive */
 
        ind = qp->rq.next;
 
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
-               if (unlikely(qp->rq.cur + nreq >= qp->rq.max)) {
-                       mthca_err(dev, "RQ %06x full\n", qp->qpn);
+               if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+                       mthca_err(dev, "RQ %06x full (%d next, %d last_polled,"
+                                       " %d max, %d nreq)\n", qp->qpn,
+                                       qp->rq.next, qp->rq.last_comp,
+                                       qp->rq.max, nreq);
                        err = -ENOMEM;
                        *bad_wr = wr;
                        goto out;
@@ -1688,10 +1713,9 @@ out:
                              MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
        }
 
-       qp->rq.cur += nreq;
        qp->rq.next = ind;
 
-       spin_unlock_irqrestore(&qp->lock, flags);
+       spin_unlock_irqrestore(&qp->rq.lock, flags);
        return err;
 }
 
@@ -1712,16 +1736,18 @@ int mthca_arbel_post_send(struct ib_qp *
        int ind;
        u8 op0 = 0;
 
-       spin_lock_irqsave(&qp->lock, flags);
+       spin_lock_irqsave(&qp->sq.lock, flags);
 
        /* XXX check that state is OK to post send */
 
        ind = qp->sq.next & (qp->sq.max - 1);
 
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
-               if (qp->sq.cur + nreq >= qp->sq.max) {
-                       mthca_err(dev, "SQ full (%d posted, %d max, %d nreq)\n",
-                                 qp->sq.cur, qp->sq.max, nreq);
+               if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+                       mthca_err(dev, "SQ %06x full (%d next, %d last_polled,"
+                                       " %d max, %d nreq)\n", qp->qpn,
+                                       qp->sq.next, qp->sq.last_comp,
+                                       qp->sq.max, nreq);
                        err = -ENOMEM;
                        *bad_wr = wr;
                        goto out;
@@ -1835,7 +1861,6 @@ out:
                                          f0 | op0);
                doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
 
-               qp->sq.cur  += nreq;
                qp->sq.next += nreq;
 
                /*
@@ -1855,7 +1880,7 @@ out:
                              MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
        }
 
-       spin_unlock_irqrestore(&qp->lock, flags);
+       spin_unlock_irqrestore(&qp->sq.lock, flags);
        return err;
 }
 
@@ -1871,15 +1896,18 @@ int mthca_arbel_post_receive(struct ib_q
        int i;
        void *wqe;
 
-       spin_lock_irqsave(&qp->lock, flags);
+       spin_lock_irqsave(&qp->rq.lock, flags);
 
        /* XXX check that state is OK to post receive */
 
        ind = qp->rq.next & (qp->rq.max - 1);
 
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
-               if (unlikely(qp->rq.cur + nreq >= qp->rq.max)) {
-                       mthca_err(dev, "RQ %06x full\n", qp->qpn);
+               if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+                       mthca_err(dev, "RQ %06x full (%d next, %d last_polled,"
+                                       " %d max, %d nreq)\n", qp->qpn,
+                                       qp->rq.next, qp->rq.last_comp,
+                                       qp->rq.max, nreq);
                        err = -ENOMEM;
                        *bad_wr = wr;
                        goto out;
@@ -1921,7 +1949,6 @@ int mthca_arbel_post_receive(struct ib_q
        }
 out:
        if (likely(nreq)) {
-               qp->rq.cur  += nreq;
                qp->rq.next += nreq;
 
                /*
@@ -1932,7 +1959,7 @@ out:
                *qp->rq.db = cpu_to_be32(qp->rq.next & 0xffff);
        }
 
-       spin_unlock_irqrestore(&qp->lock, flags);
+       spin_unlock_irqrestore(&qp->rq.lock, flags);
        return err;
 }
 

-- 
MST - Michael S. Tsirkin
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to