On 4/15/2015 8:20 PM, Devesh Sharma wrote:
-----Original Message-----
From: [email protected] [mailto:linux-rdma-
[email protected]] On Behalf Of Or Gerlitz
Sent: Thursday, April 02, 2015 4:09 PM
To: Roland Dreier; Doug Ledford
Cc: [email protected]; Erez Shitrit; Tal Alon; Amir Vadai; Or Gerlitz
Subject: [PATCH for-next 3/6] IB/ipoib: Handle QP in SQE state

From: Erez Shitrit <[email protected]>

As the result of a completion error the QP can moved to SQE state by the
hardware. Since it's not the Error state, there are no flushes and hence the
driver doesn't know about that.
As per spec, QP transition to SQE causes flush completion for subsequent WQEs, 
the description is telling other way. Am I missing something?

No you are not :) . the description tries to say the following: the driver cannot distinguish between IB_WC_WR_FLUSH_ERR that threated as IB_WC_SUCCESS to IB_WC_WR_FLUSH_ERR that comes after other errors that take the QP to SQE, The driver must recognize the first error that is not IB_WC_WR_FLUSH_ERR and handle accordingly. For example, the driver can take the QP to ERROR state as a part of its life cycle (drain it, driver down, etc.) and at these situations many IB_WC_WR_FLUSH_ERR return and no need to change the state of the QP, it is already under the handling of the driver, which is not when other error comes. this is the intention of that patch, to return the QP back to life after that (un-handed) cases.

The fix creates a task that after completion with error which is not a flush
tracks the QP state and if it is in SQE state moves it back to RTS.

Signed-off-by: Erez Shitrit <[email protected]>
Signed-off-by: Or Gerlitz <[email protected]>
---
  drivers/infiniband/ulp/ipoib/ipoib.h    |    5 +++
  drivers/infiniband/ulp/ipoib/ipoib_ib.c |   59
++++++++++++++++++++++++++++++-
  2 files changed, 63 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h
b/drivers/infiniband/ulp/ipoib/ipoib.h
index 769044c..2703d9a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -299,6 +299,11 @@ struct ipoib_neigh_table {
        struct completion               deleted;
  };

+struct ipoib_qp_state_validate {
+       struct work_struct work;
+       struct ipoib_dev_priv   *priv;
+};
+
  /*
   * Device private locking: network stack tx_lock protects members used
   * in TX fast path, lock protects everything else.  lock nests inside diff 
--git
a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 29b376d..63b92cb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -327,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
        }
  }

+/*
+ * As the result of a completion error the QP Can be transferred to SQE states.
+ * The function checks if the (send)QP is in SQE state and
+ * moves it back to RTS state, that in order to have it functional again.
+ */
+static void ipoib_qp_state_validate_work(struct work_struct *work) {
+       struct ipoib_qp_state_validate *qp_work =
+               container_of(work, struct ipoib_qp_state_validate, work);
+
+       struct ipoib_dev_priv *priv = qp_work->priv;
+       struct ib_qp_attr qp_attr;
+       struct ib_qp_init_attr query_init_attr;
+       int ret;
+
+       ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
+       if (ret) {
+               ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
+                          __func__, ret);
+               goto free_res;
+       }
+       pr_info("%s: QP: 0x%x is in state: %d\n",
+               __func__, priv->qp->qp_num, qp_attr.qp_state);
+
+       /* currently support only in SQE->RTS transition*/
+       if (qp_attr.qp_state == IB_QPS_SQE) {
+               qp_attr.qp_state = IB_QPS_RTS;
+
+               ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
+               if (ret) {
+                       pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
+                               ret, priv->qp->qp_num);
+                       goto free_res;
+               }
+               pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to
IB_QPS_RTS\n",
+                       __func__, priv->qp->qp_num);
+       } else {
+               pr_warn("QP (%d) will stay in state: %d\n",
+                       priv->qp->qp_num, qp_attr.qp_state);
+       }
+
+free_res:
+       kfree(qp_work);
+}
+
  static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)  {
        struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -358,10 +403,22
@@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc
*wc)
                netif_wake_queue(dev);

        if (wc->status != IB_WC_SUCCESS &&
-           wc->status != IB_WC_WR_FLUSH_ERR)
+           wc->status != IB_WC_WR_FLUSH_ERR) {
+               struct ipoib_qp_state_validate *qp_work;
                ipoib_warn(priv, "failed send event "
                           "(status=%d, wrid=%d vend_err %x)\n",
                           wc->status, wr_id, wc->vendor_err);
+               qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
+               if (!qp_work) {
+                       ipoib_warn(priv, "%s Failed alloc
ipoib_qp_state_validate for qp: 0x%x\n",
+                                  __func__, priv->qp->qp_num);
+                       return;
+               }
+
+               INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
+               qp_work->priv = priv;
+               queue_work(priv->wq, &qp_work->work);
+       }
  }

  static int poll_tx(struct ipoib_dev_priv *priv)
--
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the 
body
of a message to [email protected] More majordomo info at
http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to