> -----Original Message----- > From: Erez Shitrit [mailto:[email protected]] > Sent: Thursday, April 16, 2015 12:14 PM > To: Devesh Sharma; Or Gerlitz; Roland Dreier; Doug Ledford > Cc: [email protected]; Erez Shitrit; Tal Alon; Amir Vadai > Subject: Re: [PATCH for-next 3/6] IB/ipoib: Handle QP in SQE state > > On 4/15/2015 8:20 PM, Devesh Sharma wrote: > >> -----Original Message----- > >> From: [email protected] [mailto:linux-rdma- > >> [email protected]] On Behalf Of Or Gerlitz > >> Sent: Thursday, April 02, 2015 4:09 PM > >> To: Roland Dreier; Doug Ledford > >> Cc: [email protected]; Erez Shitrit; Tal Alon; Amir Vadai; > >> Or Gerlitz > >> Subject: [PATCH for-next 3/6] IB/ipoib: Handle QP in SQE state > >> > >> From: Erez Shitrit <[email protected]> > >> > >> As the result of a completion error the QP can moved to SQE state by > >> the hardware. Since it's not the Error state, there are no flushes > >> and hence the driver doesn't know about that. > > As per spec, QP transition to SQE causes flush completion for subsequent > WQEs, the description is telling other way. Am I missing something? > > No you are not :) . the description tries to say the following: the driver > cannot > distinguish between IB_WC_WR_FLUSH_ERR that threated as IB_WC_SUCCESS > to IB_WC_WR_FLUSH_ERR that comes after other errors that take the QP to > SQE, The driver must recognize the first error that is not > IB_WC_WR_FLUSH_ERR and handle accordingly. > For example, the driver can take the QP to ERROR state as a part of its life > cycle (drain it, driver down, etc.) and at these situations many > IB_WC_WR_FLUSH_ERR return and no need to change the state of the QP, it is > already under the handling of the driver, which is not when other error comes. > this is the intention of that patch, to return the QP back to life after that > (un- > handed) cases.
Okay, makes sense to me. > > >> The fix creates a task that after completion with error which is not > >> a flush tracks the QP state and if it is in SQE state moves it back to RTS. > >> > >> Signed-off-by: Erez Shitrit <[email protected]> > >> Signed-off-by: Or Gerlitz <[email protected]> > >> --- > >> drivers/infiniband/ulp/ipoib/ipoib.h | 5 +++ > >> drivers/infiniband/ulp/ipoib/ipoib_ib.c | 59 > >> ++++++++++++++++++++++++++++++- > >> 2 files changed, 63 insertions(+), 1 deletions(-) > >> > >> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h > >> b/drivers/infiniband/ulp/ipoib/ipoib.h > >> index 769044c..2703d9a 100644 > >> --- a/drivers/infiniband/ulp/ipoib/ipoib.h > >> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h > >> @@ -299,6 +299,11 @@ struct ipoib_neigh_table { > >> struct completion deleted; > >> }; > >> > >> +struct ipoib_qp_state_validate { > >> + struct work_struct work; > >> + struct ipoib_dev_priv *priv; > >> +}; > >> + > >> /* > >> * Device private locking: network stack tx_lock protects members used > >> * in TX fast path, lock protects everything else. lock nests > >> inside diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c > >> b/drivers/infiniband/ulp/ipoib/ipoib_ib.c > >> index 29b376d..63b92cb 100644 > >> --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c > >> +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c > >> @@ -327,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device > *ca, > >> } > >> } > >> > >> +/* > >> + * As the result of a completion error the QP Can be transferred to SQE > states. > >> + * The function checks if the (send)QP is in SQE state and > >> + * moves it back to RTS state, that in order to have it functional again. > >> + */ > >> +static void ipoib_qp_state_validate_work(struct work_struct *work) { > >> + struct ipoib_qp_state_validate *qp_work = > >> + container_of(work, struct ipoib_qp_state_validate, work); > >> + > >> + struct ipoib_dev_priv *priv = qp_work->priv; > >> + struct ib_qp_attr qp_attr; > >> + struct ib_qp_init_attr query_init_attr; > >> + int ret; > >> + > >> + ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr); > >> + if (ret) { > >> + ipoib_warn(priv, "%s: Failed to query QP ret: %d\n", > >> + __func__, ret); > >> + goto free_res; > >> + } > >> + pr_info("%s: QP: 0x%x is in state: %d\n", > >> + __func__, priv->qp->qp_num, qp_attr.qp_state); > >> + > >> + /* currently support only in SQE->RTS transition*/ > >> + if (qp_attr.qp_state == IB_QPS_SQE) { > >> + qp_attr.qp_state = IB_QPS_RTS; > >> + > >> + ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE); > >> + if (ret) { > >> + pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n", > >> + ret, priv->qp->qp_num); > >> + goto free_res; > >> + } > >> + pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to > >> IB_QPS_RTS\n", > >> + __func__, priv->qp->qp_num); > >> + } else { > >> + pr_warn("QP (%d) will stay in state: %d\n", > >> + priv->qp->qp_num, qp_attr.qp_state); > >> + } > >> + > >> +free_res: > >> + kfree(qp_work); > >> +} > >> + > >> static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc > *wc) { > >> struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -358,10 +403,22 > >> @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct > >> ib_wc > >> *wc) > >> netif_wake_queue(dev); > >> > >> if (wc->status != IB_WC_SUCCESS && > >> - wc->status != IB_WC_WR_FLUSH_ERR) > >> + wc->status != IB_WC_WR_FLUSH_ERR) { > >> + struct ipoib_qp_state_validate *qp_work; > >> ipoib_warn(priv, "failed send event " > >> "(status=%d, wrid=%d vend_err %x)\n", > >> wc->status, wr_id, wc->vendor_err); > >> + qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC); > >> + if (!qp_work) { > >> + ipoib_warn(priv, "%s Failed alloc > >> ipoib_qp_state_validate for qp: 0x%x\n", > >> + __func__, priv->qp->qp_num); > >> + return; > >> + } > >> + > >> + INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work); > >> + qp_work->priv = priv; > >> + queue_work(priv->wq, &qp_work->work); > >> + } > >> } > >> > >> static int poll_tx(struct ipoib_dev_priv *priv) > >> -- > >> 1.7.1 > >> > >> -- > >> To unsubscribe from this list: send the line "unsubscribe linux-rdma" > >> in the body of a message to [email protected] More majordomo > >> info at http://vger.kernel.org/majordomo-info.html > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-rdma" > > in the body of a message to [email protected] More majordomo > > info at http://vger.kernel.org/majordomo-info.html > > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
