Fix the following race conditions related to session unregistration: - If the memory subsystem does reorder the atomic accesses to the variables ch->state and ch->processing_compl it can happen that a new SCST command is issued for a session after unregistration of that session did start. - If a session was unregistered because a DREQ had been received it could happen that the cm_id was destroyed before a DREP had been sent.
Code changes: - Moved all code for IB disconnection into the function srpt_del_close_ch(). - Invoking scst_unregister_session() is now deferred until the last WQE event has been received. - Destroying the cm_id does now happen when the CM has reported that this is safe instead of when scst_unregister_session() finished. - Eliminated the variable ch->processing_compl. - Document the meaning of the channel states. - Fixed an incorrect comment. - Do not attempt to invoke ib_post_recv() after the QP has been reset. Signed-off-by: Bart Van Assche <[email protected]> --- drivers/scst/srpt/ib_srpt.c | 190 +++++++++++++++++++++++++++---------------- drivers/scst/srpt/ib_srpt.h | 14 +++- 2 files changed, 132 insertions(+), 72 deletions(-) diff --git a/drivers/scst/srpt/ib_srpt.c b/drivers/scst/srpt/ib_srpt.c index edd90f1..73b3fd4 100644 --- a/drivers/scst/srpt/ib_srpt.c +++ b/drivers/scst/srpt/ib_srpt.c @@ -143,7 +143,8 @@ static void srpt_remove_one(struct ib_device *device); static void srpt_unregister_mad_agent(struct srpt_device *sdev); static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx); -static void srpt_release_channel(struct scst_session *scst_sess); +static void srpt_release_channel(struct kref *obj); +static void srpt_release_channel_qp(struct scst_session *scst_sess); static struct ib_client srpt_client = { .name = DRV_NAME, @@ -263,6 +264,31 @@ static void srpt_srq_event(struct ib_event *event, void *ctx) } /** + * srpt_unregister_channel() - Unregister channel resources. + * + * Notes: + * - Must only be called after the channel has been removed from the channel + * list. + * - Invokes ib_destroy_cm_id(ch->cm_id) and kfree(ch) asynchronously. + */ +static void srpt_unregister_channel(struct srpt_rdma_ch *ch) +{ + if (!srpt_test_and_set_channel_state(ch, CH_DRAINING, + CH_DISCONNECTING)) + return; + + PRINT_INFO("unregistering session %s.", ch->sess_name); + + /* + * At this point it is guaranteed that no new commands will be sent to + * the SCST core for channel ch, which is a requirement for + * scst_unregister_session(). + */ + + scst_unregister_session(ch->scst_sess, 0, srpt_release_channel_qp); +} + +/** * srpt_qp_event() - QP event callback function. */ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) @@ -276,11 +302,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) ib_cm_notify(ch->cm_id, event->event); break; case IB_EVENT_QP_LAST_WQE_REACHED: - if (srpt_test_and_set_channel_state(ch, CH_LIVE, - CH_DISCONNECTING)) { - PRINT_INFO("disconnected session %s.", ch->sess_name); - ib_send_cm_dreq(ch->cm_id, NULL, 0); - } + srpt_unregister_channel(ch); break; default: PRINT_ERROR("received unrecognized IB QP event %d", @@ -1707,8 +1729,8 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, goto out; } - if (unlikely(ch_state == CH_DISCONNECTING)) - goto post_recv; + if (unlikely(ch_state != CH_LIVE)) + goto out; if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) { if (!send_ioctx) @@ -1720,8 +1742,6 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, } } - WARN_ON(ch_state != CH_LIVE); - switch (srp_cmd->opcode) { case SRP_CMD: srpt_handle_cmd(ch, recv_ioctx, send_ioctx, context); @@ -1747,7 +1767,6 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, break; } -post_recv: srpt_post_recv(ch->sport->sdev, recv_ioctx); out: return; @@ -1878,7 +1897,6 @@ static void srpt_completion(struct ib_cq *cq, void *ctx) struct srpt_rdma_ch *ch = ctx; BUG_ON(!ch); - atomic_inc(&ch->processing_compl); switch (thread) { case MODE_IB_COMPLETION_IN_THREAD: wake_up_interruptible(&ch->wait_queue); @@ -1890,7 +1908,6 @@ static void srpt_completion(struct ib_cq *cq, void *ctx) srpt_process_completion(cq, ch, SCST_CONTEXT_TASKLET); break; } - atomic_dec(&ch->processing_compl); } static int srpt_compl_thread(void *arg) @@ -2007,20 +2024,45 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) } /** - * srpt_unregister_channel() - Start RDMA channel disconnection. + * __srpt_del_close_ch() - Delete and close an RDMA channel. + * + * Delete a channel from the channel list, reset the QP and make sure + * all resources associated with the channel will be deallocated at an + * appropriate time. * - * Note: The caller must hold ch->sdev->spinlock. + * Notes: + * - The caller must hold ch->sport->sdev->spinlock. + * - Invokes ib_destroy_cm_id(ch->cm_id) and kfree(ch) either synchronously + * or asynchronously. */ -static void srpt_unregister_channel(struct srpt_rdma_ch *ch) +static void __srpt_del_close_ch(struct srpt_rdma_ch *ch, + enum srpt_channel_close_action a) __acquires(&ch->sport->sdev->spinlock) __releases(&ch->sport->sdev->spinlock) { struct srpt_device *sdev; + enum rdma_ch_state prev_state; int ret; sdev = ch->sport->sdev; list_del(&ch->list); - srpt_set_ch_state(ch, CH_DISCONNECTING); + prev_state = srpt_set_ch_state(ch, CH_DRAINING); + if (prev_state == CH_DRAINING || prev_state == CH_DISCONNECTING) + return; + switch (a) { + case close_action_rej: + if (prev_state == CH_CONNECTING) { + ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, + NULL, 0, NULL, 0); + break; + } + /* fall through */ + case close_action_send_dreq: + ib_send_cm_dreq(ch->cm_id, NULL, 0); + break; + case close_action_nop: + break; + } spin_unlock_irq(&sdev->spinlock); ret = srpt_ch_qp_err(ch); @@ -2028,18 +2070,36 @@ static void srpt_unregister_channel(struct srpt_rdma_ch *ch) PRINT_ERROR("Setting queue pair in error state failed: %d", ret); - while (atomic_read(&ch->processing_compl)) - ; + switch (prev_state) { + case CH_CONNECTING: + srpt_unregister_channel(ch); + case CH_LIVE: + /* Defer srpt_unregister_channel() until last WQE event */ + break; + case CH_DRAINING: + case CH_DISCONNECTING: + __WARN(); + break; + } - /* - * At this point it is guaranteed that no new commands will be sent to - * the SCST core for channel ch, which is a requirement for - * scst_unregister_session(). - */ + spin_lock_irq(&sdev->spinlock); +} - TRACE_DBG("unregistering session %p", ch->scst_sess); - scst_unregister_session(ch->scst_sess, 0, srpt_release_channel); +/** + * srpt_del_close_ch() - Close an RDMA channel. + * + * Note: Invokes ib_destroy_cm_id(ch->cm_id) and kfree(ch) synchronously or + * asynchronously. + */ +static void srpt_del_close_ch(struct srpt_rdma_ch *ch, + enum srpt_channel_close_action a) +{ + struct srpt_device *sdev; + + sdev = ch->sport->sdev; spin_lock_irq(&sdev->spinlock); + __srpt_del_close_ch(ch, a); + spin_unlock_irq(&sdev->spinlock); } /** @@ -2066,7 +2126,7 @@ static void srpt_release_channel_by_cmid(struct ib_cm_id *cm_id) spin_lock_irq(&sdev->spinlock); list_for_each_entry(ch, &sdev->rch_list, list) { if (ch->cm_id == cm_id) { - srpt_unregister_channel(ch); + kref_put(&ch->kref, srpt_release_channel); break; } } @@ -2102,7 +2162,21 @@ static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev, } /** - * srpt_release_channel() - Release all resources associated with an RDMA channel. + * srpt_release_channel() - Release non-QP channel resources. + */ +static void srpt_release_channel(struct kref *obj) +{ + struct srpt_rdma_ch *ch = container_of(obj, struct srpt_rdma_ch, kref); + + TRACE_DBG("%s(%p)", __func__, ch); + + ib_destroy_cm_id(ch->cm_id); + + kfree(ch); +} + +/** + * srpt_release_channel_qp() - Release QP channel resources. * * Notes: * - The caller must have removed the channel from the channel list before @@ -2110,11 +2184,12 @@ static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev, * - Must be called as a callback function via scst_unregister_session(). Never * call this function directly because doing so would trigger several race * conditions. - * - Do not access ch->sport or ch->sport->sdev in this function because the - * memory that was allocated for the sport and/or sdev data structures may - * already have been freed at the time this function is called. + * - Accessing ch->sport or ch->sport->sdev in this function is safe because + * these data structure are deallocated after scst_unregister_target() + * has returned, and that function only returns after unregistration of all + * associated sessions has finished. */ -static void srpt_release_channel(struct scst_session *scst_sess) +static void srpt_release_channel_qp(struct scst_session *scst_sess) { struct srpt_rdma_ch *ch; @@ -2122,9 +2197,7 @@ static void srpt_release_channel(struct scst_session *scst_sess) BUG_ON(!ch); WARN_ON(srpt_get_ch_state(ch) != CH_DISCONNECTING); - TRACE_DBG("destroying cm_id %p", ch->cm_id); - BUG_ON(!ch->cm_id); - ib_destroy_cm_id(ch->cm_id); + TRACE_DBG("%s(%s/%p)", __func__, scst_sess->initiator_name, ch->cm_id); srpt_destroy_ch_ib(ch); @@ -2132,7 +2205,7 @@ static void srpt_release_channel(struct scst_session *scst_sess) ch->sport->sdev, ch->rq_size, srp_max_rsp_size, DMA_TO_DEVICE); - kfree(ch); + kref_put(&ch->kref, srpt_release_channel); } /** @@ -2256,40 +2329,16 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, && param->port == ch->sport->port && param->listen_id == ch->sport->sdev->cm_id && ch->cm_id) { - enum rdma_ch_state prev_state; - /* found an existing channel */ TRACE_DBG("Found existing channel name= %s" " cm_id= %p state= %d", ch->sess_name, ch->cm_id, srpt_get_ch_state(ch)); - prev_state = srpt_set_ch_state(ch, - CH_DISCONNECTING); - if (prev_state == CH_CONNECTING) - srpt_unregister_channel(ch); - - spin_unlock_irq(&sdev->spinlock); + __srpt_del_close_ch(ch, close_action_rej); rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED; - - if (prev_state == CH_LIVE) { - ib_send_cm_dreq(ch->cm_id, NULL, 0); - PRINT_INFO("disconnected" - " session %s because a new" - " SRP_LOGIN_REQ has been received.", - ch->sess_name); - } else if (prev_state == CH_CONNECTING) { - PRINT_ERROR("%s", "rejected" - " SRP_LOGIN_REQ because another login" - " request is being processed."); - ib_send_cm_rej(ch->cm_id, - IB_CM_REJ_NO_RESOURCES, - NULL, 0, NULL, 0); - } - - spin_lock_irq(&sdev->spinlock); } } @@ -2328,9 +2377,15 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, * for the SRP protocol to the SCST SCSI command queue size. */ ch->rq_size = min(SRPT_RQ_SIZE, scst_get_max_lun_commands(NULL, 0)); - atomic_set(&ch->processing_compl, 0); spin_lock_init(&ch->spinlock); ch->state = CH_CONNECTING; + /* + * Initialize kref to 2 such that the session resources are only + * released after the connection manager has released the cm_id and + * SCST session unregistration finished. + */ + kref_init(&ch->kref); + kref_get(&ch->kref); INIT_LIST_HEAD(&ch->cmd_wait_list); ch->ioctx_ring = (struct srpt_send_ioctx **) @@ -2505,13 +2560,8 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) srpt_handle_new_iu(ch, ioctx, NULL, SCST_CONTEXT_THREAD); } - if (ret && srpt_test_and_set_channel_state(ch, CH_LIVE, - CH_DISCONNECTING)) { - TRACE_DBG("cm_id=%p sess_name=%s state=%d", - cm_id, ch->sess_name, - srpt_get_ch_state(ch)); - ib_send_cm_dreq(ch->cm_id, NULL, 0); - } + if (ret) + srpt_del_close_ch(ch, close_action_send_dreq); } out: @@ -3269,7 +3319,7 @@ static int srpt_release(struct scst_tgt *scst_tgt) spin_lock_irq(&sdev->spinlock); while (!list_empty(&sdev->rch_list)) { ch = list_first_entry(&sdev->rch_list, typeof(*ch), list); - srpt_unregister_channel(ch); + __srpt_del_close_ch(ch, close_action_send_dreq); } spin_unlock_irq(&sdev->spinlock); diff --git a/drivers/scst/srpt/ib_srpt.h b/drivers/scst/srpt/ib_srpt.h index e14d192..26e213e 100644 --- a/drivers/scst/srpt/ib_srpt.h +++ b/drivers/scst/srpt/ib_srpt.h @@ -225,13 +225,24 @@ struct srpt_mgmt_ioctx { /** * enum rdma_ch_state - SRP channel state. + * @CH_CONNECTING: QP is in RTR state; waiting for RTU. + * @CH_LIVE: QP is in RTS state. + * @CH_DRAINING: QP is in ERR state; waiting for last WQE event. + * @CH_DISCONNECTING: Last WQE event has been received. */ enum rdma_ch_state { CH_CONNECTING, CH_LIVE, + CH_DRAINING, CH_DISCONNECTING }; +enum srpt_channel_close_action { + close_action_nop, + close_action_send_dreq, + close_action_rej, +}; + /** * struct srpt_rdma_ch - RDMA channel. * @wait_queue: Allows the kernel thread to wait for more work. @@ -239,7 +250,6 @@ enum rdma_ch_state { * the channel. * @cm_id: IB CM ID associated with the channel. * @rq_size: IB receive queue size. - * @processing_compl: whether or not an IB completion is being processed. * @qp: IB queue pair used for communicating over this channel. * @sq_wr_avail: number of work requests available in the send queue. * @cq: IB completion queue for this channel. @@ -267,7 +277,6 @@ struct srpt_rdma_ch { struct ib_cm_id *cm_id; struct ib_qp *qp; int rq_size; - atomic_t processing_compl; struct ib_cq *cq; atomic_t sq_wr_avail; struct srpt_port *sport; @@ -281,6 +290,7 @@ struct srpt_rdma_ch { struct srpt_send_ioctx **ioctx_ring; struct ib_wc wc[16]; enum rdma_ch_state state; + struct kref kref; struct list_head list; struct list_head cmd_wait_list; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
